namazu-ml(ring)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: namazu.confの REPLACEが無視されてしまう



Yu Imai <s98106yi@xxxxxxxxxxxxxx> wrote:

>/a/fs0603a/s98106yi/public_html/index.html
>
>というファイルを、
>
>http://www.sfc.keio.ac.jp/~s98106yi/index.html
>
>に置換しようと思い、namazu.confに
>
>REPLACE		/a/fs0603a/	http://www.sfc.keio.ac.jp/~
>REPLACE		/public_html	/
>
>と記述しているのですが、検索結果には

2重に置換することはできません。

  REPLACE	/a/fs0603a/s98106yi/public_html/	http://www.sfc.keio.ac.jp/~s98106yi/

と指定してくださいませ。

ちなみに、開発中のヴァージョンなら

  REPLACE	/a/fs0603a/(.*)/public_html/	http://www.sfc.keio.ac.jp/~\1/

と正規表現で置換できます。

あ、そういえば (正規表現での置換を実現する) 1.3.0.8 用のパッ
チをもらっていたのだ (忘れていてごめんなさい)。メイル末尾に
添付しておきます。うまく 1.3.0.10 に適用できるといいんだけど。

# パッチの作者は Junio Hamano <junio@xxxxxxxxxxx> さんです

-- Satoru Takabayashi

rcsdiff -r1.3.0.8 -r1.3.0.8.0 -u *,v
===================================================================
RCS file: hlist.c,v
retrieving revision 1.3.0.8
retrieving revision 1.3.0.8.0.1
diff -u -r1.3.0.8 -r1.3.0.8.0.1
--- hlist.c	1999/07/12 07:38:35	1.3.0.8
+++ hlist.c	1999/07/12 07:41:11	1.3.0.8.0.1
@@ -30,6 +30,7 @@
 #include <math.h>
 #include "namazu.h"
 #include "util.h"
+#include "regex.h"
 
 /* merge the left and  right with AND rule */
 HLIST andmerge(HLIST left, HLIST right, int *ignore)
@@ -448,6 +449,105 @@
     }
 }
 
+/* FROM_STRING_PTR and TO_STRING_PTR points to the original
+ * (replace-from, replace-to) pair.
+ * If they are for regexp substitution and if the target string
+ * contained in TMP matches the replace-from pattern at the
+ * beginning, fill REPLACE_FROM_BUF and REPLACE_TO_BUF with
+ * a (replace-from, replace-to) pair that is suitable to be used
+ * for string subtitution.  That is, when:
+ *   tmp = "ab/cd", *from_string_ptr = "(.*)/(.*)", *to_string_ptr = "\1\2",
+ * store "ab/cd" in replace_from_buf[], "abcd" in replace_to_buf[],
+ * and update *from_string_ptr and *to_string_ptr to point to them.
+ */
+void replace_using_regexp (uchar *tmp,
+			   uchar **from_string_ptr,
+			   uchar **to_string_ptr,
+			   uchar *replace_from_buf,
+			   uchar *replace_to_buf)
+{
+    uchar *replace_from = *from_string_ptr;
+    uchar *replace_to = *to_string_ptr;
+    int i, j;
+
+    if (strpbrk (replace_from, ".*")) {
+      struct re_registers regs;
+      struct re_pattern_buffer *re;
+      int mlen;
+      int is_a_regexp_match = 0;
+
+      regs.allocated = 0;
+      re = malloc(sizeof (*re));
+      memset (re, 0, sizeof (*re));
+      re->buffer = 0;
+      re->allocated = 0;
+      if (re_compile_pattern (replace_from, strlen (replace_from), re))
+	/* re_comp fails; maybe it was not a regexp substitution
+	 * after all.  Fall back to string substitution for backward
+	 * compatibility.
+	 */
+	is_a_regexp_match = 0;
+      else if (0 < (mlen = re_match (re, tmp, strlen (tmp), 0, &regs))) {
+	/* We got a match.  Try to replace the string. */
+	uchar *subst = replace_to;
+	/* Assume we are doing regexp match for now; if any of the
+	 * substitution fails, we will switch back to the straight
+	 * string substitution.
+	 */
+	is_a_regexp_match = 1;
+	for (i = j = 0; subst[i]; i++) {
+	  /* i scans through RHS of sed-style substitution.
+	   * j points at the string being built.
+	   */
+	  if ((subst[i] == '\\') &&
+	      ('0' <= subst[++i]) &&
+	      (subst[i] <= '9')) {
+	    /* A backslash followed by a digit---regexp substitution.
+	     * Note that a backslash followed by anything else is
+	     * silently dropped (including a \\ sequence) and is
+	     * passed on to the else clause.
+	     */
+	    int regno = subst[i] - '0';
+	    int ct;
+	    if (re->re_nsub <= regno) {
+	      /* Oops; this is a bad substitution.  Just give up
+	       * and use straight string substitution for backward
+	       * compatibility.
+	       */
+	      is_a_regexp_match = 0;
+	      break;
+	    }
+	    for (ct = regs.beg[regno]; ct < regs.end[regno]; ct++)
+	      replace_to_buf[j++] = tmp[ct];
+	  }
+	  else {
+	    /* Either ordinary character, or an unrecognized \ sequence.
+	     * Just copy it.
+	     */
+	    replace_to_buf[j++] = subst[i];
+	  }
+	}
+	if (is_a_regexp_match) {
+	  /* Good.  Regexp substitution worked and we now have a good
+	   * string in replace_to_buf.  Fake replace_from and replace_to
+	   * as if these matched string pairs were specified in the
+	   * replacement list as literal substitutions.
+	   */
+	  replace_to_buf[j] = 0;
+	  *to_string_ptr = replace_to = replace_to_buf;
+	  strcpy (replace_from_buf, tmp);
+	  replace_from_buf[mlen] = 0;
+	  *from_string_ptr = replace_from_buf;
+	}
+	re_free_registers (&regs);
+      }
+      re_free_pattern (re);
+      /* We behave as if replace_from and replace_to specified the
+       * literal string pairs from the beginning.
+       */
+    }
+}
+
 /* replace a URL */
 void replace_url(uchar * s, int opt)
 {
@@ -458,16 +558,27 @@
     strcpy(tmp, s);
 
   for(n=0;n<url_no;n++) {
-    n_from = strlen(URL_REPLACE_FROM[n]);
-    n_to = strlen(URL_REPLACE_TO[n]);
+    uchar *replace_from = URL_REPLACE_FROM[n];
+    uchar *replace_to = URL_REPLACE_TO[n];
+    uchar replace_from_buf[BUFSIZE];
+    uchar replace_to_buf[BUFSIZE];
+
+    replace_using_regexp (tmp,
+			  &replace_from,
+			  &replace_to,
+			  replace_from_buf,
+			  replace_to_buf);
+
+    n_from = strlen(replace_from);
+    n_to = strlen(replace_to);
 
-    if (!strncmp(URL_REPLACE_FROM[n], tmp, n_from)) {
-	strcpy(s, URL_REPLACE_TO[n]);
+    if (!strncmp(replace_from, tmp, n_from)) {
+	strcpy(s, replace_to);
 	for (i = n_from, j = n_to; tmp[i] != '>'; i++, j++)
 	    s[j] = tmp[i];
 	s[j++] = tmp[i++];
-	if (opt && !strncmp(URL_REPLACE_FROM[n], tmp + i, n_from)) {
-	    strcpy(s + j, URL_REPLACE_TO[n]);
+	if (opt && !strncmp(replace_from, tmp + i, n_from)) {
+	    strcpy(s + j, replace_to);
 	    i += n_from;
 	    j += n_to;
 	}
===================================================================
RCS file: re_match.c,v
retrieving revision 1.3.0.8
retrieving revision 1.3.0.8.0.1
diff -u -r1.3.0.8 -r1.3.0.8.0.1
--- re_match.c	1999/07/12 07:38:35	1.3.0.8
+++ re_match.c	1999/07/12 07:41:11	1.3.0.8.0.1
@@ -38,6 +38,12 @@
 
 #define STEP 256
 
+void replace_using_regexp (uchar *tmp,
+			   uchar **from_string_ptr,
+			   uchar **to_string_ptr,
+			   uchar *replace_from_buf,
+			   uchar *replace_to_buf);
+
 void replace(uchar *s)
 {
     int n;
@@ -45,16 +51,26 @@
     uchar tmp[BUFSIZE];
 
     strcpy(tmp, s);
-  for(n=0;n<url_no;n++) {
-    n_from = strlen(URL_REPLACE_FROM[n]);
-    n_to = strlen(URL_REPLACE_TO[n]);
-
-    if (!strncmp(URL_REPLACE_FROM[n], tmp, n_from)) {
-	strcpy(s, URL_REPLACE_TO[n]);
-	for (i = n_from, j = n_to; tmp[i] != '\0'; i++, j++)
-	    s[j] = tmp[i];
-	s[j] = '\0';
-    }
+    for(n=0;n<url_no;n++) {
+      uchar *replace_from = URL_REPLACE_FROM[n];
+      uchar *replace_to = URL_REPLACE_TO[n];
+      uchar replace_from_buf[BUFSIZE];
+      uchar replace_to_buf[BUFSIZE];
+      replace_using_regexp (tmp,
+			    &replace_from,
+			    &replace_to,
+			    replace_from_buf,
+			    replace_to_buf);
+
+      n_from = strlen(replace_from);
+      n_to = strlen(replace_to);
+
+      if (!strncmp(replace_from, tmp, n_from)) {
+	  strcpy(s, replace_to);
+	  for (i = n_from, j = n_to; tmp[i] != '\0'; i++, j++)
+	      s[j] = tmp[i];
+	  s[j] = '\0';
+      }
   }
 }