namazu-ml(avocado)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Namazu v1.2.0.7 released!



Satoru Takabayashi <ccsatoru@xxxxxxxxxxxxxxxxxx> wrote:

>前述の送り仮名については対応してみたのでパッチをつけておきます。気
>になる方はお試しください。

さきほどのものに問題があったので送り直します。

-- Satoru Takabayashi

diff -c namazu-1.2.0.7/src/cgi.c namazu-1.2.0.8-beta-1/src/cgi.c
*** namazu-1.2.0.7/src/cgi.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.8-beta-1/src/cgi.c	Fri Sep 18 05:42:19 1998
***************
*** 62,68 ****
      /* note that CERN HTTPD would add empty PATH_INFO */
      if (getenv("PATH_INFO")) {
          char *path_info = getenv("PATH_INFO");
!         if (strlen(path_info) > 0) {
              sprintf(tmp, "%s%s", DEFAULT_DIR, path_info);
              if ((uchar *) NULL ==
                  (DbNames[DbNumber] = (uchar *) malloc(strlen(tmp) + 1)))
--- 62,68 ----
      /* note that CERN HTTPD would add empty PATH_INFO */
      if (getenv("PATH_INFO")) {
          char *path_info = getenv("PATH_INFO");
!         if (strlen(path_info) > 0 && strlen(path_info) < 128) {
              sprintf(tmp, "%s%s", DEFAULT_DIR, path_info);
              if ((uchar *) NULL ==
                  (DbNames[DbNumber] = (uchar *) malloc(strlen(tmp) + 1)))
diff -c namazu-1.2.0.7/src/search.c namazu-1.2.0.8-beta-1/src/search.c
*** namazu-1.2.0.7/src/search.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.8-beta-1/src/search.c	Fri Sep 18 05:42:19 1998
***************
*** 378,387 ****
      if (!MoreShortFormat) {
          printf(" { ");
      }
      for (i = 0; ;i++) {
          q = strchr(p, '\t');
          if (q) 
!             *q = (uchar)NULL;
          if (strlen(p) > 0) {
              HLIST tmp;
  
--- 378,390 ----
      if (!MoreShortFormat) {
          printf(" { ");
      }
+     while (*p == '\t') {  /* beggining tabs are skipped */
+         p++;
+     }
      for (i = 0; ;i++) {
          q = strchr(p, '\t');
          if (q) 
!             *q = '\0';
          if (strlen(p) > 0) {
              HLIST tmp;
  
diff -c namazu-1.2.0.7/src/wakati.c namazu-1.2.0.8-beta-1/src/wakati.c
*** namazu-1.2.0.7/src/wakati.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.8-beta-1/src/wakati.c	Fri Sep 18 05:42:19 1998
***************
*** 32,37 ****
--- 32,39 ----
  
  #define iseuc(c)  ((int)(c) >= 0xa1 && (int)(c) <= 0xfe)
  #define is_kanji(c)  (iseuc(*(c)) && iseuc(*(c + 1)))
+ #define is_choon(c) ((int)*(c) == 0xa1 && (int)*(c + 1) == 0xbc)
+ 
  int is_katakana(uchar *c)
  {
      if ((((int)*c == 0xa5 && 
***************
*** 52,75 ****
      return 0;
  }
  
  void wakati(uchar *key)
  {
!     int i, j, key_leng;
      uchar buf[BUFSIZE * 2] = "";
  
      for (i = 0; i < strlen(key); i++) {
  	if (iseuc(*(key + i))) {
! 	    key_leng = 0;
! 	    for (j = 0; is_kanji(key + i + j) && !is_katakana(key + i + j) 
!                          && !is_hiragana(key + i + j);  j += 2)
              {
  		uchar tmp[BUFSIZE];
  
  		strncpy(tmp, key + i, j + 2);
  		*(tmp + j + 2) = '\0';
  
! 		if (binsearch(tmp) != -1) {
! 		    key_leng = j + 2;
  		}
  	    }
  	    if (key_leng > 0) {
--- 54,103 ----
      return 0;
  }
  
+ 
+ #define ASCII 0
+ #define KANJI 1
+ #define KATAKANA 2
+ #define HIRAGANA 3
+ 
+ int detect_code_type(uchar *c)
+ {
+     if (is_katakana(c)) {
+         return KATAKANA;
+     } else if (is_hiragana(c)){
+         return HIRAGANA;
+     } else if (is_kanji(c)) {
+         return KANJI;
+     }
+     return ASCII;
+ }
+ 
  void wakati(uchar *key)
  {
!     int i, j;
!     int prev = 0, current = 0;
      uchar buf[BUFSIZE * 2] = "";
  
      for (i = 0; i < strlen(key); i++) {
+         current = detect_code_type(key + i);
  	if (iseuc(*(key + i))) {
!             int key_leng = 0;
! 
! 	    for (j = 0; is_kanji(key + i + j) ;  j += 2)
              {
  		uchar tmp[BUFSIZE];
  
+                 if (j == 0 && (is_katakana(key + i + j) ||
+                     is_hiragana(key + i + j))) 
+                 {
+                     /* if beggining character is Katakana or Hiragana */
+                     break;
+                 }
  		strncpy(tmp, key + i, j + 2);
  		*(tmp + j + 2) = '\0';
  
! 		if (binsearch(tmp) != -1) { /* hit */
! 		    key_leng = j + 2; 
  		}
  	    }
  	    if (key_leng > 0) {
***************
*** 77,104 ****
  		    strcat(buf, "\t");
  		}
  		strncat(buf, key + i, key_leng); 
! 		if (*(key + i + key_leng) != '\0') {
! 		    strcat(buf, "\t");
! 		}
  		i += key_leng - 1;
  	    } else {
  		strncat(buf, key + i, 2);
  		i++;
  	    }
  	} else {
!             if (i > 0 && iseuc(*(key + i - 1))) {
                  strcat(buf, "\t");
              }
              while(*(key + i) && !iseuc(*(key + i))) {
                  strncat(buf, key + i, 1);
                  i++;
              }
!             if (*(key + i)) {
!                 strcat(buf, "\t");
!             }
              i--;
! 	}
      }
      if (strlen(buf) <= BUFSIZE) {
  	strcpy(key, buf);
      } else {
--- 105,140 ----
  		    strcat(buf, "\t");
  		}
  		strncat(buf, key + i, key_leng); 
!                 strcat(buf, "\t");
  		i += key_leng - 1;
  	    } else {
+                 if (prev != current) {
+                     if (is_choon(key + i)) {
+                         current = prev;
+                     } else if (strlen(buf) != 0 
+                                && *(buf + strlen(buf) -1) != '\t')
+                     {
+                         strcat(buf, "\t");
+                     }
+                 }
  		strncat(buf, key + i, 2);
  		i++;
  	    }
  	} else {
!             if (i > 0) {
                  strcat(buf, "\t");
              }
              while(*(key + i) && !iseuc(*(key + i))) {
                  strncat(buf, key + i, 1);
                  i++;
              }
!             strcat(buf, "\t");
              i--;
!         }
!         prev = current;
      }
+     chop(buf);
+ 
      if (strlen(buf) <= BUFSIZE) {
  	strcpy(key, buf);
      } else {