Namazu-devel-ja(旧)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

(patch) MaxHit, MaxMatch



たかく@図書館情報大です。

IGNORE_HIT・IGNORE_MATCHを越えるような検索式を与えると、
検索結果が出ないというのに、
ちょっとフラストレーションを感じたので、
これらの値を namazurc で設定可能にするパッチをざっと作ってみました。

namazurc で、以下のように指定します。
(それぞれ文書・ワード数の上限値を示します。)
--
MaxHit   30000
MaxMatch 10000
--

特に異義が出ないようなら commit しようかと思いますが、
いかがでしょうか。
--
高久 雅生 || Masao Takaku
    mailto:masao@xxxxxxxxxx  http://cosmo.ulis.ac.jp/~masao/
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/namazu/ChangeLog,v
retrieving revision 1.687
diff -u -r1.687 ChangeLog
--- ChangeLog	2000/04/26 02:30:50	1.687
+++ ChangeLog	2000/05/02 02:50:07
@@ -1,3 +1,20 @@
+2000-05-01  Masao Takaku  <masao@xxxxxxxxxx>
+
+	* src/rcfile.c (process_rc_maxhit): New function.
+	(process_rc_maxmatch): Likewise.
+
+	* nmz/libnamazu.h (IGNORE_HIT): Abolished.
+	(IGNORE_MATCH): Likewise.
+	
+	* nmz/{hlist,re,search}.c: Call nmz_get_maxhit() instead of using
+	IGNORE_HIT.
+	Call nmz_get_maxmatch() instead of using IGNORE_MATCH.
+
+	* nmz/libnamazu.c (nmz_set_maxhit): New function.
+	(nmz_get_maxhit): Likewise.
+	(nmz_set_maxmatch): Likewise.
+	(nmz_get_maxmatch): Likewise.
+
 2000-04-26  Satoru Takabayashi  <satoru-t@xxxxxxxxxxxxxxxxxx>
 
 	* scripts/mknmz.in (load_registry): Simplified. Completely rewritten.
Index: nmz/hlist.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/hlist.c,v
retrieving revision 1.45
diff -u -r1.45 hlist.c
--- nmz/hlist.c	2000/03/12 02:07:06	1.45
+++ nmz/hlist.c	2000/05/02 02:50:07
@@ -552,7 +552,7 @@
 	nmz_debug_printf("idf: %f (N:%d, n:%d)\n", idf, document_number, n/2);
     }
 
-    if (n >= IGNORE_HIT * 2) {  
+    if (n >= nmz_get_maxhit() * 2) {  
         /* '* 2' means NMZ.i contains a file-ID and a score. */
         hlist.stat = ERR_TOO_MUCH_HIT;
     } else {
Index: nmz/libnamazu.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/libnamazu.c,v
retrieving revision 1.33
diff -u -r1.33 libnamazu.c
--- nmz/libnamazu.c	2000/02/20 06:35:02	1.33
+++ nmz/libnamazu.c	2000/05/02 02:50:07
@@ -61,6 +61,8 @@
 
 static enum nmz_sortmethod  sortmethod  = SORT_BY_SCORE;
 static enum nmz_sortorder   sortorder   = DESCENDING;
+static int  maxhit      = 10000;  /* Ignore if pages matched more than this. */
+static int  maxmatch    = 1000;   /* Ignore if words matched more than this. */
 static int  debugmode   = 0;
 static int  loggingmode = 1;   /* do logging with NMZ.slog */
 static char dyingmsg[BUFSIZE] = "";
@@ -106,6 +108,30 @@
 nmz_get_sortorder(void)
 {
     return sortorder;
+}
+
+void
+nmz_set_maxhit(int max)
+{
+    maxhit = max;
+}
+
+int
+nmz_get_maxhit(void)
+{
+    return maxhit;
+}
+
+void
+nmz_set_maxmatch(int max)
+{
+    maxmatch = max;
+}
+
+int
+nmz_get_maxmatch(void)
+{
+    return maxmatch;
 }
 
 void 
Index: nmz/libnamazu.h
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/libnamazu.h,v
retrieving revision 1.43
diff -u -r1.43 libnamazu.h
--- nmz/libnamazu.h	2000/03/03 03:36:20	1.43
+++ nmz/libnamazu.h	2000/05/02 02:50:07
@@ -59,9 +59,6 @@
     QUERY_TOKEN_MAX =  32, /* Max number of tokens in the query. */
     QUERY_MAX       = 256, /* Max length of the query. */
 
-    IGNORE_HIT    = 10000, /* Ignore if pages matched more than this. */
-    IGNORE_MATCH  = 1000,  /* Ignore if words matched more than this. */
-
     INDEX_MAX = 64        /* Max number of databases */
 };
 
@@ -234,6 +231,10 @@
 extern enum nmz_sortmethod nmz_get_sortmethod(void);
 extern void nmz_set_sortorder ( enum nmz_sortorder order );
 extern enum nmz_sortorder nmz_get_sortorder(void);
+extern void nmz_set_maxhit ( int max );
+extern int nmz_get_maxhit ( void );
+extern void nmz_set_maxmatch ( int max );
+extern int nmz_get_maxmatch ( void );
 extern void nmz_set_debugmode ( int mode );
 extern int  nmz_is_debugmode ( void );
 extern void nmz_set_loggingmode ( int mode );
Index: nmz/re.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/re.c,v
retrieving revision 1.29
diff -u -r1.29 re.c
--- nmz/re.c	2000/02/13 17:34:53	1.29
+++ nmz/re.c	2000/05/02 02:50:07
@@ -79,12 +79,12 @@
 	if (val.stat == ERR_FATAL)
 	    return val;
 	val.num = 0; /* set 0 for no matching case */
-        max = IGNORE_HIT;
+        max = nmz_get_maxhit();
         if (strcmp(field, "uri") == 0) {
             uri_mode = 1;
         }
     } else {
-        max = IGNORE_MATCH;
+        max = nmz_get_maxmatch();
     }
 
     nmz_re_compile_pattern(tmpexpr, strlen(tmpexpr), rp);
@@ -115,7 +115,7 @@
                 tmp = nmz_get_hlist(i);
 		if (tmp.stat == ERR_FATAL)
 		    return tmp;
-                if (tmp.num > IGNORE_HIT) {
+                if (tmp.num > nmz_get_maxhit()) {
                     nmz_free_hlist(val);
                     val.stat = ERR_TOO_MUCH_HIT;
                     val.num = 0;
@@ -137,7 +137,7 @@
 		if (val.stat == ERR_FATAL)
 		    return val;
             } 
-            if (val.num > IGNORE_HIT) {
+            if (val.num > nmz_get_maxhit()) {
                 nmz_free_hlist(val);
                 val.num = -1;
                 break;
Index: nmz/search.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/search.c,v
retrieving revision 1.74
diff -u -r1.74 search.c
--- nmz/search.c	2000/04/05 07:07:54	1.74
+++ nmz/search.c	2000/05/02 02:50:07
@@ -178,7 +178,7 @@
 	 * Return if too much word would be hit
          * because treat 'a*' completely is too consuming 
 	 */
-	if (j > IGNORE_MATCH) {
+	if (j > nmz_get_maxmatch()) {
 	    nmz_free_hlist(val);
 	    val.stat = ERR_TOO_MUCH_MATCH;
 	    break;
@@ -192,7 +192,7 @@
 	    tmp = nmz_get_hlist(i);
 	    if (tmp.stat == ERR_FATAL)
 	        return tmp;
-	    if (tmp.num > IGNORE_HIT) {
+	    if (tmp.num > nmz_get_maxhit()) {
 		nmz_free_hlist(val);
 		val.stat = ERR_TOO_MUCH_MATCH;
 		break;
@@ -200,7 +200,7 @@
 	    val = nmz_ormerge(val, tmp);
 	    if (val.stat == ERR_FATAL)
 	        return val;
-	    if (val.num > IGNORE_HIT) {
+	    if (val.num > nmz_get_maxhit()) {
 		nmz_free_hlist(val);
 		val.stat = ERR_TOO_MUCH_MATCH;
 		break;
Index: po/namazu.pot
===================================================================
RCS file: /storage/cvsroot/namazu/po/namazu.pot,v
retrieving revision 1.237
diff -u -r1.237 namazu.pot
--- po/namazu.pot	2000/04/26 03:18:05	1.237
+++ po/namazu.pot	2000/05/02 02:50:07
@@ -6,7 +6,7 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
-"POT-Creation-Date: 2000-04-26 12:12+0900\n"
+"POT-Creation-Date: 2000-05-02 11:11+0900\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@xxxxxx>\n"
@@ -132,41 +132,41 @@
 msgstr ""
 
 #. terminator not matched
-#: src/rcfile.c:315
+#: src/rcfile.c:343
 msgid "can't find string terminator"
 msgstr ""
 
-#: src/rcfile.c:431
+#: src/rcfile.c:459
 msgid "invalid directive name"
 msgstr ""
 
-#: src/rcfile.c:444
+#: src/rcfile.c:472
 msgid "can't find arguments"
 msgstr ""
 
-#: src/rcfile.c:547
+#: src/rcfile.c:575
 msgid "too few arguments"
 msgstr ""
 
-#: src/rcfile.c:550
+#: src/rcfile.c:578
 msgid "too many arguments"
 msgstr ""
 
-#: src/rcfile.c:560
+#: src/rcfile.c:588
 msgid "unknown directive"
 msgstr ""
 
-#: src/rcfile.c:611
+#: src/rcfile.c:639
 #, c-format
 msgid "%s:%d: syntax error: %s"
 msgstr ""
 
-#: src/rcfile.c:716
+#: src/rcfile.c:744
 #, c-format
 msgid "Loaded rcfile: %s\n"
 msgstr ""
 
-#: src/rcfile.c:721
+#: src/rcfile.c:749
 #, c-format
 msgid ""
 "Index:        %s\n"
@@ -174,15 +174,17 @@
 "Lang:         %s\n"
 "Scoring:      %s\n"
 "Template:     %s\n"
+"MaxHit:       %d\n"
+"MaxMatch:     %d\n"
 "EmphasisTags: %s\t%s\n"
 msgstr ""
 
-#: src/rcfile.c:738
+#: src/rcfile.c:769
 #, c-format
 msgid "Alias:   %-20s\t%s\n"
 msgstr ""
 
-#: src/rcfile.c:748
+#: src/rcfile.c:779
 #, c-format
 msgid "Replace: %-20s\t%s\n"
 msgstr ""
Index: src/rcfile.c
===================================================================
RCS file: /storage/cvsroot/namazu/src/rcfile.c,v
retrieving revision 1.23
diff -u -r1.23 rcfile.c
--- src/rcfile.c	2000/02/23 08:26:03	1.23
+++ src/rcfile.c	2000/05/02 02:50:07
@@ -114,6 +114,8 @@
 static enum nmz_stat process_rc_lang ( const char *directive, const StrList *args );
 static enum nmz_stat process_rc_emphasistags ( const char *directive, const StrList *args );
 static enum nmz_stat process_rc_template ( const char *directive, const StrList *args );
+static enum nmz_stat process_rc_maxhit ( const char *directive, const StrList *args );
+static enum nmz_stat process_rc_maxmatch ( const char *directive, const StrList *args );
 
 struct conf_directive {
     char *name;
@@ -134,6 +136,8 @@
     { "LANG",          1, 0, process_rc_lang },
     { "EMPHASISTAGS",  2, 0, process_rc_emphasistags },
     { "TEMPLATE",      1, 0, process_rc_template },
+    { "MAXHIT",        1, 0, process_rc_maxhit },
+    { "MAXMATCH",      1, 0, process_rc_maxmatch },
     { NULL,            0, 0, NULL }
 };
 
@@ -262,6 +266,30 @@
     return SUCCESS;
 }
 
+static enum nmz_stat
+process_rc_maxhit(const char *directive, const StrList *args)
+{
+    int arg1 = atoi(args->value);
+
+    if (arg1 <= 0) {
+	return FAILURE;
+    }
+    nmz_set_maxhit(arg1);
+    return SUCCESS;
+}
+
+static enum nmz_stat
+process_rc_maxmatch(const char *directive, const StrList *args)
+{
+    int arg1 = atoi(args->value);
+
+    if (arg1 <= 0) {
+	return FAILURE;
+    }
+    nmz_set_maxmatch(arg1);
+    return SUCCESS;
+}
+
 /* 
  * Get the environment variable of NAMAZURC, NAMAZUCONF or
  * NAMAZUCONFPATH.  and return it. Original of this code is
@@ -724,10 +752,13 @@
 Lang:         %s\n\
 Scoring:      %s\n\
 Template:     %s\n\
+MaxHit:       %d\n\
+MaxMatch:     %d\n\
 EmphasisTags: %s\t%s\n\
 "), nmz_get_defaultidx(), nmz_is_loggingmode() ? "on" : "off",
            nmz_get_lang(), nmz_is_tfidfmode() ? "tfidf" : "simple",
 	   get_templatedir(), 
+	   nmz_get_maxhit(), nmz_get_maxmatch(),
 	   get_emphasis_tag_start(), get_emphasis_tag_end()
 	   );