Namazu-devel-ja(旧)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

scheme (Re: indexer.pl)



<200211181039.TAA18404@xxxxxxxxxxxxxxxxxx>の記事において
私は書きました。

>> >> > 次は非ファイルの対応に着手したいところです。
>> >> どんな仕様ですか?
>> 
>>   ファイル名で受けている個所を拡張して URI として入力を得られるように
>> すると良いかな、と思っています。あとは namazu-devel-ja#2678 にあるよう
>> な感じで scheme ごとに library を用意すれば良いかなと。

  とりあえずできました。まずは scheme に対応するための差分をつけます。
-- 
野首 貴嗣
E-mail: knok@xxxxxxxxxxxxx
	knok@xxxxxxxxxx / knok@xxxxxxxxxx

? scheme
Index: Makefile.am
===================================================================
RCS file: /storage/cvsroot/namazu/Makefile.am,v
retrieving revision 1.66
diff -u -r1.66 Makefile.am
--- Makefile.am	25 Apr 2002 08:58:40 -0000	1.66
+++ Makefile.am	5 Dec 2002 06:26:21 -0000
@@ -11,10 +11,10 @@
 endif
 
 SUBDIRS = conf doc etc filter intl po lib nmz pl src lisp scripts man \
-	  tests template contrib $(BUILT_TKNAMAZU)
+	  tests template contrib scheme $(BUILT_TKNAMAZU)
 
 DIST_SUBDIRS = conf doc etc filter intl po lib nmz pl src lisp scripts man \
-	  tests template contrib $(BUILT_TKNAMAZU)
+	  tests template contrib scheme $(BUILT_TKNAMAZU)
 
 
 bin_SCRIPTS = nmz-config
Index: configure.in
===================================================================
RCS file: /storage/cvsroot/namazu/configure.in,v
retrieving revision 1.150
diff -u -r1.150 configure.in
--- configure.in	15 Nov 2002 09:01:48 -0000	1.150
+++ configure.in	5 Dec 2002 06:26:21 -0000
@@ -419,6 +419,7 @@
 	   src/Makefile
 	   template/Makefile
 	   contrib/Makefile
+	   scheme/Makefile
 	   tests/Makefile
 	   tests/data/Makefile
 	   tests/data/ja/Makefile
Index: pl/conf.pl.in
===================================================================
RCS file: /storage/cvsroot/namazu/pl/conf.pl.in,v
retrieving revision 1.34
diff -u -r1.34 conf.pl.in
--- pl/conf.pl.in	23 Aug 2001 08:18:49 -0000	1.34
+++ pl/conf.pl.in	5 Dec 2002 06:26:21 -0000
@@ -203,6 +203,7 @@
 # $LIBDIR = "@PERLLIBDIR@";
 # $FILTERDIR = "@FILTERDIR@";
 # $TEMPLATEDIR = "@TEMPLATEDIR@";
+# $SCHEMEDIR = "@SCHEMEDIR@";
 
 1;
 
Index: pl/util.pl
===================================================================
RCS file: /storage/cvsroot/namazu/pl/util.pl,v
retrieving revision 1.26
diff -u -r1.26 util.pl
--- pl/util.pl	23 Sep 2002 08:52:33 -0000	1.26
+++ pl/util.pl	5 Dec 2002 06:26:21 -0000
@@ -307,4 +307,9 @@
     }
 }
 
+# check url
+sub isurl ($) {
+    return $_[0] =~ /^[a-z]+:/;
+}
+
 1;
Index: pl/var.pl.in
===================================================================
RCS file: /storage/cvsroot/namazu/pl/var.pl.in,v
retrieving revision 1.12
diff -u -r1.12 var.pl.in
--- pl/var.pl.in	2 Mar 2000 02:39:33 -0000	1.12
+++ pl/var.pl.in	5 Dec 2002 06:26:21 -0000
@@ -142,6 +142,16 @@
      'text/plain' => "yes",
      );
 
+%SupportedScheme =
+    (
+     'file' => "yes",
+     );
+
+%RECURSIVE_SCHEME =
+    (
+     'file' => 1,
+     );
+
 # Dummy function for gettextization.
 sub N_ {};
 
Index: scripts/mknmz.in
===================================================================
RCS file: /storage/cvsroot/namazu/scripts/mknmz.in,v
retrieving revision 1.116
diff -u -r1.116 mknmz.in
--- scripts/mknmz.in	16 Nov 2002 09:19:26 -0000	1.116
+++ scripts/mknmz.in	5 Dec 2002 06:26:22 -0000
@@ -49,6 +49,7 @@
 my $LIBDIR        = $PKGDATADIR . "/pl";      # directory where library etc. are in.
 my $FILTERDIR     = $PKGDATADIR . "/filter";   # directory where filters are in.
 my $TEMPLATEDIR   = $PKGDATADIR . "/template"; # directory where templates are in.
+my $SCHEMEDIR   = $PKGDATADIR . "/scheme"; # directory where scheme handlers are in.
 
 my $DeletedFilesCount = 0;
 my $UpdatedFilesCount = 0;
@@ -175,7 +176,10 @@
 	$field_indices, $fh_errorsfile, $total_files_num) = @_;
 
     my $processed_num = 0;
-    my $file_size = util::filesize($cfile);
+    my $file_size = 0;
+    if (! $cfile =~ /^[a-z]+:/) {
+	$file_size = util::filesize($cfile);
+    }
 
     if ($var::Opt{'htmlsplit'} && $cfile =~ $conf::HTML_SUFFIX) {
 	my @parts = htmlsplit::split($cfile, "NMZ.partial");
@@ -186,7 +190,7 @@
 			 "$cfile#$part" =~ /$conf::EXCLUDE_PATH/);
 		my $fname = util::tmpnam("NMZ.partial.$id");
 		my $fragment  = defined $part ? $part : undef;
-		my $uri   = generate_uri($cfile, $fragment);
+		my $uri = generate_uri($cfile, $fragment);
 		my $result = namazu_core($fname, 
 					 $docid_count + $processed_num, 
 					 $docid_base, $file_count, 
@@ -430,6 +434,38 @@
     }
 }
 
+sub load_schememodules() {
+    unshift @INC, $SCHEMEDIR;
+    my @schemes = ();
+    @schemes = glob "$SCHEMEDIR/*.pl";
+
+    load_schemes(@schemes);
+}
+
+sub load_schemes(@) {
+    my @schemes = @_;
+    for my $scheme (@schemes) {
+	$scheme =~ m!([-\w]+)\.pl$!;
+	my $module = $1;
+	require "$module.pl" || die "unable to require \"$module.pl\"\n";;
+	my (@protocols, $status, $recursive);
+	eval "\@protocols = Namazu::Scheme::${module}::scheme();";
+	die $@ if $@;
+	eval "\$status = Namazu::Scheme::${module}::status();";
+	die $@ if $@;
+	eval "\$recursive = Namazu::Scheme::${module}::recursive();";
+	die $@ if $@;
+	eval "Namazu::Scheme::${module}::init();";
+
+	for my $sc (@protocols) {
+        next if (defined $var::SupportedScheme{$sc} && 
+                 $var::SupportedScheme{$sc} eq 'yes' && $status eq 'no');
+	    $var::SupportedScheme{$sc} = $status;
+	    $var::RECURSIVE_SCHEME{$sc} = $recursive;
+	}
+    }
+}
+
 # Core routine.
 #
 # FIXME: Too many parameters. They must be cleared.
@@ -454,6 +490,20 @@
     unless ($uri) {
 	$uri = generate_uri($cfile);  # Make a URI from a file name.
     }
+    my $scheme;
+    my $mtime;
+    $mtime = (stat($cfile))[9] unless util::isurl($cfile);
+    foreach $scheme (keys %var::SupportedScheme) {
+	if ($var::SupportedScheme{$scheme} eq 'yes') {
+	    if ($uri =~ /^$scheme:/) {
+		eval "Namazu::Scheme::${scheme}::fetch(\$uri, \\\$content);";
+		die $@ if $@;
+		eval "\$mtime = Namazu::Scheme::${scheme}::mtime(\$uri);";
+		die $@ if $@;
+	    }
+	}
+    }
+    $fields{'mtime'} = $mtime;
     my ($cfile_size, $text_size, $kanji, $mtype) = 
 	load_document(\$cfile, \$content, \$weighted_str,
 		      \$headings, \%fields);
@@ -486,7 +536,7 @@
     clean_field_index(\%fields);
     put_field_index(\%fields, $field_indices);
 
-    put_dateindex($cfile);
+    put_dateindex($cfile, $fields{'mtime'});
     $content .= $weighted_str;   # add weights
     $Indexer->init(\$content, $conf::WORD_LENG_MAX, $var::Opt{'nosymbol'});
     $Indexer->noedgesymbol() if ($var::Opt{'noedgesymbol'});
@@ -594,9 +644,10 @@
 	$fields->{'title'} = gfilter::filename_to_title($cfile, $wsref);
     }
     unless (defined($fields->{'date'})) {
-	my $mtime = (stat($cfile))[9];
+	my $mtime = $fields->{'mtime'};
 	my $date = util::rfc822time($mtime);
 	$fields->{'date'} = $date;
+	$fields->{'mtime'} = $mtime;
     }
     unless (defined($fields->{'uri'})) {
 	$fields->{'uri'} = $uri;
@@ -732,8 +783,8 @@
 
 # put the date infomation into NMZ.t file
 sub put_dateindex ($) {
-    my ($cfile) = @_;
-    my $mtime = (stat($cfile))[9];
+    my ($cfile, $mtime) = @_;
+    $mtime = (stat($cfile))[9] unless defined $mtime;
 
     my $fh_dataindex = util::efopen(">>$var::NMZ{'_t'}");
     print $fh_dataindex pack("N", $mtime);
@@ -746,8 +797,7 @@
       = @_;
     my $cfile = $$orig_cfile;
 
-    return (0, 0, 0, 0) unless (-f $cfile && -r $cfile);
-
+    return (0, 0, 0, 0) unless (util::isurl($cfile) || (-f $cfile && -r $cfile));
     my $file_size;
     my $shelter_cfile = "";
 
@@ -770,6 +820,7 @@
 	$$contref = util::readfile($cfile);
     } else {
 	$file_size = length($$contref);
+	$fields->{'size'} = $file_size;
     }
 
     my ($kanji, $mtype) = apply_filter($orig_cfile, $contref, $weighted_str, $headings, $fields, $shelter_cfile);
@@ -1110,6 +1161,7 @@
       util::dprint("Override indexing language: $util::LANG\n");
     }
     load_filtermodules(); # to make effect $opt_config, $index_lang.
+    load_schememodules();
     postload_modules();
 
     if ($opt_help) {
@@ -1273,6 +1325,7 @@
 sub absolute_path($$) {
     my ($cwd, $path) = @_;
 
+    return $path if (util::isurl($path));
     $path =~ s!^\.$!\./!;
     $path =~ s!^\.[/\\]!$cwd/!;
     if (($SYSTEM eq "MSWin32") || ($SYSTEM eq "os2")) {
@@ -1321,8 +1374,10 @@
 	    print STDERR "Warning: target contains empty line, skip it\n";
 	    next;
 	}
-	
-	if (-f $target) { # target is a file.
+
+	if (util::isurl($target)) {
+	    add_target($target, \@flist, \%counts);
+	} elsif (-f $target) { # target is a file.
 	    add_target($target, \@flist, \%counts);
 	} elsif (-d $target) { # target is a directory.
 	    my @subtargets = ();
@@ -1385,11 +1440,11 @@
 	return;   # skip a file name containing LF/CR/TAB chars.
     }
 
-    return unless -f $target;  # Only file is targeted.
+    return unless util::isurl($target) || -f $target;
 
     $counts_ref->{'possible'}++;
 
-    unless (-r $target) {
+    unless (util::isurl($target) || -r $target) {
         util::vprint(sprintf(_("Unreadable:	%s"), $target));
 	$counts_ref->{'excluded'}++;
 	return;
@@ -1407,7 +1462,7 @@
     #
     # Do processing just like find's  --mtime option.
     #
-    if (defined $var::Opt{'mtime'}) {
+    if (defined $var::Opt{'mtime'} && ! util::isurl($_)) {
 	my $mtime = -M $_;
 	if ($var::Opt{'mtime'} < 0) {
 
@@ -1506,9 +1561,9 @@
     my ($cfile, $cfile_size, $text_size, $mtype, $uri) = @_;
 
     my $msg = undef;
-    if (! -e $cfile) {
+    if (! util::isurl($cfile) && ! -e $cfile) {
 	$msg = _("does NOT EXIST! skipped.");
-    } elsif (! -r $cfile) {
+    } elsif (! util::isurl($cfile) && ! -r $cfile) {
 	$msg = _("is NOT READABLE! skipped.");
     } elsif ($text_size == 0 || $cfile_size == 0) {
 	$msg = _("is 0 size! skipped.");
@@ -2427,6 +2482,7 @@
      $var::RECURSIVE_ACTIONS, $conf::META_TAGS, $var::USE_NKF_MODULE,
      $conf::ADDRESS, $var::MAILING_ADDRESS,
      $conf::FILE_SIZE_MAX,
+     $var::SupportedScheme, $var::RECURSIVE_SCHEME,
      );
 
 sub muda {}