Namazu-devel-ja(旧)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: zipフィルタ



臼田です。

zip フィルタに手を加え Archive::Zip モジュールでも動作するようにしてみました。

確認には Archive::Zip と Compress::Zlib が必要です。

Winzipで作成した zipアーカイブ内の日本語ファイル名問題はおきないはずです。
処理時間も unzipを何回も呼び出すのに比べれば少しはよいようですが、
めだって速くなった気はしません。


Index: filter/zip.pl
===================================================================
RCS file: /storage/cvsroot/namazu/filter/zip.pl,v
retrieving revision 1.7
diff -u -r1.7 zip.pl
--- filter/zip.pl	4 May 2004 19:51:00 -0000	1.7
+++ filter/zip.pl	5 May 2004 05:30:43 -0000
@@ -28,13 +28,15 @@
 use strict;
 require 'util.pl';
 
-my $unzippath;
+my $unzippath = undef;
 
 sub mediatype() {
     return ('application/x-zip');
 }
 
 sub status() {
+    return 'yes' if (util::checklib('Compress/Zlib.pm') and
+		     util::checklib('Archive/Zip.pm'));
     $unzippath = util::checkcmd('unzip');
     return 'yes' if (defined $unzippath);
     return 'no';
@@ -75,9 +77,71 @@
         util::fclose($fh);
     }
 
-    util::vprint("Processing zip file ... (using  '$unzippath')\n");
+    $$contref ="";
+    my $err = undef;
+    if (util::checklib('Archive/Zip.pm')){
+	$err = az_filter($tmpfile, $contref, $weighted_str, $headings, $fields);
+    } else {
+	$err = unzip_filter($tmpfile, $contref, $weighted_str, $headings, $fields);
+    }
+    unlink($tmpfile);
+    return $err;
+}
+
+sub az_filter ($$$$$) {
+    my ($tmpfile, $contref, $weighted_str, $headings, $fields)
+      = @_;
+
+    util::vprint("Processing zip file ... (using Archive::ZIP module)\n");
+
+    eval 'use Archive::Zip;';
+    my $zip = Archive::Zip->new();
+    my $err = $zip->read( $tmpfile );
+    if ($err != 0) {
+	util::dprint("Archive::Zip: there was a error");
+	return $err;
+    }
+    {
+	my $comment = $zip->zipfileComment();
+	my @filenames = $zip->memberNames();
+	my $tmp = join(" ", @filenames);
+	$$contref = $comment . " " . codeconv::toeuc(\$tmp) . " ";
+    }
+    my @members = $zip->members();
+    my $member;
+    foreach $member (@members){
+	next if (($member->isEncrypted() or $member->isDirectory()));
+
+	my $size = $member->uncompressedSize();
+	my $fname = $member->fileName();
+	if ($size == 0) {
+	    util::dprint("$fname: filesize is 0");
+	} elsif ($size > $conf::FILE_SIZE_MAX) {
+	    util::dprint("$fname: Too large ziped file");
+	} else {
+	    my $con = $zip->contents($member);
+	    if ($con) {
+		my $unzippedname = "unzipped_content";
+		if ($fname =~ /.*(\..*)/){
+		    $unzippedname = $unzippedname . $1;
+		}
+		my $err = zip::nesting_filter($unzippedname, \$con, $weighted_str);
+		if (defined $err) {
+		    util::dprint("filter/zip.pl gets error message \"$err\"");
+		}
+		$$contref .= $con . " ";
+	    }
+	}
+    }
+    return undef;
+}
+
+
+sub unzip_filter ($$$$$) {
+    my ($tmpfile, $contref, $weighted_str, $headings, $fields)
+      = @_;
 
-    $$contref = "";
+    util::vprint("Processing zip file ... (using  '$unzippath')\n");
 
     my $status = system("$unzippath -P passwd -qq -t $tmpfile");
     if ($status != 0) {
@@ -148,7 +212,6 @@
 	    util::fclose($fh);
 	}
     };
-    unlink($tmpfile);
     return undef;
 }
 


臼田幸生