[Namazu-devel-ja 1741] Re: filter/ooo.pl を Archive::Zip に対応するパッチ
Tadamasa Teranishi
yw3t-trns @ asahi-net.or.jp
2008年 5月 3日 (土) 03:23:02 JST
寺西です。
Tadamasa Teranishi wrote:
>
> とりあえず filter/ooo.pl を Archive::Zip に対応するパッチを書きま
> した。(development-2-1)
これの改良版です。(development-2-1)
前回のものに加えて、IO::String が利用できる環境ではテンポラリファイル
を作成しない処理も加えました。
koffice.pl にも使えます。
msofficexml.pl, xps.pl については、更にzip ファイル内のメンバを検索
する機能を追加する必要はありますが、Archive::Zip に対応することは
可能だと思います。
--
=====================================================================
寺西 忠勝(TADAMASA TERANISHI) yw3t-trns @ asahi-net.or.jp
http://www.asahi-net.or.jp/~yw3t-trns/index.htm
Key fingerprint = 474E 4D93 8E97 11F6 662D 8A42 17F5 52F4 10E7 D14E
-------------- next part --------------
Index: ooo.pl
===================================================================
RCS file: /storage/cvsroot/namazu/filter/ooo.pl,v
retrieving revision 1.16.2.3
diff -u -p -r1.16.2.3 ooo.pl
--- ooo.pl 29 Apr 2008 11:20:58 -0000 1.16.2.3
+++ ooo.pl 2 May 2008 18:16:18 -0000
@@ -32,6 +32,7 @@ require 'gfilter.pl';
my $utfconvpath = undef;
my $unzippath = undef;
my @unzipopts;
+my $_zread = undef;
sub mediatype() {
# http://framework.openoffice.org/documentation/mimetypes/mimetypes.html
@@ -46,8 +47,22 @@ sub mediatype() {
}
sub status() {
+ if (util::checklib('Compress/Zlib.pm') and
+ util::checklib('Archive/Zip.pm')) {
+ if (util::checklib('IO/String.pm')) {
+ $_zread = \&_az_is_zread;
+ } else {
+ $_zread = \&_az_zread;
+ }
+ if ($English::PERL_VERSION >= 5.008) {
+ $utfconvpath = "none";
+ return 'yes';
+ }
+ }
+
$unzippath = util::checkcmd('unzip');
- if (defined $unzippath){
+ if (defined $unzippath) {
+ $_zread = \&_unzip_zread;
@unzipopts = ("-p");
if (util::islang("ja")) {
if ($English::PERL_VERSION >= 5.008) {
@@ -55,17 +70,17 @@ sub status() {
return 'yes';
}
$utfconvpath = util::checkcmd('lv');
- if ($utfconvpath){
+ if ($utfconvpath) {
return 'yes';
} else {
$utfconvpath = util::checklib('unicode.pl');
- if ($utfconvpath){
+ if ($utfconvpath) {
return 'yes';
}
}
- return 'no';
+ return 'no';
} else {
- return 'yes';
+ return 'yes';
}
}
return 'no';
@@ -109,25 +124,10 @@ sub filter ($$$$$) {
sub filter_metafile ($$$) {
my ($contref, $weighted_str, $fields) = @_;
- my $metafile = 'meta.xml';
+
my $xml = "";
- my $tmpfile = util::tmpnam('NMZ.zip');
- {
- my $fh = util::efopen("> $tmpfile");
- print $fh $$contref;
- util::fclose($fh);
- }
- my @cmd = ($unzippath, @unzipopts, $tmpfile, $metafile);
- my $status = util::syscmd(
- command => \@cmd,
- option => {
- "stdout" => \$xml,
- "stderr" => "/dev/null",
- "mode_stdout" => "wt",
- "mode_stderr" => "wt",
- },
- );
- unlink $tmpfile;
+ my $status = $_zread->($contref, 'meta.xml', \$xml);
+ codeconv::normalize_nl(\$xml);
my $authorname = ooo::get_author(\$xml);
my $title = ooo::get_title(\$xml);
@@ -145,10 +145,10 @@ sub filter_metafile ($$$) {
codeconv::normalize_eucjp(\$title);
codeconv::normalize_eucjp(\$keywords);
}
- if (!($authorname eq "")){
+ if (!($authorname eq "")) {
$fields->{'author'} = $authorname;
}
- if (!($title eq "")){
+ if (!($title eq "")) {
$fields->{'title'} = $title;
my $weight = $conf::Weight{'html'}->{'title'};
$$weighted_str .= "\x7f$weight\x7f$title\x7f/$weight\x7f\n";
@@ -162,25 +162,10 @@ sub filter_metafile ($$$) {
sub filter_contentfile ($$$$$) {
my ($contref, $weighted_str, $headings, $fields) = @_;
- my $contentfile = "content.xml";
+
my $xml = "";
- my $tmpfile = util::tmpnam('NMZ.zip');
- {
- my $fh = util::efopen("> $tmpfile");
- print $fh $$contref;
- util::fclose($fh);
- }
- my @cmd = ($unzippath, @unzipopts, $tmpfile, $contentfile);
- my $status = util::syscmd(
- command => \@cmd,
- option => {
- "stdout" => \$xml,
- "stderr" => "/dev/null",
- "mode_stdout" => "wt",
- "mode_stderr" => "wt",
- },
- );
- unlink $tmpfile;
+ my $status = $_zread->($contref, 'content.xml', \$xml);
+ codeconv::normalize_nl(\$xml);
ooo::remove_all_tag(\$xml);
ooo::decode_entity(\$xml);
@@ -209,7 +194,7 @@ sub get_author ($){
sub get_title ($){
my ($contref) = @_;
- if ($$contref =~ m!<dc:title>(.*)</dc:title>!){
+ if ($$contref =~ m!<dc:title>(.*)</dc:title>!) {
return $1;
} else {
return "";
@@ -247,7 +232,7 @@ sub utoe ($) {
my $cmd = ($utfconvpath . " -Iu8 " . "-Oej " . $tmpfile . " |");
$$tmp = "";
my $fh = util::efopen($cmd);
- while (defined(my $line = <$fh>)){
+ while (defined(my $line = <$fh>)) {
$$tmp .= $line;
}
util::fclose($fh);
@@ -286,4 +271,120 @@ sub decode_entity ($) {
$$text =~ s/ [;\s]/ /g;
}
+sub _unzip_zread($$$)
+{
+ my ($contref, $filename, $conts) = @_;
+
+ util::vprint("Processing ooo file ... (using '$unzippath')\n");
+
+ my $tmpfile = util::tmpnam('NMZ.zip');
+ {
+ my $fh = util::efopen("> $tmpfile");
+ print $fh $$contref;
+ util::fclose($fh);
+ }
+
+ my @cmd = ($unzippath, @unzipopts, $tmpfile, $filename);
+ my $status = util::syscmd(
+ command => \@cmd,
+ option => {
+ "stdout" => $conts,
+ "stderr" => "/dev/null",
+ "mode_stdout" => "wb",
+ "mode_stderr" => "wt",
+ },
+ );
+
+ unlink $tmpfile;
+
+ return $status;
+}
+
+use constant NMZ_AZ_OK => 0;
+use constant NMZ_AZ_STREAM_END => 1;
+use constant NMZ_AZ_ERROR => 2;
+use constant NMZ_AZ_FORMAT_ERROR => 3;
+use constant NMZ_AZ_IO_ERROR => 4;
+
+sub _az_zread($$$)
+{
+ eval 'use Archive::Zip;';
+ my ($contref, $filename, $conts) = @_;
+
+ util::vprint("Processing ooo file ... (using 'Archive::Zip')\n");
+
+ my $tmpfile = util::tmpnam('NMZ.zip');
+ {
+ my $fh = util::efopen("> $tmpfile");
+ print $fh $$contref;
+ util::fclose($fh);
+ }
+
+ $$conts = '';
+
+ my $zip = Archive::Zip->new();
+ my $err = $zip->read($tmpfile);
+ if ($err != NMZ_AZ_OK) {
+ util::dprint("Archive::Zip: there was a error");
+ unlink $tmpfile;
+ return 1;
+ }
+
+ my $member = $zip->memberNamed($filename);
+ my $size = $member->uncompressedSize();
+ if ($size == 0) {
+ my $fname = $member->fileName();
+ util::dprint("$fname: filesize is 0");
+ unlink $tmpfile;
+ return 1;
+ } elsif ($size > $conf::FILE_SIZE_MAX) {
+ my $fname = $member->fileName();
+ util::dprint("$fname: Too large ooo file");
+ unlink $tmpfile;
+ return 1;
+ }
+
+ $$conts = $zip->contents($member);
+
+ unlink $tmpfile;
+
+ return 0;
+}
+
+sub _az_is_zread($$$)
+{
+ eval 'use Archive::Zip;';
+ eval 'use IO::String;';
+ my ($contref, $filename, $conts) = @_;
+
+ util::vprint("Processing ooo file ... (using 'Archive::Zip, IO::String')\n");
+
+ my $io = IO::String->new($$contref);
+
+ $$conts = '';
+
+ my $zip = Archive::Zip->new();
+ my $err = $zip->readFromFileHandle($io);
+ if ($err != NMZ_AZ_OK) {
+ util::dprint("Archive::Zip: there was a error");
+ return 1;
+ }
+
+ my $member = $zip->memberNamed($filename);
+ my $size = $member->uncompressedSize();
+ if ($size == 0) {
+ my $fname = $member->fileName();
+ util::dprint("$fname: filesize is 0");
+ return 1;
+ } elsif ($size > $conf::FILE_SIZE_MAX) {
+ my $fname = $member->fileName();
+ util::dprint("$fname: Too large ooo file");
+ return 1;
+ }
+
+ $$conts = $zip->contents($member);
+
+ return 0;
+}
+
1;
Namazu-devel-ja メーリングリストの案内