Namazu-devel-ja(旧)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

OLE filter doesn't create index of title (namazu-bugs-ja#86)



高城と申します。

Office 文書のインデックス OLE フィルタを使って作成したところ,
文書のプロパティとしてタイトルが設定されている場合には,文書のファイル名,
文書のタイトルともにインデックスが生成されませんでした.

環境は以下の通りです.

Windows 95
Office 98
Namazu 2.0.10
# $Id: olemsword.pl,v 1.8.4.1 2001/11/29 11:46:48 takesako Exp $
# $Id: oleexcel.pl,v 1.11.4.1 2001/11/29 11:46:48 takesako Exp $
# $Id: olepowerpoint.pl,v 1.8.4.1 2001/11/29 11:46:48 takesako Exp $

文書のタイトルをインデックス化するための即席パッチを作りましたので,よろ
しければ御活用ください.

-- 
高城 正平@元長野高専
takavoid@xxxxxxxxxxxxxxxxxxx
http://www-suzuki-lab.ei.nagano-nct.ac.jp/old/s2000/j96019/


diff -urN namazu.old/share/namazu/filter/win32/oleexcel.pl namazu/share/namazu/filter/win32/oleexcel.pl
--- namazu.old/share/namazu/filter/win32/oleexcel.pl	Sun Dec 30 23:20:42 2001
+++ namazu/share/namazu/filter/win32/oleexcel.pl	Tue Jan 29 17:18:44 2002
@@ -98,8 +98,8 @@
     return;
 }
 
-sub getProperties ($$) {
-    my ($cfile, $fields) = @_;
+sub getProperties ($$$) {
+    my ($cfile, $fields, $weighted_str) = @_;
 
     # See VBA online help using Microsoft Excel in detail.
     # Topic item: 'DocumentProperty Object'.
@@ -110,6 +110,9 @@
     $fields->{'title'} = codeconv::shiftjis_to_eucjp($title)
 	if (defined $title);
 
+    my $weight = $conf::Weight{'html'}->{'title'};
+    $$weighted_str .= "\x7f$weight\x7f$fields->{'title'}\x7f/$weight\x7f\n";
+
     my $author = $cfile->BuiltInDocumentProperties('Last Author')->{Value};
     $author = $cfile->BuiltInDocumentProperties('Author')->{Value}
 	unless (defined $author);
@@ -170,7 +173,7 @@
     return "$fileName: cannot open file\n" unless (defined $Book);
 
     # get some properties
-    getProperties($Book, $fields);
+    getProperties($Book, $fields, $weighted_str);
 
     # FileHandle for temporary file 1,2
     local (*FH1, *FH2);
diff -urN namazu.old/share/namazu/filter/win32/olemsword.pl namazu/share/namazu/filter/win32/olemsword.pl
--- namazu.old/share/namazu/filter/win32/olemsword.pl	Sun Dec 30 23:20:42 2001
+++ namazu/share/namazu/filter/win32/olemsword.pl	Tue Jan 29 17:29:24 2002
@@ -93,7 +93,7 @@
 
     $cfile =~ s/\//\\/g;
     $$cont = "";
-    ReadMSWord::ReadMSWord($cfile, $cont, $fields);
+    ReadMSWord::ReadMSWord($cfile, $cont, $fields, $weighted_str);
     $cfile = defined $orig_cfile ? $$orig_cfile : '';
 
     gfilter::line_adjust_filter($cont);
@@ -124,8 +124,8 @@
     return 1;
 }
 
-sub getProperties ($$) {
-    my ($cfile, $fields) = @_;
+sub getProperties ($$$) {
+    my ($cfile, $fields, $weighted_str) = @_;
 
     # See VBA online help using Microsoft Word in detail.
     # Topic item: 'DocumentProperty Object'.
@@ -136,6 +136,9 @@
     $fields->{'title'} = codeconv::shiftjis_to_eucjp($title)
 	if (defined $title);
 
+    my $weight = $conf::Weight{'html'}->{'title'};
+    $$weighted_str .= "\x7f$weight\x7f$fields->{'title'}\x7f/$weight\x7f\n";
+
     my $author = $cfile->BuiltInDocumentProperties('Last Author')->{Value};
     $author = $cfile->BuiltInDocumentProperties('Author')->{Value}
 	unless (defined $author);
@@ -153,8 +156,8 @@
 package ReadMSWord;
 
 my $word;
-sub ReadMSWord ($$$) {
-    my ($cfile, $cont, $fields) = @_;
+sub ReadMSWord ($$$$) {
+    my ($cfile, $cont, $fields, $weighted_str) = @_;
 
     # Copy From Win32::OLE Example Program
     # use existing instance if Word is already running
@@ -187,7 +190,7 @@
 	});
     die "Cannot open File $cfile" unless (defined $doc) ;
 
-    olemsword::getProperties($doc, $fields);
+    olemsword::getProperties($doc, $fields, $weighted_str);
     getParagraphs($doc, $cont);
     getFrames($doc, $cont);
     getShapes($doc, $cont);
diff -urN namazu.old/share/namazu/filter/win32/olepowerpoint.pl namazu/share/namazu/filter/win32/olepowerpoint.pl
--- namazu.old/share/namazu/filter/win32/olepowerpoint.pl	Sun Dec 30 23:20:42 2001
+++ namazu/share/namazu/filter/win32/olepowerpoint.pl	Tue Jan 29 17:20:42 2002
@@ -93,7 +93,7 @@
 
     $cfile =~ s/\//\\/g;
     $$cont = "";
-    ReadPPT::ReadPPT($cfile, $cont, $fields);
+    ReadPPT::ReadPPT($cfile, $cont, $fields, $weighted_str);
     $cfile = defined $orig_cfile ? $$orig_cfile : '';
 
     gfilter::line_adjust_filter($cont);
@@ -125,8 +125,8 @@
     return 1;
 }
 
-sub getProperties ($$) {
-    my ($cfile, $fields) = @_;
+sub getProperties ($$$) {
+    my ($cfile, $fields, $weighted_str) = @_;
 
     # See VBA online help using Microsoft PowerPoint in detail.
     # Topic item: 'DocumentProperty Object'.
@@ -139,6 +139,9 @@
     $fields->{'title'} = codeconv::shiftjis_to_eucjp($title)
 	if (defined $title);
 
+    my $weight = $conf::Weight{'html'}->{'title'};
+    $$weighted_str .= "\x7f$weight\x7f$fields->{'title'}\x7f/$weight\x7f\n";
+
     my $author = $cfile->BuiltInDocumentProperties('Author')->{Value};
     $author = $cfile->BuiltInDocumentProperties('Last Author')->{Value}
 	unless (defined $author);
@@ -155,8 +158,8 @@
 
 package ReadPPT;
 
-sub ReadPPT ($$$) {
-    my ($cfile, $cont, $fields) = @_;
+sub ReadPPT ($$$$) {
+    my ($cfile, $cont, $fields, $weighted_str) = @_;
 
     # Copy From Win32::OLE Example Program
     # use existing instance if PowerPoint is already running
@@ -189,7 +192,7 @@
 	});
     die "Cannot open File $cfile" unless (defined $prs);
 
-    olepowerpoint::getProperties($prs, $fields);
+    olepowerpoint::getProperties($prs, $fields, $weighted_str);
     getSlides($prs, $cont);
 
     $prs->close();
diff -urN namazu.old/share/namazu/filter/win32/olertf.pl namazu/share/namazu/filter/win32/olertf.pl
--- namazu.old/share/namazu/filter/win32/olertf.pl	Sun Dec 30 23:20:42 2001
+++ namazu/share/namazu/filter/win32/olertf.pl	Tue Jan 29 11:55:30 2002
@@ -73,7 +73,7 @@
 
     $cfile =~ s/\//\\/g;
     $$cont = "";
-    ReadMSWord::ReadMSWord($cfile, $cont, $fields);
+    ReadMSWord::ReadMSWord($cfile, $cont, $fields, $weighted_str);
     $cfile = defined $orig_cfile ? $$orig_cfile : '';
 
     gfilter::line_adjust_filter($cont);
diff -urN namazu.old/share/namazu/filter/win32/oletaro.pl namazu/share/namazu/filter/win32/oletaro.pl
--- namazu.old/share/namazu/filter/win32/oletaro.pl	Sun Dec 30 23:20:42 2001
+++ namazu/share/namazu/filter/win32/oletaro.pl	Tue Jan 29 11:56:16 2002
@@ -78,7 +78,7 @@
 
     $cfile =~ s/\//\\/g;
     $$cont = "";
-    ReadMSWord::ReadMSWord($cfile, $cont, $fields);
+    ReadMSWord::ReadMSWord($cfile, $cont, $fields, $weighted_str);
     $cfile = defined $orig_cfile ? $$orig_cfile : '';
 
     gfilter::line_adjust_filter($cont);