namazu-ml(ring)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Word97の検索



倉部です。

Layout枠の中の文字列が取れないバグがありましたので、
修正版をお送りします。


# Created by Jun Kurabe
# V1.00 1999/10/30
# V1.01 1999/11/03 Add getFrames by Jun Kurabe
# V1.02 1999/11/03 Change getProperties of check TextFrame statement
use Win32::OLE;
use Win32::OLE::Enum;

package ReadMSWord;

sub ReadMSWord {
    my $fileName = shift;

    # Copy From Win32::OLE Example Program
    # use existing instance if Word is already running
    my $word;
    eval {$word = Win32::OLE->GetActiveObject('Word.Application')};
    die "MSWord not installed" if $@;
    unless (defined $word) {
 $word = Win32::OLE->new('Word.Application', sub {$_[0]->Quit;})
     or die "Oops, cannot start Word";
    }
    # End of Copy From Win32::OLE Example Program
    # for debug
    # $word->{Visible} = 1;

    my $doc = $word->{Documents}->open($fileName);
    $allText = '';
    $allText .= getProperties($doc);
    $allText .= getParagraphs($doc);
    $allText .= getFrames($doc);
    $allText .= getShapes($doc);
    $allText .= getHeadersFooters($doc);
    $doc->close(0);
    undef $doc;
    undef $word;

    return $allText;
}

sub getProperties {
    my $doc = shift;
    my $allText = '';

    # get Title
    $title = $doc->BuiltInDocumentProperties(1)->{Value};
    $subject = $doc->BuiltInDocumentProperties(2)->{Value};
    $author = $doc->BuiltInDocumentProperties(3)->{Value};
    $lastAuthor = $doc->BuiltInDocumentProperties(7)->{Value};
    $createDate = $doc->BuiltInDocumentProperties(11)->{Value};
    $editDate = $doc->BuiltInDocumentProperties(13)->{Value};

    $allText .= 'Subject: ' . $title . ' ' . $subjext ;
    $allText .= "\n";
    $allText .= 'From: ' . $author . ',' . $lastAuthor;
    $allText .= "\n";
    $allText .= 'Date: ' . $createDate;
    $allText .= "\n";
    $allText .= "\n";

    return $allText;
}

sub getParagraphs {
    my $doc = shift;
    my $e = Win32::OLE::Enum->new($doc->Paragraphs);
    my $allText = '';
    while(($obj = $e->Next)) {
 $p = $obj->Range->{Text};
 chop $p;
 $allText .= $p;
 $allText .= "\n";
    }

    return $allText;
}

sub getShapes {
    my $doc = shift;
    my $e = Win32::OLE::Enum->new($doc->Shapes);
    my $allText = '';
    while(($obj = $e->Next)) {
 if ($obj->TextFrame->{HasText}) { #
     $p = $obj->TextFrame->TextRange->{Text};
     chop $p;
     $allText .= $p;
     $allText .= "\n";
 }
    }
    return $allText;
}

sub getFrames {
    my $doc = shift;
    my $e = Win32::OLE::Enum->new($doc->Frames);
    my $allText = '';
    while(($obj = $e->Next)) {
 $p = $obj->Range->{Text};
 chop $p;
 $allText .= $p;
 $allText .= "\n";
    }
    return $allText;
}

sub getFrames {
    my $doc = shift;
    my $e = Win32::OLE::Enum->new($doc->Frames);
    my $allText = '';
    while(($obj = $e->Next)) {
 $p = $obj->Range->{Text};
 chop $p;
 $allText .= $p;
 $allText .= "\n";
    }
    return $allText;
}

sub getHeadersFooters {
    my $doc = shift;

    my $allText = '';
    my $obj ;

    my $e = Win32::OLE::Enum->new($doc->Sections);
    while(($obj = $e->Next)) {
 my $e_header = Win32::OLE::Enum->new($obj->Headers);
 my $h;
 while(($h = $e_header->Next)) {
     $p = $h->Range->{Text};
     chop $p;
     $allText .= $p;
     $allText .= "\n";
 }

 my $e_footer = Win32::OLE::Enum->new($obj->Footers);
 my $f;
 while(($f = $e_footer->Next)) {
     $p = $f->Range->{Text};
     chop $p;
     $allText .= $p;
     $allText .= "\n";
 }
    }

    return $allText;
}

#main
print ReadMSWord::ReadMSWord("$ARGV[0]");