Namazu-users-ja(旧)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: インデックスのマージ



古川です。

From: Osamu Okano <osamu2001@xxxxxxxxxxxx>
Subject: [namazu-users-ja] Re: pnamazu-2000.10.07 (インデックスのマージ)
Date: Tue, 10 Oct 2000 23:21:36 +0900

osamu2001> かなり前の話ですが
osamu2001> インデックスのマージツールを作る話
osamu2001> があったと思うのですが
osamu2001> 今現在はどうなっているのでしょうか?

おっ、そんな話もありましたね。じゃあ、つくりましょうか。

やっつけですが…

-- 
Rei FURUKAWA 
furukawa@xxxxxxxxxxxx
#! /usr/local/bin/perl5 -w

use strict;

push(@INC, "/usr/local/share/namazu/pl");
require 'nmzidx.pl';

print("Usage: nmzmerge.pl dst src1 src2\n"), exit unless @ARGV == 3;

&nmzmerge(@ARGV);

sub nmzmerge{
    my ($dir0, $dir1, $dir2) = @_;

    my $nmz0 = new nmzidx($dir0, 'w');

    my $nmz1 = new nmzidx($dir1, 'r');
    my $nmz2 = new nmzidx($dir2, 'r');

    my $offset = 0;
    {
        my $nmz0_file = $nmz0->open_flist;

        my $nmz1_file = $nmz1->open_flist;
        my @field = keys %{$nmz1_file->{'field'}};
        $nmz1_file->close;

        my $nmz2_file = $nmz2->open_flist;
        @field = (@field, (keys %{$nmz1_file->{'field'}}));
        $nmz2_file->close;

        for my $field (@field){
            $nmz0_file->{'field'}->open($nmz0, $field) unless defined $nmz0_file->{'field'}->{$field};
        }
        @field = sort keys %{$nmz0_file->{'field'}};

        $nmz1_file = $nmz1->open_flist;
        $offset = &nmzfile(\@field, $nmz0_file, $nmz1_file);
        $nmz1_file->close;

        $nmz2_file = $nmz2->open_flist;
        &nmzfile(\@field, $nmz0_file, $nmz2_file);
        $nmz2_file->close;

        $nmz0_file->close;
    }

    {
        my $nmz0_word = $nmz0->open_word;
        my $nmz1_word = $nmz1->open_word;
        my $nmz2_word = $nmz2->open_word;

        my ($w1, $w2, %list1, %list2);
        my $word1 = $nmz1_word->read(\$w1, \%list1);
        my $word2 = $nmz2_word->read(\$w2, \%list2);

        my $ndx = 0;

        while (defined(my $c = &wordcmp($word1, $word2))){
            my %list;
            my $word;

            if ($c <= 0){
                $word = $word1;
                %list = %list1;
                $word1 = $nmz1_word->read(\$w1, \%list1);
            }
            if ($c >= 0){
                $word = $word2;
                for my $key (keys %list2){
                    $list{$key + $offset} = $list2{$key};
                }
                $word2 = $nmz2_word->read(\$w2, \%list2);
            }
            $nmz0_word->write($word, \%list);
            print "word $ndx: $word\n" unless ++$ndx % 100;
        }
    }

    {
        my $nmz0_phrase = $nmz0->open_phrase;
        my $nmz1_phrase = $nmz1->open_phrase;
        my $nmz2_phrase = $nmz2->open_phrase;

        for (my $ndx = 0; $ndx < 0x10000; $ndx++){
            my (@list, @list2);
            $nmz1_phrase->read(\@list);
            $nmz2_phrase->read(\@list2);

            for my $key (@list2){
                push(@list, $key + $offset);
            }

            $nmz0_phrase->write(\@list);
            printf("phrase %04X\n", $ndx) unless $ndx & 0xff;
        }
    }

    $nmz0->write_status($nmz1);

    if (my $log = $nmz0->log_open("[Merge]")){
        $log->printf("%-20s %d\n", "Total Files:", $nmz0->{'flist'}->{'offset'});
        $log->printf("%-20s %d\n", "Total Keywords:", $nmz0->{'word'}->{'offset'});
        $nmz0->log_close;
    }

    $nmz0->replace_db(0);
}

sub nmzfile{
    my $ref_field = shift;
    my $nmzo_file = shift;
    my $nmzi_file = shift;

    my $dir = $nmzi_file->{'dir'};
    my $size = $nmzi_file->{'size'};
    my $ndx = 0;
    my %list;

    while (defined $nmzi_file->read(\%list)){
        ++$ndx;
        for my $field (@$ref_field){
            $list{'field'}{$field} = '' unless defined $list{'field'}{$field};
        }
        print "$dir: $ndx/$size\n" unless $ndx % 100;
        $nmzo_file->write(\%list);
    }
    return $ndx;
}

sub wordcmp{
    my ($w1, $w2) = @_;
    return undef unless defined $w1 or defined $w2;
    return -1 if defined $w1 and !defined $w2;
    return 1 if defined $w2 and !defined $w1;
    return $w1 cmp $w2;
}