namazu-dev(ring)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

htmlmail - a filter to convert mail/news to HTML



ふと思い立って、Mail/News のファイルを HTML に変換するフィル
タを作りました。

使い方:

  % htmlmail < ~/Mail/inbox/123 > foo.html
  % lynx foo.html

htmlmail は CGI としても使えます。(こっちが本当の目的 :-)
<http://home.jp.FreeBSD.org/cgi-bin/showmail> の真似ができま
す。References: を辿ったり、スレッドを生成したり、といった高
機能はありませんが。

Mail/Newsを CGIで全文検索したいけど、MHonArcを使うほどディス
クに余裕はない(あるいは面倒)、という状況で使えます。

簡単な perlスクリプトです。適当に修正して使ってくださいませ。

-- Satoru Takabayashi

#! /usr/bin/perl -wT
#
# htmlmail - a filter to convert mail/news to HTML.
#            It works as CGI if $ENV{SCRIPT_NAME} is defined.
#
# Copyright (C) 2000 Satoru Takabayashi <satoru-t@xxxxxxxxxxxxxxxxxx>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#


require 5.004;
use strict;
use FileHandle;
use NKF;        # <ftp://ftp.ie.u-ryukyu.ac.jp/pub/software/kono/>

my $maildir  = "/foo/bar/Mail";  # for CGI mode.
my $fieldpat = "To:|Cc:|Newsgroups:|Subject:|From:|Date:" .
               "|X-Mailer:|User-Agent:|Message-Id:";

my $Header = << 'EOS';
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
        "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>${subject}</title>
</head>
<body>
<h1>${subject}</h1>
<hr>
EOS

my $Footer = << 'EOS';
</body>
</html>
EOS

main();

sub main () {
    my $mail = "-";  # default is STDIN

    if (defined $ENV{SCRIPT_NAME}) { # CGI mode
	print "Content-type: text/html\n\n";
	if (!defined $ENV{"PATH_INFO"}) {
	    print "No mail specified.";
	    exit 1;
	}
	$mail = $maildir . $ENV{"PATH_INFO"};
    } else {
	$mail = $ARGV[0] if defined $ARGV[0];
    }

    show_mail($mail);
}

sub show_mail ($) {
    my ($mail) = @_;

    my $fh = new FileHandle;
    $fh->open($mail) || die "$mail: $!";
    my @lines = map { chomp; nkf("-emXZ1", $_) } <$fh>;
    return if @lines == 0;

    # Remove very first "From " line.
    shift @lines if $lines[0] =~ /^From /i;

    my ($subject, $headers, $boundary) = handle_headers(\@lines);

    $Header =~ s/\$\{subject\}/$subject/g;
    print $Header;

    print "<ul>\n";
    print $headers;
    print "</ul>\n";
    print "<hr>\n";

    my $body = handle_body(\@lines, $boundary);

    print "<pre>\n";
    print $body;
    print "</pre>\n";

    print $Footer;
}

sub handle_headers (\@) {
    my ($lines_ref) = @_;

    my $subject  = "";
    my %headers  = ();
    my $boundary = "";
    while (@$lines_ref) {
	my $line = shift @$lines_ref;
	last if $line =~ /^$/;

	# Connect if the next line has leading spaces.
	while (defined($$lines_ref[0]) && $$lines_ref[0] =~ /^\s+/) {
	    my $nextline = shift @$lines_ref;
	    $line =~ s/([\xa1-\xfe])\s+$/$1/;
	    $nextline =~ s/^\s+([\xa1-\xfe])/$1/;
	    $line .= $nextline;
	}

	unless ($line =~ /^(\S+:) (.*)/) {
	    print STDERR ">> $line\n";
	    die;
	}
	my $field = $1;
	my $value = encode_entity($2);

	if ($field =~ /^($fieldpat)$/) {
	    $headers{$field} = $value;
	    $subject = $value if $field eq "Subject:";
	}

	if ($field eq "Content-Type:" && 
	    $value =~ /multipart.*boundary="(.*)"/i) 
	{
	    $boundary = $1;
	}

    }

    # Sort by $fieldpat order.
    my $headers = "";
    my @fields = split '\|', $fieldpat;
    for my $field (@fields) {
	if (defined $headers{$field}) {
	    $headers .= "<li><em>$field</em> " 
		. $headers{$field} . "\n";	
	}
    }

    return ($subject, $headers, $boundary);
}

sub handle_body (\@$) {
    my ($lines_ref, $boundary) = @_;

    my $body   = "";
    while (@$lines_ref) {
	my $line = shift @$lines_ref;
	$body .= $line . "\n";
    }

    # Handle MIME multipart message.
    if ($boundary ne "") {
	$body =~ s/This is multipart message.\n//i;
	$body =~ s/--\Q$boundary\E(--)?\n?/\xff/g;

	my (@parts) = split(/\xff/, $body);
	$body = "";
	for my $part (@parts){
	    if ($part =~ s/^(.*?\n\n)//s){
		my $head = $1;
		$body .= $part if $head =~ m!^content-type:.*text/plain!mi;
	    }
	}
    }
    $body = encode_entity($body);
    $body = hyperlink($body);

    return $body;
}

sub encode_entity() {
    my ($str) = @_;

    $str =~ s/&/&amp;/g;
    $str =~ s/</&lt;/g;
    $str =~ s/>/&gt;/g;
    
    return $str;
}


#  hyperlink() subroutine uses codes of MHonArc's mhtxtplain.pl.
#  <http://www.oac.uci.edu/indiv/ehood/mhonarc.html>

##---------------------------------------------------------------------------##
##  File:
##	@(#) mhtxtplain.pl 2.8 99/08/15 22:19:04
##  Author:
##      Earl Hood       mhonarc@xxxxxxxxx
##  Description:
##	Library defines routine to filter text/plain body parts to HTML
##	for MHonArc.
##	Filter routine can be registered with the following:
##              <MIMEFILTERS>
##              text/plain:m2h_text_plain'filter:mhtxtplain.pl
##              </MIMEFILTERS>
##---------------------------------------------------------------------------##
##    MHonArc -- Internet mail-to-HTML converter
##    Copyright (C) 1995-1999	Earl Hood, mhonarc@xxxxxxxxx
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##    02111-1307, USA
##---------------------------------------------------------------------------##

sub hyperlink($) {
    my ($str) = @_;

    my $Url    	= '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
		  '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
    my $UrlExp 	= $Url . q%[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]%;
    my $HUrlExp = $Url . q%(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&])+% .
			 q%[^\.?!;,"'\|\[\]\(\)\s<>\&]%;

    ## Convert URLs to hyperlinks
    $str =~ s@($HUrlExp)@<a href="$1">$1</a>@gio;

    return $str;
}