[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Misc



Hi, Itīs again me from Sweden

I saw that post concerning the wanted X11 tutorial stuff. Paul/Anni, can
you put up that "Articles wanted" page? I hacked some simple news2html
script (attached). Unfortunately I couldnt test it, mainly because I
couldnt find that ("/#/(Ī&) Net::NNTP module on my CPAN cd. Anyway, if
you want to use it right now, here it is. If not, a simple page just
listing the topics will be enough for now.

    Christian
#!/usr/bin/perl -w
#
# Read thread from newsgroup and wrap all the messages into a HTML page
# The messages are also stored in a DB (Key is the msgid) to avoid costly
# refetching of already known ones.
#
# Actually two databases are maintained: n2h-meta (containing statistics
# about the newsgroups and subjects already processed) and n2h-messages
# (containing the known messages)
#
# To be run locally (no CGI)
#

use strict;
use DB_File;
use Net::NNTP;   # CPAN Module !!



# Some internal flags ------------------------------------------------

my $Debug    = 0;                           # print debugging info ?
my $Verbose  = 0;                           # print process info ?
my $Version  = "0.1";


# Operation parameters, configurable --------------------------------

my $Server   = "sunsite.auc.dk";            # newsserver to contact
my $Group    = "sunsite.linux.linuxgames";  # newsgroup to work on
my $Fetch    = 1;                           # fetch messages ?
my $Write    = 1;                           # write HTML ?
my $FetchAll = 0;                           # fetch even already known msgs?
my $DBase    = "~/n2h";                     # database basename
my $Output   = "";                          # output file name
my $Subject  = "foobar";                    # subject to search for


# Some runtime vars -------------------------------------------------

my %Messages = ();        # known messages. tied to the database.
my %Metainfo = ();        # misc persistent data. tied to database.

# Contents of Metainfo:
#
# <groupname>  =>  Number of last message read
# <subject>    =>  MsgIDs of matching messages (seperated by newline)
#






#
# ================================= main =====================================
#



GetOpts ();
InitDB ();
FetchMessages () if $Fetch;
WriteHTML ()     if $Write;
CloseDB ();



#
# ============================== subroutines =================================
#




######################
#
# Pack the stuff into a HTML page
#

sub WriteHTML
{
	my $OldHandle;
	my $MsgID;
	my $Article;
	my $Header;
	my $Body;
	my $From;
	my $Date;

	if ($Output)
	{
		open (OUTPUT, ">$Output")
			or die "Can't open output file $Output: $!\n";
		$OldHandle = select (OUTPUT);
	}

	WriteHTMLHeader ();

	foreach $MsgID (split /\n/ $Metainfo {$Subject})
	{
		chomp $MsgID;
		$Verbose && print STDERR "HTMLizing message $MsgID\n";

		$Article = $Messages {$MsgID};
		$Article =~ /(.+?)\n\n(.+)/s;
		$Header = $1;
		$Body   = $2;

		print "<hr>";
		$Header =~ m/^From: (.+)$/m;
		$From = QuoteMeta ($1);
		print "<b>From: $From </b> <br>";

		$Header =~ m/^Date: (.+)$/m;
		$Date = QuoteMeta ($1);
		print "<b>Date: $Date </b> <br>";

		print "<br><pre>";
		print $Body;
		print "</pre><br>";
	}

	WriteHTMLFooter ();

	if ($Output) select ($OldHandle);

	$Verbose && print STDERR "Done writing HTML\n\n";
}


######################
#
# Quote HTML special chars
#
# Parameter1: String to process
# Returns:    processed string
#

sub Quotemeta
{
	shift;

	# replace angled brackets with proper html codes
	s#<#&lt;#g;
	s#>#&gt;#g;

	return $_;
}



######################
#
# Write HTML Header
#

sub WriteHTMLHeader
{
	print "<html>";
	print "<head><title>Thread listing for subject $Subject</title></head>";
	print "<body>";
	print "<h1>Thread listing for subject $Subject</h1>";
	print "<b>Newsgroup :</b> $Group <br>";
	print "<b>Newsserver:</b> $Group <br>";
	print "<br>";
}




######################
#
# Write HTML Footer
#

sub WriteHTMLFooter
{
	print "<br><br>";
	print "<font size=-2>Generated  with mail2html V $Version</font>";
	print "</body>";
	print "</html>\n";
}




######################
#
# Fetch Messages
# [inspired by Perl Cookbook Recipe 18.4]
#

sub FetchMessages
{
	my $ArticleCount;
	my $First;
	my $Last;
	my @Header;
	my $Article;
	my $TheServer;
	my $MsgID;
	my $i;

	$TheServer = Net::NNTP->new ($Server)
		or die "Can't connect to server $Server: $!\n";

	$Verbose && print STDERR "Connected to $Server\n";

	($ArticleCount, $First, $Last, undef) = $TheServer->group ($Group)
		or die "Can't access newsgroup $Group: $!\n";

	$Verbose && print STDERR "Accessing newsgroup $Group\n";

	if (($Last <= $Metainfo {$Group}) && (!$FetchAll))
	{
		$Verbose && print STDERR "No new articles\n";
		return;   # all articles are already known
	}

	if (!$FetchAll && ($First < $Metainfo {$Group}))
	{
		$First = $Metainfo {$Group};
	}

	$Verbose && print STDERR "Preparing to fetch ", $Last - $First, " articles\n";

	for ($i=$First ; $i <= $Last ; $i++)
	{
		$Verbose && print STDERR "Fetching Message Nr. $i\n";

		if (!(@Header = $TheServer->head ($i)))
		{
			$Metainfo {$Group} = $i - 1;
			die "Couldn't read message # $i: $!\n";
		}

		if (defined ($MsgID = MatchHeader (@Header)))
		{
			$Verbose && print STDERR "   Match! MsgID = $MsgID\n";
			$Article = join "\n", $TheServer->article ($i);
			$Messages {$MsgID} = $Article;
			$Metainfo {$Subject} .= "$MsgID\n";
		}
	}

	$Metainfo {$Group} = $Last;

	$Verbose && print STDERR "Done fetching articles\n\n";
}



######################
#
# Match message header against Subject pattern
#
# Parameter1: message header as array of lines
# Returns:    messageID if matched, else undef
#

sub MatchHeader
{
	my $MsgID   = undef;
	my $Matched = 0;

	while (shift)
	{
		if (/^Subject: .*$Subject/i)
		{ # Cool, a match!
			return $MsgID if $MsgID; # do we already have it?
			$Matched = 1;            # Uh, no
		}
		elsif (/^Message-Id: (.+)$/i)
		{
			$MsgID = $1;
			chomp $MsgID;
			return $MsgID if $Matched;
		}
	}

	return undef; # no match :(
}



######################
#
# Initialize Database
# [inspired by Perl Cookbook Recipe 14.1]
#

sub InitDB
{
	$DBase = ExpandTilde ($DBase);

	tie (%Metainfo, "DB_File", $DBase."-meta.db")
		or die "Can't open database ${DBase}-meta.db: $!\n";

	tie (%Messages, "DB_File", $DBase."-messages.db")
		or die "Can't open database ${DBase}-messages.db: $!\n";

	if ((!defined $Metainfo {$Group}) or ($FetchAll))
		$Metainfo {$Group} = 0;
}



######################
#
# Close Database
#

sub CloseDB
{
	untie %Messages;
	untie %Metainfo;
}



######################
#
# Expand '~' to proper home dir
# [Perl Cookbook Recipe 7.3]
#
# Parameter1 : String to process
# Returns    : Processed string
#

sub ExpandTilde
{
	my $TheString = shift;

	$TheString =~ s#^~([^/]*)#
		$1 ? (getpwnam ($1)) [7] :
		($ENV {HOME} || $ENV {LOGDIR} || (getpwuid ($>)) [7])
		#ex;

	return $TheString;
}

}


######################
#
# parse cmdline
#

sub GetOpts
{
	my ($Arg)  = "";
	my ($SPos) = 0;

	while ($Arg = shift (@ARGV))
	{
		if ($Arg =~ /^-[sgadofwvhV]{2,}/)
		{
			for ($SPos = length ($Arg) ; $SPos > 1 ; $SPos--)
			{
				unshift (@ARGV, "-" . substr ($Arg, $SPos-1, 1));
				$ODebug && warn "Option -".substr ($Arg, $SPos-1, 1)." found in multi\n";
			}
		}
		elsif (($Arg eq "-s") or ($Arg eq "--server"))
		{
			$Server = shift (@ARGV);
		}
		elsif (($Arg eq "-g") or ($Arg eq "--group"))
		{
			$Group = shift (@ARGV);
		}
		elsif (($Arg eq "-a") or ($Arg eq "--all"))
		{
			$FetchAll = 1;
		}
		elsif (($Arg eq "-d") or ($Arg eq "--database"))
		{
			$DBase = shift (@ARGV);
		}
		elsif (($Arg eq "-o") or ($Arg eq "--outputfile"))
		{
			$Output = shift (@ARGV);
		}
		elsif (($Arg eq "-f") or ($Arg eq "--fetchonly"))
		{
			$Fetch = 1;
			$Write = 0;
		}
		elsif (($Arg eq "-w") or ($Arg eq "--convertonly"))
		{
			$Fetch = 0;
			$Write = 1;
		}
		elsif (($Arg eq "-v") or ($Arg eq "--verbose"))
		{
			$Verbose = 1;
		}
		elsif (($Arg eq "-h") or ($Arg eq "--help"))
		{
			Usage ();
			die;
		}
		elsif (($Arg eq "-V") or ($Arg eq "--version"))
		{
			Version ();
			die;
		}
		else
		{
			$Subject = $Arg;
		}
	}
}


######################
#
# Display version info
#

sub Version
{
	my $OldHandle = select (STDERR);

	print "news2html V $Version   (C) Christian Reiniger\n";
	print "   may be redistributed/changed under the terms of the GPL\n";^
	print "\n";

	select ($OldHandle);
}



######################
#
# Display usage info
#

sub Usage
{
	my $OldHandle = select (STDERR);

	Version ();

print <<'USAGEEND';
Usage:
    news2html [options] "Thread subject"

Options  (short | long : argument (default value)):
  -s | --server      : string  ("sunsite.auc.dk")
       Newsserver to connect to
  -g | --group       : string  ("sunsite.linux.linuxgames")
       Newsgroup to browse
  -a | --all         : none
       Fetch complete thread, incl. already known msgs
  -d | --database    : string  ("~/n2h")
       Basename to use for the database
  -o | --outputfile  : string  (STDOUT)
       Name of output file (HTML)
  -f | --fetchonly   : none
       Only fetch messages, do not generate HTML
  -w | --convertonly : none
       Only generate HTML, do not fetch messages
  -v | --verbose     : none
       Print some process info
  -h | --help        : none
       This info
  -V | --version     : none
       Print version info

USAGEEND

	select ($OldHandle);
}