#!/usr/bin/perl
#
# nntp2jam.pl version 1.0 by Johan Billing
#
# This program has been placed in the public domain, used it any way you want.
#
# The nntp2jam.pl program can connect to a NNTP server of your choice and 
# download Usenet messages to JAM messagebases for local use. It is not 
# intended to be used as a gateway. 
#
# The TimeDate module is required. It can be found packaged for most Linux
# distributions or can be installed using the following command:
#
# perl -MCPAN -e "install TimeDate"
#
# Users of ActivePerl for Windows can install it using "ppm install TimeDate".
#
# Limitations:
# * Uploading of new posts is not supported
# * No charset support
# * Only very limited MIME support
# * No reply links are created

use strict;

use Net::NNTP;
use MIME::QuotedPrint;
use MIME::Base64;
use Time::Local;
use Date::Parse;
use Text::ParseWords;
use Getopt::Long;

use JAM;

# Primitive MIME header decoder 

sub unmime {
   my @words=split(/\s/,$_[0]);
   my $res='';

   for(my $i=0;$i <= $#words;$i++) {
      if ( $words[$i] =~ /^=\?(\S+)\?(\S+)\?(\S+)\?=/ ) {
         if(lc($2) eq 'q') {
            my $dec = decode_qp($3);
            $dec =~ s/_/ /g;
            $res .= $dec;
         }

         if(lc($2) eq 'b') {
            $res .= decode_base64($3);
         }
      }
      else {
         if($i) {
            $res .= ' ';
         }
         
         $res .= $words[$i];
      }
   }

   return $res;
}

# Parse command-line arguments 

my %opts = ( server  => $ENV{NNTPSERVER},
             port    => 119,
             maxnum  => 1000,
             initnum => 100 );

my $getopt_res = GetOptions(\%opts, 'server=s', 
                                    'port=i', 
                                    'username=s',
                                    'password=s',
                                    'initnum=i',
                                    'maxnum=i',
                                    'fullheaders',  
                                    'debug',
                                    'help|h');
            
if ( !$getopt_res or $opts{help} or $#ARGV != 0) {
   print "\n";
   print "Usage: [perl] nntp2jam.pl [<options>] <groups file>\n";
   print "\n";
   print "Valid options:\n";
   print "\n";
   print " --server <name>     NNTP server to use (default: NNTPSERVER env variable)\n";
   print " --port <port>       Port number of NNTP server (default: 119)\n";
   print " --username <name>   Username if the NNTP server requires authentication\n";
   print " --password <pass>   Password if the NNTP server requires authentication\n";
   print " --maxnum <num>      Maximum messages to download per group (default: 1000)\n";
   print " --initnum <num>     Number of messages to download first time (default: 100)\n";
   print " --fullheaders       Import all news headers into the JAM messagebase\n";   
   print " --debug             Dump all NNTP communication to stderr\n";
   print "\n";
   print "Each line of the groups file should contain the newsgroup name and the path\n";
   print "to the JAM messagebase where downloaded news articles should be stored.\n";
   print "\n";
   print "Example:\n";
   print "\n";
   print "rec.travel.europe /home/bbs/areas/rec.travel.europe\n";
   print "soc.culture.nordic /home/bbs/areas/soc.culture.nordic\n";   
   print "\n";
   print "nntp2jam.pl will automatically add the number of the last downloaded message\n";
   print "for each newsgroup to this file, do not remove these numbers.\n";
   print "\n";
   exit;
}

# Read groups file

print "Reading groups file $ARGV[0]\n";

my @groups_name;
my @groups_path;
my @groups_max;

if (!open(GROUPSFILE, $ARGV[0]))
{
   print "Failed to open file";
   exit;
}        
        
while(<GROUPSFILE>) {
   s/\s+$//; # strip trailing whitespace

   my @words = quotewords('\s+', 0, $_);
            
   if($#words >= 1)
   {
      push(@groups_name,$words[0]);
      push(@groups_path,$words[1]);
      push(@groups_max,$words[2]);
   }
}

close(GROUPSFILE);
            
# Connect to NNTP server 

my $nntp = Net::NNTP->new($opts{server},(Port => $opts{port}, Debug => $opts{debug}));

print "Connecting to server $opts{server} at port $opts{port}\n";

if(!$nntp) {
   print "Failed to connect to server\n";
   exit;
}

if (defined($opts{username}) and defined($opts{password})) {
   print "Logging in\n";

   if(!($nntp->authinfo($opts{username},$opts{password}))) {
      print "Login failed\n";
      $nntp->quit();
      exit;
   }
}

# Download news

for(my $group = 0; $group <= $#groups_name; $group++) {
   my ($num,$first,$last) = $nntp->group($groups_name[$group]);

   if (!defined($num)) {
      print "Failed to download news from $groups_name[$group]\n";
   }
   else {
      my $start;
      
      if ($groups_max[$group]) {
         $start = $groups_max[$group]+1;
        
         if($last-$start+1 > $opts{maxnum}) {
            $start = $last-$opts{maxnum}+1;
         }
      }
      else {
         $start = $last-$opts{initnum}+1; 
      }

      if ($start < $first) {
         $start = $first;
      }

      if ($start > $last or $start == 0) {
         print "No new articles to download in $groups_name[$group]\n";
      }
      else {
         print "Downloading ",$last-$start+1," new articles in $groups_name[$group]\n";
      
         my $jamhandle = JAM::OpenMB($groups_path[$group]);

         if (!$jamhandle) {
            print "Creating messagebase $groups_path[$group]\n";
            
            $jamhandle = JAM::CreateMB($groups_path[$group],1);
            
            if(!$jamhandle) {
               print "Failed to create JAM messagebase $groups_path[$group], skipping group\n";
            }
         }
         
         if($jamhandle) {
            for(my $i=$start; $i <= $last; $i++) {
               $|=1; printf "%d\r",$i-$start+1; $|=0;

               my $headref = $nntp->head($i);
               my $bodyref = $nntp->body($i);
      
               if (!defined($headref) || !defined($bodyref)) {
                  print "Failed to download article $i (article deleted on server?)\n";
               }
               else {
                  my $header = join("",@$headref);
                  my $body   = join("",@$bodyref);
                  
                  $header =~ s/\n\s+/ /g;  # fix continuation lines
      
                  my %hdrs = (UNIX_FROM => split /^(\S*?):\s*/m, $header); # from perldoc -f split
                  chomp(%hdrs);
      
                  my $jamfrom = 'Unknown';
                  my $jamaddr = 'unknown@unknown';
                  my $jamsubj = '';
                  my $jamtzutc = '';
      
                  my $jamdate = time();
      
                  # Parse From: header line. Not perfect, but good enough for most cases.
               
                  if($hdrs{From}) {
                     if ($hdrs{From} =~ /^(.+)\s<(.+)>$/ ) {
                        $jamfrom = $1;
                        $jamaddr = $2;
                     }
                     elsif ($hdrs{From} =~ /^(.+)\s\((.+)\)$/ ) {
                        $jamfrom = $2;
                        $jamaddr = $1;
                     }
                     else {
                        $jamaddr = $hdrs{From};
                        $jamfrom = $hdrs{From};
                     }
      
                     if ( $jamfrom =~ /^\"(.+)\"$/ ) {
                        $jamfrom = $1;
                        $jamfrom =~ s/\\(.)/$1/g;
                     }
                  }
      
                  if($hdrs{Subject}) {
                     $jamsubj = $hdrs{Subject};
                  }
      
                  if($hdrs{Date}) {
                     my ($ss,$mm,$hh,$day,$month,$year,$zone) = strptime($hdrs{Date});
      
                     if(defined($ss)) {
                        if($zone < 0) {
                           $jamtzutc = sprintf("%03d%02d",$zone/3600,($zone/60)%60);
                        }
                        else {
                           $jamtzutc = sprintf("%02d%02d",$zone/3600,($zone/60)%60);
                        }
      
                        $jamdate = timelocal($ss,$mm,$hh,$day,$month,$year);
                     }
                  }
      
                  $jamfrom = unmime($jamfrom);
                  $jamsubj = unmime($jamsubj);
                  
                  my %msgheader;
                  my %subfields;
                  my @subfields;
   
                  if($jamfrom) {
                     push(@subfields,JAM::Subfields::SENDERNAME,$jamfrom);
                  }
               
                  if($jamsubj) {
                     push(@subfields,JAM::Subfields::SUBJECT,$jamsubj);
                  }
               
                  if($jamaddr) {
                     push(@subfields,JAM::Subfields::FTSKLUDGE,"REPLYADDR: ".$jamaddr); 
                  }
               
                  if($jamtzutc) {
                     push(@subfields,JAM::Subfields::FTSKLUDGE,"TZUTC: ".$jamtzutc); 
                  }

                  if($opts{fullheaders}) {
                     for my $line (@$headref) {
                        chomp $line;
                        push(@subfields,JAM::Subfields::FTSKLUDGE,"RFC-".$line);
                     }
                  }

                  $msgheader{DateWritten}   = JAM::TimeToLocal($jamdate);
                  $msgheader{DateReceived}  = JAM::TimeToLocal(time());
                  $msgheader{DateProcessed} = JAM::TimeToLocal(time());

                  $msgheader{Attributes} = JAM::Attr::SENT;

                  if($hdrs{'Content-Type'} =~ /text\/plain/ ) {
                     if($hdrs{'Content-Transfer-Encoding'} eq 'quoted-printable') {
                        $body = decode_qp($body);
                     }
                     
                     if($hdrs{'Content-Transfer-Encoding'} eq 'base64') {
                        $body = decode_base64($body);
                     }
                  }
                  
                  $body =~ s/\x0a/\x0d/g;
               
                  if (!JAM::LockMB($jamhandle,10)) {
                     print "Failed to lock message base $groups_path[$group]\n";
                     $nntp->quit();
                     exit;
                  }
                  else
                  {
                     my $num = JAM::AddMessage($jamhandle,\%msgheader,\@subfields,\$body);

                     if(!$num) {
                        print "Failed to add message to $groups_path[$group]\n";
                        JAM::UnlockMB($jamhandle);
                        $nntp->quit();
                        exit;
                     }
                  
                     JAM::UnlockMB($jamhandle);
                  }
               }
            }
         
            $groups_max[$group]=$last;
         }
      }
   }
}

print "Closing connection to server\n";
$nntp->quit();

# Write groups file

print "Writing groups file $ARGV[0]\n";

if (!open(GROUPSFILE,">$ARGV[0]"))
{
   print "Failed to open file";
   exit;
}        

for (my $i=0; $i <= $#groups_name; $i++) {
   if($groups_max[$i]) {
      print GROUPSFILE "$groups_name[$i] $groups_path[$i] $groups_max[$i]\n";
   }
   else {
      print GROUPSFILE "$groups_name[$i] $groups_path[$i]\n";
   }
}
        
close(GROUPSFILE);

