#!/usr/bin/perl -w

BEGIN {
    use constant EXTRADIR => "/var/ispman/customer/m_thiesen/perl";
    push @INC, EXTRADIR if ( -e EXTRADIR);
}

use strict; use warnings;
use XML::RSS;
use DateTime;
use Date::Parse;
use DateTime::Format::Mail;

my $VERSION = "0.2";

## methods:
sub mk_rss {
    my $data = shift;

    # some common errors
    $data =~ s/ & / &amp; /gi;

    $data =~ s/&ouml;/ö/g;
    $data =~ s/&Ouml;/Ö/g;

    $data =~ s/&auml;/ä/g;
    $data =~ s/&Auml;/Ä/g;

    $data =~ s/&uuml;/ü/g;
    $data =~ s/&Uuml;/Ü/g;

    $data =~ s/&szlig;/\x{230}/g;

    #ok, first try clean up the data
    eval { require XML::LibXML; };
    if (!$@) {
	my $parser = XML::LibXML->new();
	$parser->recover(1);
	my $doc = $parser->parse_string($data);
	$data = $doc->toString();
    } else {
	undef $@;
    }

    #check for HTML (404 pages, etc)
    if ($data =~ m|<!DOCTYPE (\S+) PUBLIC( [^>]+)+>|i) {
	my $doctype = $1;
	if ($doctype =~ /html/i) {
	    warn "Bailing out because of doctype $doctype:\n";
	    require HTML::Strip;
	    my $s = new HTML::Strip;
	    warn $s->parse($data);
	    $s->eof;
	    return undef;
	}
    }

    my $rss;

    if ($data =~ m|http://purl.org/atom/|) {
	require XML::RSS::FromAtom;
	my $atom2rss = new XML::RSS::FromAtom;
	$rss = $atom2rss->parse($data);
    } elsif ($data =~ /rss|rdf/) {
	$rss = new XML::RSS;
	$rss->parse($data);
    } 

    return $rss;
}


## main:

my $configfile = shift;

if (! defined $configfile) {
    die "Usage: $0 <configfile>\n";
}

use Config::IniHash;

my $config = ReadINI $configfile, ( 'case' => 'tolower') ;
my $planet = $config->{planet};

use FindBin;
use File::Basename;
use File::Spec::Functions;

my $basedir = $FindBin::RealBin;

# setup a caching user agent to laod the feeds
use LWP::UserAgent::WithCache;
my %cache_opt = (
		 'cache_root' => catdir($basedir, $planet->{cache_directory}),
		 'default_expires_in' => 5,
		 'namespace' => $planet->{name},
		 );

my $ua = LWP::UserAgent::WithCache->new(\%cache_opt);

# get the feeds and parse them:
my $feed_data = {};
my @feeds = grep /^http/, keys %$config;
my @channels;
foreach my $feed (@feeds) {
    my $response = $ua->get( $feed );
    if (!$response->is_success) {
	warn "Couldn't load $feed\n";
	next;
    } else {
	my $data = $response->content;
	my $rss = mk_rss $data;
	my $channel_name = $config->{$feed}->{name};
	my $channel_link = $rss->{'channel'}->{'link'};
	my $channel_title = $rss->{'channel'}->{'description'};

	my $prefix = $channel_link;
	$prefix =~ s|([^:/])/.+|$1|;

	push @channels, {title => $channel_title, name => $channel_name, 
			 uri => $feed, link => $channel_link };

        foreach my $item (@{$rss->{'items'}}) {
            my $headline = $item->{'title'};

            my $dt = DateTime->now();
            if (exists $item->{'pubDate'}) {
                #at lest RSS 2.0 says RFC 822
                eval {
		    my $p = DateTime::Format::Mail->new->loose;
                    $dt = $p->parse_datetime($item->{'pubDate'});
                };
                if ($@) {
                    #bad, stick to the standard bastard!
		    warn $@;
                    undef $@;
                    my $time = str2time($item->{'pubDate'});
                    if ($time) {
                        $dt = DateTime->from_epoch($time);
                    } else {
                        warn "Strange date format found: ". $item->{'pubDate'} . "\n";
                        $dt = DateTime->now;
                    }
                }
            } else {
		warn "Item has no pubdate: $channel_name\n";
	    }

            my $description = $item->{'description'};
            $description ||= '';
            if (exists $item->{'http://purl.org/rss/1.0/modules/content/'}) {
                my $data = $item->{'http://purl.org/rss/1.0/modules/content/'}->{'encoded'};
                if ($data) {
                    my $text = $data;
                    $text ||= '';
                    $description = $text if (length $text > length $description);
                }
            }

	    $description =~ s|(href=")(?!http://)|$1$prefix|gsi;
	    $description =~ s|(src=")(?!http://)|$1$prefix|gsi;

            my $link = $item->{'link'};
	    my $author = $item->{'author'};

	    my $date = $dt->strftime('%Y%m%d');

	    my $entry = {
		channel_link => $channel_link,
		channel_title => $channel_title,
		channel_name => $channel_name,
		title => $headline,
		content => $description,
		creator => $author,
		link => $link,
		date => $dt->strftime($planet->{date_format}),
		dt => $dt,
	    };

	    if (exists $feed_data->{$date}) {
		push @{$feed_data->{$date}}, $entry;
	    } else {
		$feed_data->{$date} = [ $entry ];
	    }
	}
    }
}
my @items;
my $lastdate = "";;
my $lastchannel = "";
foreach my $date (reverse sort keys %$feed_data) {
    my @list = reverse sort { $a->{dt} <=> $b->{dt}} @{$feed_data->{$date}};
    foreach my $item (@list) {
	if ($date ne $lastdate) {
	    $item->{'new_date'} = $item->{'dt'}->strftime('%B %d, %Y');
	    $lastdate = $date;
	    $lastchannel = "";
	}
	if ($item->{'channel_name'} ne $lastchannel) {
	    $item->{'new_channel'}++;
	    $lastchannel = $item->{'channel_name'};
	}
	delete $item->{'dt'};
	push @items, $item;
    }
}

# create the output
my @templates = split / /, $planet->{'template_files'};
use HTML::Template;

@channels = sort { $a->{'name'} cmp $b->{'name'} } @channels;

foreach my $template_file (@templates) {
    my $template = HTML::Template->new( 'filename' => $template_file,  die_on_bad_params => 0);

    $template->param("name", $planet->{'name'});

    my @items_paged;
    if (exists $planet->{items_per_page}) {
	my $count = $planet->{items_per_page};
	@items_paged = @items[0 .. $count];
    } else {
	@items_paged = @items;
    }

    if (exists $config->{$template_file}->{items_per_page}) {
	my $count = $config->{$template_file}->{items_per_page};
	@items_paged = @items[0 .. $count];
    }

    $template->param("Items", \@items_paged);
    $template->param("date", DateTime->now()->strftime($planet->{date_format}));
    $template->param("Channels", \@channels);

    my $filename = basename $template_file;
    $filename =~ s/\.tmpl//gi;
    my $ofname = catdir($planet->{output_dir}, $filename);
    open my $ofh, ">:utf8", $ofname or die "Couln't create file $ofname for writing: $!\n";
    print $ofh $template->output;
    close $ofh;
}
