#!/usr/bin/perl -w

use lib qw(/home/blech/perllib/perl5
           /home/blech/perllib/share/perl/5.8.4
           /home/blech/perllib/lib/perl/5.8.4
           /home/blech/perllib/lib/home/blech/perllib/); 

use strict;

use CGI;
use CGI::Cache;
use Data::Dumper;
use DateTime;
use DateTime::Format::Strptime;
use Digest::MD5 qw(md5_hex);
use Template;
use XML::Feed;
use URI;

my $nocache = 0;

my $q = new CGI;
my $view = $q->param('view');

$q->http();

my $formatter = DateTime::Format::Strptime->new('pattern' => "%FT%TZ");

if ($view && ($view eq 'atom' || $view =~ m/rss/)) {
  feed($view);
} else {
  html();
}

exit;

### output types

sub html {
  # HTML output
  CGI::Cache::setup( { cache_options =>
                             { cache_root => '/tmp/CGI_Cache',
                               namespace => 'huskfront',
                               directory_umask => 077,
                               max_size => 20 * 1024 * 1024,
                               default_expires_in => '2 hours',
                             }
                           } );
  CGI::Cache::set_key("default_key");
  unless ($nocache) {
    CGI::Cache::start() or exit;
  }
  
  print CGI->header;
  
   my ($sources, $list, $entries) = main('wanted' => ['chaff', 'vox', 'delicious']);
  my $flickr = flickr();

  my $output;
  my $template = Template->new() || die "template error\n";
  $template->process("index.tt", { 'entries' => $entries, 
                                   'list'    => $list,
                                   'sources' => $sources,
                                   'flickr'  => [ $flickr->entries ],
                                 }, \$output);
  
  print $output;
  
  unless ($nocache) {
    CGI::Cache::stop();
  }
}

sub feed {
  my $view = shift;
  $view = ($view =~ m/rss/) ? "RSS" : "Atom";
  
  # Atom output

#  print CGI->header('text/xml; charset="utf-8"');
   print CGI->header('application/atom+xml; charset="utf-8"');

  my $feed = XML::Feed->new($view);
  $feed->link("http://husk.org/");
  $feed->title("husk.org - collected output");
  $feed->tagline("all the content that Paul Mison generates in one handy feed");
  $feed->generator("XML::Feed");

  # TODO give the option of defining by inclusion not exclusion
  # work out what we need to exclude
  my @remove = grep {/^no_/} $q->param;
  @remove = map { s/^no_//; $_ } @remove;

  # get type
  my $type = $q->param('type');
  $type = "" if ($type eq "all");

  # fetch data
  my ($sources, $list, $entries) = main('type'     => $type,
                                        'remove'  => \@remove,
                                        'view'     => $view);

  if ($view eq 'Atom') {
    # add link rel="self"
    my $link = XML::Atom::Link->new('Namespace' => 'http://www.w3.org/2005/Atom');
    $link->set_ns();
    $link->type('application/atom+xml');
    $link->rel('self');
    $link->href('http://husk.org/?view=atom'); # TODO repeat params
    $feed->add_link($link);

    # add ID and updated
    my $now = DateTime->now;
    $now->set_formatter($formatter);

    $feed->id("tag:husk.org,2008:".join("/", sort @{$sources}));
    $feed->updated($now);
  }

  # replicate template ordering  
  my @dates = reverse sort keys (%{ $entries });
  foreach my $date (@dates) {
    foreach my $source (@{ $sources }) {
      my $entries = $entries->{$date}{$source};
      
      foreach my $entry (@{ $entries }) {
        $feed->add_entry($entry);
      }
    }
  }
  
  print $feed->as_xml;
}

### fetch data

sub main {
  my %args = @_;
  my $view = $args{'view'} || 'html';

  my $type = $args{'type'} || "";
  
  warn "Producing view '$view' with type '$type'";
  
  my (@sources, $list, $entries);
  my $sources = sources();
    
  my $now = DateTime->now();
  $now->set_formatter($formatter);

  my @wanted = keys %{ $sources };
  if ($args{'wanted'}) {
    @wanted = @{ $args{wanted} };
  }
  if ($args{'remove'}) {
    foreach my $source (@{ $args{'remove'} }) {
      @wanted = grep {!($source eq $_)} @wanted;
    }
  }

  foreach my $source (@wanted) {
    next if ($type && $sources->{$source}{'type'} !~ $type);

    my $url     = $sources->{$source}{'url'};
    my $uri     = URI->new($url);
    
    my $feed = XML::Feed->parse($uri)
      or die "can't parse $source: ".XML::Feed->errstr;

    # convert everything to Atom, if needed
    if ($view ne 'html' && lc $sources->{$source}{'format'} ne lc $view) {
      $feed = $feed->convert($view);
    }

    my @entries = $feed->entries;
    map { $_->{source} = $source } @entries;
  
    # group by date
    foreach my $entry ($feed->entries) {

      # add ID if needed; set author and source
      $entry->author("Paul Mison ($source)");
      # ok, this is harder than I thought
      # http://atomenabled.org/developers/syndication/atom-format-spec.php#element.source
      # $entry->source($source);

      # fix up Atom feed
      my $issued = $entry->issued;
      my $date = $issued->ymd;
      my $gap  = $now - $issued;

      if ($view eq 'Atom') {
        $issued->set_formatter($formatter);
        $entry->updated($issued) if (!$entry->updated());

        $entry->id(make_tag($entry, $source)) if (!$entry->id || $entry->id =~ m(^http://));
      }
  
      # skip really old stuff
      # TODO stop after the end of delicious entries (or something)
      # rather than fixed time period
      next if ($gap->delta_months > 7);

      if ($source eq 'flickr') {
        # trim some crap from the feed
        my $desc = $entry->content->body;
        $desc =~ s(<p><a href="http://www.flickr.com/people/blech/">.*</a> posted a photo:</p>)()i;
        $entry->content($desc);
      }
      
      push @{ $entries->{$date}{$source} }, $entry;
    }
  
    push @{ $list }, @entries;   
  }
  
  # sort list by date
  $list = [ sort { $b->issued <=> $a->issued } @{ $list } ];
  
  # sort sources by priority
  @sources = sort { $sources->{$a}{priority} <=> $sources->{$b}{priority} } keys %{ $sources };

  return (\@sources, $list, $entries);
}

sub flickr {
  # get Flickr photos (REST, JSON, unused)
  # my $url  = "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key=86a4e36f0d24a3ecdeec19c32e6fa89e&user_id=48600109393%40N01&per_page=10&format=json&nojsoncallback=1";
  # my $json = get($url);
  # my $data = jsonToObj($json);

  warn "Getting Flickr photos";

  # get Flickr photos (RSS)
  my $uri    = URI->new('http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200');
  my $flickr = XML::Feed->parse($uri)
    or die "can't parse Flickr: ".XML::Feed->errstr;

  return $flickr;
}

sub make_tag {
  my $entry = shift;
  my $source = shift;
  
  my $tag;
  my $url = $entry->link();
  my $time = $entry->issued->ymd;
  
  if ($source ne 'delicious') {
    # http://diveintomark.org/archives/2004/05/28/howto-atom-id

    $tag = $url;
     $tag =~ s(^.*://)();
     $tag =~ s(#)(/);
     $tag =~ s(/)(,${time}:/);
     $tag = "tag:$tag";

  } else {
    # delicious special case, because I want to have an internal record
    $tag = "tag:husk.org,$time:links/";
    $tag .= md5_hex($url);
  
  }
  
  return $tag;
}

sub sources {
  my $sources = { 'chaff'       => { url       => 'http://husk.org/blog/index_full.rdf', 
                                     priority   => 1,
                                     format     => 'rss',
                                     type       => 'longtext',
                                   },
                  'vox'        => { url       => 'http://blech.vox.com/library/posts/atom-full.xml',
                                    priority   => 2,
                                     format     => 'atom',
                                     type       => 'longtext',
                                  },
                   'delicious'  => { url       => 'http://del.icio.us/rss/blech',
                                     priority   => 3, 
                                     format     => 'rss',
                                     type       => 'shorttext',
                                   },
                  'flickr'     => { url       => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=atom',
                                    priority   => 4,
                                     format     => 'atom',
                                     type       => 'image',
                                  },
#                   'flickr'     => { url       => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200',
#                                     priority   => 4,
#                                      format     => 'rss',
#                                      type       => 'image',
#                                   },
#                   'flickrfaves'=> { url       => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200',
#                                     priority   => 5,
#                                      format     => 'rss',
#                                      type       => 'image',
#                                   },
#                   'ffffound'   => { url       => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200',
#                                     priority   => 6,
#                                      format     => 'rss',
#                                      type       => 'image',
#                                   },
                };

  return $sources;
}