#!/usr/bin/perl -w use lib qw(/home/blech/perllib/perl5 /home/blech/perllib/share/perl/5.8.4 /home/blech/perllib/lib/perl/5.8.4 /home/blech/perllib/lib/home/blech/perllib/); use strict; use CGI; use CGI::Cache; use Data::Dumper; use DateTime; use DateTime::Format::Strptime; use Digest::MD5 qw(md5_hex); use Template; use XML::Feed; use URI; my $nocache = 0; my $q = new CGI; my $view = $q->param('view'); $q->http(); my $formatter = DateTime::Format::Strptime->new('pattern' => "%FT%TZ"); if ($view && ($view eq 'atom' || $view =~ m/rss/)) { feed($view); } else { html(); } exit; ### output types sub html { # HTML output CGI::Cache::setup( { cache_options => { cache_root => '/tmp/CGI_Cache', namespace => 'huskfront', directory_umask => 077, max_size => 20 * 1024 * 1024, default_expires_in => '2 hours', } } ); CGI::Cache::set_key("default_key"); unless ($nocache) { CGI::Cache::start() or exit; } print CGI->header; my ($sources, $list, $entries) = main('wanted' => ['chaff', 'vox', 'delicious']); my $flickr = flickr(); my $output; my $template = Template->new() || die "template error\n"; $template->process("index.tt", { 'entries' => $entries, 'list' => $list, 'sources' => $sources, 'flickr' => [ $flickr->entries ], }, \$output); print $output; unless ($nocache) { CGI::Cache::stop(); } } sub feed { my $view = shift; $view = ($view =~ m/rss/) ? "RSS" : "Atom"; # Atom output # print CGI->header('text/xml; charset="utf-8"'); print CGI->header('application/atom+xml; charset="utf-8"'); my $feed = XML::Feed->new($view); $feed->link("http://husk.org/"); $feed->title("husk.org - collected output"); $feed->tagline("all the content that Paul Mison generates in one handy feed"); $feed->generator("XML::Feed"); # TODO give the option of defining by inclusion not exclusion # work out what we need to exclude my @remove = grep {/^no_/} $q->param; @remove = map { s/^no_//; $_ } @remove; # get type my $type = $q->param('type'); $type = "" if ($type eq "all"); # fetch data my ($sources, $list, $entries) = main('type' => $type, 'remove' => \@remove, 'view' => $view); if ($view eq 'Atom') { # add link rel="self" my $link = XML::Atom::Link->new('Namespace' => 'http://www.w3.org/2005/Atom'); $link->set_ns(); $link->type('application/atom+xml'); $link->rel('self'); $link->href('http://husk.org/?view=atom'); # TODO repeat params $feed->add_link($link); # add ID and updated my $now = DateTime->now; $now->set_formatter($formatter); $feed->id("tag:husk.org,2008:".join("/", sort @{$sources})); $feed->updated($now); } # replicate template ordering my @dates = reverse sort keys (%{ $entries }); foreach my $date (@dates) { foreach my $source (@{ $sources }) { my $entries = $entries->{$date}{$source}; foreach my $entry (@{ $entries }) { $feed->add_entry($entry); } } } print $feed->as_xml; } ### fetch data sub main { my %args = @_; my $view = $args{'view'} || 'html'; my $type = $args{'type'} || ""; warn "Producing view '$view' with type '$type'"; my (@sources, $list, $entries); my $sources = sources(); my $now = DateTime->now(); $now->set_formatter($formatter); my @wanted = keys %{ $sources }; if ($args{'wanted'}) { @wanted = @{ $args{wanted} }; } if ($args{'remove'}) { foreach my $source (@{ $args{'remove'} }) { @wanted = grep {!($source eq $_)} @wanted; } } foreach my $source (@wanted) { next if ($type && $sources->{$source}{'type'} !~ $type); my $url = $sources->{$source}{'url'}; my $uri = URI->new($url); my $feed = XML::Feed->parse($uri) or die "can't parse $source: ".XML::Feed->errstr; # convert everything to Atom, if needed if ($view ne 'html' && lc $sources->{$source}{'format'} ne lc $view) { $feed = $feed->convert($view); } my @entries = $feed->entries; map { $_->{source} = $source } @entries; # group by date foreach my $entry ($feed->entries) { # add ID if needed; set author and source $entry->author("Paul Mison ($source)"); # ok, this is harder than I thought # http://atomenabled.org/developers/syndication/atom-format-spec.php#element.source # $entry->source($source); # fix up Atom feed my $issued = $entry->issued; my $date = $issued->ymd; my $gap = $now - $issued; if ($view eq 'Atom') { $issued->set_formatter($formatter); $entry->updated($issued) if (!$entry->updated()); $entry->id(make_tag($entry, $source)) if (!$entry->id || $entry->id =~ m(^http://)); } # skip really old stuff # TODO stop after the end of delicious entries (or something) # rather than fixed time period next if ($gap->delta_months > 7); if ($source eq 'flickr') { # trim some crap from the feed my $desc = $entry->content->body; $desc =~ s(
.* posted a photo:
)()i; $entry->content($desc); } push @{ $entries->{$date}{$source} }, $entry; } push @{ $list }, @entries; } # sort list by date $list = [ sort { $b->issued <=> $a->issued } @{ $list } ]; # sort sources by priority @sources = sort { $sources->{$a}{priority} <=> $sources->{$b}{priority} } keys %{ $sources }; return (\@sources, $list, $entries); } sub flickr { # get Flickr photos (REST, JSON, unused) # my $url = "http://api.flickr.com/services/rest/?method=flickr.photos.search&api_key=86a4e36f0d24a3ecdeec19c32e6fa89e&user_id=48600109393%40N01&per_page=10&format=json&nojsoncallback=1"; # my $json = get($url); # my $data = jsonToObj($json); warn "Getting Flickr photos"; # get Flickr photos (RSS) my $uri = URI->new('http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200'); my $flickr = XML::Feed->parse($uri) or die "can't parse Flickr: ".XML::Feed->errstr; return $flickr; } sub make_tag { my $entry = shift; my $source = shift; my $tag; my $url = $entry->link(); my $time = $entry->issued->ymd; if ($source ne 'delicious') { # http://diveintomark.org/archives/2004/05/28/howto-atom-id $tag = $url; $tag =~ s(^.*://)(); $tag =~ s(#)(/); $tag =~ s(/)(,${time}:/); $tag = "tag:$tag"; } else { # delicious special case, because I want to have an internal record $tag = "tag:husk.org,$time:links/"; $tag .= md5_hex($url); } return $tag; } sub sources { my $sources = { 'chaff' => { url => 'http://husk.org/blog/index_full.rdf', priority => 1, format => 'rss', type => 'longtext', }, 'vox' => { url => 'http://blech.vox.com/library/posts/atom-full.xml', priority => 2, format => 'atom', type => 'longtext', }, 'delicious' => { url => 'http://del.icio.us/rss/blech', priority => 3, format => 'rss', type => 'shorttext', }, 'flickr' => { url => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=atom', priority => 4, format => 'atom', type => 'image', }, # 'flickr' => { url => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200', # priority => 4, # format => 'rss', # type => 'image', # }, # 'flickrfaves'=> { url => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200', # priority => 5, # format => 'rss', # type => 'image', # }, # 'ffffound' => { url => 'http://api.flickr.com/services/feeds/photos_public.gne?id=48600109393@N01&format=rss_200', # priority => 6, # format => 'rss', # type => 'image', # }, }; return $sources; }