#!/usr/bin/perl
#
# bbcnews.pl - turn the BBC News headline to an RSS feed
use strict;
use XML::RSS;
use LWP::UserAgent;
# get file
my $ua = new LWP::UserAgent;
$ua->agent("BlechRSS/0.1 ". $ua->agent);
my $root = "http://news.bbc.co.uk";
my $req = new HTTP::Request GET => $root.'/text_only.stm';
my $res = $ua->request($req);
die "error fetching\n" if (!$res->is_success);
my $content = $res->content;
# grep data - note this is an Ugly Hack and will break horrifically if
# the BBC ever change their text formatting
my (@headlines, @links, $templink);
foreach my $line (split $/, $content) {
chomp $line;
if ($line =~ m!<a href="([^"]*)">!i) {
$templink = $1;
# print STDERR "Found link: $templink\n";
}
if ($line =~ m!<h3>([^<]*)</h3>!i) {
my $headline = $1;
$headline =~ s/^\s*//;
$headline =~ s/\s*$//;
push (@headlines, $headline);
push (@links, $root.$templink);
# print STDERR "Found headline '$1'\n";
# print STDERR "Associating link '$templink'\n";
}
}
my $rss = XML::RSS->new(version => '0.91');
$rss->channel(title => "BBC News");
$rss->channel(link => "news.bbc.co.uk");
for (my $i = 0; $i < scalar(@headlines); $i++) {
$rss->add_item(title => xml_protect($headlines[$i]),
link => $links[$i]);
}
# $rss->save("/home/www/html/bbcnews.xml");
# print it out RSS stylee
print "Content-type:text/plain\n\n";
print $rss->as_string, "\n";
exit;
# utility sub (nicked from Trelane)
sub xml_protect {
$_ = shift;
s/&/&/;
s/</</;
s/>/>/;
$_;
}
syntax highlighted by Code2HTML, v. 0.9