#!/usr/bin/perl
#
# bbcnews.pl - turn the BBC News headline to an RSS feed

use strict;

use XML::RSS;
use LWP::UserAgent;

# get file

my $ua = new LWP::UserAgent;
$ua->agent("BlechRSS/0.1 ". $ua->agent);

my $root = "http://news.bbc.co.uk";
my $req = new HTTP::Request GET => $root.'/text_only.stm';

my $res = $ua->request($req);

die "error fetching\n" if (!$res->is_success);
my $content = $res->content;

# grep data - note this is an Ugly Hack and will break horrifically if
#             the BBC ever change their text formatting

my (@headlines, @links, $templink);
foreach my $line (split $/, $content) {
	chomp $line;

	if ($line =~ m!<a href="([^"]*)">!i) {
		$templink = $1;
#		print STDERR "Found link: $templink\n";
	}

	if ($line =~ m!<h3>([^<]*)</h3>!i) {
		my $headline = $1;
		$headline =~ s/^\s*//;
		$headline =~ s/\s*$//;
		push (@headlines, $headline);
		push (@links, $root.$templink);

#		print STDERR "Found headline '$1'\n";
#		print STDERR "Associating link '$templink'\n";
	}
	
}

my $rss = XML::RSS->new(version => '0.91');
$rss->channel(title  => "BBC News");
$rss->channel(link => "news.bbc.co.uk");

for (my $i = 0; $i < scalar(@headlines); $i++) {
	$rss->add_item(title => xml_protect($headlines[$i]),
				   link  => $links[$i]);
}

# $rss->save("/home/www/html/bbcnews.xml");

# print it out RSS stylee

print "Content-type:text/plain\n\n";

print $rss->as_string, "\n";

exit;

# utility sub (nicked from Trelane)

sub xml_protect {
    $_ = shift;
    s/&/&amp;/;
    s/</&lt;/;
    s/>/&gt;/;
    $_;
}


syntax highlighted by Code2HTML, v. 0.9