#!/usr/bin/perl -w
#
# gloop.pl - yet another blogbot
### setup
use strict;
use POE::Kernel;
use POE::Session;
use POE::Component::IRC;
use URI::Find;
use XML::XPath;
use Data::Dumper;
# options to set
my $debug=1;
# these need to be getopts
my $bot_nick = "gloop";
my $channel = '#gloop';
my $server = 'london.rhizomatic.net';
my $port = '6667';
my $xmlstore = '/home/blech/projects/gloop/xml/gloop/';
# my $xmlstore = '/home/paulm/personal/scribot/xml/gloop/';
### initilisation POE stuff
# This gets executed as soon as the kernel sets up this session.
sub _start {
my ($kernel, $session) = @_[KERNEL, SESSION];
my (%data);
my $xmlstore = '/home/blech/projects/gloop/xml/gloop/';
my %status = ( replies => 'notice',
addressing => 'optional',
formality => 'formal',
);
$kernel->alias_set( 'gloop_bot' );
$kernel->post( 'gloop_core', 'register', 'all');
# qw(snotice 332 public msg connected join)); should work, sigh
$kernel->post( 'gloop_core', 'connect', { Debug => 0,
Nick => $bot_nick,
Server => $server,
Port => $port,
Username => $bot_nick,
Ircname =>
'yet another blogbot', }
);
$_[HEAP]->{data} = \%data;
$_[HEAP]->{topic} = 'no topic is set';
$_[HEAP]->{xmlstore} = $xmlstore;
$_[HEAP]->{status} = \%status;
$_[HEAP]->{ignore} = [qw(dipsy slavorg scribot dadadodo)];
$kernel->post( 'gloop_bot', 'read_xml' );
print Dumper($_[HEAP]);
}
# After we successfully log into the IRC server, join a channel.
sub irc_001 {
my ($kernel) = $_[KERNEL];
$kernel->post( 'gloop_core', 'mode', $bot_nick, '+i' );
$kernel->post( 'gloop_core', 'join', $channel );
# $kernel->post( 'gloop_core', 'away', 'run away! run away!' );
}
sub irc_dcc_send {
my ($nick, $port, $file, $size, $done) = @_[ARG0 .. $#_];
printf "DCC SEND to $nick ($file): $done bytes of $size sent. %d%%\n",
($done / $size) * 100;
}
sub _default {
my ($state, $event, $args) = @_[STATE, ARG0, ARG1];
$args ||= [];
print "$state -- $event @$args\n" if $debug;
}
sub _stop {
my ($kernel) = $_[KERNEL];
print "Control session stopped.\n";
$kernel->post( 'gloop_core', 'quit', 'Regrouping (bbiab)' );
$kernel->alias_remove( 'gloop_kernel' );
}
sub irc_disconnected {
my ($server) = $_[ARG0];
# need reconnect code here
print "Lost connection to server $server.\n";
}
sub irc_error {
my $err = $_[ARG0];
print "Server error occurred! $err\n";
}
sub irc_kick {
my ($who, $where, $isitme, $reason) = @_[ARG0 .. ARG4];
print "Kicked from $where by $who: $reason\n" if $isitme eq $bot_nick;
}
### gloop stuff :: irc handlers
sub irc_msg {
my ($kernel, $session) = @_[KERNEL, SESSION];
my ($who, $where, $what) = @_[ARG0 .. ARG2];
print "irc_msg :: $who sent me >$what< on @$where\n";
$kernel->post( 'gloop_bot', 'parser', $who, $what, 'sender');
}
sub irc_public {
my ($kernel, $session, $heap) = @_[KERNEL, SESSION, HEAP];
my ($who, $where, $what) = @_[ARG0 .. ARG2];
print "irc_public:: $who sent me >$what< on @$where\n";
if ($what =~ m/^$bot_nick[,:;-] ?/i
|| $heap->{status}{addressing} eq 'optional') {
print q!$kernel->post( 'gloop_bot', 'parser', $who, $what,!,
q! 'default');!,"\n";
$kernel->post( 'gloop_bot', 'parser', $who, $what, 'default');
}
}
sub irc_topic {
my ($kernel, $heap, $topic) = @_[KERNEL, HEAP, ARG2];
$heap->{'topic'} = $topic;
$kernel->post( 'gloop_bot', 'dump_xml' );
}
#sub irc_333 {
# print "ping\n" if $debug;
# return;
#}
sub irc_331 {
my ($kernel, $heap, $topic) = @_[KERNEL, HEAP, ARG1];
$topic = "no topic is set";
$heap->{'topic'} = $topic;
$kernel->post( 'gloop_bot', 'dump_xml' );
}
### gloop stuff :: utility subs (not kernel posted)
sub naturally {
# just a sort sub
if (length $a != length $b) {
return length $a <=> length $b;
} else {
return $a cmp $b;
}
}
sub iso {
# convert given time to HHHH-MM-DD YY:MM
my $time = shift @_;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
gmtime($time);
$year+=1900; $mon++;
return sprintf '%04.f-%02.f-%02.f %02.f:%02.f',
$year, $mon, $mday, $hour,$min;
}
sub uris {
# use URI::Find to any URIs
my $text = shift;
my @uris;
find_uris($text,
sub { push @uris, $_[0]; $_[1] }
);
# print "Contains uris '",join("', '", @uris),"'\n" if $debug;
return @uris;
}
sub utterance_to_xml {
# convert utterances (ie what we hear) to XML
my $in = shift;
# entitites
$in =~ s/\>/g;
$in =~ s/>/\</g;
$in =~ s/&/\&/g;
$in =~ s/'/\'/g;
$in =~ s/"/\"/g; # " bbedit syntax fix (sigh)
# first let's mark up our wiki
$in =~ s!\*([^*]+)\*!$1!g;
# now we find URIs
my @uris = &uris($in);
my $uri_match;
foreach my $uri (@uris) {
# some people send URLs without trailing slashes. URI::Find adds,
# so we have to match them in the incoming and replace them in the
# outgoing text. Sigh.
($uri_match = $uri) =~ s!/$!!;
# image with comments
if ($in =~ m!\+\[([^|]*)\|$uri\]!) {
$in =~ s!\+\[([^|]*)\|$uri\]!!;
}
# image without comments
elsif ($in =~ m!\+\[$uri\]!) {
$in =~ s!\+\[$uri\]!!;
}
# url with title
elsif ($in =~ m!\[([^|]*)\|$uri_match/?\]!) {
$in =~ s!\[([^|]*)\|$uri_match/?\]!$1!;
}
# url without title. unlike chump, we pick up any old URL. Rar.
elsif ($in =~ m!\[?$uri_match/?\]?!) {
$in =~ s!\[?$uri_match/?\]?!$uri!;
}
}
return wantarray ? ($in, @uris) : $in;
}
sub lastitem {
# get the indetifier of the last n items, or the last one
my ($heap, $num) = @_;
$num = 1 if ($num !~ m/^(\d*)$/ || $num == 0);
my $items = scalar(keys(%{ $heap->{'data'} }));
$num = ($num > $items) ? $items : $num;
if (wantarray) {
return (sort naturally (keys(%{ $heap->{'data'} })))[-$num..-1];
} else {
return (sort naturally (keys(%{ $heap->{'data'} })))[-$num];
}
}
### gloop stuff :: command information
sub commands {
# various settings things gloop can respond to
my ($heap, $kernel, $type, $body, $target) = @_[HEAP, KERNEL, ARG0..ARG2];
if ($type eq 'view') {
# view a list of articles
$body =~ s/^view (\d+)//;
my $num = $1;
$num = 5 if !defined($num);
my @items = &lastitem($heap, $num);
print join(', ',@items), "\n";
foreach my $item (@items) {
$kernel->post( 'gloop_bot', 'viewlink', $item, $target );
}
return;
} elsif ($type =~ m/help/) {
# view some help text
$kernel->post( 'gloop_bot', 'help', $target, $body );
} elsif ($type eq 'status') {
# status
$kernel->post( 'gloop_bot', 'status', $target, $body );
} elsif ($type eq 'be') {
# change behaviour
$kernel->post( 'gloop_bot', 'change', $target, $body );
} elsif ($type eq 'shut' || $type eq 'wake') {
# also change, really
$body = ($type eq 'shut') ? 'be optional' : 'be required';
$kernel->post( 'gloop_bot', 'change', $target, $body );
} elsif ($type eq 'showmode') {
# compact status
$kernel->post( 'gloop_bot', 'showmode', $target, $body );
}
}
sub spew {
# tell the channel/enquirer about the stuff we've recorded
# deprecated
}
sub viewitem {
# view a single item in spew mode
my ($heap, $kernel, $item, $target) = @_[HEAP, KERNEL, ARG0..ARG1];
print "VIEWING ITEM $item\n";
print Dumper($heap);
my $message = "$heap->{'data'}{$item}{'uri'}\n";
$message .= $heap->{'data'}{$item}{'title'}."\n"
if (exists($heap->{'data'}{$item}{'title'}));
foreach my $comment (@{ $heap->{'data'}{$item}{'comment'} }) {
my ($nick) = keys(%{ $comment });
$message .= "($nick) $comment->{$nick}\n";
}
print "Sending:\n$message";
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
$message);
}
sub viewlink {
# view a single link in spew mode
my ($heap, $kernel, $item, $target) = @_[HEAP, KERNEL, ARG0..ARG1];
print "VIEWING LINK $item\n";
if (defined($item) && exists($heap->{'data'}{$item})) {
my $message = $heap->{'data'}{$item}{'uri'};
if (exists($heap->{'data'}{$item}{'title'})) {
$message = "$heap->{'data'}{$item}{'title'} ($message)";
}
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
"$item: $message");
}
}
sub help {
# tell people about how to use the bot
my ($heap, $kernel, $target, $body) = @_[HEAP, KERNEL, ARG0..ARG1];
my @help = (
"Post a URL by saying it on a line on its own",
"To post an item without a URL, say BLURB:This is the title",
"I will reply with a label, for example A",
"You can then append comments by saying A:This is a comment",
"To title a link, use a pipe as the first character of the comment",
"Eg. A:|This is the title",
"To see the last 5 links posted, say $bot_nick:view",
"For morefeatures, say $bot_nick:morehelp"
);
my @morehelp = (
"Put emphasis in a comment by using *asterisks*",
"To create an inline link in a comment, say:",
"A:Look at [this thing here|http://pants.heddley.com]",
"You can also link to inline images in a comment:",
"A:Chump logo [alt-text|http://pants.heddley.com/chump.png]",
"To see the last n links, say $bot_nick:view n (where n is a number)",
"To see the details of a link labelled A, say A: on a line on its own",
"Send any comments or questions to gloop\@husk.org"
);
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
"Help [TODO]");
}
sub status {
# infobot-esque status info
my ($heap, $kernel, $target, $body) = @_[HEAP, KERNEL, ARG0..ARG1];
my $itemcount = scalar(keys(%{ $heap->{'data'} }));
my $method = ($heap->{'status'}{'replies'} eq 'notice')
? 'by notices'
: 'by talking';
my $awake = 'some time';
my $message = <{'status'}{'addressing'} mode, I am replying
$heap->{'status'}{formality}ly $method.
MESSAGE
$message =~ s/\n/ /g;
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
$message);
}
sub change {
# set global config stuff
my ($heap, $kernel, $target, $command) = @_[HEAP, KERNEL, ARG0..ARG1];
$command =~ s/^be *//;
$heap->{'status'}{'replies'} = 'notice' if $command =~ m'notice';
$heap->{'status'}{'replies'} = 'privmsg' if $command =~ m'privmsg';
$heap->{'status'}{'addressing'} = 'optional' if $command =~ m'optional';
$heap->{'status'}{'addressing'} = 'required' if $command =~ m'required';
$heap->{'status'}{'formality'} = 'formal' if $command =~ m'formal';
$heap->{'status'}{'formality'} = 'informal' if $command =~ m'informal';
$heap->{'status'}{'formality'} = 'silent' if $command =~ m'silent';
}
sub showmode {
# show some mode information
my ($heap, $kernel, $target, $body) = @_[HEAP, KERNEL, ARG0..ARG1];
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
'addressing is currently ',
uc($heap->{'status'}{'addressing'}));
}
### gloop stuff :: xml output
sub dump_xml {
# take %data and output it
my ($heap, $kernel) = @_[HEAP, KERNEL];
open(XML_OUT, ">".$xmlstore."index.xml")
or return "Error opening XML data for writing\n";
select XML_OUT;
my $time = time;
my $iso = &iso($time);
my $itemcount = scalar(keys(%{ $heap->{'data'} }));
$kernel->post( 'gloop_core', 'topic', $channel );
print <$iso$heap->{'topic'}
HEADER
foreach my $item (reverse sort naturally (keys %{ $heap->{'data'} })) {
if (defined($heap->{'data'}{$item}{'uri'})
&& $heap->{'data'}{$item}{'uri'} ne 'blurb') {
# standard link
print <
$heap->{'data'}{$item}{'uri'}$heap->{'data'}{$item}{'nick'}
LINK
} else {
# just blurb
print <
$heap->{'data'}{$item}{'nick'}
LINK
}
print qq!$heap->{'data'}{$item}{'title'}!
if (exists($heap->{'data'}{$item}{'title'}));
foreach my $comment (@{ $heap->{'data'}{$item}{'comment'} }) {
# reel around the comments
my ($nick) = keys(%{ $comment });
my $body = &utterance_to_xml($comment->{$nick});
print qq!$body\n!;
}
print "\n";
}
print "\n";
close(XML_OUT);
select STDOUT;
return;
}
sub read_xml {
my ($heap, $kernel) = @_[HEAP, KERNEL];
print "Start importing data\n" if $debug;
my $file = $xmlstore."index.xml";
my $xp = XML::XPath->new(filename => $file) or return;
my $item;
my %path = ('uri' => 'url',
'iso' => 'time',
'nick' => 'nick',
'title' => 'title',
'epoch' => 'time/@value',
);
# key xpath
my $nodeset = $xp->find('/churn/link'); # find all links
foreach my $node ($nodeset->get_nodelist) {
# now process down to data format
my %item = ();
# do main entries
map { $item{$_} = $xp->findvalue($path{$_}, $node) } keys %path;
# fix blurb
$item{'uri'} = 'blurb' if ($xp->findvalue('@type', $node) eq 'blurb');
# do comments
$item{'comment'} = [];
foreach my $comment ($xp->findnodes('comment', $node)) {
push @{ $item{'comment'} }, { $xp->findvalue('@nick', $comment) =>
$comment->string_value() };
}
# build structure
$item = (defined($item)) ? ++$item : 'A';
$heap->{'data'}{$item} = \%item;
}
print "Done importing data (last item $item)\n" if $debug;
return;
}
### gloop stuff :: entry modification bits
sub add {
# add or modify an entry
my ($heap, $kernel, $type, $item, $nick, $body) = @_[HEAP, KERNEL, ARG0..ARG3];
if ($type eq 'new') {
# it's a new entry, so we use the time
my $time = time;
my $iso = &iso($time);
my $item = $item;
print "$item\n";
$heap->{'data'}{$item}{'epoch'} = $time;
$heap->{'data'}{$item}{'iso'} = $iso;
$heap->{'data'}{$item}{'nick'} = $nick;
# now we do the processing
my @uris = &uris($body);
if (@uris) {
# ok, we handle this differently to chump
# if sent 'http://husk.org/ # my site'
# we make 'my site' the first entry; similarly, any other
# uris afterwards
my $body = $body;
my $uri = shift(@uris);
my $shorter_uri;
($shorter_uri = $uri) =~ s!/$!!;
$body =~ s!$shorter_uri/?!!;
$body =~ s!^ ?# ?!!;
# stringify the URI::Find object with ="$uri". Ta brev
$heap->{'data'}{$item}{'uri'} = "$uri";
if ($body) {
$kernel->post( 'gloop_bot', 'body', $item, $nick, $body );
}
} else {
# it's a blurb
$heap->{'data'}{$item}{'uri'} = 'blurb';
$heap->{'data'}{$item}{'title'} = $body;
}
$kernel->post( 'gloop_bot', 'dump_xml' );
print "Just added\n",Dumper($heap->{'data'}{$item}),"\n" if $debug;
}
if ($type eq 'comment') {
$kernel->post( 'gloop_bot', 'body', $item, $nick, $body );
$kernel->post( 'gloop_bot', 'dump_xml' );
print "Just added to\n",Dumper($heap->{'data'}{$item}),"\n" if $debug;
}
if ($type eq 'title') {
$heap->{'data'}{$item}{'title'} = $body;
$kernel->post( 'gloop_bot', 'dump_xml' );
print "Just added to\n",Dumper($heap->{'data'}{$item}),"\n" if $debug;
}
}
sub body {
# add comments to a pre-existing link
my ($heap, $kernel, $item, $nick, $body) = @_[HEAP, KERNEL, ARG0..ARG2];
print "Adding comment '$body' by '$nick' to '$item'\n";
push @{ $heap->{'data'}{$item}{'comment'} }, { "$nick" => "$body" };
}
sub parser {
# main command parser; this is getting a bit big really...
my ($heap, $kernel, $who, $what, $where) = @_[HEAP, KERNEL, ARG0..ARG2];
# who sent us stuff?
my $nick = $who;
$nick =~ s/!.*$//;
if (grep { $nick =~ $_ } @{ $heap->{'ignore'} }) {
print "### We ignore bots\n";
return;
}
my $target = ($where eq 'sender') ? $nick : $channel;
print "### PARSING\n";
print "Saw '$nick' tell me '$what'\n";
print "Will send replies to '$target' (from '$where')\n";
# first we parse the commands, as these are a bit odd in compatibility
# mode (need the bot name and a colon)
my @commands = qw(help morehelp view status be shut wake showmode);
if ($where ne 'sender') {
# we only accept things without names in private
foreach my $com (@commands) {
if ($what =~ m/^$bot_nick[,:;-] *$com/i) {
$what =~ s/^$bot_nick[,:;-] *$com/$com/i;
print "### COMMAND $com; passing on '$what'\n";
$kernel->post( 'gloop_bot', 'commands', $com, $what, $target );
return;
}
}
} else {
# we're in a privmsg
# we can be way more lenient in our processing
foreach my $com (@commands) {
if ($what =~ m/$com/i) {
$what =~ s/^$bot_nick://i;
print "### COMMAND $com; passing on '$what'\n";
$kernel->post( 'gloop_bot', 'commands', $com, $what, $target );
return;
}
}
}
# now we don't care if the bot nick is there, as we shouldn't
# be here otherwise
$what =~ s/^$bot_nick[,:;-] ?//i;
print "what now '$what'\n";
# is there a prefix letter?
my $item;
if ($heap->{'status'}{'addressing'} eq 'optional') {
# ok, we're stricter here. you *have* to be caps and you
# *have* to terminate with a :
if ($what =~ s/^([A-Z][A-Z]?): *//) {
$item = $1;
}
} else {
# we're far looser; you can be lowercase, and you can
# have one of ,:;- as in the bot's addressing, plus :
if ($what =~ s/^([A-Z][A-Z]?)[,:;-] *//) {
$item = uc($1);
}
}
if (defined($item)) {
# we're trying to actually address the bot
print "### GOT ITEM '$item': passing on '$what'\n";
if (exists $heap->{'data'}{$item}) {
# we can add this
# add stuff to $heap->{'data'}{$item}
if ($what eq '') {
# we're being asked to spew a single item
print "### VIEW ITEM\n";
$kernel->post( 'gloop_bot', 'viewitem', $item, $target );
return;
}
print "### ADD ITEM\n";
my $type = ($what =~ s/^\| *//) ? 'title' : 'comment';
$kernel->post( 'gloop_bot', 'add', $type, $item, $nick, $what );
return if $heap->{'status'}{'formality'} eq 'silent';
# tell the channel this amazing fact
my $message = ($type eq 'title') ? 'titled' : 'commented';
$message .= " item $item";
$message = "$nick $message" if ($heap->{'status'}{'formality'} eq 'informal');
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, $message);
return;
} else {
print "### NO SUCH ITEM\n";
# drop it on the floor, but let the user know
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
"Label $item not found.");
return;
}
}
# otherwise, it's a new blurb or URI, we'll make it a title
# note that if we got here under optional addressing, we need
# to make sure there's either a URL or it starts with BLURB
if ($heap->{'status'}{'addressing'} eq 'optional' || $where eq 'sender') {
my $pass = 0;
$pass = 1 if ($what =~ s/^BLURB[:;-] *//);
my @uris = &uris($what);
$pass = 1 if (@uris);
return unless $pass;
}
# so first we find the last item in %data
$item = &lastitem($heap, 1);
$item = (defined($item)) ? ++$item : 'A';
print "ADDING '$item' ($heap->{'status'}{'addressing'}, $where)\n";
$kernel->post( 'gloop_bot', 'add', 'new', $item, $nick, $what );
# now we add it as a title
my $message;
if ($heap->{'status'}{'formality'} eq "informal") {
$message = "$nick added item $item, $what";
} else {
$message = "$item: $what from $nick";
}
$kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target,
$message);
}
### here's where execution starts.
POE::Component::IRC->new( 'gloop_core' ) or
die "Can't instantiate new IRC component!\n";
POE::Session->new( 'main' => [qw(_start _stop _default irc_001 irc_kick
irc_public irc_msg irc_topic
irc_331 irc_error irc_disconnected
commands viewlink viewitem help status
change showmode dump_xml read_xml add
body parser)
]
);
$poe_kernel->run();
exit 0;
__END__
=head 1 TODO
=over 4
=item topic done
=item XML import
=item autoreboot
=item the surrounding stuff
=item split parse
=item split app
=item precise times (?)
=end