#!/usr/bin/perl -w # # gloop.pl - yet another blogbot ### setup use strict; use POE::Kernel; use POE::Session; use POE::Component::IRC; use URI::Find; use XML::XPath; use Data::Dumper; # options to set my $debug=1; # these need to be getopts my $bot_nick = "gloop"; my $channel = '#gloop'; my $server = 'london.rhizomatic.net'; my $port = '6667'; my $xmlstore = '/home/blech/projects/gloop/xml/gloop/'; # my $xmlstore = '/home/paulm/personal/scribot/xml/gloop/'; ### initilisation POE stuff # This gets executed as soon as the kernel sets up this session. sub _start { my ($kernel, $session) = @_[KERNEL, SESSION]; my (%data); my $xmlstore = '/home/blech/projects/gloop/xml/gloop/'; my %status = ( replies => 'notice', addressing => 'optional', formality => 'formal', ); $kernel->alias_set( 'gloop_bot' ); $kernel->post( 'gloop_core', 'register', 'all'); # qw(snotice 332 public msg connected join)); should work, sigh $kernel->post( 'gloop_core', 'connect', { Debug => 0, Nick => $bot_nick, Server => $server, Port => $port, Username => $bot_nick, Ircname => 'yet another blogbot', } ); $_[HEAP]->{data} = \%data; $_[HEAP]->{topic} = 'no topic is set'; $_[HEAP]->{xmlstore} = $xmlstore; $_[HEAP]->{status} = \%status; $_[HEAP]->{ignore} = [qw(dipsy slavorg scribot dadadodo)]; $kernel->post( 'gloop_bot', 'read_xml' ); print Dumper($_[HEAP]); } # After we successfully log into the IRC server, join a channel. sub irc_001 { my ($kernel) = $_[KERNEL]; $kernel->post( 'gloop_core', 'mode', $bot_nick, '+i' ); $kernel->post( 'gloop_core', 'join', $channel ); # $kernel->post( 'gloop_core', 'away', 'run away! run away!' ); } sub irc_dcc_send { my ($nick, $port, $file, $size, $done) = @_[ARG0 .. $#_]; printf "DCC SEND to $nick ($file): $done bytes of $size sent. %d%%\n", ($done / $size) * 100; } sub _default { my ($state, $event, $args) = @_[STATE, ARG0, ARG1]; $args ||= []; print "$state -- $event @$args\n" if $debug; } sub _stop { my ($kernel) = $_[KERNEL]; print "Control session stopped.\n"; $kernel->post( 'gloop_core', 'quit', 'Regrouping (bbiab)' ); $kernel->alias_remove( 'gloop_kernel' ); } sub irc_disconnected { my ($server) = $_[ARG0]; # need reconnect code here print "Lost connection to server $server.\n"; } sub irc_error { my $err = $_[ARG0]; print "Server error occurred! $err\n"; } sub irc_kick { my ($who, $where, $isitme, $reason) = @_[ARG0 .. ARG4]; print "Kicked from $where by $who: $reason\n" if $isitme eq $bot_nick; } ### gloop stuff :: irc handlers sub irc_msg { my ($kernel, $session) = @_[KERNEL, SESSION]; my ($who, $where, $what) = @_[ARG0 .. ARG2]; print "irc_msg :: $who sent me >$what< on @$where\n"; $kernel->post( 'gloop_bot', 'parser', $who, $what, 'sender'); } sub irc_public { my ($kernel, $session, $heap) = @_[KERNEL, SESSION, HEAP]; my ($who, $where, $what) = @_[ARG0 .. ARG2]; print "irc_public:: $who sent me >$what< on @$where\n"; if ($what =~ m/^$bot_nick[,:;-] ?/i || $heap->{status}{addressing} eq 'optional') { print q!$kernel->post( 'gloop_bot', 'parser', $who, $what,!, q! 'default');!,"\n"; $kernel->post( 'gloop_bot', 'parser', $who, $what, 'default'); } } sub irc_topic { my ($kernel, $heap, $topic) = @_[KERNEL, HEAP, ARG2]; $heap->{'topic'} = $topic; $kernel->post( 'gloop_bot', 'dump_xml' ); } #sub irc_333 { # print "ping\n" if $debug; # return; #} sub irc_331 { my ($kernel, $heap, $topic) = @_[KERNEL, HEAP, ARG1]; $topic = "no topic is set"; $heap->{'topic'} = $topic; $kernel->post( 'gloop_bot', 'dump_xml' ); } ### gloop stuff :: utility subs (not kernel posted) sub naturally { # just a sort sub if (length $a != length $b) { return length $a <=> length $b; } else { return $a cmp $b; } } sub iso { # convert given time to HHHH-MM-DD YY:MM my $time = shift @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime($time); $year+=1900; $mon++; return sprintf '%04.f-%02.f-%02.f %02.f:%02.f', $year, $mon, $mday, $hour,$min; } sub uris { # use URI::Find to any URIs my $text = shift; my @uris; find_uris($text, sub { push @uris, $_[0]; $_[1] } ); # print "Contains uris '",join("', '", @uris),"'\n" if $debug; return @uris; } sub utterance_to_xml { # convert utterances (ie what we hear) to XML my $in = shift; # entitites $in =~ s//\</g; $in =~ s/&/\&/g; $in =~ s/'/\'/g; $in =~ s/"/\"/g; # " bbedit syntax fix (sigh) # first let's mark up our wiki $in =~ s!\*([^*]+)\*!$1!g; # now we find URIs my @uris = &uris($in); my $uri_match; foreach my $uri (@uris) { # some people send URLs without trailing slashes. URI::Find adds, # so we have to match them in the incoming and replace them in the # outgoing text. Sigh. ($uri_match = $uri) =~ s!/$!!; # image with comments if ($in =~ m!\+\[([^|]*)\|$uri\]!) { $in =~ s!\+\[([^|]*)\|$uri\]!$1!; } # image without comments elsif ($in =~ m!\+\[$uri\]!) { $in =~ s!\+\[$uri\]!!; } # url with title elsif ($in =~ m!\[([^|]*)\|$uri_match/?\]!) { $in =~ s!\[([^|]*)\|$uri_match/?\]!$1!; } # url without title. unlike chump, we pick up any old URL. Rar. elsif ($in =~ m!\[?$uri_match/?\]?!) { $in =~ s!\[?$uri_match/?\]?!$uri!; } } return wantarray ? ($in, @uris) : $in; } sub lastitem { # get the indetifier of the last n items, or the last one my ($heap, $num) = @_; $num = 1 if ($num !~ m/^(\d*)$/ || $num == 0); my $items = scalar(keys(%{ $heap->{'data'} })); $num = ($num > $items) ? $items : $num; if (wantarray) { return (sort naturally (keys(%{ $heap->{'data'} })))[-$num..-1]; } else { return (sort naturally (keys(%{ $heap->{'data'} })))[-$num]; } } ### gloop stuff :: command information sub commands { # various settings things gloop can respond to my ($heap, $kernel, $type, $body, $target) = @_[HEAP, KERNEL, ARG0..ARG2]; if ($type eq 'view') { # view a list of articles $body =~ s/^view (\d+)//; my $num = $1; $num = 5 if !defined($num); my @items = &lastitem($heap, $num); print join(', ',@items), "\n"; foreach my $item (@items) { $kernel->post( 'gloop_bot', 'viewlink', $item, $target ); } return; } elsif ($type =~ m/help/) { # view some help text $kernel->post( 'gloop_bot', 'help', $target, $body ); } elsif ($type eq 'status') { # status $kernel->post( 'gloop_bot', 'status', $target, $body ); } elsif ($type eq 'be') { # change behaviour $kernel->post( 'gloop_bot', 'change', $target, $body ); } elsif ($type eq 'shut' || $type eq 'wake') { # also change, really $body = ($type eq 'shut') ? 'be optional' : 'be required'; $kernel->post( 'gloop_bot', 'change', $target, $body ); } elsif ($type eq 'showmode') { # compact status $kernel->post( 'gloop_bot', 'showmode', $target, $body ); } } sub spew { # tell the channel/enquirer about the stuff we've recorded # deprecated } sub viewitem { # view a single item in spew mode my ($heap, $kernel, $item, $target) = @_[HEAP, KERNEL, ARG0..ARG1]; print "VIEWING ITEM $item\n"; print Dumper($heap); my $message = "$heap->{'data'}{$item}{'uri'}\n"; $message .= $heap->{'data'}{$item}{'title'}."\n" if (exists($heap->{'data'}{$item}{'title'})); foreach my $comment (@{ $heap->{'data'}{$item}{'comment'} }) { my ($nick) = keys(%{ $comment }); $message .= "($nick) $comment->{$nick}\n"; } print "Sending:\n$message"; $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, $message); } sub viewlink { # view a single link in spew mode my ($heap, $kernel, $item, $target) = @_[HEAP, KERNEL, ARG0..ARG1]; print "VIEWING LINK $item\n"; if (defined($item) && exists($heap->{'data'}{$item})) { my $message = $heap->{'data'}{$item}{'uri'}; if (exists($heap->{'data'}{$item}{'title'})) { $message = "$heap->{'data'}{$item}{'title'} ($message)"; } $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, "$item: $message"); } } sub help { # tell people about how to use the bot my ($heap, $kernel, $target, $body) = @_[HEAP, KERNEL, ARG0..ARG1]; my @help = ( "Post a URL by saying it on a line on its own", "To post an item without a URL, say BLURB:This is the title", "I will reply with a label, for example A", "You can then append comments by saying A:This is a comment", "To title a link, use a pipe as the first character of the comment", "Eg. A:|This is the title", "To see the last 5 links posted, say $bot_nick:view", "For morefeatures, say $bot_nick:morehelp" ); my @morehelp = ( "Put emphasis in a comment by using *asterisks*", "To create an inline link in a comment, say:", "A:Look at [this thing here|http://pants.heddley.com]", "You can also link to inline images in a comment:", "A:Chump logo [alt-text|http://pants.heddley.com/chump.png]", "To see the last n links, say $bot_nick:view n (where n is a number)", "To see the details of a link labelled A, say A: on a line on its own", "Send any comments or questions to gloop\@husk.org" ); $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, "Help [TODO]"); } sub status { # infobot-esque status info my ($heap, $kernel, $target, $body) = @_[HEAP, KERNEL, ARG0..ARG1]; my $itemcount = scalar(keys(%{ $heap->{'data'} })); my $method = ($heap->{'status'}{'replies'} eq 'notice') ? 'by notices' : 'by talking'; my $awake = 'some time'; my $message = <{'status'}{'addressing'} mode, I am replying $heap->{'status'}{formality}ly $method. MESSAGE $message =~ s/\n/ /g; $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, $message); } sub change { # set global config stuff my ($heap, $kernel, $target, $command) = @_[HEAP, KERNEL, ARG0..ARG1]; $command =~ s/^be *//; $heap->{'status'}{'replies'} = 'notice' if $command =~ m'notice'; $heap->{'status'}{'replies'} = 'privmsg' if $command =~ m'privmsg'; $heap->{'status'}{'addressing'} = 'optional' if $command =~ m'optional'; $heap->{'status'}{'addressing'} = 'required' if $command =~ m'required'; $heap->{'status'}{'formality'} = 'formal' if $command =~ m'formal'; $heap->{'status'}{'formality'} = 'informal' if $command =~ m'informal'; $heap->{'status'}{'formality'} = 'silent' if $command =~ m'silent'; } sub showmode { # show some mode information my ($heap, $kernel, $target, $body) = @_[HEAP, KERNEL, ARG0..ARG1]; $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, 'addressing is currently ', uc($heap->{'status'}{'addressing'})); } ### gloop stuff :: xml output sub dump_xml { # take %data and output it my ($heap, $kernel) = @_[HEAP, KERNEL]; open(XML_OUT, ">".$xmlstore."index.xml") or return "Error opening XML data for writing\n"; select XML_OUT; my $time = time; my $iso = &iso($time); my $itemcount = scalar(keys(%{ $heap->{'data'} })); $kernel->post( 'gloop_core', 'topic', $channel ); print < $iso $heap->{'topic'} HEADER foreach my $item (reverse sort naturally (keys %{ $heap->{'data'} })) { if (defined($heap->{'data'}{$item}{'uri'}) && $heap->{'data'}{$item}{'uri'} ne 'blurb') { # standard link print < $heap->{'data'}{$item}{'uri'} $heap->{'data'}{$item}{'nick'} LINK } else { # just blurb print < $heap->{'data'}{$item}{'nick'} LINK } print qq!$heap->{'data'}{$item}{'title'}! if (exists($heap->{'data'}{$item}{'title'})); foreach my $comment (@{ $heap->{'data'}{$item}{'comment'} }) { # reel around the comments my ($nick) = keys(%{ $comment }); my $body = &utterance_to_xml($comment->{$nick}); print qq!$body\n!; } print "\n"; } print "\n"; close(XML_OUT); select STDOUT; return; } sub read_xml { my ($heap, $kernel) = @_[HEAP, KERNEL]; print "Start importing data\n" if $debug; my $file = $xmlstore."index.xml"; my $xp = XML::XPath->new(filename => $file) or return; my $item; my %path = ('uri' => 'url', 'iso' => 'time', 'nick' => 'nick', 'title' => 'title', 'epoch' => 'time/@value', ); # key xpath my $nodeset = $xp->find('/churn/link'); # find all links foreach my $node ($nodeset->get_nodelist) { # now process down to data format my %item = (); # do main entries map { $item{$_} = $xp->findvalue($path{$_}, $node) } keys %path; # fix blurb $item{'uri'} = 'blurb' if ($xp->findvalue('@type', $node) eq 'blurb'); # do comments $item{'comment'} = []; foreach my $comment ($xp->findnodes('comment', $node)) { push @{ $item{'comment'} }, { $xp->findvalue('@nick', $comment) => $comment->string_value() }; } # build structure $item = (defined($item)) ? ++$item : 'A'; $heap->{'data'}{$item} = \%item; } print "Done importing data (last item $item)\n" if $debug; return; } ### gloop stuff :: entry modification bits sub add { # add or modify an entry my ($heap, $kernel, $type, $item, $nick, $body) = @_[HEAP, KERNEL, ARG0..ARG3]; if ($type eq 'new') { # it's a new entry, so we use the time my $time = time; my $iso = &iso($time); my $item = $item; print "$item\n"; $heap->{'data'}{$item}{'epoch'} = $time; $heap->{'data'}{$item}{'iso'} = $iso; $heap->{'data'}{$item}{'nick'} = $nick; # now we do the processing my @uris = &uris($body); if (@uris) { # ok, we handle this differently to chump # if sent 'http://husk.org/ # my site' # we make 'my site' the first entry; similarly, any other # uris afterwards my $body = $body; my $uri = shift(@uris); my $shorter_uri; ($shorter_uri = $uri) =~ s!/$!!; $body =~ s!$shorter_uri/?!!; $body =~ s!^ ?# ?!!; # stringify the URI::Find object with ="$uri". Ta brev $heap->{'data'}{$item}{'uri'} = "$uri"; if ($body) { $kernel->post( 'gloop_bot', 'body', $item, $nick, $body ); } } else { # it's a blurb $heap->{'data'}{$item}{'uri'} = 'blurb'; $heap->{'data'}{$item}{'title'} = $body; } $kernel->post( 'gloop_bot', 'dump_xml' ); print "Just added\n",Dumper($heap->{'data'}{$item}),"\n" if $debug; } if ($type eq 'comment') { $kernel->post( 'gloop_bot', 'body', $item, $nick, $body ); $kernel->post( 'gloop_bot', 'dump_xml' ); print "Just added to\n",Dumper($heap->{'data'}{$item}),"\n" if $debug; } if ($type eq 'title') { $heap->{'data'}{$item}{'title'} = $body; $kernel->post( 'gloop_bot', 'dump_xml' ); print "Just added to\n",Dumper($heap->{'data'}{$item}),"\n" if $debug; } } sub body { # add comments to a pre-existing link my ($heap, $kernel, $item, $nick, $body) = @_[HEAP, KERNEL, ARG0..ARG2]; print "Adding comment '$body' by '$nick' to '$item'\n"; push @{ $heap->{'data'}{$item}{'comment'} }, { "$nick" => "$body" }; } sub parser { # main command parser; this is getting a bit big really... my ($heap, $kernel, $who, $what, $where) = @_[HEAP, KERNEL, ARG0..ARG2]; # who sent us stuff? my $nick = $who; $nick =~ s/!.*$//; if (grep { $nick =~ $_ } @{ $heap->{'ignore'} }) { print "### We ignore bots\n"; return; } my $target = ($where eq 'sender') ? $nick : $channel; print "### PARSING\n"; print "Saw '$nick' tell me '$what'\n"; print "Will send replies to '$target' (from '$where')\n"; # first we parse the commands, as these are a bit odd in compatibility # mode (need the bot name and a colon) my @commands = qw(help morehelp view status be shut wake showmode); if ($where ne 'sender') { # we only accept things without names in private foreach my $com (@commands) { if ($what =~ m/^$bot_nick[,:;-] *$com/i) { $what =~ s/^$bot_nick[,:;-] *$com/$com/i; print "### COMMAND $com; passing on '$what'\n"; $kernel->post( 'gloop_bot', 'commands', $com, $what, $target ); return; } } } else { # we're in a privmsg # we can be way more lenient in our processing foreach my $com (@commands) { if ($what =~ m/$com/i) { $what =~ s/^$bot_nick://i; print "### COMMAND $com; passing on '$what'\n"; $kernel->post( 'gloop_bot', 'commands', $com, $what, $target ); return; } } } # now we don't care if the bot nick is there, as we shouldn't # be here otherwise $what =~ s/^$bot_nick[,:;-] ?//i; print "what now '$what'\n"; # is there a prefix letter? my $item; if ($heap->{'status'}{'addressing'} eq 'optional') { # ok, we're stricter here. you *have* to be caps and you # *have* to terminate with a : if ($what =~ s/^([A-Z][A-Z]?): *//) { $item = $1; } } else { # we're far looser; you can be lowercase, and you can # have one of ,:;- as in the bot's addressing, plus : if ($what =~ s/^([A-Z][A-Z]?)[,:;-] *//) { $item = uc($1); } } if (defined($item)) { # we're trying to actually address the bot print "### GOT ITEM '$item': passing on '$what'\n"; if (exists $heap->{'data'}{$item}) { # we can add this # add stuff to $heap->{'data'}{$item} if ($what eq '') { # we're being asked to spew a single item print "### VIEW ITEM\n"; $kernel->post( 'gloop_bot', 'viewitem', $item, $target ); return; } print "### ADD ITEM\n"; my $type = ($what =~ s/^\| *//) ? 'title' : 'comment'; $kernel->post( 'gloop_bot', 'add', $type, $item, $nick, $what ); return if $heap->{'status'}{'formality'} eq 'silent'; # tell the channel this amazing fact my $message = ($type eq 'title') ? 'titled' : 'commented'; $message .= " item $item"; $message = "$nick $message" if ($heap->{'status'}{'formality'} eq 'informal'); $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, $message); return; } else { print "### NO SUCH ITEM\n"; # drop it on the floor, but let the user know $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, "Label $item not found."); return; } } # otherwise, it's a new blurb or URI, we'll make it a title # note that if we got here under optional addressing, we need # to make sure there's either a URL or it starts with BLURB if ($heap->{'status'}{'addressing'} eq 'optional' || $where eq 'sender') { my $pass = 0; $pass = 1 if ($what =~ s/^BLURB[:;-] *//); my @uris = &uris($what); $pass = 1 if (@uris); return unless $pass; } # so first we find the last item in %data $item = &lastitem($heap, 1); $item = (defined($item)) ? ++$item : 'A'; print "ADDING '$item' ($heap->{'status'}{'addressing'}, $where)\n"; $kernel->post( 'gloop_bot', 'add', 'new', $item, $nick, $what ); # now we add it as a title my $message; if ($heap->{'status'}{'formality'} eq "informal") { $message = "$nick added item $item, $what"; } else { $message = "$item: $what from $nick"; } $kernel->post( 'gloop_core', $heap->{'status'}{'replies'}, $target, $message); } ### here's where execution starts. POE::Component::IRC->new( 'gloop_core' ) or die "Can't instantiate new IRC component!\n"; POE::Session->new( 'main' => [qw(_start _stop _default irc_001 irc_kick irc_public irc_msg irc_topic irc_331 irc_error irc_disconnected commands viewlink viewitem help status change showmode dump_xml read_xml add body parser) ] ); $poe_kernel->run(); exit 0; __END__ =head 1 TODO =over 4 =item topic done =item XML import =item autoreboot =item the surrounding stuff =item split parse =item split app =item precise times (?) =end