#!/usr/bin/perl -w ### # This is a script to import Thingamablog-posts to textpattern. # # The script parses the ThinGamaBlog(http://thingamablog.sf.net/) # RSS-Feed-exports and writes SQL statements to standardout. The statements import # categories and posts to textpattern (http://textpattern.com). If one adjusts the statements # the whole thing should work with any rss feed and any blogtool. Further # desciptions come within the code. # # Shortcommings, TODOS: # - By now posts can only belong to two or less categories to be handled correctly. # - Figure out wht the values $flt, $rgt are meant for in textpattern # - Write dummy statements vor authors that appeared in the feed # - unHTML the posts for textpatterns 'Body'-column # # Author: joerg.feuerhake@free-penguin.org # Date: 2004/10/15 # License: GPL http://fsf.org/gpl.txt # Version: 0.1 # Status: Alpha # ### use strict; use XML::RSS::Parser; #maybe you have to download this use URI; use LWP::UserAgent; #Please give correct parameters here # The URL to retrieve the feed from? my @places=( 'http://localhost/xml/FeuerhakeLog.xml' ); #specify the MySQL Database to import to? my $dbname = "blog"; # apparently the rss-parser used here merges the several category nodes to one # string. To devide the one has to know the true categorynames, put them here: my @catwords = ("Deutsch", "English","Rechnerstoff", "GeekyStuff", "EveryDayLife", "Begebenheiten", "Creatives", "Economics", "Movies", "Politics"); # these mubers are important for the import of the categories, somehow. # If there are no categories defined yet let $flt be 2 and $rgt be 3. # I did not really figure out what these numbers are good for. Drop me a line if you know my $flt = 4; my $rgt = 5; # You should not edit variables below this: my $ua = LWP::UserAgent->new; $ua->agent('XML::RSS::ThinGamaBlogParser'); my $p = new XML::RSS::Parser; #the articletable of textpattern. this should be left unedited my $tablename = "texpattern"; #the categorytable of textpattern. this should be left unedited my $cattablename = "txp_category"; foreach my $place ( @places ) { # get the feed my $url=URI->new($place); my $req=HTTP::Request->new; $req->method('GET'); $req->uri($url); my $feed = $ua->request($req); # parse the feed $p->parse( $feed->content ); foreach my $item (@{$p->items}) { my $cat1 = ""; my $cat2 = ""; my $int = 0; # escape quotationmarks $item->{'title'} =~ s/\"/\\\"/g; $item->{'author'} =~ s/\"/\\\"/g; $item->{'category'} =~ s/\"/\\\"/g; $item->{'description'} =~ s/\"/\\\"/g; # too lazy to modularize the algorithm $item->{'title'} =~ s/\'/\\\'/g; $item->{'author'} =~ s/\'/\\\'/g; $item->{'category'} =~ s/\'/\\\'/g; $item->{'description'} =~ s/\'/\\\'/g; # figure out which categories the post belongs to # by now it works only for equal or less then two categories per post foreach my $cate (@catwords){ if( $item->{'category'} =~ /$cate/ && $int != 0){ $cat2 = $cate; }; if( $item->{'category'} =~ /$cate/ && $int == 0){ $cat1 = $cate; $int = 1; }; }; #write the insert statement print "INSERT INTO ".$dbname.".".$tablename." (Title, AuthorID, Posted, LastMod, Category1, Category2, Body, Body_html, AnnotateInvite, Section) Values ('".$item->{'title'}."', '".$item->{'author'}."', '".$item->{'pubDate'}."', '".$item->{'pubDate'}."', '".$cat1."', '".$cat2."', '".$item->{'description'}."', '".$item->{'description'}."', 'Comment', 'article');\n"; } } # write an insert statement for every known category foreach my $cate (@catwords){ print "INSERT INTO ".$dbname.".".$cattablename." (name, type, parent, lft, rgt) VALUES('".$cate."','article','root',".$flt.", ".$rgt.");\n"; $flt = $flt+2; $rgt = $rgt+2; } #done