#!/usr/bin/python
import urllib, re, cgi, string, ftplib, sys
from HTMLParser import HTMLParser

# Simple Python app to generate an RDF Site Summary for http://www.cix.co.uk/~jimh/weblog/blogger.html
# can also ftp the file generated (if required see params comment later).
#
# Based on a script by Mark Pilgrim: http://diveintomark.org/archives/2002/08/06.html#feeding_zeldman
#
# Jim Hughes - Email: jim@fineway.cx - Jabber: jimh@amessage.de or jimh@jabber.at

class MyHTMLParser(HTMLParser):
    """Trivial HTML parser class, basically strips out all tags
    and leaves just textual content, the first bold section gets
    put into self._title and everything else goes into self._content
    these can be retrieved with the get_title & get_content() methods"""

    def reset(self):
        HTMLParser.reset(self)
        self._title = ""
        self._content = ""
        self._hadtitle = 0

    def handle_starttag(self, tag, attrs):
        if tag == "br":
            self._content += "\r\n"
        elif tag == "p":
            self._content += "\r\n"
        elif tag == "a":
            self._content += "&lt;a"
            for k, v in attrs:
                self._content += ' ' + k + '=&quot;' + cgi.escape(v) + '&quot;'
            self._content += "&gt;"

    def handle_endtag(self, tag):
        if self._hadtitle == 0:
            if tag == "b":
                self._hadtitle = 1
                self._title = self._content
                self._content = ""
        if tag =="a":
            self._content += "&lt;/" + tag + "&gt;"

    def handle_data(self, data):
        data = cgi.escape(data)
        data = data.replace("'", "&#39;")
        data = data.replace('"', "&quot;")
        self._content += data

    def handle_entityref(self, name):
        if name == "amp":
            self._content += "and"
        #else:
        #    self._content += "&" + name + ";"

    def get_title(self):
        return string.strip(self._title)

    def get_content(self):
        return string.strip(self._content)


# Trivial function to put an Ascii file onto an ftp site
def ftpPut( site, user, pwd, path, srcfile, destfile ):
    ftp = ftplib.FTP( site )	# connect to host
#    ftp.set_debuglevel(1)	# comment out this line if you're not testing
    ftp.login( user, pwd )
    ftp.cwd( path )
    if path == ftp.pwd():
        fileA = open( srcfile, "r" )
        ftp.storlines("STOR " + destfile, fileA)
        fileA.close()
    ftp.quit

#
# Expected params:
#
# 1 - command (gen, put or both)
# 2 - file name
# 3 - ftp user
# 4 - ftp password
# 5 - ftp directory
#

#
# Generate RSS file
#

if (sys.argv[1] == "gen") or (sys.argv[1] == "both"):

    opFile = open( sys.argv[2], "w" )

    _pattern = re.compile(r'<a name=".*?">&nbsp;</a><br>(.*?).posted by [ A-z]* at <a title="permalink" href="(.*?)">..:.. UTC', re.DOTALL)

    data = urllib.urlopen('http://www.cix.co.uk/~jimh/weblog/blogger.html').read()
    opFile.write( """<?xml version="1.0" encoding="iso-8859-1"?>

<rss version="0.92">
<channel>
<title>Feet Up!</title>
<link>http://www.cix.co.uk/~jimh/weblog/blogger.html</link>
<description>Feet Up!</description>
<language>en-GB</language>\n""" )

    _parser = MyHTMLParser()
    for description, link in _pattern.findall(data):
        opFile.write( "<item>\n" )
        _parser.reset()
        _parser.feed(description)
        opFile.write( "<title>%s</title>\n" % _parser.get_title() )
        opFile.write( "<description>%s</description>\n" % _parser.get_content() )
        opFile.write( "<link>%s</link>\n" % link )
        opFile.write( "</item>\n" )
    opFile.write( "</channel>\n</rss>\n" )
    opFile.close()

#
# FTP RSS file
#

if (sys.argv[1] == "put") or (sys.argv[1] == "both"):
    ftpPut( "www.cix.co.uk", sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[2], sys.argv[2] )

