#!/usr/bin/python import urllib, re, cgi # _pattern changed 17/01/2003 JMH _pattern = re.compile(r"""\(.*?)\.*?\(.*?)\""", re.DOTALL) data = urllib.urlopen("http://www.newsnow.co.uk/newsfeed/?name=Southampton").read() # data = open("nn.txt", "r").read() links = {} for link, desc, src in _pattern.findall( data ): src = src.replace('', '', 1) src = src.replace(' ', ' ') src = src.replace('?', '?') src = src.replace(':', ':') src = src.replace('/', '/') src = src.replace("'", "'") src = src.replace('!', '!') src = src.replace(' ', ' ') desc = desc.replace('?', '?') desc = desc.replace(':', ':') desc = desc.replace('/', '/') desc = desc.replace("'", "'") desc = desc.replace('!', '!') desc = desc.replace(' ', ' ') links[ link ] = "%s - %s" % (src, desc) sortedLinks = links.keys() sortedLinks.sort() print """ NewsNow - Southampton FC Southampton FC news - pulled from http://www.NewsNow.co.uk/ http://www.cix.co.uk/~jimh/weblog/NewsNow.xml en-GB """ for link in sortedLinks: print '' % link print """ """ for link in sortedLinks: print '' % link print '%s' % links[ link ] print '%s' % links[ link ] print 'http://www.newsnow.co.uk/cgi/NGoto/%s' % link print '' print ''