593 lines
14 KiB
Python
593 lines
14 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds."""
|
|
|
|
__name__ = 'PyRSS2Gen'
|
|
__version__ = (1, 0, 0)
|
|
__author__ = 'Andrew Dalke <dalke@dalkescientific.com>'
|
|
|
|
_generator_name = __name__ + '-' + '.'.join(map(str, __version__))
|
|
|
|
import datetime
|
|
import cStringIO
|
|
|
|
# Could make this the base class; will need to add 'publish'
|
|
|
|
|
|
class WriteXmlMixin:
|
|
|
|
def write_xml(self, outfile, encoding='iso-8859-1'):
|
|
from xml.sax import saxutils
|
|
handler = saxutils.XMLGenerator(outfile, encoding)
|
|
handler.startDocument()
|
|
self.publish(handler)
|
|
handler.endDocument()
|
|
|
|
def to_xml(self, encoding='iso-8859-1'):
|
|
try:
|
|
import cStringIO as StringIO
|
|
except ImportError:
|
|
import StringIO
|
|
f = StringIO.StringIO()
|
|
self.write_xml(f, encoding)
|
|
return f.getvalue()
|
|
|
|
|
|
def _element(
|
|
handler,
|
|
name,
|
|
obj,
|
|
d={},
|
|
):
|
|
if isinstance(obj, basestring) or obj is None:
|
|
|
|
# special-case handling to make the API easier
|
|
# to use for the common case.
|
|
|
|
handler.startElement(name, d)
|
|
if obj is not None:
|
|
handler.characters(obj)
|
|
handler.endElement(name)
|
|
else:
|
|
|
|
# It better know how to emit the correct XML.
|
|
|
|
obj.publish(handler)
|
|
|
|
|
|
def _opt_element(handler, name, obj):
|
|
if obj is None:
|
|
return
|
|
_element(handler, name, obj)
|
|
|
|
|
|
def _format_date(dt):
|
|
"""convert a datetime into an RFC 822 formatted date
|
|
|
|
Input date must be in GMT.
|
|
"""
|
|
|
|
# Looks like:
|
|
# Sat, 07 Sep 2002 00:00:01 GMT
|
|
# Can't use strftime because that's locale dependent
|
|
#
|
|
# Isn't there a standard way to do this for Python? The
|
|
# rfc822 and email.Utils modules assume a timestamp. The
|
|
# following is based on the rfc822 module.
|
|
|
|
return '%s, %02d %s %04d %02d:%02d:%02d GMT' % (
|
|
[
|
|
'Mon',
|
|
'Tue',
|
|
'Wed',
|
|
'Thu',
|
|
'Fri',
|
|
'Sat',
|
|
'Sun',
|
|
][dt.weekday()],
|
|
dt.day,
|
|
[
|
|
'Jan',
|
|
'Feb',
|
|
'Mar',
|
|
'Apr',
|
|
'May',
|
|
'Jun',
|
|
'Jul',
|
|
'Aug',
|
|
'Sep',
|
|
'Oct',
|
|
'Nov',
|
|
'Dec',
|
|
][dt.month - 1],
|
|
dt.year,
|
|
dt.hour,
|
|
dt.minute,
|
|
dt.second,
|
|
)
|
|
|
|
|
|
##
|
|
# A couple simple wrapper objects for the fields which
|
|
# take a simple value other than a string.
|
|
|
|
|
|
class IntElement:
|
|
|
|
"""implements the 'publish' API for integers
|
|
|
|
Takes the tag name and the integer value to publish.
|
|
|
|
(Could be used for anything which uses str() to be published
|
|
to text for XML.)
|
|
"""
|
|
|
|
element_attrs = {}
|
|
|
|
def __init__(self, name, val):
|
|
self.name = name
|
|
self.val = val
|
|
|
|
def publish(self, handler):
|
|
handler.startElement(self.name, self.element_attrs)
|
|
handler.characters(str(self.val))
|
|
handler.endElement(self.name)
|
|
|
|
|
|
class DateElement:
|
|
|
|
"""implements the 'publish' API for a datetime.datetime
|
|
|
|
Takes the tag name and the datetime to publish.
|
|
|
|
Converts the datetime to RFC 2822 timestamp (4-digit year).
|
|
"""
|
|
|
|
def __init__(self, name, dt):
|
|
self.name = name
|
|
self.dt = dt
|
|
|
|
def publish(self, handler):
|
|
_element(handler, self.name, _format_date(self.dt))
|
|
|
|
|
|
# ###
|
|
|
|
|
|
class Category:
|
|
|
|
"""Publish a category element"""
|
|
|
|
def __init__(self, category, domain=None):
|
|
self.category = category
|
|
self.domain = domain
|
|
|
|
def publish(self, handler):
|
|
d = {}
|
|
if self.domain is not None:
|
|
d['domain'] = self.domain
|
|
_element(handler, 'category', self.category, d)
|
|
|
|
|
|
class Cloud:
|
|
|
|
"""Publish a cloud"""
|
|
|
|
def __init__(
|
|
self,
|
|
domain,
|
|
port,
|
|
path,
|
|
registerProcedure,
|
|
protocol,
|
|
):
|
|
self.domain = domain
|
|
self.port = port
|
|
self.path = path
|
|
self.registerProcedure = registerProcedure
|
|
self.protocol = protocol
|
|
|
|
def publish(self, handler):
|
|
_element(handler, 'cloud', None, {
|
|
'domain': self.domain,
|
|
'port': str(self.port),
|
|
'path': self.path,
|
|
'registerProcedure': self.registerProcedure,
|
|
'protocol': self.protocol,
|
|
})
|
|
|
|
|
|
class Image:
|
|
|
|
"""Publish a channel Image"""
|
|
|
|
element_attrs = {}
|
|
|
|
def __init__(
|
|
self,
|
|
url,
|
|
title,
|
|
link,
|
|
width=None,
|
|
height=None,
|
|
description=None,
|
|
):
|
|
self.url = url
|
|
self.title = title
|
|
self.link = link
|
|
self.width = width
|
|
self.height = height
|
|
self.description = description
|
|
|
|
def publish(self, handler):
|
|
handler.startElement('image', self.element_attrs)
|
|
|
|
_element(handler, 'url', self.url)
|
|
_element(handler, 'title', self.title)
|
|
_element(handler, 'link', self.link)
|
|
|
|
width = self.width
|
|
if isinstance(width, int):
|
|
width = IntElement('width', width)
|
|
_opt_element(handler, 'width', width)
|
|
|
|
height = self.height
|
|
if isinstance(height, int):
|
|
height = IntElement('height', height)
|
|
_opt_element(handler, 'height', height)
|
|
|
|
_opt_element(handler, 'description', self.description)
|
|
|
|
handler.endElement('image')
|
|
|
|
|
|
class Guid:
|
|
|
|
"""Publish a guid
|
|
|
|
Defaults to being a permalink, which is the assumption if it's
|
|
omitted. Hence strings are always permalinks.
|
|
"""
|
|
|
|
def __init__(self, guid, isPermaLink=1):
|
|
self.guid = guid
|
|
self.isPermaLink = isPermaLink
|
|
|
|
def publish(self, handler):
|
|
d = {}
|
|
if self.isPermaLink:
|
|
d['isPermaLink'] = 'true'
|
|
else:
|
|
d['isPermaLink'] = 'false'
|
|
_element(handler, 'guid', self.guid, d)
|
|
|
|
|
|
class TextInput:
|
|
|
|
"""Publish a textInput
|
|
|
|
Apparently this is rarely used.
|
|
"""
|
|
|
|
element_attrs = {}
|
|
|
|
def __init__(
|
|
self,
|
|
title,
|
|
description,
|
|
name,
|
|
link,
|
|
):
|
|
self.title = title
|
|
self.description = description
|
|
self.name = name
|
|
self.link = link
|
|
|
|
def publish(self, handler):
|
|
handler.startElement('textInput', self.element_attrs)
|
|
_element(handler, 'title', self.title)
|
|
_element(handler, 'description', self.description)
|
|
_element(handler, 'name', self.name)
|
|
_element(handler, 'link', self.link)
|
|
handler.endElement('textInput')
|
|
|
|
|
|
class Enclosure:
|
|
|
|
"""Publish an enclosure"""
|
|
|
|
def __init__(
|
|
self,
|
|
url,
|
|
length,
|
|
type,
|
|
):
|
|
self.url = url
|
|
self.length = length
|
|
self.type = type
|
|
|
|
def publish(self, handler):
|
|
_element(handler, 'enclosure', None,
|
|
{'url': self.url, 'length': str(self.length), 'type': self.type})
|
|
|
|
|
|
class Source:
|
|
|
|
"""Publish the item's original source, used by aggregators"""
|
|
|
|
def __init__(self, name, url):
|
|
self.name = name
|
|
self.url = url
|
|
|
|
def publish(self, handler):
|
|
_element(handler, 'source', self.name, {'url': self.url})
|
|
|
|
|
|
class SkipHours:
|
|
|
|
"""Publish the skipHours
|
|
|
|
This takes a list of hours, as integers.
|
|
"""
|
|
|
|
element_attrs = {}
|
|
|
|
def __init__(self, hours):
|
|
self.hours = hours
|
|
|
|
def publish(self, handler):
|
|
if self.hours:
|
|
handler.startElement('skipHours', self.element_attrs)
|
|
for hour in self.hours:
|
|
_element(handler, 'hour', str(hour))
|
|
handler.endElement('skipHours')
|
|
|
|
|
|
class SkipDays:
|
|
|
|
"""Publish the skipDays
|
|
|
|
This takes a list of days as strings.
|
|
"""
|
|
|
|
element_attrs = {}
|
|
|
|
def __init__(self, days):
|
|
self.days = days
|
|
|
|
def publish(self, handler):
|
|
if self.days:
|
|
handler.startElement('skipDays', self.element_attrs)
|
|
for day in self.days:
|
|
_element(handler, 'day', day)
|
|
handler.endElement('skipDays')
|
|
|
|
|
|
class RSS2(WriteXmlMixin):
|
|
|
|
"""The main RSS class.
|
|
|
|
Stores the channel attributes, with the \"category\" elements under
|
|
\".categories\" and the RSS items under \".items\".
|
|
"""
|
|
|
|
rss_attrs = {'version': '2.0'}
|
|
element_attrs = {}
|
|
|
|
def __init__(
|
|
self,
|
|
title,
|
|
link,
|
|
description,
|
|
language=None,
|
|
copyright=None,
|
|
managingEditor=None,
|
|
webMaster=None,
|
|
pubDate=None,
|
|
lastBuildDate=None,
|
|
categories=None,
|
|
generator=_generator_name,
|
|
docs='http://blogs.law.harvard.edu/tech/rss',
|
|
cloud=None,
|
|
ttl=None,
|
|
image=None,
|
|
rating=None,
|
|
textInput=None,
|
|
skipHours=None,
|
|
skipDays=None,
|
|
items=None,
|
|
):
|
|
|
|
self.title = title
|
|
self.link = link
|
|
self.description = description
|
|
self.language = language
|
|
self.copyright = copyright
|
|
self.managingEditor = managingEditor
|
|
|
|
self.webMaster = webMaster
|
|
self.pubDate = pubDate
|
|
self.lastBuildDate = lastBuildDate
|
|
|
|
if categories is None:
|
|
categories = []
|
|
self.categories = categories
|
|
self.generator = generator
|
|
self.docs = docs
|
|
self.cloud = cloud
|
|
self.ttl = ttl
|
|
self.image = image
|
|
self.rating = rating
|
|
self.textInput = textInput
|
|
self.skipHours = skipHours
|
|
self.skipDays = skipDays
|
|
|
|
if items is None:
|
|
items = []
|
|
self.items = items
|
|
|
|
def publish(self, handler):
|
|
handler.startElement('rss', self.rss_attrs)
|
|
handler.startElement('channel', self.element_attrs)
|
|
_element(handler, 'title', self.title)
|
|
_element(handler, 'link', self.link)
|
|
_element(handler, 'description', self.description)
|
|
|
|
self.publish_extensions(handler)
|
|
|
|
_opt_element(handler, 'language', self.language)
|
|
_opt_element(handler, 'copyright', self.copyright)
|
|
_opt_element(handler, 'managingEditor', self.managingEditor)
|
|
_opt_element(handler, 'webMaster', self.webMaster)
|
|
|
|
pubDate = self.pubDate
|
|
if isinstance(pubDate, datetime.datetime):
|
|
pubDate = DateElement('pubDate', pubDate)
|
|
_opt_element(handler, 'pubDate', pubDate)
|
|
|
|
lastBuildDate = self.lastBuildDate
|
|
if isinstance(lastBuildDate, datetime.datetime):
|
|
lastBuildDate = DateElement('lastBuildDate', lastBuildDate)
|
|
_opt_element(handler, 'lastBuildDate', lastBuildDate)
|
|
|
|
for category in self.categories:
|
|
if isinstance(category, basestring):
|
|
category = Category(category)
|
|
category.publish(handler)
|
|
|
|
_opt_element(handler, 'generator', self.generator)
|
|
_opt_element(handler, 'docs', self.docs)
|
|
|
|
if self.cloud is not None:
|
|
self.cloud.publish(handler)
|
|
|
|
ttl = self.ttl
|
|
if isinstance(self.ttl, int):
|
|
ttl = IntElement('ttl', ttl)
|
|
_opt_element(handler, 'tt', ttl)
|
|
|
|
if self.image is not None:
|
|
self.image.publish(handler)
|
|
|
|
_opt_element(handler, 'rating', self.rating)
|
|
if self.textInput is not None:
|
|
self.textInput.publish(handler)
|
|
if self.skipHours is not None:
|
|
self.skipHours.publish(handler)
|
|
if self.skipDays is not None:
|
|
self.skipDays.publish(handler)
|
|
|
|
for item in self.items:
|
|
item.publish(handler)
|
|
|
|
handler.endElement('channel')
|
|
handler.endElement('rss')
|
|
|
|
def publish_extensions(self, handler):
|
|
|
|
# Derived classes can hook into this to insert
|
|
# output after the three required fields.
|
|
|
|
pass
|
|
|
|
|
|
class RSSItem(WriteXmlMixin):
|
|
|
|
"""Publish an RSS Item"""
|
|
|
|
element_attrs = {}
|
|
|
|
def __init__(
|
|
self,
|
|
title=None,
|
|
link=None,
|
|
description=None,
|
|
author=None,
|
|
categories=None,
|
|
comments=None,
|
|
enclosure=None,
|
|
guid=None,
|
|
pubDate=None,
|
|
source=None,
|
|
):
|
|
|
|
if title is None and description is None:
|
|
raise TypeError(
|
|
"RSSItem must define at least one of 'title' or 'description'")
|
|
self.title = title
|
|
self.link = link
|
|
self.description = description
|
|
self.author = author
|
|
if categories is None:
|
|
categories = []
|
|
self.categories = categories
|
|
self.comments = comments
|
|
self.enclosure = enclosure
|
|
self.guid = guid
|
|
self.pubDate = pubDate
|
|
self.source = source
|
|
|
|
# It sure does get tedious typing these names three times...
|
|
|
|
def publish(self, handler):
|
|
handler.startElement('item', self.element_attrs)
|
|
_opt_element(handler, 'title', self.title)
|
|
_opt_element(handler, 'link', self.link)
|
|
self.publish_extensions(handler)
|
|
_opt_element(handler, 'description', self.description)
|
|
_opt_element(handler, 'author', self.author)
|
|
|
|
for category in self.categories:
|
|
if isinstance(category, basestring):
|
|
category = Category(category)
|
|
category.publish(handler)
|
|
|
|
_opt_element(handler, 'comments', self.comments)
|
|
if self.enclosure is not None:
|
|
self.enclosure.publish(handler)
|
|
_opt_element(handler, 'guid', self.guid)
|
|
|
|
pubDate = self.pubDate
|
|
if isinstance(pubDate, datetime.datetime):
|
|
pubDate = DateElement('pubDate', pubDate)
|
|
_opt_element(handler, 'pubDate', pubDate)
|
|
|
|
if self.source is not None:
|
|
self.source.publish(handler)
|
|
|
|
handler.endElement('item')
|
|
|
|
def publish_extensions(self, handler):
|
|
|
|
# Derived classes can hook into this to insert
|
|
# output after the title and link elements
|
|
|
|
pass
|
|
|
|
|
|
def dumps(rss, encoding='utf-8'):
|
|
s = cStringIO.StringIO()
|
|
rss.write_xml(s, encoding)
|
|
return s.getvalue()
|
|
|
|
|
|
def test():
|
|
rss = RSS2(title='web2py feed', link='http://www.web2py.com',
|
|
description='About web2py',
|
|
lastBuildDate=datetime.datetime.now(),
|
|
items=[RSSItem(title='web2py and PyRSS2Gen-0.0',
|
|
link='http://www.web2py.com/examples/simple_examples/getrss',
|
|
description='web2py can now make rss feeds!',
|
|
guid=Guid('http://www.web2py.com/'),
|
|
pubDate=datetime.datetime(2007, 11, 14, 10, 30))])
|
|
return dumps(rss)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print test()
|
|
|
|
|
|
|
|
|