diff --git a/gluon/contrib/feedparser.py b/gluon/contrib/feedparser.py
old mode 100755
new mode 100644
index 2ef24a9d..c78e6a39
--- a/gluon/contrib/feedparser.py
+++ b/gluon/contrib/feedparser.py
@@ -9,7 +9,7 @@ Required: Python 2.4 or later
Recommended: iconv_codec
"""
-__version__ = "5.1.2"
+__version__ = "5.1.3"
__license__ = """
Copyright (c) 2010-2012 Kurt McKee
Copyright (c) 2002-2008 Mark Pilgrim
@@ -44,7 +44,8 @@ __contributors__ = ["Jason Diamond ",
"Sam Ruby ",
"Ade Oshineye ",
"Martin Pool ",
- "Kurt McKee "]
+ "Kurt McKee ",
+ "Bernd Schlapsi ",]
# HTTP "User-Agent" header to send to servers when downloading feeds.
# If you are embedding feedparser in a larger application, you should
@@ -1971,6 +1972,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
def handle_charref(self, ref):
# called for each character reference, e.g. for ' ', ref will be '160'
# Reconstruct the original character reference.
+ ref = ref.lower()
if ref.startswith('x'):
value = int(ref[1:], 16)
else:
@@ -2455,7 +2457,10 @@ class _MicroformatsParser:
linktype.startswith('video/') or \
(linktype.startswith('application/') and not linktype.endswith('xml')):
return 1
- path = urlparse.urlparse(attrsD['href'])[2]
+ try:
+ path = urlparse.urlparse(attrsD['href'])[2]
+ except ValueError:
+ return 0
if path.find('.') == -1:
return 0
fileext = path.split('.').pop().lower()
@@ -2541,7 +2546,8 @@ class _RelativeURIResolver(_BaseHTMLProcessor):
('object', 'data'),
('object', 'usemap'),
('q', 'cite'),
- ('script', 'src')])
+ ('script', 'src'),
+ ('video', 'poster')])
def __init__(self, baseuri, encoding, _type):
_BaseHTMLProcessor.__init__(self, encoding, _type)
@@ -2618,13 +2624,13 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max',
'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref',
'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size',
- 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max',
- 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows',
- 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src',
- 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template',
- 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign',
- 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap',
- 'xml:lang'])
+ 'poster', 'pqg', 'preload', 'prompt', 'radiogroup', 'readonly', 'rel',
+ 'repeat-max', 'repeat-min', 'replace', 'required', 'rev', 'rightspacing',
+ 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span',
+ 'src', 'start', 'step', 'summary', 'suppress', 'tabindex', 'target',
+ 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
+ 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
+ 'width', 'wrap', 'xml:lang'])
unacceptable_elements_with_end_tag = set(['script', 'applet', 'style'])
@@ -2976,9 +2982,9 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:]
if not agent:
agent = USER_AGENT
- # test for inline user:password for basic auth
+ # Test for inline user:password credentials for HTTP basic auth
auth = None
- if base64:
+ if base64 and not url_file_stream_or_string.startswith('ftp:'):
urltype, rest = urllib.splittype(url_file_stream_or_string)
realhost, rest = urllib.splithost(rest)
if realhost:
@@ -3471,15 +3477,7 @@ _rfc822_match = re.compile(
"(?:%s, )?%s(?: %s)?" % (_rfc822_dayname, _rfc822_date, _rfc822_time)
).match
-def _parse_date_rfc822(dt):
- """Parse RFC 822 dates and times, with one minor
- difference: years may be 4DIGIT or 2DIGIT.
- http://tools.ietf.org/html/rfc822#section-5"""
- try:
- m = _rfc822_match(dt.lower()).groupdict(0)
- except AttributeError:
- return None
-
+def _parse_date_group_rfc822(m):
# Calculate a date and timestamp
for k in ('year', 'day', 'hour', 'minute', 'second'):
m[k] = int(m[k])
@@ -3487,7 +3485,7 @@ def _parse_date_rfc822(dt):
# If the year is 2 digits, assume everything in the 90's is the 1990's
if m['year'] < 100:
m['year'] += (1900, 2000)[m['year'] < 90]
- stamp = datetime.datetime(*[m[i] for i in
+ stamp = datetime.datetime(*[m[i] for i in
('year', 'month', 'day', 'hour', 'minute', 'second')])
# Use the timezone information to calculate the difference between
@@ -3512,8 +3510,36 @@ def _parse_date_rfc822(dt):
# Return the date and timestamp in UTC
return (stamp - delta).utctimetuple()
+
+def _parse_date_rfc822(dt):
+ """Parse RFC 822 dates and times, with one minor
+ difference: years may be 4DIGIT or 2DIGIT.
+ http://tools.ietf.org/html/rfc822#section-5"""
+ try:
+ m = _rfc822_match(dt.lower()).groupdict(0)
+ except AttributeError:
+ return None
+
+ return _parse_date_group_rfc822(m)
registerDateHandler(_parse_date_rfc822)
+def _parse_date_rfc822_grubby(dt):
+ """Parse date format similar to RFC 822, but
+ the comma after the dayname is optional and
+ month/day are inverted"""
+ _rfc822_date_grubby = "%s %s %s" % (_rfc822_month, _rfc822_day, _rfc822_year)
+ _rfc822_match_grubby = re.compile(
+ "(?:%s[,]? )?%s(?: %s)?" % (_rfc822_dayname, _rfc822_date_grubby, _rfc822_time)
+ ).match
+
+ try:
+ m = _rfc822_match_grubby(dt.lower()).groupdict(0)
+ except AttributeError:
+ return None
+
+ return _parse_date_group_rfc822(m)
+registerDateHandler(_parse_date_rfc822_grubby)
+
def _parse_date_asctime(dt):
"""Parse asctime-style dates"""
dayname, month, day, remainder = dt.split(None, 3)
@@ -3699,7 +3725,7 @@ def convert_to_utf8(http_headers, data):
u'application/xml-external-parsed-entity')
text_content_types = (u'text/xml', u'text/xml-external-parsed-entity')
if (http_content_type in application_content_types) or \
- (http_content_type.startswith(u'application/') and
+ (http_content_type.startswith(u'application/') and
http_content_type.endswith(u'+xml')):
acceptable_content_type = 1
rfc3023_encoding = http_encoding or xml_encoding or u'utf-8'