Merge pull request #1625 from Scimonster/master

Fix #1624 -- Unicode in XML sanitizing causes error
This commit is contained in:
mdipierro
2017-06-05 11:39:52 -05:00
committed by GitHub
2 changed files with 4 additions and 2 deletions

View File

@@ -596,10 +596,10 @@ class XML(XmlComponent):
for A, IMG and BlockQuote).
The key is the tag; the value is a list of allowed attributes.
"""
if sanitize:
text = sanitizer.sanitize(text, permitted_tags, allowed_attributes)
if isinstance(text, unicodeT):
text = to_native(text.encode('utf8', 'xmlcharrefreplace'))
if sanitize:
text = sanitizer.sanitize(text, permitted_tags, allowed_attributes)
elif isinstance(text, bytes):
text = to_native(text)
elif not isinstance(text, str):

View File

@@ -170,6 +170,8 @@ class TestBareHelpers(unittest.TestCase):
# seams that __repr__ is no longer enough
##self.assertEqual(XML('1.3'), '1.3')
self.assertEqual(XML(u'<div>è</div>').xml(), b'<div>\xc3\xa8</div>')
# make sure unicode works with sanitize
self.assertEqual(XML(u'<div>è</div>', sanitize=True).xml(), b'<div>\xc3\xa8</div>')
# you can calc len on the class, that equals the xml() and the str()
##self.assertEqual(len(XML('1.3')), len('1.3'))
self.assertEqual(len(XML('1.3').xml()), len('1.3'))