Fix #1624 -- Unicode in XML sanitizing causes error

Add unit test for it
This commit is contained in:
Scimonster
2017-05-07 14:32:52 +03:00
parent 8a741023d8
commit 49bf14e79a
2 changed files with 4 additions and 2 deletions

View File

@@ -596,10 +596,10 @@ class XML(XmlComponent):
for A, IMG and BlockQuote).
The key is the tag; the value is a list of allowed attributes.
"""
if sanitize:
text = sanitizer.sanitize(text, permitted_tags, allowed_attributes)
if isinstance(text, unicodeT):
text = to_native(text.encode('utf8', 'xmlcharrefreplace'))
if sanitize:
text = sanitizer.sanitize(text, permitted_tags, allowed_attributes)
elif isinstance(text, bytes):
text = to_native(text)
elif not isinstance(text, str):

View File

@@ -168,6 +168,8 @@ class TestBareHelpers(unittest.TestCase):
# seams that __repr__ is no longer enough
##self.assertEqual(XML('1.3'), '1.3')
self.assertEqual(XML(u'<div>è</div>').xml(), b'<div>\xc3\xa8</div>')
# make sure unicode works with sanitize
self.assertEqual(XML(u'<div>è</div>', sanitize=True).xml(), b'<div>\xc3\xa8</div>')
# you can calc len on the class, that equals the xml() and the str()
##self.assertEqual(len(XML('1.3')), len('1.3'))
self.assertEqual(len(XML('1.3').xml()), len('1.3'))