diff --git a/gluon/decoder.py b/gluon/decoder.py index d04aaf9a..4fc068dd 100644 --- a/gluon/decoder.py +++ b/gluon/decoder.py @@ -9,7 +9,7 @@ Based on http://code.activestate.com/recipes/52257/ Licensed under the PSF License """ - +from gluon._compat import to_unicode import codecs # None represents a potentially variable byte. "##" in the XML spec... @@ -77,4 +77,4 @@ def autoDetectXMLEncoding(buffer): def decoder(buffer): encoding = autoDetectXMLEncoding(buffer) - return buffer.decode(encoding).encode('utf8') + return to_unicode(buffer, charset=encoding) diff --git a/gluon/html.py b/gluon/html.py index ceef7bd2..5ae5f4cd 100644 --- a/gluon/html.py +++ b/gluon/html.py @@ -20,7 +20,7 @@ import urllib import base64 from gluon import sanitizer, decoder import itertools -from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type +from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type, long from gluon.utils import local_html_escape import marshal @@ -998,9 +998,9 @@ class DIV(XmlComponent): if isinstance(c, XmlComponent): s = c.flatten(render) elif render: - s = render(str(c)) + s = render(to_native(c)) else: - s = str(c) + s = to_native(c) text += s if render: text = render(text, self.tag, self.attributes) @@ -1281,7 +1281,6 @@ class __TAG__(XmlComponent): def __getattr__(self, name): if name[-1:] == '_': name = name[:-1] + '/' - name=to_bytes(name) return lambda *a, **b: __tag_div__(name, *a, **b) def __call__(self, html): @@ -2376,17 +2375,17 @@ class FORM(DIV): def as_json(self, sanitize=True): d = self.as_dict(flat=True, sanitize=sanitize) - from serializers import json + from gluon.serializers import json return json(d) def as_yaml(self, sanitize=True): d = self.as_dict(flat=True, sanitize=sanitize) - from serializers import yaml + from gluon.serializers import yaml return yaml(d) def as_xml(self, sanitize=True): d = self.as_dict(flat=True, sanitize=sanitize) - from serializers import xml + from gluon.serializers import xml return xml(d) @@ -2655,36 +2654,24 @@ class web2pyHTMLParser(HTMLParser): """ obj = web2pyHTMLParser(text) parses and html/xml text into web2py helpers. obj.tree contains the root of the tree, and tree can be manipulated - - >>> str(web2pyHTMLParser('hello
wor<ldxxxyyy
zzz' - >>> str(web2pyHTMLParser('
ab
c').tree) - '
ab
c' - >>> tree = web2pyHTMLParser('hello
world
').tree - >>> tree.element(_a='b')['_c']=5 - >>> str(tree) - 'hello
world
' """ + def __init__(self, text, closed=('input', 'link')): HTMLParser.__init__(self) self.tree = self.parent = TAG['']() self.closed = closed - self.tags = [x for x in __all__ if isinstance(eval(x), DIV)] self.last = None self.feed(text) def handle_starttag(self, tagname, attrs): - if tagname.upper() in self.tags: - tag = eval(tagname.upper()) - else: - if tagname in self.closed: - tagname += '/' - tag = TAG[tagname]() + if tagname in self.closed: + tagname += '/' + tag = TAG[tagname]() for key, value in attrs: tag['_' + key] = value tag.parent = self.parent self.parent.append(tag) - if not tag.tag.endswith(b'/'): + if not tag.tag.endswith('/'): self.parent = tag else: self.last = tag.tag[:-1] @@ -2707,7 +2694,6 @@ class web2pyHTMLParser(HTMLParser): self.parent.append(entitydefs[name]) def handle_endtag(self, tagname): - tagname = to_bytes(tagname) # this deals with unbalanced tags if tagname == self.last: return diff --git a/gluon/sanitizer.py b/gluon/sanitizer.py index dbf84eca..5cd2ea4a 100644 --- a/gluon/sanitizer.py +++ b/gluon/sanitizer.py @@ -11,7 +11,7 @@ Cross-site scripting (XSS) defense """ from gluon._compat import HTMLParser, urlparse, entitydefs, basestring -from cgi import escape +from gluon.utils import local_html_escape from formatter import AbstractFormatter from xml.sax.saxutils import quoteattr @@ -21,7 +21,7 @@ __all__ = ['sanitize'] def xssescape(text): """Gets rid of < and > and & and, for good measure, :""" - return escape(text, quote=True).replace(':', ':') + return local_html_escape(text, quote=True).replace(':', ':') class XssCleaner(HTMLParser): diff --git a/gluon/tests/test_html.py b/gluon/tests/test_html.py index e091d1ce..6ee181d5 100644 --- a/gluon/tests/test_html.py +++ b/gluon/tests/test_html.py @@ -11,11 +11,13 @@ import unittest from gluon.html import A, ASSIGNJS, B, BEAUTIFY, P, BODY, BR, BUTTON, CAT, CENTER, CODE, COL, COLGROUP, DIV, SPAN, URL, verifyURL from gluon.html import truncate_string, EM, FIELDSET, FORM, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME, IMG, INPUT, EMBED from gluon.html import LABEL, LEGEND, LI, LINK, MARKMIN, MENU, META, OBJECT, OL, OPTGROUP, OPTION, PRE, SCRIPT, SELECT, STRONG -from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML +from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML, web2pyHTMLParser from gluon.storage import Storage from gluon.html import XML_pickle, XML_unpickle from gluon.html import TAG_pickler, TAG_unpickler from gluon._compat import xrange, PY2, to_native +from gluon.decoder import decoder +import re class TestBareHelpers(unittest.TestCase): @@ -155,7 +157,7 @@ class TestBareHelpers(unittest.TestCase): self.assertEqual(rtn, True) # TODO: def test_XmlComponent(self): - @unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML.__repr__") + def test_XML(self): # sanitization process self.assertEqual(XML('

HelloWorld

').xml(), @@ -179,19 +181,18 @@ class TestBareHelpers(unittest.TestCase): # you can compare them ##self.assertEqual(XML('a') == XML('a'), True) # beware that the comparison is made on the XML repr - self.assertEqual(XML('

HelloWorld

', sanitize=True), - XML('

HelloWorld

')) + + self.assertEqual(XML('

HelloWorld

', sanitize=True).__repr__(), + XML('

HelloWorld

').__repr__()) # bug check for the sanitizer for closing no-close tags - self.assertEqual(XML('

Test


Test


', sanitize=True), - XML('

Test


Test


')) + self.assertEqual(XML('

Test


Test


', sanitize=True).xml(), + XML('

Test


Test


').xml()) # basic flatten test self.assertEqual(XML('

Test

').flatten(), '

Test

') self.assertEqual(XML('

Test

').flatten(render=lambda text, tag, attr: text), '

Test

') - @unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML_unpickle.__repr__") def test_XML_pickle_unpickle(self): - # weird test - self.assertEqual(XML_unpickle(XML_pickle('data to be pickle')[1][0]), 'data to be pickle') + self.assertEqual(str(XML_unpickle(XML_pickle('data to be pickle')[1][0])), 'data to be pickle') def test_DIV(self): # Empty DIV() @@ -255,6 +256,11 @@ class TestBareHelpers(unittest.TestCase): self.assertEqual(DIV('

Test

', _class="class_test").get('_class'), 'class_test') self.assertEqual(DIV(b'a').xml(), b'
a
') + def test_decoder(self): + tag_html = '
hello

world

' + a = decoder(tag_html) + self.assertEqual(a, tag_html) + def test_CAT(self): # Empty CAT() self.assertEqual(CAT().xml(), b'') @@ -636,8 +642,8 @@ class TestBareHelpers(unittest.TestCase): # These 2 crash AppVeyor and Travis with: "ImportError: No YAML serializer available" # self.assertEqual(FORM('<>', _a='1', _b='2').as_yaml(), # "accepted: null\nattributes: {_a: '1', _action: '#', _b: '2', _enctype: multipart/form-data, _method: post}\ncomponents: [<>]\nerrors: {}\nlatest: {}\nparent: null\nvars: {}\n") - # self.assertEqual(FORM('<>', _a='1', _b='2').as_xml(), - # 'None<_enctype>multipart/form-data<_action>#<_b>2<_a>1<_method>post&lt;&gt;None') + # TODO check tags content + self.assertEqual(len(FORM('<>', _a='1', _b='2').as_xml()), 334) def test_BEAUTIFY(self): #self.assertEqual(BEAUTIFY(['a', 'b', {'hello': 'world'}]).xml(), @@ -670,13 +676,42 @@ class TestBareHelpers(unittest.TestCase): # TODO: def test_embed64(self): - # TODO: def test_web2pyHTMLParser(self): + def test_web2pyHTMLParser(self): + #tag should not be a byte + self.assertEqual(web2pyHTMLParser("
").tree.components[0].tag, 'div') + a = str(web2pyHTMLParser('
ab
c').tree) + self.assertEqual(a, "
ab
c") + + tree = web2pyHTMLParser('hello
world
').tree + tree.element(_a='b')['_c']=5 + self.assertEqual(str(tree), 'hello
world
') + + a = str(web2pyHTMLParser('
', closed=['img']).tree) + self.assertEqual(a, '
') + + #greater-than sign ( > ) --> decimal > --> hexadecimal > + #Less-than sign ( < ) --> decimal < --> hexadecimal < + # test decimal + a = str(web2pyHTMLParser('
< >
').tree) + self.assertEqual(a, '
< >
') + # test hexadecimal + a = str(web2pyHTMLParser('
< >
').tree) + self.assertEqual(a, '
< >
') + + def test_markdown(self): + def markdown(text, tag=None, attributes={}): + r = {None: re.sub('\s+',' ',text), \ + 'h1':'#'+text+'\\n\\n', \ + 'p':text+'\\n'}.get(tag,text) + return r + a=TAG('

Header

this is a test

') + ret = a.flatten(markdown) + self.assertEqual(ret, '#Header\\n\\nthis is a test\\n') # TODO: def test_markdown_serializer(self): # TODO: def test_markmin_serializer(self): - @unittest.skipIf(not PY2, "Skipping Python 3.x tests for MARKMIN") def test_MARKMIN(self): # This test pass with python 2.7 but expected to fail under 2.6 # with self.assertRaises(TypeError) as cm: