fix TAG helper on PY3, updated web2pyHTMLParser

2017-05-05 21:12:19 +02:00
parent 1d77968a06
commit cf1ea98217
4 changed files with 63 additions and 42 deletions
@@ -9,7 +9,7 @@ Based on http://code.activestate.com/recipes/52257/

 Licensed under the PSF License
 """
-
+from gluon._compat import to_unicode
 import codecs

 # None represents a potentially variable byte. "##" in the XML spec...
@@ -77,4 +77,4 @@ def autoDetectXMLEncoding(buffer):

 def decoder(buffer):
    encoding = autoDetectXMLEncoding(buffer)
-    return buffer.decode(encoding).encode('utf8')
+    return to_unicode(buffer, charset=encoding)
@@ -20,7 +20,7 @@ import urllib
 import base64
 from gluon import sanitizer, decoder
 import itertools
-from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type
+from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type, long
 from gluon.utils import local_html_escape
 import marshal

@@ -998,9 +998,9 @@ class DIV(XmlComponent):
            if isinstance(c, XmlComponent):
                s = c.flatten(render)
            elif render:
-                s = render(str(c))
+                s = render(to_native(c))
            else:
-                s = str(c)
+                s = to_native(c)
            text += s
        if render:
            text = render(text, self.tag, self.attributes)
@@ -1281,7 +1281,6 @@ class __TAG__(XmlComponent):
    def __getattr__(self, name):
        if name[-1:] == '_':
            name = name[:-1] + '/'
-        name=to_bytes(name)
        return lambda *a, **b: __tag_div__(name, *a, **b)

    def __call__(self, html):
@@ -2376,17 +2375,17 @@ class FORM(DIV):

    def as_json(self, sanitize=True):
        d = self.as_dict(flat=True, sanitize=sanitize)
-        from serializers import json
+        from gluon.serializers import json
        return json(d)

    def as_yaml(self, sanitize=True):
        d = self.as_dict(flat=True, sanitize=sanitize)
-        from serializers import yaml
+        from gluon.serializers import yaml
        return yaml(d)

    def as_xml(self, sanitize=True):
        d = self.as_dict(flat=True, sanitize=sanitize)
-        from serializers import xml
+        from gluon.serializers import xml
        return xml(d)


@@ -2655,36 +2654,24 @@ class web2pyHTMLParser(HTMLParser):
    """
    obj = web2pyHTMLParser(text) parses and html/xml text into web2py helpers.
    obj.tree contains the root of the tree, and tree can be manipulated
-
-    >>> str(web2pyHTMLParser('hello<div a="b" c=3>wor&lt;ld<span>xxx</span>y<script/>yy</div>zzz').tree)
-    'hello<div a="b" c="3">wor&lt;ld<span>xxx</span>y<script></script>yy</div>zzz'
-    >>> str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
-    '<div>a<span>b</span></div>c'
-    >>> tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
-    >>> tree.element(_a='b')['_c']=5
-    >>> str(tree)
-    'hello<div a="b" c="5">world</div>'
    """
+
    def __init__(self, text, closed=('input', 'link')):
        HTMLParser.__init__(self)
        self.tree = self.parent = TAG['']()
        self.closed = closed
-        self.tags = [x for x in __all__ if isinstance(eval(x), DIV)]
        self.last = None
        self.feed(text)

    def handle_starttag(self, tagname, attrs):
-        if tagname.upper() in self.tags:
-            tag = eval(tagname.upper())
-        else:
-            if tagname in self.closed:
-                tagname += '/'
-            tag = TAG[tagname]()
+        if tagname in self.closed:
+            tagname += '/'
+        tag = TAG[tagname]()
        for key, value in attrs:
            tag['_' + key] = value
        tag.parent = self.parent
        self.parent.append(tag)
-        if not tag.tag.endswith(b'/'):
+        if not tag.tag.endswith('/'):
            self.parent = tag
        else:
            self.last = tag.tag[:-1]
@@ -2707,7 +2694,6 @@ class web2pyHTMLParser(HTMLParser):
        self.parent.append(entitydefs[name])

    def handle_endtag(self, tagname):
-        tagname = to_bytes(tagname)
        # this deals with unbalanced tags
        if tagname == self.last:
            return
@@ -11,7 +11,7 @@ Cross-site scripting (XSS) defense
 """

 from gluon._compat import HTMLParser, urlparse, entitydefs, basestring
-from cgi import escape
+from gluon.utils import local_html_escape
 from formatter import AbstractFormatter
 from xml.sax.saxutils import quoteattr

@@ -21,7 +21,7 @@ __all__ = ['sanitize']
 def xssescape(text):
    """Gets rid of < and > and & and, for good measure, :"""

-    return escape(text, quote=True).replace(':', '&#58;')
+    return local_html_escape(text, quote=True).replace(':', '&#58;')


 class XssCleaner(HTMLParser):
@@ -11,11 +11,13 @@ import unittest
 from gluon.html import A, ASSIGNJS, B, BEAUTIFY, P, BODY, BR, BUTTON, CAT, CENTER, CODE, COL, COLGROUP, DIV, SPAN, URL, verifyURL
 from gluon.html import truncate_string, EM, FIELDSET, FORM, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME, IMG, INPUT, EMBED
 from gluon.html import LABEL, LEGEND, LI, LINK, MARKMIN, MENU, META, OBJECT, OL, OPTGROUP, OPTION, PRE, SCRIPT, SELECT, STRONG
-from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML
+from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML, web2pyHTMLParser
 from gluon.storage import Storage
 from gluon.html import XML_pickle, XML_unpickle
 from gluon.html import TAG_pickler, TAG_unpickler
 from gluon._compat import xrange, PY2, to_native
+from gluon.decoder import decoder
+import re

 class TestBareHelpers(unittest.TestCase):

@@ -155,7 +157,7 @@ class TestBareHelpers(unittest.TestCase):
        self.assertEqual(rtn, True)

    # TODO: def test_XmlComponent(self):
-    @unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML.__repr__")
+
    def test_XML(self):
        # sanitization process
        self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>').xml(),
@@ -179,19 +181,18 @@ class TestBareHelpers(unittest.TestCase):
        # you can compare them
        ##self.assertEqual(XML('a') == XML('a'), True)
        # beware that the comparison is made on the XML repr
-        self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>', sanitize=True),
-                         XML('<h1>HelloWorld</h1>'))
+
+        self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>', sanitize=True).__repr__(),
+                         XML('<h1>HelloWorld</h1>').__repr__())
        # bug check for the sanitizer for closing no-close tags
-        self.assertEqual(XML('<p>Test</p><br/><p>Test</p><br/>', sanitize=True),
-                         XML('<p>Test</p><br /><p>Test</p><br />'))
+        self.assertEqual(XML('<p>Test</p><br/><p>Test</p><br/>', sanitize=True).xml(),
+                         XML('<p>Test</p><br /><p>Test</p><br />').xml())
        # basic flatten test
        self.assertEqual(XML('<p>Test</p>').flatten(), '<p>Test</p>')
        self.assertEqual(XML('<p>Test</p>').flatten(render=lambda text, tag, attr: text), '<p>Test</p>')

-    @unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML_unpickle.__repr__")
    def test_XML_pickle_unpickle(self):
-        # weird test
-        self.assertEqual(XML_unpickle(XML_pickle('data to be pickle')[1][0]), 'data to be pickle')
+        self.assertEqual(str(XML_unpickle(XML_pickle('data to be pickle')[1][0])), 'data to be pickle')

    def test_DIV(self):
        # Empty DIV()
@@ -255,6 +256,11 @@ class TestBareHelpers(unittest.TestCase):
        self.assertEqual(DIV('<p>Test</p>', _class="class_test").get('_class'), 'class_test')
        self.assertEqual(DIV(b'a').xml(), b'<div>a</div>')

+    def test_decoder(self):
+        tag_html = '<div><span><a id="1-1" u:v="$">hello</a></span><p class="this is a test">world</p></div>'
+        a = decoder(tag_html)
+        self.assertEqual(a, tag_html)
+
    def test_CAT(self):
        # Empty CAT()
        self.assertEqual(CAT().xml(), b'')
@@ -636,8 +642,8 @@ class TestBareHelpers(unittest.TestCase):
        # These 2 crash AppVeyor and Travis with: "ImportError: No YAML serializer available"
        # self.assertEqual(FORM('<>', _a='1', _b='2').as_yaml(),
        #                  "accepted: null\nattributes: {_a: '1', _action: '#', _b: '2', _enctype: multipart/form-data, _method: post}\ncomponents: [<>]\nerrors: {}\nlatest: {}\nparent: null\nvars: {}\n")
-        # self.assertEqual(FORM('<>', _a='1', _b='2').as_xml(),
-        #                  '<?xml version="1.0" encoding="UTF-8"?><document><errors></errors><vars></vars><parent>None</parent><attributes><_enctype>multipart/form-data</_enctype><_action>#</_action><_b>2</_b><_a>1</_a><_method>post</_method></attributes><components><item>&amp;lt;&amp;gt;</item></components><accepted>None</accepted><latest></latest></document>')
+        # TODO check tags content
+        self.assertEqual(len(FORM('<>', _a='1', _b='2').as_xml()), 334)

    def test_BEAUTIFY(self):
        #self.assertEqual(BEAUTIFY(['a', 'b', {'hello': 'world'}]).xml(),
@@ -670,13 +676,42 @@ class TestBareHelpers(unittest.TestCase):

    # TODO: def test_embed64(self):

-    # TODO: def test_web2pyHTMLParser(self):
+    def test_web2pyHTMLParser(self):
+        #tag should not be a byte
+        self.assertEqual(web2pyHTMLParser("<div></div>").tree.components[0].tag, 'div')
+        a = str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
+        self.assertEqual(a, "<div>a<span>b</span></div>c")
+
+        tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
+        tree.element(_a='b')['_c']=5
+        self.assertEqual(str(tree), 'hello<div a="b" c="5">world</div>')
+
+        a = str(web2pyHTMLParser('<div><img class="img"/></div>', closed=['img']).tree)
+        self.assertEqual(a, '<div><img class="img" /></div>')
+
+        #greater-than sign ( > )  --> decimal &#62; --> hexadecimal &#x3E;
+        #Less-than sign    ( < )  --> decimal &#60; --> hexadecimal &#x3C;
+        # test decimal
+        a = str(web2pyHTMLParser('<div>&#60; &#62;</div>').tree)
+        self.assertEqual(a, '<div>&lt; &gt;</div>')
+        # test hexadecimal
+        a = str(web2pyHTMLParser('<div>&#x3C; &#x3E;</div>').tree)
+        self.assertEqual(a, '<div>&lt; &gt;</div>')
+
+    def test_markdown(self):
+        def markdown(text, tag=None, attributes={}):
+            r = {None: re.sub('\s+',' ',text), \
+                 'h1':'#'+text+'\\n\\n', \
+                 'p':text+'\\n'}.get(tag,text)
+            return r
+        a=TAG('<h1>Header</h1><p>this is a     test</p>')
+        ret = a.flatten(markdown)
+        self.assertEqual(ret, '#Header\\n\\nthis is a test\\n')

    # TODO: def test_markdown_serializer(self):

    # TODO: def test_markmin_serializer(self):

-    @unittest.skipIf(not PY2, "Skipping Python 3.x tests for MARKMIN")
    def test_MARKMIN(self):
        # This test pass with python 2.7 but expected to fail under 2.6
        # with self.assertRaises(TypeError) as cm: