fix TAG helper on PY3, updated web2pyHTMLParser

This commit is contained in:
ilvalle
2017-05-05 21:12:19 +02:00
parent 1d77968a06
commit cf1ea98217
4 changed files with 63 additions and 42 deletions
+2 -2
View File
@@ -9,7 +9,7 @@ Based on http://code.activestate.com/recipes/52257/
Licensed under the PSF License
"""
from gluon._compat import to_unicode
import codecs
# None represents a potentially variable byte. "##" in the XML spec...
@@ -77,4 +77,4 @@ def autoDetectXMLEncoding(buffer):
def decoder(buffer):
encoding = autoDetectXMLEncoding(buffer)
return buffer.decode(encoding).encode('utf8')
return to_unicode(buffer, charset=encoding)
+11 -25
View File
@@ -20,7 +20,7 @@ import urllib
import base64
from gluon import sanitizer, decoder
import itertools
from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type
from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type, long
from gluon.utils import local_html_escape
import marshal
@@ -998,9 +998,9 @@ class DIV(XmlComponent):
if isinstance(c, XmlComponent):
s = c.flatten(render)
elif render:
s = render(str(c))
s = render(to_native(c))
else:
s = str(c)
s = to_native(c)
text += s
if render:
text = render(text, self.tag, self.attributes)
@@ -1281,7 +1281,6 @@ class __TAG__(XmlComponent):
def __getattr__(self, name):
if name[-1:] == '_':
name = name[:-1] + '/'
name=to_bytes(name)
return lambda *a, **b: __tag_div__(name, *a, **b)
def __call__(self, html):
@@ -2376,17 +2375,17 @@ class FORM(DIV):
def as_json(self, sanitize=True):
d = self.as_dict(flat=True, sanitize=sanitize)
from serializers import json
from gluon.serializers import json
return json(d)
def as_yaml(self, sanitize=True):
d = self.as_dict(flat=True, sanitize=sanitize)
from serializers import yaml
from gluon.serializers import yaml
return yaml(d)
def as_xml(self, sanitize=True):
d = self.as_dict(flat=True, sanitize=sanitize)
from serializers import xml
from gluon.serializers import xml
return xml(d)
@@ -2655,36 +2654,24 @@ class web2pyHTMLParser(HTMLParser):
"""
obj = web2pyHTMLParser(text) parses and html/xml text into web2py helpers.
obj.tree contains the root of the tree, and tree can be manipulated
>>> str(web2pyHTMLParser('hello<div a="b" c=3>wor&lt;ld<span>xxx</span>y<script/>yy</div>zzz').tree)
'hello<div a="b" c="3">wor&lt;ld<span>xxx</span>y<script></script>yy</div>zzz'
>>> str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
'<div>a<span>b</span></div>c'
>>> tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
>>> tree.element(_a='b')['_c']=5
>>> str(tree)
'hello<div a="b" c="5">world</div>'
"""
def __init__(self, text, closed=('input', 'link')):
HTMLParser.__init__(self)
self.tree = self.parent = TAG['']()
self.closed = closed
self.tags = [x for x in __all__ if isinstance(eval(x), DIV)]
self.last = None
self.feed(text)
def handle_starttag(self, tagname, attrs):
if tagname.upper() in self.tags:
tag = eval(tagname.upper())
else:
if tagname in self.closed:
tagname += '/'
tag = TAG[tagname]()
if tagname in self.closed:
tagname += '/'
tag = TAG[tagname]()
for key, value in attrs:
tag['_' + key] = value
tag.parent = self.parent
self.parent.append(tag)
if not tag.tag.endswith(b'/'):
if not tag.tag.endswith('/'):
self.parent = tag
else:
self.last = tag.tag[:-1]
@@ -2707,7 +2694,6 @@ class web2pyHTMLParser(HTMLParser):
self.parent.append(entitydefs[name])
def handle_endtag(self, tagname):
tagname = to_bytes(tagname)
# this deals with unbalanced tags
if tagname == self.last:
return
+2 -2
View File
@@ -11,7 +11,7 @@ Cross-site scripting (XSS) defense
"""
from gluon._compat import HTMLParser, urlparse, entitydefs, basestring
from cgi import escape
from gluon.utils import local_html_escape
from formatter import AbstractFormatter
from xml.sax.saxutils import quoteattr
@@ -21,7 +21,7 @@ __all__ = ['sanitize']
def xssescape(text):
"""Gets rid of < and > and & and, for good measure, :"""
return escape(text, quote=True).replace(':', '&#58;')
return local_html_escape(text, quote=True).replace(':', '&#58;')
class XssCleaner(HTMLParser):
+48 -13
View File
@@ -11,11 +11,13 @@ import unittest
from gluon.html import A, ASSIGNJS, B, BEAUTIFY, P, BODY, BR, BUTTON, CAT, CENTER, CODE, COL, COLGROUP, DIV, SPAN, URL, verifyURL
from gluon.html import truncate_string, EM, FIELDSET, FORM, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME, IMG, INPUT, EMBED
from gluon.html import LABEL, LEGEND, LI, LINK, MARKMIN, MENU, META, OBJECT, OL, OPTGROUP, OPTION, PRE, SCRIPT, SELECT, STRONG
from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML
from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML, web2pyHTMLParser
from gluon.storage import Storage
from gluon.html import XML_pickle, XML_unpickle
from gluon.html import TAG_pickler, TAG_unpickler
from gluon._compat import xrange, PY2, to_native
from gluon.decoder import decoder
import re
class TestBareHelpers(unittest.TestCase):
@@ -155,7 +157,7 @@ class TestBareHelpers(unittest.TestCase):
self.assertEqual(rtn, True)
# TODO: def test_XmlComponent(self):
@unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML.__repr__")
def test_XML(self):
# sanitization process
self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>').xml(),
@@ -179,19 +181,18 @@ class TestBareHelpers(unittest.TestCase):
# you can compare them
##self.assertEqual(XML('a') == XML('a'), True)
# beware that the comparison is made on the XML repr
self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>', sanitize=True),
XML('<h1>HelloWorld</h1>'))
self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>', sanitize=True).__repr__(),
XML('<h1>HelloWorld</h1>').__repr__())
# bug check for the sanitizer for closing no-close tags
self.assertEqual(XML('<p>Test</p><br/><p>Test</p><br/>', sanitize=True),
XML('<p>Test</p><br /><p>Test</p><br />'))
self.assertEqual(XML('<p>Test</p><br/><p>Test</p><br/>', sanitize=True).xml(),
XML('<p>Test</p><br /><p>Test</p><br />').xml())
# basic flatten test
self.assertEqual(XML('<p>Test</p>').flatten(), '<p>Test</p>')
self.assertEqual(XML('<p>Test</p>').flatten(render=lambda text, tag, attr: text), '<p>Test</p>')
@unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML_unpickle.__repr__")
def test_XML_pickle_unpickle(self):
# weird test
self.assertEqual(XML_unpickle(XML_pickle('data to be pickle')[1][0]), 'data to be pickle')
self.assertEqual(str(XML_unpickle(XML_pickle('data to be pickle')[1][0])), 'data to be pickle')
def test_DIV(self):
# Empty DIV()
@@ -255,6 +256,11 @@ class TestBareHelpers(unittest.TestCase):
self.assertEqual(DIV('<p>Test</p>', _class="class_test").get('_class'), 'class_test')
self.assertEqual(DIV(b'a').xml(), b'<div>a</div>')
def test_decoder(self):
tag_html = '<div><span><a id="1-1" u:v="$">hello</a></span><p class="this is a test">world</p></div>'
a = decoder(tag_html)
self.assertEqual(a, tag_html)
def test_CAT(self):
# Empty CAT()
self.assertEqual(CAT().xml(), b'')
@@ -636,8 +642,8 @@ class TestBareHelpers(unittest.TestCase):
# These 2 crash AppVeyor and Travis with: "ImportError: No YAML serializer available"
# self.assertEqual(FORM('<>', _a='1', _b='2').as_yaml(),
# "accepted: null\nattributes: {_a: '1', _action: '#', _b: '2', _enctype: multipart/form-data, _method: post}\ncomponents: [<>]\nerrors: {}\nlatest: {}\nparent: null\nvars: {}\n")
# self.assertEqual(FORM('<>', _a='1', _b='2').as_xml(),
# '<?xml version="1.0" encoding="UTF-8"?><document><errors></errors><vars></vars><parent>None</parent><attributes><_enctype>multipart/form-data</_enctype><_action>#</_action><_b>2</_b><_a>1</_a><_method>post</_method></attributes><components><item>&amp;lt;&amp;gt;</item></components><accepted>None</accepted><latest></latest></document>')
# TODO check tags content
self.assertEqual(len(FORM('<>', _a='1', _b='2').as_xml()), 334)
def test_BEAUTIFY(self):
#self.assertEqual(BEAUTIFY(['a', 'b', {'hello': 'world'}]).xml(),
@@ -670,13 +676,42 @@ class TestBareHelpers(unittest.TestCase):
# TODO: def test_embed64(self):
# TODO: def test_web2pyHTMLParser(self):
def test_web2pyHTMLParser(self):
#tag should not be a byte
self.assertEqual(web2pyHTMLParser("<div></div>").tree.components[0].tag, 'div')
a = str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
self.assertEqual(a, "<div>a<span>b</span></div>c")
tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
tree.element(_a='b')['_c']=5
self.assertEqual(str(tree), 'hello<div a="b" c="5">world</div>')
a = str(web2pyHTMLParser('<div><img class="img"/></div>', closed=['img']).tree)
self.assertEqual(a, '<div><img class="img" /></div>')
#greater-than sign ( > ) --> decimal &#62; --> hexadecimal &#x3E;
#Less-than sign ( < ) --> decimal &#60; --> hexadecimal &#x3C;
# test decimal
a = str(web2pyHTMLParser('<div>&#60; &#62;</div>').tree)
self.assertEqual(a, '<div>&lt; &gt;</div>')
# test hexadecimal
a = str(web2pyHTMLParser('<div>&#x3C; &#x3E;</div>').tree)
self.assertEqual(a, '<div>&lt; &gt;</div>')
def test_markdown(self):
def markdown(text, tag=None, attributes={}):
r = {None: re.sub('\s+',' ',text), \
'h1':'#'+text+'\\n\\n', \
'p':text+'\\n'}.get(tag,text)
return r
a=TAG('<h1>Header</h1><p>this is a test</p>')
ret = a.flatten(markdown)
self.assertEqual(ret, '#Header\\n\\nthis is a test\\n')
# TODO: def test_markdown_serializer(self):
# TODO: def test_markmin_serializer(self):
@unittest.skipIf(not PY2, "Skipping Python 3.x tests for MARKMIN")
def test_MARKMIN(self):
# This test pass with python 2.7 but expected to fail under 2.6
# with self.assertRaises(TypeError) as cm: