fix TAG helper on PY3, updated web2pyHTMLParser
This commit is contained in:
+2
-2
@@ -9,7 +9,7 @@ Based on http://code.activestate.com/recipes/52257/
|
||||
|
||||
Licensed under the PSF License
|
||||
"""
|
||||
|
||||
from gluon._compat import to_unicode
|
||||
import codecs
|
||||
|
||||
# None represents a potentially variable byte. "##" in the XML spec...
|
||||
@@ -77,4 +77,4 @@ def autoDetectXMLEncoding(buffer):
|
||||
|
||||
def decoder(buffer):
|
||||
encoding = autoDetectXMLEncoding(buffer)
|
||||
return buffer.decode(encoding).encode('utf8')
|
||||
return to_unicode(buffer, charset=encoding)
|
||||
|
||||
+11
-25
@@ -20,7 +20,7 @@ import urllib
|
||||
import base64
|
||||
from gluon import sanitizer, decoder
|
||||
import itertools
|
||||
from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type
|
||||
from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type, long
|
||||
from gluon.utils import local_html_escape
|
||||
import marshal
|
||||
|
||||
@@ -998,9 +998,9 @@ class DIV(XmlComponent):
|
||||
if isinstance(c, XmlComponent):
|
||||
s = c.flatten(render)
|
||||
elif render:
|
||||
s = render(str(c))
|
||||
s = render(to_native(c))
|
||||
else:
|
||||
s = str(c)
|
||||
s = to_native(c)
|
||||
text += s
|
||||
if render:
|
||||
text = render(text, self.tag, self.attributes)
|
||||
@@ -1281,7 +1281,6 @@ class __TAG__(XmlComponent):
|
||||
def __getattr__(self, name):
|
||||
if name[-1:] == '_':
|
||||
name = name[:-1] + '/'
|
||||
name=to_bytes(name)
|
||||
return lambda *a, **b: __tag_div__(name, *a, **b)
|
||||
|
||||
def __call__(self, html):
|
||||
@@ -2376,17 +2375,17 @@ class FORM(DIV):
|
||||
|
||||
def as_json(self, sanitize=True):
|
||||
d = self.as_dict(flat=True, sanitize=sanitize)
|
||||
from serializers import json
|
||||
from gluon.serializers import json
|
||||
return json(d)
|
||||
|
||||
def as_yaml(self, sanitize=True):
|
||||
d = self.as_dict(flat=True, sanitize=sanitize)
|
||||
from serializers import yaml
|
||||
from gluon.serializers import yaml
|
||||
return yaml(d)
|
||||
|
||||
def as_xml(self, sanitize=True):
|
||||
d = self.as_dict(flat=True, sanitize=sanitize)
|
||||
from serializers import xml
|
||||
from gluon.serializers import xml
|
||||
return xml(d)
|
||||
|
||||
|
||||
@@ -2655,36 +2654,24 @@ class web2pyHTMLParser(HTMLParser):
|
||||
"""
|
||||
obj = web2pyHTMLParser(text) parses and html/xml text into web2py helpers.
|
||||
obj.tree contains the root of the tree, and tree can be manipulated
|
||||
|
||||
>>> str(web2pyHTMLParser('hello<div a="b" c=3>wor<ld<span>xxx</span>y<script/>yy</div>zzz').tree)
|
||||
'hello<div a="b" c="3">wor<ld<span>xxx</span>y<script></script>yy</div>zzz'
|
||||
>>> str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
|
||||
'<div>a<span>b</span></div>c'
|
||||
>>> tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
|
||||
>>> tree.element(_a='b')['_c']=5
|
||||
>>> str(tree)
|
||||
'hello<div a="b" c="5">world</div>'
|
||||
"""
|
||||
|
||||
def __init__(self, text, closed=('input', 'link')):
|
||||
HTMLParser.__init__(self)
|
||||
self.tree = self.parent = TAG['']()
|
||||
self.closed = closed
|
||||
self.tags = [x for x in __all__ if isinstance(eval(x), DIV)]
|
||||
self.last = None
|
||||
self.feed(text)
|
||||
|
||||
def handle_starttag(self, tagname, attrs):
|
||||
if tagname.upper() in self.tags:
|
||||
tag = eval(tagname.upper())
|
||||
else:
|
||||
if tagname in self.closed:
|
||||
tagname += '/'
|
||||
tag = TAG[tagname]()
|
||||
if tagname in self.closed:
|
||||
tagname += '/'
|
||||
tag = TAG[tagname]()
|
||||
for key, value in attrs:
|
||||
tag['_' + key] = value
|
||||
tag.parent = self.parent
|
||||
self.parent.append(tag)
|
||||
if not tag.tag.endswith(b'/'):
|
||||
if not tag.tag.endswith('/'):
|
||||
self.parent = tag
|
||||
else:
|
||||
self.last = tag.tag[:-1]
|
||||
@@ -2707,7 +2694,6 @@ class web2pyHTMLParser(HTMLParser):
|
||||
self.parent.append(entitydefs[name])
|
||||
|
||||
def handle_endtag(self, tagname):
|
||||
tagname = to_bytes(tagname)
|
||||
# this deals with unbalanced tags
|
||||
if tagname == self.last:
|
||||
return
|
||||
|
||||
+2
-2
@@ -11,7 +11,7 @@ Cross-site scripting (XSS) defense
|
||||
"""
|
||||
|
||||
from gluon._compat import HTMLParser, urlparse, entitydefs, basestring
|
||||
from cgi import escape
|
||||
from gluon.utils import local_html_escape
|
||||
from formatter import AbstractFormatter
|
||||
from xml.sax.saxutils import quoteattr
|
||||
|
||||
@@ -21,7 +21,7 @@ __all__ = ['sanitize']
|
||||
def xssescape(text):
|
||||
"""Gets rid of < and > and & and, for good measure, :"""
|
||||
|
||||
return escape(text, quote=True).replace(':', ':')
|
||||
return local_html_escape(text, quote=True).replace(':', ':')
|
||||
|
||||
|
||||
class XssCleaner(HTMLParser):
|
||||
|
||||
+48
-13
@@ -11,11 +11,13 @@ import unittest
|
||||
from gluon.html import A, ASSIGNJS, B, BEAUTIFY, P, BODY, BR, BUTTON, CAT, CENTER, CODE, COL, COLGROUP, DIV, SPAN, URL, verifyURL
|
||||
from gluon.html import truncate_string, EM, FIELDSET, FORM, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME, IMG, INPUT, EMBED
|
||||
from gluon.html import LABEL, LEGEND, LI, LINK, MARKMIN, MENU, META, OBJECT, OL, OPTGROUP, OPTION, PRE, SCRIPT, SELECT, STRONG
|
||||
from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML
|
||||
from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML, web2pyHTMLParser
|
||||
from gluon.storage import Storage
|
||||
from gluon.html import XML_pickle, XML_unpickle
|
||||
from gluon.html import TAG_pickler, TAG_unpickler
|
||||
from gluon._compat import xrange, PY2, to_native
|
||||
from gluon.decoder import decoder
|
||||
import re
|
||||
|
||||
class TestBareHelpers(unittest.TestCase):
|
||||
|
||||
@@ -155,7 +157,7 @@ class TestBareHelpers(unittest.TestCase):
|
||||
self.assertEqual(rtn, True)
|
||||
|
||||
# TODO: def test_XmlComponent(self):
|
||||
@unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML.__repr__")
|
||||
|
||||
def test_XML(self):
|
||||
# sanitization process
|
||||
self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>').xml(),
|
||||
@@ -179,19 +181,18 @@ class TestBareHelpers(unittest.TestCase):
|
||||
# you can compare them
|
||||
##self.assertEqual(XML('a') == XML('a'), True)
|
||||
# beware that the comparison is made on the XML repr
|
||||
self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>', sanitize=True),
|
||||
XML('<h1>HelloWorld</h1>'))
|
||||
|
||||
self.assertEqual(XML('<h1>Hello<a data-hello="world">World</a></h1>', sanitize=True).__repr__(),
|
||||
XML('<h1>HelloWorld</h1>').__repr__())
|
||||
# bug check for the sanitizer for closing no-close tags
|
||||
self.assertEqual(XML('<p>Test</p><br/><p>Test</p><br/>', sanitize=True),
|
||||
XML('<p>Test</p><br /><p>Test</p><br />'))
|
||||
self.assertEqual(XML('<p>Test</p><br/><p>Test</p><br/>', sanitize=True).xml(),
|
||||
XML('<p>Test</p><br /><p>Test</p><br />').xml())
|
||||
# basic flatten test
|
||||
self.assertEqual(XML('<p>Test</p>').flatten(), '<p>Test</p>')
|
||||
self.assertEqual(XML('<p>Test</p>').flatten(render=lambda text, tag, attr: text), '<p>Test</p>')
|
||||
|
||||
@unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML_unpickle.__repr__")
|
||||
def test_XML_pickle_unpickle(self):
|
||||
# weird test
|
||||
self.assertEqual(XML_unpickle(XML_pickle('data to be pickle')[1][0]), 'data to be pickle')
|
||||
self.assertEqual(str(XML_unpickle(XML_pickle('data to be pickle')[1][0])), 'data to be pickle')
|
||||
|
||||
def test_DIV(self):
|
||||
# Empty DIV()
|
||||
@@ -255,6 +256,11 @@ class TestBareHelpers(unittest.TestCase):
|
||||
self.assertEqual(DIV('<p>Test</p>', _class="class_test").get('_class'), 'class_test')
|
||||
self.assertEqual(DIV(b'a').xml(), b'<div>a</div>')
|
||||
|
||||
def test_decoder(self):
|
||||
tag_html = '<div><span><a id="1-1" u:v="$">hello</a></span><p class="this is a test">world</p></div>'
|
||||
a = decoder(tag_html)
|
||||
self.assertEqual(a, tag_html)
|
||||
|
||||
def test_CAT(self):
|
||||
# Empty CAT()
|
||||
self.assertEqual(CAT().xml(), b'')
|
||||
@@ -636,8 +642,8 @@ class TestBareHelpers(unittest.TestCase):
|
||||
# These 2 crash AppVeyor and Travis with: "ImportError: No YAML serializer available"
|
||||
# self.assertEqual(FORM('<>', _a='1', _b='2').as_yaml(),
|
||||
# "accepted: null\nattributes: {_a: '1', _action: '#', _b: '2', _enctype: multipart/form-data, _method: post}\ncomponents: [<>]\nerrors: {}\nlatest: {}\nparent: null\nvars: {}\n")
|
||||
# self.assertEqual(FORM('<>', _a='1', _b='2').as_xml(),
|
||||
# '<?xml version="1.0" encoding="UTF-8"?><document><errors></errors><vars></vars><parent>None</parent><attributes><_enctype>multipart/form-data</_enctype><_action>#</_action><_b>2</_b><_a>1</_a><_method>post</_method></attributes><components><item>&lt;&gt;</item></components><accepted>None</accepted><latest></latest></document>')
|
||||
# TODO check tags content
|
||||
self.assertEqual(len(FORM('<>', _a='1', _b='2').as_xml()), 334)
|
||||
|
||||
def test_BEAUTIFY(self):
|
||||
#self.assertEqual(BEAUTIFY(['a', 'b', {'hello': 'world'}]).xml(),
|
||||
@@ -670,13 +676,42 @@ class TestBareHelpers(unittest.TestCase):
|
||||
|
||||
# TODO: def test_embed64(self):
|
||||
|
||||
# TODO: def test_web2pyHTMLParser(self):
|
||||
def test_web2pyHTMLParser(self):
|
||||
#tag should not be a byte
|
||||
self.assertEqual(web2pyHTMLParser("<div></div>").tree.components[0].tag, 'div')
|
||||
a = str(web2pyHTMLParser('<div>a<span>b</div>c').tree)
|
||||
self.assertEqual(a, "<div>a<span>b</span></div>c")
|
||||
|
||||
tree = web2pyHTMLParser('hello<div a="b">world</div>').tree
|
||||
tree.element(_a='b')['_c']=5
|
||||
self.assertEqual(str(tree), 'hello<div a="b" c="5">world</div>')
|
||||
|
||||
a = str(web2pyHTMLParser('<div><img class="img"/></div>', closed=['img']).tree)
|
||||
self.assertEqual(a, '<div><img class="img" /></div>')
|
||||
|
||||
#greater-than sign ( > ) --> decimal > --> hexadecimal >
|
||||
#Less-than sign ( < ) --> decimal < --> hexadecimal <
|
||||
# test decimal
|
||||
a = str(web2pyHTMLParser('<div>< ></div>').tree)
|
||||
self.assertEqual(a, '<div>< ></div>')
|
||||
# test hexadecimal
|
||||
a = str(web2pyHTMLParser('<div>< ></div>').tree)
|
||||
self.assertEqual(a, '<div>< ></div>')
|
||||
|
||||
def test_markdown(self):
|
||||
def markdown(text, tag=None, attributes={}):
|
||||
r = {None: re.sub('\s+',' ',text), \
|
||||
'h1':'#'+text+'\\n\\n', \
|
||||
'p':text+'\\n'}.get(tag,text)
|
||||
return r
|
||||
a=TAG('<h1>Header</h1><p>this is a test</p>')
|
||||
ret = a.flatten(markdown)
|
||||
self.assertEqual(ret, '#Header\\n\\nthis is a test\\n')
|
||||
|
||||
# TODO: def test_markdown_serializer(self):
|
||||
|
||||
# TODO: def test_markmin_serializer(self):
|
||||
|
||||
@unittest.skipIf(not PY2, "Skipping Python 3.x tests for MARKMIN")
|
||||
def test_MARKMIN(self):
|
||||
# This test pass with python 2.7 but expected to fail under 2.6
|
||||
# with self.assertRaises(TypeError) as cm:
|
||||
|
||||
Reference in New Issue
Block a user