diff --git a/gluon/decoder.py b/gluon/decoder.py
index d04aaf9a..4fc068dd 100644
--- a/gluon/decoder.py
+++ b/gluon/decoder.py
@@ -9,7 +9,7 @@ Based on http://code.activestate.com/recipes/52257/
Licensed under the PSF License
"""
-
+from gluon._compat import to_unicode
import codecs
# None represents a potentially variable byte. "##" in the XML spec...
@@ -77,4 +77,4 @@ def autoDetectXMLEncoding(buffer):
def decoder(buffer):
encoding = autoDetectXMLEncoding(buffer)
- return buffer.decode(encoding).encode('utf8')
+ return to_unicode(buffer, charset=encoding)
diff --git a/gluon/html.py b/gluon/html.py
index ceef7bd2..5ae5f4cd 100644
--- a/gluon/html.py
+++ b/gluon/html.py
@@ -20,7 +20,7 @@ import urllib
import base64
from gluon import sanitizer, decoder
import itertools
-from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type
+from gluon._compat import reduce, pickle, copyreg, HTMLParser, name2codepoint, iteritems, unichr, unicodeT, urllib_quote, to_bytes, to_native, to_unicode, basestring, urlencode, implements_bool, text_type, long
from gluon.utils import local_html_escape
import marshal
@@ -998,9 +998,9 @@ class DIV(XmlComponent):
if isinstance(c, XmlComponent):
s = c.flatten(render)
elif render:
- s = render(str(c))
+ s = render(to_native(c))
else:
- s = str(c)
+ s = to_native(c)
text += s
if render:
text = render(text, self.tag, self.attributes)
@@ -1281,7 +1281,6 @@ class __TAG__(XmlComponent):
def __getattr__(self, name):
if name[-1:] == '_':
name = name[:-1] + '/'
- name=to_bytes(name)
return lambda *a, **b: __tag_div__(name, *a, **b)
def __call__(self, html):
@@ -2376,17 +2375,17 @@ class FORM(DIV):
def as_json(self, sanitize=True):
d = self.as_dict(flat=True, sanitize=sanitize)
- from serializers import json
+ from gluon.serializers import json
return json(d)
def as_yaml(self, sanitize=True):
d = self.as_dict(flat=True, sanitize=sanitize)
- from serializers import yaml
+ from gluon.serializers import yaml
return yaml(d)
def as_xml(self, sanitize=True):
d = self.as_dict(flat=True, sanitize=sanitize)
- from serializers import xml
+ from gluon.serializers import xml
return xml(d)
@@ -2655,36 +2654,24 @@ class web2pyHTMLParser(HTMLParser):
"""
obj = web2pyHTMLParser(text) parses and html/xml text into web2py helpers.
obj.tree contains the root of the tree, and tree can be manipulated
-
- >>> str(web2pyHTMLParser('hello
wor<ldxxxyyy
zzz').tree)
- 'hellowor<ldxxxyyy
zzz'
- >>> str(web2pyHTMLParser('ab
c').tree)
- 'ab
c'
- >>> tree = web2pyHTMLParser('helloworld
').tree
- >>> tree.element(_a='b')['_c']=5
- >>> str(tree)
- 'helloworld
'
"""
+
def __init__(self, text, closed=('input', 'link')):
HTMLParser.__init__(self)
self.tree = self.parent = TAG['']()
self.closed = closed
- self.tags = [x for x in __all__ if isinstance(eval(x), DIV)]
self.last = None
self.feed(text)
def handle_starttag(self, tagname, attrs):
- if tagname.upper() in self.tags:
- tag = eval(tagname.upper())
- else:
- if tagname in self.closed:
- tagname += '/'
- tag = TAG[tagname]()
+ if tagname in self.closed:
+ tagname += '/'
+ tag = TAG[tagname]()
for key, value in attrs:
tag['_' + key] = value
tag.parent = self.parent
self.parent.append(tag)
- if not tag.tag.endswith(b'/'):
+ if not tag.tag.endswith('/'):
self.parent = tag
else:
self.last = tag.tag[:-1]
@@ -2707,7 +2694,6 @@ class web2pyHTMLParser(HTMLParser):
self.parent.append(entitydefs[name])
def handle_endtag(self, tagname):
- tagname = to_bytes(tagname)
# this deals with unbalanced tags
if tagname == self.last:
return
diff --git a/gluon/sanitizer.py b/gluon/sanitizer.py
index dbf84eca..5cd2ea4a 100644
--- a/gluon/sanitizer.py
+++ b/gluon/sanitizer.py
@@ -11,7 +11,7 @@ Cross-site scripting (XSS) defense
"""
from gluon._compat import HTMLParser, urlparse, entitydefs, basestring
-from cgi import escape
+from gluon.utils import local_html_escape
from formatter import AbstractFormatter
from xml.sax.saxutils import quoteattr
@@ -21,7 +21,7 @@ __all__ = ['sanitize']
def xssescape(text):
"""Gets rid of < and > and & and, for good measure, :"""
- return escape(text, quote=True).replace(':', ':')
+ return local_html_escape(text, quote=True).replace(':', ':')
class XssCleaner(HTMLParser):
diff --git a/gluon/tests/test_html.py b/gluon/tests/test_html.py
index e091d1ce..6ee181d5 100644
--- a/gluon/tests/test_html.py
+++ b/gluon/tests/test_html.py
@@ -11,11 +11,13 @@ import unittest
from gluon.html import A, ASSIGNJS, B, BEAUTIFY, P, BODY, BR, BUTTON, CAT, CENTER, CODE, COL, COLGROUP, DIV, SPAN, URL, verifyURL
from gluon.html import truncate_string, EM, FIELDSET, FORM, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME, IMG, INPUT, EMBED
from gluon.html import LABEL, LEGEND, LI, LINK, MARKMIN, MENU, META, OBJECT, OL, OPTGROUP, OPTION, PRE, SCRIPT, SELECT, STRONG
-from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML
+from gluon.html import STYLE, TABLE, TR, TD, TAG, TBODY, THEAD, TEXTAREA, TFOOT, TH, TITLE, TT, UL, XHTML, XML, web2pyHTMLParser
from gluon.storage import Storage
from gluon.html import XML_pickle, XML_unpickle
from gluon.html import TAG_pickler, TAG_unpickler
from gluon._compat import xrange, PY2, to_native
+from gluon.decoder import decoder
+import re
class TestBareHelpers(unittest.TestCase):
@@ -155,7 +157,7 @@ class TestBareHelpers(unittest.TestCase):
self.assertEqual(rtn, True)
# TODO: def test_XmlComponent(self):
- @unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML.__repr__")
+
def test_XML(self):
# sanitization process
self.assertEqual(XML('').xml(),
@@ -179,19 +181,18 @@ class TestBareHelpers(unittest.TestCase):
# you can compare them
##self.assertEqual(XML('a') == XML('a'), True)
# beware that the comparison is made on the XML repr
- self.assertEqual(XML('', sanitize=True),
- XML('HelloWorld
'))
+
+ self.assertEqual(XML('', sanitize=True).__repr__(),
+ XML('HelloWorld
').__repr__())
# bug check for the sanitizer for closing no-close tags
- self.assertEqual(XML('Test
Test
', sanitize=True),
- XML('Test
Test
'))
+ self.assertEqual(XML('Test
Test
', sanitize=True).xml(),
+ XML('Test
Test
').xml())
# basic flatten test
self.assertEqual(XML('Test
').flatten(), 'Test
')
self.assertEqual(XML('Test
').flatten(render=lambda text, tag, attr: text), 'Test
')
- @unittest.skipIf(not PY2, "Skipping Python 3.x tests for XML_unpickle.__repr__")
def test_XML_pickle_unpickle(self):
- # weird test
- self.assertEqual(XML_unpickle(XML_pickle('data to be pickle')[1][0]), 'data to be pickle')
+ self.assertEqual(str(XML_unpickle(XML_pickle('data to be pickle')[1][0])), 'data to be pickle')
def test_DIV(self):
# Empty DIV()
@@ -255,6 +256,11 @@ class TestBareHelpers(unittest.TestCase):
self.assertEqual(DIV('Test
', _class="class_test").get('_class'), 'class_test')
self.assertEqual(DIV(b'a').xml(), b'a
')
+ def test_decoder(self):
+ tag_html = ''
+ a = decoder(tag_html)
+ self.assertEqual(a, tag_html)
+
def test_CAT(self):
# Empty CAT()
self.assertEqual(CAT().xml(), b'')
@@ -636,8 +642,8 @@ class TestBareHelpers(unittest.TestCase):
# These 2 crash AppVeyor and Travis with: "ImportError: No YAML serializer available"
# self.assertEqual(FORM('<>', _a='1', _b='2').as_yaml(),
# "accepted: null\nattributes: {_a: '1', _action: '#', _b: '2', _enctype: multipart/form-data, _method: post}\ncomponents: [<>]\nerrors: {}\nlatest: {}\nparent: null\nvars: {}\n")
- # self.assertEqual(FORM('<>', _a='1', _b='2').as_xml(),
- # 'None<_enctype>multipart/form-data<_action>#<_b>2<_a>1<_method>post- <>
None')
+ # TODO check tags content
+ self.assertEqual(len(FORM('<>', _a='1', _b='2').as_xml()), 334)
def test_BEAUTIFY(self):
#self.assertEqual(BEAUTIFY(['a', 'b', {'hello': 'world'}]).xml(),
@@ -670,13 +676,42 @@ class TestBareHelpers(unittest.TestCase):
# TODO: def test_embed64(self):
- # TODO: def test_web2pyHTMLParser(self):
+ def test_web2pyHTMLParser(self):
+ #tag should not be a byte
+ self.assertEqual(web2pyHTMLParser("").tree.components[0].tag, 'div')
+ a = str(web2pyHTMLParser('ab
c').tree)
+ self.assertEqual(a, "ab
c")
+
+ tree = web2pyHTMLParser('helloworld
').tree
+ tree.element(_a='b')['_c']=5
+ self.assertEqual(str(tree), 'helloworld
')
+
+ a = str(web2pyHTMLParser('', closed=['img']).tree)
+ self.assertEqual(a, '')
+
+ #greater-than sign ( > ) --> decimal > --> hexadecimal >
+ #Less-than sign ( < ) --> decimal < --> hexadecimal <
+ # test decimal
+ a = str(web2pyHTMLParser('< >
').tree)
+ self.assertEqual(a, '< >
')
+ # test hexadecimal
+ a = str(web2pyHTMLParser('< >
').tree)
+ self.assertEqual(a, '< >
')
+
+ def test_markdown(self):
+ def markdown(text, tag=None, attributes={}):
+ r = {None: re.sub('\s+',' ',text), \
+ 'h1':'#'+text+'\\n\\n', \
+ 'p':text+'\\n'}.get(tag,text)
+ return r
+ a=TAG('Header
this is a test
')
+ ret = a.flatten(markdown)
+ self.assertEqual(ret, '#Header\\n\\nthis is a test\\n')
# TODO: def test_markdown_serializer(self):
# TODO: def test_markmin_serializer(self):
- @unittest.skipIf(not PY2, "Skipping Python 3.x tests for MARKMIN")
def test_MARKMIN(self):
# This test pass with python 2.7 but expected to fail under 2.6
# with self.assertRaises(TypeError) as cm: