Update library: html5lib

This commit is contained in:
Ruud
2014-01-20 16:50:21 +01:00
parent 04539edb45
commit f318524070
6 changed files with 8 additions and 7 deletions

View File

@@ -20,4 +20,4 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
__version__ = "0.99"
__version__ = "0.999"

View File

@@ -1,5 +1,6 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from six.moves import http_client
import codecs
import re
@@ -118,7 +119,11 @@ class BufferedStream(object):
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
if hasattr(source, "read"):
if isinstance(source, http_client.HTTPResponse):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
else:
isUnicode = isinstance(source, text_type)

0
libs/html5lib/treebuilders/__init__.py Executable file → Normal file
View File

0
libs/html5lib/treebuilders/_base.py Executable file → Normal file
View File

0
libs/html5lib/treebuilders/etree.py Executable file → Normal file
View File

View File

@@ -87,10 +87,6 @@ class FragmentWrapper(object):
self.tail = ensure_str(self.obj.tail)
else:
self.tail = None
self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
# Support for bytes here is Py2
if self.isstring:
self.obj = ensure_str(self.obj)
def __getattr__(self, name):
return getattr(self.obj, name)
@@ -143,7 +139,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
elif isinstance(node, Doctype):
return _base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and node.isstring:
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
return _base.TEXT, node.obj
elif node.tag == etree.Comment: