diff --git a/libs/html5lib/__init__.py b/libs/html5lib/__init__.py index 66c1a8eb..19a4b7d6 100644 --- a/libs/html5lib/__init__.py +++ b/libs/html5lib/__init__.py @@ -20,4 +20,4 @@ from .serializer import serialize __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", "getTreeWalker", "serialize"] -__version__ = "0.99" +__version__ = "0.999" diff --git a/libs/html5lib/inputstream.py b/libs/html5lib/inputstream.py index 004bdd4a..9e03b931 100644 --- a/libs/html5lib/inputstream.py +++ b/libs/html5lib/inputstream.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, unicode_literals from six import text_type +from six.moves import http_client import codecs import re @@ -118,7 +119,11 @@ class BufferedStream(object): def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): - if hasattr(source, "read"): + if isinstance(source, http_client.HTTPResponse): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + isUnicode = False + elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) else: isUnicode = isinstance(source, text_type) diff --git a/libs/html5lib/treebuilders/__init__.py b/libs/html5lib/treebuilders/__init__.py old mode 100755 new mode 100644 diff --git a/libs/html5lib/treebuilders/_base.py b/libs/html5lib/treebuilders/_base.py old mode 100755 new mode 100644 diff --git a/libs/html5lib/treebuilders/etree.py b/libs/html5lib/treebuilders/etree.py old mode 100755 new mode 100644 diff --git a/libs/html5lib/treewalkers/lxmletree.py b/libs/html5lib/treewalkers/lxmletree.py index 375cc2e8..bc934ac0 100644 --- a/libs/html5lib/treewalkers/lxmletree.py +++ b/libs/html5lib/treewalkers/lxmletree.py @@ -87,10 +87,6 @@ class FragmentWrapper(object): self.tail = ensure_str(self.obj.tail) else: self.tail = None - self.isstring = isinstance(obj, str) or isinstance(obj, bytes) - # Support for bytes here is Py2 - if self.isstring: - self.obj = ensure_str(self.obj) def __getattr__(self, name): return getattr(self.obj, name) @@ -143,7 +139,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker): elif isinstance(node, Doctype): return _base.DOCTYPE, node.name, node.public_id, node.system_id - elif isinstance(node, FragmentWrapper) and node.isstring: + elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return _base.TEXT, node.obj elif node.tag == etree.Comment: