From 4008774908e6838aa27ffb2d3cf54d3f730a5bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joel=20K=C3=A5berg?= Date: Tue, 19 Nov 2013 18:51:28 +0100 Subject: [PATCH 01/32] append label unnecessary just set the full path to the dir --- couchpotato/core/downloaders/rtorrent/__init__.py | 8 -------- couchpotato/core/downloaders/rtorrent/main.py | 4 +--- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/couchpotato/core/downloaders/rtorrent/__init__.py b/couchpotato/core/downloaders/rtorrent/__init__.py index 026a56c6..684ea45e 100755 --- a/couchpotato/core/downloaders/rtorrent/__init__.py +++ b/couchpotato/core/downloaders/rtorrent/__init__.py @@ -58,14 +58,6 @@ config = [{ 'advanced': True, 'description': 'Also remove the leftover files.', }, - { - 'name': 'append_label', - 'label': 'Append Label', - 'default': False, - 'advanced': True, - 'type': 'bool', - 'description': 'Append label to download location. Requires you to set the download location above.', - }, { 'name': 'paused', 'type': 'bool', diff --git a/couchpotato/core/downloaders/rtorrent/main.py b/couchpotato/core/downloaders/rtorrent/main.py index d7ae589f..8381f0a2 100755 --- a/couchpotato/core/downloaders/rtorrent/main.py +++ b/couchpotato/core/downloaders/rtorrent/main.py @@ -125,9 +125,7 @@ class rTorrent(Downloader): if self.conf('label'): torrent.set_custom(1, self.conf('label')) - if self.conf('directory') and self.conf('append_label'): - torrent.set_directory(os.path.join(self.conf('directory'), self.conf('label'))) - elif self.conf('directory'): + if self.conf('directory'): torrent.set_directory(self.conf('directory')) # Set Ratio Group From b7d93b84dd74cdbee01543be17053841de740455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joel=20K=C3=A5berg?= Date: Tue, 19 Nov 2013 18:59:54 +0100 Subject: [PATCH 02/32] option to set download directory in utorrent --- couchpotato/core/downloaders/utorrent/__init__.py | 5 +++++ couchpotato/core/downloaders/utorrent/main.py | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/couchpotato/core/downloaders/utorrent/__init__.py b/couchpotato/core/downloaders/utorrent/__init__.py index d45e2e6c..0c4c323c 100644 --- a/couchpotato/core/downloaders/utorrent/__init__.py +++ b/couchpotato/core/downloaders/utorrent/__init__.py @@ -36,6 +36,11 @@ config = [{ 'name': 'label', 'description': 'Label to add torrent as.', }, + { + 'name': 'directory', + 'type': 'directory', + 'description': 'Download to this directory. Keep empty for default uTorrent download directory.', + }, { 'name': 'remove_complete', 'label': 'Remove torrent', diff --git a/couchpotato/core/downloaders/utorrent/main.py b/couchpotato/core/downloaders/utorrent/main.py index 1db1b8a3..e05d1043 100644 --- a/couchpotato/core/downloaders/utorrent/main.py +++ b/couchpotato/core/downloaders/utorrent/main.py @@ -77,6 +77,7 @@ class uTorrent(Downloader): else: info = bdecode(filedata)["info"] torrent_hash = sha1(benc(info)).hexdigest().upper() + torrent_filename = self.createFileName(data, filedata, movie) if data.get('seed_ratio'): @@ -91,11 +92,17 @@ class uTorrent(Downloader): if len(torrent_hash) == 32: torrent_hash = b16encode(b32decode(torrent_hash)) + # Set download directory + if self.conf('directory'): + directory = self.conf('directory') + else: + directory = False + # Send request to uTorrent if data.get('protocol') == 'torrent_magnet': - self.utorrent_api.add_torrent_uri(torrent_filename, data.get('url')) + self.utorrent_api.add_torrent_uri(torrent_filename, data.get('url'), directory) else: - self.utorrent_api.add_torrent_file(torrent_filename, filedata) + self.utorrent_api.add_torrent_file(torrent_filename, filedata, directory) # Change settings of added torrent self.utorrent_api.set_torrent(torrent_hash, torrent_params) @@ -249,13 +256,13 @@ class uTorrentAPI(object): def add_torrent_uri(self, filename, torrent, add_folder = False): action = "action=add-url&s=%s" % urllib.quote(torrent) if add_folder: - action += "&path=%s" % urllib.quote(filename) + action += "&path=%s" % urllib.quote(add_folder) return self._request(action) def add_torrent_file(self, filename, filedata, add_folder = False): action = "action=add-file" if add_folder: - action += "&path=%s" % urllib.quote(filename) + action += "&path=%s" % urllib.quote(add_folder) return self._request(action, {"torrent_file": (ss(filename), filedata)}) def set_torrent(self, hash, params): From d31b7eb72d811852634c3cf3af47046450cd9117 Mon Sep 17 00:00:00 2001 From: Ruud Date: Tue, 19 Nov 2013 23:45:12 +0100 Subject: [PATCH 03/32] Add date and message-id to email notification --- couchpotato/core/notifications/email/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/couchpotato/core/notifications/email/main.py b/couchpotato/core/notifications/email/main.py index c67ac97d..41a4323b 100644 --- a/couchpotato/core/notifications/email/main.py +++ b/couchpotato/core/notifications/email/main.py @@ -4,6 +4,7 @@ from couchpotato.core.logger import CPLog from couchpotato.core.notifications.base import Notification from couchpotato.environment import Env from email.mime.text import MIMEText +from email.utils import formatdate, make_msgid import smtplib import traceback @@ -30,6 +31,8 @@ class Email(Notification): message['Subject'] = self.default_title message['From'] = from_address message['To'] = to_address + message['Date'] = formatdate(localtime = 1) + message['Message-ID'] = make_msgid() try: # Open the SMTP connection, via SSL if requested From ed19fd0254b1301b5ba73619530f6cb792870c98 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 20 Nov 2013 22:04:11 +1300 Subject: [PATCH 04/32] Added Pushbullet notifications --- .../core/notifications/pushbullet/__init__.py | 39 +++++++++ .../core/notifications/pushbullet/main.py | 86 +++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 couchpotato/core/notifications/pushbullet/__init__.py create mode 100644 couchpotato/core/notifications/pushbullet/main.py diff --git a/couchpotato/core/notifications/pushbullet/__init__.py b/couchpotato/core/notifications/pushbullet/__init__.py new file mode 100644 index 00000000..e61a44e3 --- /dev/null +++ b/couchpotato/core/notifications/pushbullet/__init__.py @@ -0,0 +1,39 @@ +from .main import Pushbullet + +def start(): + return Pushbullet() + +config = [{ + 'name': 'pushbullet', + 'groups': [ + { + 'tab': 'notifications', + 'list': 'notification_providers', + 'name': 'pushbullet', + 'options': [ + { + 'name': 'enabled', + 'default': 0, + 'type': 'enabler', + }, + { + 'name': 'api_key', + 'label': 'User API Key' + }, + { + 'name': 'devices', + 'default': '', + 'advanced': True, + 'description': 'IDs of devices to send notifications to, empty = all devices' + }, + { + 'name': 'on_snatch', + 'default': 0, + 'type': 'bool', + 'advanced': True, + 'description': 'Also send message when movie is snatched.', + }, + ], + } + ], +}] diff --git a/couchpotato/core/notifications/pushbullet/main.py b/couchpotato/core/notifications/pushbullet/main.py new file mode 100644 index 00000000..2e6db29d --- /dev/null +++ b/couchpotato/core/notifications/pushbullet/main.py @@ -0,0 +1,86 @@ +from couchpotato.core.helpers.encoding import toUnicode +from couchpotato.core.helpers.variable import tryInt +from couchpotato.core.logger import CPLog +from couchpotato.core.notifications.base import Notification +import base64 +import json + +log = CPLog(__name__) + + +class Pushbullet(Notification): + + url = 'https://api.pushbullet.com/api/%s' + + def notify(self, message = '', data = None, listener = None): + if not data: data = {} + + devices = self.getDevices() + if devices is None: + return False + + # Get all the device IDs linked to this user + if not len(devices): + response = self.request('devices') + if not response: + return False + + devices += [device.get('id') for device in response['devices']] + + successful = 0 + for device in devices: + response = self.request( + 'pushes', + cache = False, + device_id = device, + type = 'note', + title = self.default_title, + body = toUnicode(message) + ) + + if response: + successful += 1 + else: + log.error('Unable to push notification to Pushbullet device with ID %s' % device) + + return successful == len(devices) + + def getDevices(self): + devices = [d.strip() for d in self.conf('devices').split(',')] + + # Remove empty items + devices = [d for d in devices if len(d)] + + # Break on any ids that aren't integers + valid_devices = [] + + for device_id in devices: + d = tryInt(device_id, None) + + if not d: + log.error('Device ID "%s" is not valid', device_id) + return None + + valid_devices.append(d) + + return valid_devices + + def request(self, method, cache = True, **kwargs): + try: + base64string = base64.encodestring('%s:' % self.conf('api_key'))[:-1] + + headers = { + "Authorization": "Basic %s" % base64string + } + + if cache: + return self.getJsonData(self.url % method, headers = headers, params = kwargs) + else: + data = self.urlopen(self.url % method, headers = headers, params = kwargs) + return json.loads(data) + + except Exception, ex: + log.error('Pushbullet request failed') + log.debug(ex) + + return None From f865484182f80b777cf5b3c4c9acaacb71ccda67 Mon Sep 17 00:00:00 2001 From: Kate von Roeder Date: Wed, 20 Nov 2013 05:47:36 -0800 Subject: [PATCH 05/32] Add Array.stableSort from mootools forge. Change calls to Array.sort to use new Array.stableSort. Fixes sorting problems on Chrome --- couchpotato/core/_base/clientscript/main.py | 1 + .../scripts/library/Array.stableSort.js | 56 +++++++++++++++++++ couchpotato/static/scripts/page/settings.js | 14 ++--- 3 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 couchpotato/static/scripts/library/Array.stableSort.js diff --git a/couchpotato/core/_base/clientscript/main.py b/couchpotato/core/_base/clientscript/main.py index 1b7f1636..a0be0d02 100644 --- a/couchpotato/core/_base/clientscript/main.py +++ b/couchpotato/core/_base/clientscript/main.py @@ -34,6 +34,7 @@ class ClientScript(Plugin): 'scripts/library/question.js', 'scripts/library/scrollspy.js', 'scripts/library/spin.js', + 'scripts/library/Array.stableSort.js', 'scripts/couchpotato.js', 'scripts/api.js', 'scripts/library/history.js', diff --git a/couchpotato/static/scripts/library/Array.stableSort.js b/couchpotato/static/scripts/library/Array.stableSort.js new file mode 100644 index 00000000..062c7566 --- /dev/null +++ b/couchpotato/static/scripts/library/Array.stableSort.js @@ -0,0 +1,56 @@ +/* +--- + +script: Array.stableSort.js + +description: Add a stable sort algorithm for all browsers + +license: MIT-style license. + +authors: + - Yorick Sijsling + +requires: + core/1.3: '*' + +provides: + - [Array.stableSort, Array.mergeSort] + +... +*/ + +(function() { + + var defaultSortFunction = function(a, b) { + return a > b ? 1 : (a < b ? -1 : 0); + } + + Array.implement({ + + stableSort: function(compare) { + // I would love some real feature recognition. Problem is that an unstable algorithm sometimes/often gives the same result as an unstable algorithm. + return (Browser.chrome || Browser.firefox2 || Browser.opera9) ? this.mergeSort(compare) : this.sort(compare); + }, + + mergeSort: function(compare, token) { + compare = compare || defaultSortFunction; + if (this.length > 1) { + // Split and sort both parts + var right = this.splice(Math.floor(this.length / 2)).mergeSort(compare); + var left = this.splice(0).mergeSort(compare); // 'this' is now empty. + + // Merge parts together + while (left.length > 0 || right.length > 0) { + this.push( + right.length === 0 ? left.shift() + : left.length === 0 ? right.shift() + : compare(left[0], right[0]) > 0 ? right.shift() + : left.shift()); + } + } + return this; + } + + }); +})(); + diff --git a/couchpotato/static/scripts/page/settings.js b/couchpotato/static/scripts/page/settings.js index 68b41d0a..213c0d96 100644 --- a/couchpotato/static/scripts/page/settings.js +++ b/couchpotato/static/scripts/page/settings.js @@ -111,6 +111,10 @@ Page.Settings = new Class({ Cookie.write('advanced_toggle_checked', +self.advanced_toggle.checked, {'duration': 365}); }, + sortByOrder: function(a, b){ + return (a.order || 100) - (b.order || 100) + }, + create: function(json){ var self = this; @@ -141,13 +145,11 @@ Page.Settings = new Class({ options.include(section); }); - options.sort(function(a, b){ - return (a.order || 100) - (b.order || 100) - }).each(function(section){ + options.stableSort(self.sortByOrder).each(function(section){ var section_name = section.section_name; // Add groups to content - section.groups.sortBy('order').each(function(group){ + section.groups.stableSort(self.sortByOrder).each(function(group){ if(group.hidden) return; if(self.wizard_only && !group.wizard) @@ -184,9 +186,7 @@ Page.Settings = new Class({ } // Add options to group - group.options.sort(function(a, b){ - return (a.order || 100) - (b.order || 100) - }).each(function(option){ + group.options.stableSort(self.sortByOrder).each(function(option){ if(option.hidden) return; var class_name = (option.type || 'string').capitalize(); var input = new Option[class_name](section_name, option.name, self.getValue(section_name, option.name), option); From 309ec50691501a96a6bf50c99b8ca5fe09a5b583 Mon Sep 17 00:00:00 2001 From: Kate von Roeder Date: Wed, 20 Nov 2013 09:15:25 -0800 Subject: [PATCH 06/32] Array.sortBy should also use the new stablesort. --- couchpotato/static/scripts/couchpotato.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/couchpotato/static/scripts/couchpotato.js b/couchpotato/static/scripts/couchpotato.js index 59fac34b..d8d2655d 100644 --- a/couchpotato/static/scripts/couchpotato.js +++ b/couchpotato/static/scripts/couchpotato.js @@ -503,7 +503,7 @@ function randomString(length, extra) { case "string": saveKeyPath(argument.match(/[+-]|[^.]+/g)); break; } }); - return this.sort(comparer); + return this.stableSort(comparer); } }); From 185cb0196a29f5f7326b56bed6b5f84ee0e57d1d Mon Sep 17 00:00:00 2001 From: Kate von Roeder Date: Wed, 20 Nov 2013 13:36:08 -0800 Subject: [PATCH 07/32] Fix for #1578 - Depends on stableSort, so added to PR#2500. Object.each is not necessarily alphabetic when iterating an object's properties, so we pull the folders out of the object, add them to an array, and sort that. --- couchpotato/static/scripts/page/manage.js | 25 ++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/couchpotato/static/scripts/page/manage.js b/couchpotato/static/scripts/page/manage.js index 4827f51d..6955cf21 100644 --- a/couchpotato/static/scripts/page/manage.js +++ b/couchpotato/static/scripts/page/manage.js @@ -102,6 +102,8 @@ Page.Manage = new Class({ } } else { + // Capture progress so we can use it in our *each* closure + var progress = json.progress // Don't add loader when page is loading still if(!self.list.navigation) @@ -112,10 +114,13 @@ Page.Manage = new Class({ self.progress_container.empty(); - Object.each(json.progress, function(progress, folder){ + var sorted_table = self.parseProgress(json.progress) + + sorted_table.each(function(folder){ + var folder_progress = progress[folder] new Element('div').adopt( new Element('span.folder', {'text': folder}), - new Element('span.percentage', {'text': progress.total ? (((progress.total-progress.to_go)/progress.total)*100).round() + '%' : '0%'}) + new Element('span.percentage', {'text': folder_progress.total ? (((folder_progress.total-folder_progress.to_go)/folder_progress.total)*100).round() + '%' : '0%'}) ).inject(self.progress_container) }); @@ -124,7 +129,21 @@ Page.Manage = new Class({ }) }, 1000); + }, - } + parseProgress: function (progress_object) { + var folder, temp_array = []; + + /* Sort the properties on the progress object into an alphabetic array, ensuring that our folders display in appropriate alphabetic order. + + Bugfix for https://github.com/RuudBurger/CouchPotatoServer/issues/1578 + */ + for (folder in progress_object) { + if (progress_object.hasOwnProperty(folder)) { + temp_array.push(folder) + } + } + return temp_array.stableSort() + } }); From 99947fb135d8f526af59d8dfe0019406f0114fc0 Mon Sep 17 00:00:00 2001 From: Kate von Roeder Date: Wed, 20 Nov 2013 13:47:40 -0800 Subject: [PATCH 08/32] CSS fix for #1578 part 2 - Change text direction from RTL to LTR, fixing issue where root drives would show up as '\C:'. Weird! --- couchpotato/core/media/movie/_base/static/movie.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/couchpotato/core/media/movie/_base/static/movie.css b/couchpotato/core/media/movie/_base/static/movie.css index c013bd80..a88a2077 100644 --- a/couchpotato/core/media/movie/_base/static/movie.css +++ b/couchpotato/core/media/movie/_base/static/movie.css @@ -1036,7 +1036,7 @@ text-overflow: ellipsis; overflow: hidden; width: 85%; - direction: rtl; + direction: ltr; vertical-align: middle; } From ab923cc592858545992796a49bc7c90a82496966 Mon Sep 17 00:00:00 2001 From: Kate von Roeder Date: Wed, 20 Nov 2013 18:47:09 -0800 Subject: [PATCH 09/32] Sort directories so that we scan them in alphabetical order as well (keeps things nice and well ordered!) --- couchpotato/core/plugins/manage/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/couchpotato/core/plugins/manage/main.py b/couchpotato/core/plugins/manage/main.py index e8ccaf7e..87207615 100644 --- a/couchpotato/core/plugins/manage/main.py +++ b/couchpotato/core/plugins/manage/main.py @@ -79,6 +79,7 @@ class Manage(Plugin): try: directories = self.directories() + directories.sort() added_identifiers = [] # Add some progress From e1a311de40ab787f862f585ad41b44117b0b247c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joel=20K=C3=A5berg?= Date: Thu, 21 Nov 2013 19:55:36 +0100 Subject: [PATCH 10/32] initial couchtarter provider (torrent newznab) initial ground work based on newznab provider needs UI changes: http://i.imgur.com/4MiJUH5.png (need to add ratio and seed hours also) untested code --- .../providers/torrent/couchtater/__init__.py | 53 ++++++++ .../core/providers/torrent/couchtater/main.py | 121 ++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 couchpotato/core/providers/torrent/couchtater/__init__.py create mode 100644 couchpotato/core/providers/torrent/couchtater/main.py diff --git a/couchpotato/core/providers/torrent/couchtater/__init__.py b/couchpotato/core/providers/torrent/couchtater/__init__.py new file mode 100644 index 00000000..1b1cf9c5 --- /dev/null +++ b/couchpotato/core/providers/torrent/couchtater/__init__.py @@ -0,0 +1,53 @@ +from .main import Couchtarter + +def start(): + return Couchtarter() + +config = [{ + 'name': 'couchtart', + 'groups': [ + { + 'tab': 'searcher', + 'list': 'torrent_providers', + 'name': 'couchtart', + 'order': 10, + 'description': 'Cocuhtart providers.', + 'wizard': True, + 'options': [ + { + 'name': 'enabled', + 'type': 'enabler', + 'default': True, + }, + { + 'name': 'use', + 'default': '0,0,0,0,0,0' + }, + { + 'name': 'host', + 'default': '', + 'description': 'The url path of your Couchtart provider.', + }, + { + 'name': 'extra_score', + 'advanced': True, + 'label': 'Extra Score', + 'default': '0', + 'description': 'Starting score for each release found via this provider.', + }, + { + 'name': 'username', + 'default': '', + }, + { + 'name': 'pass_key', + 'default': ',', + 'label': 'Pass Key', + 'description': 'Can be found on your profile page', + 'type': 'combined', + 'combine': ['use', 'host', 'username', 'pass_key', 'extra_score'], + }, + ], + }, + ], +}] diff --git a/couchpotato/core/providers/torrent/couchtater/main.py b/couchpotato/core/providers/torrent/couchtater/main.py new file mode 100644 index 00000000..c7d2fa5a --- /dev/null +++ b/couchpotato/core/providers/torrent/couchtater/main.py @@ -0,0 +1,121 @@ +from couchpotato.core.helpers.encoding import tryUrlencode, toUnicode +from couchpotato.core.helpers.variable import splitString, tryInt +from couchpotato.core.logger import CPLog +from couchpotato.core.providers.base import ResultList +from couchpotato.core.providers.torrent.base import TorrentProvider +from couchpotato.environment import Env +import traceback + +log = CPLog(__name__) + +class Couchtarter(TorrentProvider): + + limits_reached = {} + + http_time_between_calls = 1 # Seconds + + def search(self, movie, quality): + hosts = self.getHosts() + + results = ResultList(self, movie, quality, imdb_results = True) + + for host in hosts: + if self.isDisabled(host): + continue + + self._searchOnHost(host, movie, quality, results) + + return results + + def _searchOnHost(self, host, movie, quality, results): + + arguments = tryUrlencode({ + 'user': host['username'], + 'passkey': host['pass_key'], + 'imdbid': movie['library']['identifier'].replace('tt', '') + }) + url = '%s&%s' % (host['host'], arguments) + + torrents = self.getJsonData(url, cache_timeout = 1800) + + if torrents: + try: + if torrents.get('Error'): + if 'Incorrect parameters.' in torrents['Error']: + log.error('Wrong parameters passed to: %s', host['host']) + elif 'Death by authorization.' in torrents['Error']: + log.error('Wrong username or pass key for: %s', host['host']) + else: + log.error('Unknown error for: %s', host['host']) + return #(can I disable this host somehow? and notify user?) + + elif torrents.get('Results'): + if 'None found' in torrents['Results']: + return + else: + for torrent in torrents['Results']: + print torrent['ReleaseName'] + print torrent['Size'] + print torrent['DownloadURL'] + #results.append({ + # 'id': tryInt(result['TorrentID']), + # 'name': toUnicode(result['ReleaseName']), + # 'size': tryInt(self.parseSize(result['Size'])), + # 'url': result['DownloadURL'], + # 'detail_url': result['DetailURL'], + # 'resoultion': result['Resolution'], + # 'score': host['extra_score'], + # 'get_more_info': result['IMDbID'] + #}) + + except: + log.error('Failed getting results from %s: %s', (host['host'], traceback.format_exc())) + + def getHosts(self): + + uses = splitString(str(self.conf('use')), clean = False) + hosts = splitString(self.conf('host'), clean = False) + usernames = splitString(self.conf('username'), clean = False) + pass_keys = splitString(self.conf('pass_key'), clean = False) + extra_score = splitString(self.conf('extra_score'), clean = False) + + list = [] + for nr in range(len(hosts)): + + try: key = pass_keys[nr] + except: key = '' + + try: host = hosts[nr] + except: host = '' + + list.append({ + 'use': uses[nr], + 'host': host, + 'pass_key': key, + 'extra_score': tryInt(extra_score[nr]) if len(extra_score) > nr else 0 + }) + + return list + + def belongsTo(self, url, provider = None, host = None): + + hosts = self.getHosts() + + for host in hosts: + result = super(Couchtater, self).belongsTo(url, host = host['host'], provider = provider) + if result: + return result + + def isDisabled(self, host = None): + return not self.isEnabled(host) + + def isEnabled(self, host = None): + + # Return true if at least one is enabled and no host is given + if host is None: + for host in self.getHosts(): + if self.isEnabled(host): + return True + return False + + return TorrentProvider.isEnabled(self) and host['host'] and host['pass_key'] and int(host['use']) From 357166414cd2946656da6d75bac163741d6fc6ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joel=20K=C3=A5berg?= Date: Thu, 21 Nov 2013 22:20:45 +0100 Subject: [PATCH 11/32] use .get() and added more options --- .../core/providers/torrent/couchtater/main.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/couchpotato/core/providers/torrent/couchtater/main.py b/couchpotato/core/providers/torrent/couchtater/main.py index c7d2fa5a..79c65110 100644 --- a/couchpotato/core/providers/torrent/couchtater/main.py +++ b/couchpotato/core/providers/torrent/couchtater/main.py @@ -58,14 +58,17 @@ class Couchtarter(TorrentProvider): print torrent['Size'] print torrent['DownloadURL'] #results.append({ - # 'id': tryInt(result['TorrentID']), - # 'name': toUnicode(result['ReleaseName']), - # 'size': tryInt(self.parseSize(result['Size'])), - # 'url': result['DownloadURL'], - # 'detail_url': result['DetailURL'], - # 'resoultion': result['Resolution'], + # 'id': tryInt(result.get('TorrentID')), + # 'name': toUnicode(result.get('ReleaseName')), + # 'url': result.get('DownloadURL'), + # 'detail_url': result.get('DetailURL'), + # 'size': tryInt(self.parseSize(result.get('Size'))), # 'score': host['extra_score'], - # 'get_more_info': result['IMDbID'] + # 'seeders': tryInt(result.get('Seeders'), + # 'leechers': tryInt(result.get('leechers'), + # 'resoultion': result.get('Resolution'), + # 'source': result.get('Media'), + # 'get_more_info': result.get('IMDbID') #}) except: From 8951e9fc9007a4bb08ede97af6493404c38664d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joel=20K=C3=A5berg?= Date: Thu, 21 Nov 2013 22:22:19 +0100 Subject: [PATCH 12/32] typo --- couchpotato/core/providers/torrent/couchtater/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/couchpotato/core/providers/torrent/couchtater/main.py b/couchpotato/core/providers/torrent/couchtater/main.py index 79c65110..e0262a5c 100644 --- a/couchpotato/core/providers/torrent/couchtater/main.py +++ b/couchpotato/core/providers/torrent/couchtater/main.py @@ -65,7 +65,7 @@ class Couchtarter(TorrentProvider): # 'size': tryInt(self.parseSize(result.get('Size'))), # 'score': host['extra_score'], # 'seeders': tryInt(result.get('Seeders'), - # 'leechers': tryInt(result.get('leechers'), + # 'leechers': tryInt(result.get('Leechers'), # 'resoultion': result.get('Resolution'), # 'source': result.get('Media'), # 'get_more_info': result.get('IMDbID') From 31a1af43d53c0f06310098caa4533597d54dd03f Mon Sep 17 00:00:00 2001 From: jchristi Date: Wed, 20 Nov 2013 23:51:14 -0500 Subject: [PATCH 13/32] Update fedora init file This took me awhile to figure out when trying to install for the first time. Luckily, I had the sickbeard init file to reference. --- init/fedora | 2 ++ 1 file changed, 2 insertions(+) diff --git a/init/fedora b/init/fedora index 47352471..ec8a9ccd 100644 --- a/init/fedora +++ b/init/fedora @@ -1,3 +1,5 @@ +#!/bin/sh +# ### BEGIN INIT INFO # Provides: CouchPotato application instance # Required-Start: $all From 0065ff5086fa6f738fa7dfb49869c98f32b64ea7 Mon Sep 17 00:00:00 2001 From: Ruud Date: Fri, 22 Nov 2013 01:34:50 +0100 Subject: [PATCH 14/32] Indentation cleanup --- couchpotato/static/scripts/page/manage.js | 32 ++++++++++------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/couchpotato/static/scripts/page/manage.js b/couchpotato/static/scripts/page/manage.js index 6955cf21..eeeef628 100644 --- a/couchpotato/static/scripts/page/manage.js +++ b/couchpotato/static/scripts/page/manage.js @@ -102,8 +102,8 @@ Page.Manage = new Class({ } } else { - // Capture progress so we can use it in our *each* closure - var progress = json.progress + // Capture progress so we can use it in our *each* closure + var progress = json.progress // Don't add loader when page is loading still if(!self.list.navigation) @@ -114,10 +114,10 @@ Page.Manage = new Class({ self.progress_container.empty(); - var sorted_table = self.parseProgress(json.progress) + var sorted_table = self.parseProgress(json.progress) sorted_table.each(function(folder){ - var folder_progress = progress[folder] + var folder_progress = progress[folder] new Element('div').adopt( new Element('span.folder', {'text': folder}), new Element('span.percentage', {'text': folder_progress.total ? (((folder_progress.total-folder_progress.to_go)/folder_progress.total)*100).round() + '%' : '0%'}) @@ -129,21 +129,17 @@ Page.Manage = new Class({ }) }, 1000); - }, + }, - parseProgress: function (progress_object) { - var folder, temp_array = []; + parseProgress: function (progress_object) { + var folder, temp_array = []; - /* Sort the properties on the progress object into an alphabetic array, ensuring that our folders display in appropriate alphabetic order. - - Bugfix for https://github.com/RuudBurger/CouchPotatoServer/issues/1578 - */ - for (folder in progress_object) { - if (progress_object.hasOwnProperty(folder)) { - temp_array.push(folder) - } - } - return temp_array.stableSort() - } + for (folder in progress_object) { + if (progress_object.hasOwnProperty(folder)) { + temp_array.push(folder) + } + } + return temp_array.stableSort() + } }); From bb6e1e2909421b9d5e937277cc0da37dac68ed6b Mon Sep 17 00:00:00 2001 From: Ruud Date: Fri, 22 Nov 2013 15:17:35 +0100 Subject: [PATCH 15/32] Don't propagate core messages to other notification providers. --- couchpotato/core/notifications/base.py | 2 +- couchpotato/core/notifications/core/main.py | 11 ++++++++++- .../core/notifications/core/static/notification.js | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/couchpotato/core/notifications/base.py b/couchpotato/core/notifications/base.py index 4c0d0992..63d2075e 100644 --- a/couchpotato/core/notifications/base.py +++ b/couchpotato/core/notifications/base.py @@ -17,7 +17,7 @@ class Notification(Provider): listen_to = [ 'renamer.after', 'movie.snatched', 'updater.available', 'updater.updated', - 'core.message', + 'core.message.important', ] dont_listen_to = [] diff --git a/couchpotato/core/notifications/core/main.py b/couchpotato/core/notifications/core/main.py index 04acf284..cd63c2cb 100644 --- a/couchpotato/core/notifications/core/main.py +++ b/couchpotato/core/notifications/core/main.py @@ -21,6 +21,12 @@ class CoreNotifier(Notification): m_lock = None + listen_to = [ + 'renamer.after', 'movie.snatched', + 'updater.available', 'updater.updated', + 'core.message', 'core.message.important', + ] + def __init__(self): super(CoreNotifier, self).__init__() @@ -121,7 +127,10 @@ class CoreNotifier(Notification): for message in messages: if message.get('time') > last_check: - fireEvent('core.message', message = message.get('message'), data = message) + message['sticky'] = True # Always sticky core messages + + message_type = 'core.message.important' if message.get('important') else 'core.message' + fireEvent(message_type, message = message.get('message'), data = message) if last_check < message.get('time'): last_check = message.get('time') diff --git a/couchpotato/core/notifications/core/static/notification.js b/couchpotato/core/notifications/core/static/notification.js index e485976e..79d199c6 100644 --- a/couchpotato/core/notifications/core/static/notification.js +++ b/couchpotato/core/notifications/core/static/notification.js @@ -50,7 +50,7 @@ var NotificationBase = new Class({ , 'top'); self.notifications.include(result); - if(result.data.important !== undefined && !result.read){ + if((result.data.important !== undefined || result.data.sticky !== undefined) && !result.read){ var sticky = true App.fireEvent('message', [result.message, sticky, result]) } From b8f78e311d3d41a1c5c539b08bd1e21cef749ec5 Mon Sep 17 00:00:00 2001 From: Ruud Date: Fri, 22 Nov 2013 15:38:33 +0100 Subject: [PATCH 16/32] Update scheduler module --- couchpotato/core/_base/scheduler/main.py | 2 +- libs/apscheduler/__init__.py | 4 +- libs/apscheduler/job.py | 29 +++--- libs/apscheduler/jobstores/ram_store.py | 2 +- libs/apscheduler/jobstores/redis_store.py | 91 +++++++++++++++++++ libs/apscheduler/jobstores/shelve_store.py | 5 +- .../apscheduler/jobstores/sqlalchemy_store.py | 18 ++-- libs/apscheduler/scheduler.py | 70 +++++++++++--- libs/apscheduler/triggers/cron/__init__.py | 16 ++-- libs/apscheduler/triggers/cron/expressions.py | 18 +++- libs/apscheduler/triggers/cron/fields.py | 3 +- libs/apscheduler/util.py | 10 +- 12 files changed, 215 insertions(+), 53 deletions(-) create mode 100644 libs/apscheduler/jobstores/redis_store.py diff --git a/couchpotato/core/_base/scheduler/main.py b/couchpotato/core/_base/scheduler/main.py index 2c97e1b4..87b05335 100644 --- a/couchpotato/core/_base/scheduler/main.py +++ b/couchpotato/core/_base/scheduler/main.py @@ -31,8 +31,8 @@ class Scheduler(Plugin): pass def doShutdown(self): - super(Scheduler, self).doShutdown() self.stop() + return super(Scheduler, self).doShutdown() def stop(self): if self.started: diff --git a/libs/apscheduler/__init__.py b/libs/apscheduler/__init__.py index a55959fe..d93e1b3b 100644 --- a/libs/apscheduler/__init__.py +++ b/libs/apscheduler/__init__.py @@ -1,3 +1,3 @@ -version_info = (2, 0, 2) +version_info = (2, 1, 1) version = '.'.join(str(n) for n in version_info[:3]) -release = version + ''.join(str(n) for n in version_info[3:]) +release = '.'.join(str(n) for n in version_info) diff --git a/libs/apscheduler/job.py b/libs/apscheduler/job.py index 868e7234..cfc09a2f 100644 --- a/libs/apscheduler/job.py +++ b/libs/apscheduler/job.py @@ -16,22 +16,25 @@ class MaxInstancesReachedError(Exception): class Job(object): """ Encapsulates the actual Job along with its metadata. Job instances - are created by the scheduler when adding jobs, and it should not be - directly instantiated. + are created by the scheduler when adding jobs, and should not be + directly instantiated. These options can be set when adding jobs + to the scheduler (see :ref:`job_options`). - :param trigger: trigger that determines the execution times - :param func: callable to call when the trigger is triggered - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job (optional) - :param misfire_grace_time: seconds after the designated run time that + :var trigger: trigger that determines the execution times + :var func: callable to call when the trigger is triggered + :var args: list of positional arguments to call func with + :var kwargs: dict of keyword arguments to call func with + :var name: name of the job + :var misfire_grace_time: seconds after the designated run time that the job is still allowed to be run - :param coalesce: run once instead of many times if the scheduler determines + :var coalesce: run once instead of many times if the scheduler determines that the job should be run more than once in succession - :param max_runs: maximum number of times this job is allowed to be + :var max_runs: maximum number of times this job is allowed to be triggered - :param max_instances: maximum number of concurrently running + :var max_instances: maximum number of concurrently running instances allowed for this job + :var runs: number of times this job has been triggered + :var instances: number of concurrently running instances of this job """ id = None next_run_time = None @@ -130,5 +133,5 @@ class Job(object): return '' % (self.name, repr(self.trigger)) def __str__(self): - return '%s (trigger: %s, next run at: %s)' % (self.name, - str(self.trigger), str(self.next_run_time)) + return '%s (trigger: %s, next run at: %s)' % ( + self.name, str(self.trigger), str(self.next_run_time)) diff --git a/libs/apscheduler/jobstores/ram_store.py b/libs/apscheduler/jobstores/ram_store.py index 85091fe8..60458fba 100644 --- a/libs/apscheduler/jobstores/ram_store.py +++ b/libs/apscheduler/jobstores/ram_store.py @@ -8,7 +8,7 @@ from apscheduler.jobstores.base import JobStore class RAMJobStore(JobStore): def __init__(self): self.jobs = [] - + def add_job(self, job): self.jobs.append(job) diff --git a/libs/apscheduler/jobstores/redis_store.py b/libs/apscheduler/jobstores/redis_store.py new file mode 100644 index 00000000..5eabf4b1 --- /dev/null +++ b/libs/apscheduler/jobstores/redis_store.py @@ -0,0 +1,91 @@ +""" +Stores jobs in a Redis database. +""" +from uuid import uuid4 +from datetime import datetime +import logging + +from apscheduler.jobstores.base import JobStore +from apscheduler.job import Job + +try: + import cPickle as pickle +except ImportError: # pragma: nocover + import pickle + +try: + from redis import StrictRedis +except ImportError: # pragma: nocover + raise ImportError('RedisJobStore requires redis installed') + +try: + long = long +except NameError: + long = int + +logger = logging.getLogger(__name__) + + +class RedisJobStore(JobStore): + def __init__(self, db=0, key_prefix='jobs.', + pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args): + self.jobs = [] + self.pickle_protocol = pickle_protocol + self.key_prefix = key_prefix + + if db is None: + raise ValueError('The "db" parameter must not be empty') + if not key_prefix: + raise ValueError('The "key_prefix" parameter must not be empty') + + self.redis = StrictRedis(db=db, **connect_args) + + def add_job(self, job): + job.id = str(uuid4()) + job_state = job.__getstate__() + job_dict = { + 'job_state': pickle.dumps(job_state, self.pickle_protocol), + 'runs': '0', + 'next_run_time': job_state.pop('next_run_time').isoformat()} + self.redis.hmset(self.key_prefix + job.id, job_dict) + self.jobs.append(job) + + def remove_job(self, job): + self.redis.delete(self.key_prefix + job.id) + self.jobs.remove(job) + + def load_jobs(self): + jobs = [] + keys = self.redis.keys(self.key_prefix + '*') + pipeline = self.redis.pipeline() + for key in keys: + pipeline.hgetall(key) + results = pipeline.execute() + + for job_dict in results: + job_state = {} + try: + job = Job.__new__(Job) + job_state = pickle.loads(job_dict['job_state'.encode()]) + job_state['runs'] = long(job_dict['runs'.encode()]) + dateval = job_dict['next_run_time'.encode()].decode() + job_state['next_run_time'] = datetime.strptime( + dateval, '%Y-%m-%dT%H:%M:%S') + job.__setstate__(job_state) + jobs.append(job) + except Exception: + job_name = job_state.get('name', '(unknown)') + logger.exception('Unable to restore job "%s"', job_name) + self.jobs = jobs + + def update_job(self, job): + attrs = { + 'next_run_time': job.next_run_time.isoformat(), + 'runs': job.runs} + self.redis.hmset(self.key_prefix + job.id, attrs) + + def close(self): + self.redis.connection_pool.disconnect() + + def __repr__(self): + return '<%s>' % self.__class__.__name__ diff --git a/libs/apscheduler/jobstores/shelve_store.py b/libs/apscheduler/jobstores/shelve_store.py index 87c95f8f..bd68333f 100644 --- a/libs/apscheduler/jobstores/shelve_store.py +++ b/libs/apscheduler/jobstores/shelve_store.py @@ -32,17 +32,20 @@ class ShelveJobStore(JobStore): def add_job(self, job): job.id = self._generate_id() - self.jobs.append(job) self.store[job.id] = job.__getstate__() + self.store.sync() + self.jobs.append(job) def update_job(self, job): job_dict = self.store[job.id] job_dict['next_run_time'] = job.next_run_time job_dict['runs'] = job.runs self.store[job.id] = job_dict + self.store.sync() def remove_job(self, job): del self.store[job.id] + self.store.sync() self.jobs.remove(job) def load_jobs(self): diff --git a/libs/apscheduler/jobstores/sqlalchemy_store.py b/libs/apscheduler/jobstores/sqlalchemy_store.py index 41ed4c7a..5b64a35a 100644 --- a/libs/apscheduler/jobstores/sqlalchemy_store.py +++ b/libs/apscheduler/jobstores/sqlalchemy_store.py @@ -4,6 +4,8 @@ Stores jobs in a database table using SQLAlchemy. import pickle import logging +import sqlalchemy + from apscheduler.jobstores.base import JobStore from apscheduler.job import Job @@ -28,17 +30,19 @@ class SQLAlchemyJobStore(JobStore): else: raise ValueError('Need either "engine" or "url" defined') - self.jobs_t = Table(tablename, metadata or MetaData(), + if sqlalchemy.__version__ < '0.7': + pickle_coltype = PickleType(pickle_protocol, mutable=False) + else: + pickle_coltype = PickleType(pickle_protocol) + self.jobs_t = Table( + tablename, metadata or MetaData(), Column('id', Integer, Sequence(tablename + '_id_seq', optional=True), primary_key=True), - Column('trigger', PickleType(pickle_protocol, mutable=False), - nullable=False), + Column('trigger', pickle_coltype, nullable=False), Column('func_ref', String(1024), nullable=False), - Column('args', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('kwargs', PickleType(pickle_protocol, mutable=False), - nullable=False), + Column('args', pickle_coltype, nullable=False), + Column('kwargs', pickle_coltype, nullable=False), Column('name', Unicode(1024)), Column('misfire_grace_time', Integer, nullable=False), Column('coalesce', Boolean, nullable=False), diff --git a/libs/apscheduler/scheduler.py b/libs/apscheduler/scheduler.py index 50769e4d..d6afcad2 100644 --- a/libs/apscheduler/scheduler.py +++ b/libs/apscheduler/scheduler.py @@ -35,7 +35,7 @@ class Scheduler(object): their execution. """ - _stopped = False + _stopped = True _thread = None def __init__(self, gconfig={}, **options): @@ -60,6 +60,7 @@ class Scheduler(object): self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) self.coalesce = asbool(config.pop('coalesce', True)) self.daemonic = asbool(config.pop('daemonic', True)) + self.standalone = asbool(config.pop('standalone', False)) # Configure the thread pool if 'threadpool' in config: @@ -85,6 +86,12 @@ class Scheduler(object): def start(self): """ Starts the scheduler in a new thread. + + In threaded mode (the default), this method will return immediately + after starting the scheduler thread. + + In standalone mode, this method will block until there are no more + scheduled jobs. """ if self.running: raise SchedulerAlreadyRunningError @@ -99,11 +106,15 @@ class Scheduler(object): del self._pending_jobs[:] self._stopped = False - self._thread = Thread(target=self._main_loop, name='APScheduler') - self._thread.setDaemon(self.daemonic) - self._thread.start() + if self.standalone: + self._main_loop() + else: + self._thread = Thread(target=self._main_loop, name='APScheduler') + self._thread.setDaemon(self.daemonic) + self._thread.start() - def shutdown(self, wait=True, shutdown_threadpool=True): + def shutdown(self, wait=True, shutdown_threadpool=True, + close_jobstores=True): """ Shuts down the scheduler and terminates the thread. Does not interrupt any currently running jobs. @@ -111,6 +122,7 @@ class Scheduler(object): :param wait: ``True`` to wait until all currently executing jobs have finished (if ``shutdown_threadpool`` is also ``True``) :param shutdown_threadpool: ``True`` to shut down the thread pool + :param close_jobstores: ``True`` to close all job stores after shutdown """ if not self.running: return @@ -123,11 +135,19 @@ class Scheduler(object): self._threadpool.shutdown(wait) # Wait until the scheduler thread terminates - self._thread.join() + if self._thread: + self._thread.join() + + # Close all job stores + if close_jobstores: + for jobstore in itervalues(self._jobstores): + jobstore.close() @property def running(self): - return not self._stopped and self._thread and self._thread.isAlive() + thread_alive = self._thread and self._thread.isAlive() + standalone = getattr(self, 'standalone', False) + return not self._stopped and (standalone or thread_alive) def add_jobstore(self, jobstore, alias, quiet=False): """ @@ -156,21 +176,25 @@ class Scheduler(object): if not quiet: self._wakeup.set() - def remove_jobstore(self, alias): + def remove_jobstore(self, alias, close=True): """ Removes the job store by the given alias from this scheduler. + :param close: ``True`` to close the job store after removing it :type alias: str """ self._jobstores_lock.acquire() try: - try: - del self._jobstores[alias] - except KeyError: + jobstore = self._jobstores.pop(alias) + if not jobstore: raise KeyError('No such job store: %s' % alias) finally: self._jobstores_lock.release() + # Close the job store if requested + if close: + jobstore.close() + # Notify listeners that a job store has been removed self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_REMOVED, alias)) @@ -245,8 +269,10 @@ class Scheduler(object): **options): """ Adds the given job to the job list and notifies the scheduler thread. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). - :param trigger: alias of the job store to store the job in + :param trigger: trigger that determines when ``func`` is called :param func: callable to run at the given time :param args: list of positional arguments to call func with :param kwargs: dict of keyword arguments to call func with @@ -276,6 +302,8 @@ class Scheduler(object): def add_date_job(self, func, date, args=None, kwargs=None, **options): """ Schedules a job to be completed on a specific date and time. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). :param func: callable to run at the given time :param date: the date/time to run the job at @@ -294,6 +322,8 @@ class Scheduler(object): **options): """ Schedules a job to be completed on specified intervals. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). :param func: callable to run :param weeks: number of weeks to wait @@ -322,6 +352,8 @@ class Scheduler(object): """ Schedules a job to be completed on times that match the given expressions. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). :param func: callable to run :param year: year to run on @@ -352,6 +384,8 @@ class Scheduler(object): This decorator does not wrap its host function. Unscheduling decorated functions is possible by passing the ``job`` attribute of the scheduled function to :meth:`unschedule_job`. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). """ def inner(func): func.job = self.add_cron_job(func, **options) @@ -364,6 +398,8 @@ class Scheduler(object): This decorator does not wrap its host function. Unscheduling decorated functions is possible by passing the ``job`` attribute of the scheduled function to :meth:`unschedule_job`. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). """ def inner(func): func.job = self.add_interval_job(func, **options) @@ -517,7 +553,8 @@ class Scheduler(object): job.runs += len(run_times) # Update the job, but don't keep finished jobs around - if job.compute_next_run_time(now + timedelta(microseconds=1)): + if job.compute_next_run_time( + now + timedelta(microseconds=1)): jobstore.update_job(job) else: self._remove_job(job, alias, jobstore) @@ -550,10 +587,15 @@ class Scheduler(object): logger.debug('Next wakeup is due at %s (in %f seconds)', next_wakeup_time, wait_seconds) self._wakeup.wait(wait_seconds) + self._wakeup.clear() + elif self.standalone: + logger.debug('No jobs left; shutting down scheduler') + self.shutdown() + break else: logger.debug('No jobs; waiting until a job is added') self._wakeup.wait() - self._wakeup.clear() + self._wakeup.clear() logger.info('Scheduler has been shut down') self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN)) diff --git a/libs/apscheduler/triggers/cron/__init__.py b/libs/apscheduler/triggers/cron/__init__.py index 763edb1e..9e69f720 100644 --- a/libs/apscheduler/triggers/cron/__init__.py +++ b/libs/apscheduler/triggers/cron/__init__.py @@ -21,8 +21,10 @@ class CronTrigger(object): if self.start_date: self.start_date = convert_to_datetime(self.start_date) - # Yank out all None valued fields + # Check field names and yank out all None valued fields for key, value in list(iteritems(values)): + if key not in self.FIELD_NAMES: + raise TypeError('Invalid field name: %s' % key) if value is None: del values[key] @@ -111,17 +113,17 @@ class CronTrigger(object): if next_value is None: # No valid value was found - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum - 1) + next_date, fieldnum = self._increment_field_value( + next_date, fieldnum - 1) elif next_value > curr_value: # A valid, but higher than the starting value, was found if field.REAL: - next_date = self._set_field_value(next_date, fieldnum, - next_value) + next_date = self._set_field_value( + next_date, fieldnum, next_value) fieldnum += 1 else: - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum) + next_date, fieldnum = self._increment_field_value( + next_date, fieldnum) else: # A valid value was found, no changes necessary fieldnum += 1 diff --git a/libs/apscheduler/triggers/cron/expressions.py b/libs/apscheduler/triggers/cron/expressions.py index 018c7a30..b5d29195 100644 --- a/libs/apscheduler/triggers/cron/expressions.py +++ b/libs/apscheduler/triggers/cron/expressions.py @@ -8,7 +8,7 @@ import re from apscheduler.util import asint __all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', - 'WeekdayPositionExpression') + 'WeekdayPositionExpression', 'LastDayOfMonthExpression') WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] @@ -176,3 +176,19 @@ class WeekdayPositionExpression(AllExpression): return "%s('%s', '%s')" % (self.__class__.__name__, self.options[self.option_num], WEEKDAYS[self.weekday]) + + +class LastDayOfMonthExpression(AllExpression): + value_re = re.compile(r'last', re.IGNORECASE) + + def __init__(self): + pass + + def get_next_value(self, date, field): + return monthrange(date.year, date.month)[1] + + def __str__(self): + return 'last' + + def __repr__(self): + return "%s()" % self.__class__.__name__ diff --git a/libs/apscheduler/triggers/cron/fields.py b/libs/apscheduler/triggers/cron/fields.py index ef970cc9..be5e5e33 100644 --- a/libs/apscheduler/triggers/cron/fields.py +++ b/libs/apscheduler/triggers/cron/fields.py @@ -85,7 +85,8 @@ class WeekField(BaseField): class DayOfMonthField(BaseField): - COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression] + COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression, + LastDayOfMonthExpression] def get_max(self, dateval): return monthrange(dateval.year, dateval.month)[1] diff --git a/libs/apscheduler/util.py b/libs/apscheduler/util.py index a49aaed8..dcede4c3 100644 --- a/libs/apscheduler/util.py +++ b/libs/apscheduler/util.py @@ -6,7 +6,6 @@ from datetime import date, datetime, timedelta from time import mktime import re import sys -from types import MethodType __all__ = ('asint', 'asbool', 'convert_to_datetime', 'timedelta_seconds', 'time_difference', 'datetime_ceil', 'combine_opts', @@ -64,7 +63,7 @@ def convert_to_datetime(input): return input elif isinstance(input, date): return datetime.fromordinal(input.toordinal()) - elif isinstance(input, str): + elif isinstance(input, basestring): m = _DATE_REGEX.match(input) if not m: raise ValueError('Invalid date string') @@ -109,7 +108,7 @@ def datetime_ceil(dateval): """ if dateval.microsecond > 0: return dateval + timedelta(seconds=1, - microseconds= -dateval.microsecond) + microseconds=-dateval.microsecond) return dateval @@ -143,7 +142,8 @@ def get_callable_name(func): if f_self and hasattr(func, '__name__'): if isinstance(f_self, type): # class method - return '%s.%s' % (f_self.__name__, func.__name__) + clsname = getattr(f_self, '__qualname__', None) or f_self.__name__ + return '%s.%s' % (clsname, func.__name__) # bound method return '%s.%s' % (f_self.__class__.__name__, func.__name__) @@ -169,7 +169,7 @@ def obj_to_ref(obj): raise ValueError except Exception: raise ValueError('Cannot determine the reference to %s' % repr(obj)) - + return ref From f53364eb6c75bcf46de3c491dab1e075cf0899c7 Mon Sep 17 00:00:00 2001 From: Ruud Date: Fri, 22 Nov 2013 16:08:54 +0100 Subject: [PATCH 17/32] Update Tornado --- libs/backports/__init__.py | 3 + libs/backports/ssl_match_hostname/README.txt | 42 +++++ libs/backports/ssl_match_hostname/__init__.py | 60 ++++++++ libs/tornado/auth.py | 68 +++++++- libs/tornado/autoreload.py | 15 +- libs/tornado/curl_httpclient.py | 11 ++ libs/tornado/gen.py | 23 ++- libs/tornado/httpclient.py | 30 +++- libs/tornado/httpserver.py | 26 +++- libs/tornado/httputil.py | 6 +- libs/tornado/ioloop.py | 3 +- libs/tornado/iostream.py | 2 +- libs/tornado/log.py | 2 +- libs/tornado/netutil.py | 78 +--------- libs/tornado/platform/asyncio.py | 134 ++++++++++++++++ libs/tornado/process.py | 3 +- libs/tornado/simple_httpclient.py | 67 ++++++-- libs/tornado/speedups.c | 49 ++++++ libs/tornado/tcpserver.py | 3 +- libs/tornado/web.py | 145 ++++++++++++++---- libs/tornado/websocket.py | 88 ++++++++--- libs/tornado/wsgi.py | 9 +- 22 files changed, 703 insertions(+), 164 deletions(-) create mode 100644 libs/backports/__init__.py create mode 100644 libs/backports/ssl_match_hostname/README.txt create mode 100644 libs/backports/ssl_match_hostname/__init__.py create mode 100644 libs/tornado/platform/asyncio.py create mode 100644 libs/tornado/speedups.c diff --git a/libs/backports/__init__.py b/libs/backports/__init__.py new file mode 100644 index 00000000..612d3283 --- /dev/null +++ b/libs/backports/__init__.py @@ -0,0 +1,3 @@ +# This is a Python "namespace package" http://www.python.org/dev/peps/pep-0382/ +from pkgutil import extend_path +__path__ = extend_path(__path__, __name__) diff --git a/libs/backports/ssl_match_hostname/README.txt b/libs/backports/ssl_match_hostname/README.txt new file mode 100644 index 00000000..f024fd7b --- /dev/null +++ b/libs/backports/ssl_match_hostname/README.txt @@ -0,0 +1,42 @@ + +The ssl.match_hostname() function from Python 3.2 +================================================= + +The Secure Sockets layer is only actually *secure* +if you check the hostname in the certificate returned +by the server to which you are connecting, +and verify that it matches to hostname +that you are trying to reach. + +But the matching logic, defined in `RFC2818`_, +can be a bit tricky to implement on your own. +So the ``ssl`` package in the Standard Library of Python 3.2 +now includes a ``match_hostname()`` function +for performing this check instead of requiring every application +to implement the check separately. + +This backport brings ``match_hostname()`` to users +of earlier versions of Python. +Simply make this distribution a dependency of your package, +and then use it like this:: + + from backports.ssl_match_hostname import match_hostname, CertificateError + ... + sslsock = ssl.wrap_socket(sock, ssl_version=ssl.PROTOCOL_SSLv3, + cert_reqs=ssl.CERT_REQUIRED, ca_certs=...) + try: + match_hostname(sslsock.getpeercert(), hostname) + except CertificateError, ce: + ... + +Note that the ``ssl`` module is only included in the Standard Library +for Python 2.6 and later; +users of Python 2.5 or earlier versions +will also need to install the ``ssl`` distribution +from the Python Package Index to use code like that shown above. + +Brandon Craig Rhodes is merely the packager of this distribution; +the actual code inside comes verbatim from Python 3.2. + +.. _RFC2818: http://tools.ietf.org/html/rfc2818.html + diff --git a/libs/backports/ssl_match_hostname/__init__.py b/libs/backports/ssl_match_hostname/__init__.py new file mode 100644 index 00000000..57076497 --- /dev/null +++ b/libs/backports/ssl_match_hostname/__init__.py @@ -0,0 +1,60 @@ +"""The match_hostname() function from Python 3.2, essential when using SSL.""" + +import re + +__version__ = '3.2a3' + +class CertificateError(ValueError): + pass + +def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not san: + # The subject is only checked when subjectAltName is empty + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/libs/tornado/auth.py b/libs/tornado/auth.py index 0cbfa7c0..a2cef356 100755 --- a/libs/tornado/auth.py +++ b/libs/tornado/auth.py @@ -549,7 +549,7 @@ class OAuth2Mixin(object): @return_future def authorize_redirect(self, redirect_uri=None, client_id=None, client_secret=None, extra_params=None, - callback=None): + callback=None, scope=None, response_type="code"): """Redirects the user to obtain OAuth authorization for this service. Some providers require that you register a redirect URL with @@ -566,10 +566,13 @@ class OAuth2Mixin(object): """ args = { "redirect_uri": redirect_uri, - "client_id": client_id + "client_id": client_id, + "response_type": response_type } if extra_params: args.update(extra_params) + if scope: + args['scope'] = ' '.join(scope) self.redirect( url_concat(self._OAUTH_AUTHORIZE_URL, args)) callback() @@ -945,6 +948,67 @@ class GoogleMixin(OpenIdMixin, OAuthMixin): return OpenIdMixin.get_authenticated_user(self) +class GoogleOAuth2Mixin(OAuth2Mixin): + """Google authentication using OAuth2.""" + _OAUTH_AUTHORIZE_URL = "https://accounts.google.com/o/oauth2/auth" + _OAUTH_ACCESS_TOKEN_URL = "https://accounts.google.com/o/oauth2/token" + _OAUTH_NO_CALLBACKS = False + _OAUTH_SETTINGS_KEY = 'google_oauth' + + @_auth_return_future + def get_authenticated_user(self, redirect_uri, code, callback): + """Handles the login for the Google user, returning a user object. + + Example usage:: + + class GoogleOAuth2LoginHandler(LoginHandler, tornado.auth.GoogleOAuth2Mixin): + @tornado.web.asynchronous + @tornado.gen.coroutine + def get(self): + if self.get_argument("code", False): + user = yield self.get_authenticated_user( + redirect_uri='http://your.site.com/auth/google', + code=self.get_argument("code")) + # Save the user with e.g. set_secure_cookie + else: + yield self.authorize_redirect( + redirect_uri='http://your.site.com/auth/google', + client_id=self.settings["google_consumer_key"], + scope=['openid', 'email'], + response_type='code', + extra_params={"approval_prompt": "auto"}) + """ + http = self.get_auth_http_client() + body = urllib_parse.urlencode({ + "redirect_uri": redirect_uri, + "code": code, + "client_id": self.settings[self._OAUTH_SETTINGS_KEY]['key'], + "client_secret": self.settings[self._OAUTH_SETTINGS_KEY]['secret'], + "grant_type": "authorization_code", + }) + + http.fetch(self._OAUTH_ACCESS_TOKEN_URL, + self.async_callback(self._on_access_token, callback), + method="POST", headers={'Content-Type': 'application/x-www-form-urlencoded'}, body=body) + + def _on_access_token(self, future, response): + """Callback function for the exchange to the access token.""" + if response.error: + future.set_exception(AuthError('Google auth error: %s' % str(response))) + return + + args = escape.json_decode(response.body) + future.set_result(args) + + def get_auth_http_client(self): + """Returns the `.AsyncHTTPClient` instance to be used for auth requests. + + May be overridden by subclasses to use an HTTP client other than + the default. + """ + return httpclient.AsyncHTTPClient() + + class FacebookMixin(object): """Facebook Connect authentication. diff --git a/libs/tornado/autoreload.py b/libs/tornado/autoreload.py index 05754299..79cccb49 100755 --- a/libs/tornado/autoreload.py +++ b/libs/tornado/autoreload.py @@ -16,11 +16,15 @@ """xAutomatically restart the server when a source file is modified. -Most applications should not access this module directly. Instead, pass the -keyword argument ``debug=True`` to the `tornado.web.Application` constructor. -This will enable autoreload mode as well as checking for changes to templates -and static resources. Note that restarting is a destructive operation -and any requests in progress will be aborted when the process restarts. +Most applications should not access this module directly. Instead, +pass the keyword argument ``autoreload=True`` to the +`tornado.web.Application` constructor (or ``debug=True``, which +enables this setting and several others). This will enable autoreload +mode as well as checking for changes to templates and static +resources. Note that restarting is a destructive operation and any +requests in progress will be aborted when the process restarts. (If +you want to disable autoreload while using other debug-mode features, +pass both ``debug=True`` and ``autoreload=False``). This module can also be used as a command-line wrapper around scripts such as unit test runners. See the `main` method for details. @@ -38,6 +42,7 @@ Reloading loses any Python interpreter command-line arguments (e.g. ``-u``) because it re-executes Python using ``sys.executable`` and ``sys.argv``. Additionally, modifying these variables will cause reloading to behave incorrectly. + """ from __future__ import absolute_import, division, print_function, with_statement diff --git a/libs/tornado/curl_httpclient.py b/libs/tornado/curl_httpclient.py index e0900569..cb97710a 100755 --- a/libs/tornado/curl_httpclient.py +++ b/libs/tornado/curl_httpclient.py @@ -360,6 +360,7 @@ def _curl_setup_request(curl, request, buffer, headers): curl.setopt(pycurl.PROXYUSERPWD, credentials) else: curl.setopt(pycurl.PROXY, '') + curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) @@ -382,6 +383,8 @@ def _curl_setup_request(curl, request, buffer, headers): # that we can't reach, so allow ipv6 unless the user asks to disable. # (but see version check in _process_queue above) curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + else: + curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method @@ -404,6 +407,11 @@ def _curl_setup_request(curl, request, buffer, headers): # Handle curl's cryptic options for every individual HTTP method if request.method in ("POST", "PUT"): + if request.body is None: + raise AssertionError( + 'Body must not be empty for "%s" request' + % request.method) + request_buffer = BytesIO(utf8(request.body)) curl.setopt(pycurl.READFUNCTION, request_buffer.read) if request.method == "POST": @@ -414,6 +422,9 @@ def _curl_setup_request(curl, request, buffer, headers): curl.setopt(pycurl.POSTFIELDSIZE, len(request.body)) else: curl.setopt(pycurl.INFILESIZE, len(request.body)) + elif request.method == "GET": + if request.body is not None: + raise AssertionError('Body must be empty for GET request') if request.auth_username is not None: userpwd = "%s:%s" % (request.auth_username, request.auth_password or '') diff --git a/libs/tornado/gen.py b/libs/tornado/gen.py index 92b7458e..217ebdf5 100755 --- a/libs/tornado/gen.py +++ b/libs/tornado/gen.py @@ -38,8 +38,8 @@ since it is both shorter and provides better exception handling):: def get(self): yield gen.Task(AsyncHTTPClient().fetch, "http://example.com") -You can also yield a list of ``Futures`` and/or ``Tasks``, which will be -started at the same time and run in parallel; a list of results will +You can also yield a list or dict of ``Futures`` and/or ``Tasks``, which will be +started at the same time and run in parallel; a list or dict of results will be returned when they are all finished:: @gen.coroutine @@ -47,6 +47,13 @@ be returned when they are all finished:: http_client = AsyncHTTPClient() response1, response2 = yield [http_client.fetch(url1), http_client.fetch(url2)] + response_dict = yield dict(response3=http_client.fetch(url3), + response4=http_client.fetch(url4)) + response3 = response_dict['response3'] + response4 = response_dict['response4'] + +.. versionchanged:: 3.2 + Dict support added. For more complicated interfaces, `Task` can be split into two parts: `Callback` and `Wait`:: @@ -404,6 +411,10 @@ class Multi(YieldPoint): a list of ``YieldPoints``. """ def __init__(self, children): + self.keys = None + if isinstance(children, dict): + self.keys = list(children.keys()) + children = children.values() self.children = [] for i in children: if isinstance(i, Future): @@ -423,7 +434,11 @@ class Multi(YieldPoint): return not self.unfinished_children def get_result(self): - return [i.get_result() for i in self.children] + result = (i.get_result() for i in self.children) + if self.keys is not None: + return dict(zip(self.keys, result)) + else: + return list(result) class _NullYieldPoint(YieldPoint): @@ -523,7 +538,7 @@ class Runner(object): self.finished = True self.yield_point = _null_yield_point raise - if isinstance(yielded, list): + if isinstance(yielded, (list, dict)): yielded = Multi(yielded) elif isinstance(yielded, Future): yielded = YieldFuture(yielded) diff --git a/libs/tornado/httpclient.py b/libs/tornado/httpclient.py index 67675894..b58a8348 100755 --- a/libs/tornado/httpclient.py +++ b/libs/tornado/httpclient.py @@ -282,7 +282,8 @@ class HTTPRequest(object): :arg int max_redirects: Limit for ``follow_redirects`` :arg string user_agent: String to send as ``User-Agent`` header :arg bool use_gzip: Request gzip encoding from the server - :arg string network_interface: Network interface to use for request + :arg string network_interface: Network interface to use for request. + ``curl_httpclient`` only; see note below. :arg callable streaming_callback: If set, ``streaming_callback`` will be run with each chunk of data as it is received, and ``HTTPResponse.body`` and ``HTTPResponse.buffer`` will be empty in @@ -310,14 +311,26 @@ class HTTPRequest(object): :arg bool validate_cert: For HTTPS requests, validate the server's certificate? :arg string ca_certs: filename of CA certificates in PEM format, - or None to use defaults. Note that in ``curl_httpclient``, if - any request uses a custom ``ca_certs`` file, they all must (they - don't have to all use the same ``ca_certs``, but it's not possible - to mix requests with ``ca_certs`` and requests that use the defaults. + or None to use defaults. See note below when used with + ``curl_httpclient``. :arg bool allow_ipv6: Use IPv6 when available? Default is false in ``simple_httpclient`` and true in ``curl_httpclient`` - :arg string client_key: Filename for client SSL key, if any - :arg string client_cert: Filename for client SSL certificate, if any + :arg string client_key: Filename for client SSL key, if any. See + note below when used with ``curl_httpclient``. + :arg string client_cert: Filename for client SSL certificate, if any. + See note below when used with ``curl_httpclient``. + + .. note:: + + When using ``curl_httpclient`` certain options may be + inherited by subsequent fetches because ``pycurl`` does + not allow them to be cleanly reset. This applies to the + ``ca_certs``, ``client_key``, ``client_cert``, and + ``network_interface`` arguments. If you use these + options, you should pass them on every request (you don't + have to always use the same values, but it's not possible + to mix requests that specify these options with ones that + use the defaults). .. versionadded:: 3.1 The ``auth_mode`` argument. @@ -372,6 +385,9 @@ class HTTPResponse(object): * headers: `tornado.httputil.HTTPHeaders` object + * effective_url: final location of the resource after following any + redirects + * buffer: ``cStringIO`` object for response body * body: response body as string (created on demand from ``self.buffer``) diff --git a/libs/tornado/httpserver.py b/libs/tornado/httpserver.py index d005545e..34e7b768 100755 --- a/libs/tornado/httpserver.py +++ b/libs/tornado/httpserver.py @@ -29,6 +29,7 @@ from __future__ import absolute_import, division, print_function, with_statement import socket import ssl import time +import copy from tornado.escape import native_str, parse_qs_bytes from tornado import httputil @@ -326,8 +327,8 @@ class HTTPConnection(object): self.request_callback(self._request) except _BadRequestException as e: - gen_log.info("Malformed HTTP request from %s: %s", - self.address[0], e) + gen_log.info("Malformed HTTP request from %r: %s", + self.address, e) self.close() return @@ -336,7 +337,10 @@ class HTTPConnection(object): if self._request.method in ("POST", "PATCH", "PUT"): httputil.parse_body_arguments( self._request.headers.get("Content-Type", ""), data, - self._request.arguments, self._request.files) + self._request.body_arguments, self._request.files) + + for k, v in self._request.body_arguments.items(): + self._request.arguments.setdefault(k, []).extend(v) self.request_callback(self._request) @@ -403,6 +407,20 @@ class HTTPRequest(object): `.RequestHandler.get_argument`, which returns argument values as unicode strings. + .. attribute:: query_arguments + + Same format as ``arguments``, but contains only arguments extracted + from the query string. + + .. versionadded:: 3.2 + + .. attribute:: body_arguments + + Same format as ``arguments``, but contains only arguments extracted + from the request body. + + .. versionadded:: 3.2 + .. attribute:: files File uploads are available in the files property, which maps file @@ -457,6 +475,8 @@ class HTTPRequest(object): self.path, sep, self.query = uri.partition('?') self.arguments = parse_qs_bytes(self.query, keep_blank_values=True) + self.query_arguments = copy.deepcopy(self.arguments) + self.body_arguments = {} def supports_http_1_1(self): """Returns True if this request supports HTTP/1.1 semantics""" diff --git a/libs/tornado/httputil.py b/libs/tornado/httputil.py index 3e7337d9..2575bc56 100755 --- a/libs/tornado/httputil.py +++ b/libs/tornado/httputil.py @@ -320,7 +320,11 @@ def parse_body_arguments(content_type, body, arguments, files): with the parsed contents. """ if content_type.startswith("application/x-www-form-urlencoded"): - uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) + try: + uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) + except Exception as e: + gen_log.warning('Invalid x-www-form-urlencoded body: %s', e) + uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) diff --git a/libs/tornado/ioloop.py b/libs/tornado/ioloop.py index 91ee2c5b..a36ab7a5 100755 --- a/libs/tornado/ioloop.py +++ b/libs/tornado/ioloop.py @@ -676,8 +676,7 @@ class PollIOLoop(IOLoop): while self._events: fd, events = self._events.popitem() try: - if self._handlers.has_key(fd): - self._handlers[fd](fd, events) + self._handlers[fd](fd, events) except (OSError, IOError) as e: if e.args[0] == errno.EPIPE: # Happens when the client closes the connection diff --git a/libs/tornado/iostream.py b/libs/tornado/iostream.py index 6bdc6397..08430cea 100755 --- a/libs/tornado/iostream.py +++ b/libs/tornado/iostream.py @@ -774,7 +774,7 @@ class IOStream(BaseIOStream): # Sometimes setsockopt will fail if the socket is closed # at the wrong time. This can happen with HTTPServer # resetting the value to false between requests. - if e.errno != errno.EINVAL: + if e.errno not in (errno.EINVAL, errno.ECONNRESET): raise diff --git a/libs/tornado/log.py b/libs/tornado/log.py index fa11f379..648db5c6 100755 --- a/libs/tornado/log.py +++ b/libs/tornado/log.py @@ -51,7 +51,7 @@ gen_log = logging.getLogger("tornado.general") def _stderr_supports_color(): color = False - if curses and sys.stderr.isatty(): + if curses and hasattr(sys.stderr, 'isatty') and sys.stderr.isatty(): try: curses.setupterm() if curses.tigetnum("colors") > 0: diff --git a/libs/tornado/netutil.py b/libs/tornado/netutil.py index 9dc8506e..21db4755 100755 --- a/libs/tornado/netutil.py +++ b/libs/tornado/netutil.py @@ -20,7 +20,6 @@ from __future__ import absolute_import, division, print_function, with_statement import errno import os -import re import socket import ssl import stat @@ -30,6 +29,13 @@ from tornado.ioloop import IOLoop from tornado.platform.auto import set_close_exec from tornado.util import Configurable +if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'): # python 3.2+ + ssl_match_hostname = ssl.match_hostname + SSLCertificateError = ssl.CertificateError +else: + import backports.ssl_match_hostname + ssl_match_hostname = backports.ssl_match_hostname.match_hostname + SSLCertificateError = backports.ssl_match_hostname.CertificateError def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, flags=None): """Creates listening sockets bound to the given port and address. @@ -391,73 +397,3 @@ def ssl_wrap_socket(socket, ssl_options, server_hostname=None, **kwargs): return context.wrap_socket(socket, **kwargs) else: return ssl.wrap_socket(socket, **dict(context, **kwargs)) - -if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'): # python 3.2+ - ssl_match_hostname = ssl.match_hostname - SSLCertificateError = ssl.CertificateError -else: - # match_hostname was added to the standard library ssl module in python 3.2. - # The following code was backported for older releases and copied from - # https://bitbucket.org/brandon/backports.ssl_match_hostname - class SSLCertificateError(ValueError): - pass - - def _dnsname_to_pat(dn, max_wildcards=1): - pats = [] - for frag in dn.split(r'.'): - if frag.count('*') > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise SSLCertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - - def ssl_match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise SSLCertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise SSLCertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise SSLCertificateError("no appropriate commonName or " - "subjectAltName fields were found") diff --git a/libs/tornado/platform/asyncio.py b/libs/tornado/platform/asyncio.py new file mode 100644 index 00000000..a8f5bad4 --- /dev/null +++ b/libs/tornado/platform/asyncio.py @@ -0,0 +1,134 @@ +"""Bridges between the `asyncio` module and Tornado IOLoop. + +This is a work in progress and interfaces are subject to change. + +To test: +python3.4 -m tornado.test.runtests --ioloop=tornado.platform.asyncio.AsyncIOLoop +python3.4 -m tornado.test.runtests --ioloop=tornado.platform.asyncio.AsyncIOMainLoop +(the tests log a few warnings with AsyncIOMainLoop because they leave some +unfinished callbacks on the event loop that fail when it resumes) +""" +import asyncio +import datetime +import functools +import os + +from tornado.ioloop import IOLoop +from tornado import stack_context + +class BaseAsyncIOLoop(IOLoop): + def initialize(self, asyncio_loop, close_loop=False): + self.asyncio_loop = asyncio_loop + self.close_loop = close_loop + self.asyncio_loop.call_soon(self.make_current) + # Maps fd to handler function (as in IOLoop.add_handler) + self.handlers = {} + # Set of fds listening for reads/writes + self.readers = set() + self.writers = set() + self.closing = False + + def close(self, all_fds=False): + self.closing = True + for fd in list(self.handlers): + self.remove_handler(fd) + if all_fds: + os.close(fd) + if self.close_loop: + self.asyncio_loop.close() + + def add_handler(self, fd, handler, events): + if fd in self.handlers: + raise ValueError("fd %d added twice" % fd) + self.handlers[fd] = stack_context.wrap(handler) + if events & IOLoop.READ: + self.asyncio_loop.add_reader( + fd, self._handle_events, fd, IOLoop.READ) + self.readers.add(fd) + if events & IOLoop.WRITE: + self.asyncio_loop.add_writer( + fd, self._handle_events, fd, IOLoop.WRITE) + self.writers.add(fd) + + def update_handler(self, fd, events): + if events & IOLoop.READ: + if fd not in self.readers: + self.asyncio_loop.add_reader( + fd, self._handle_events, fd, IOLoop.READ) + self.readers.add(fd) + else: + if fd in self.readers: + self.asyncio_loop.remove_reader(fd) + self.readers.remove(fd) + if events & IOLoop.WRITE: + if fd not in self.writers: + self.asyncio_loop.add_writer( + fd, self._handle_events, fd, IOLoop.WRITE) + self.writers.add(fd) + else: + if fd in self.writers: + self.asyncio_loop.remove_writer(fd) + self.writers.remove(fd) + + def remove_handler(self, fd): + if fd not in self.handlers: + return + if fd in self.readers: + self.asyncio_loop.remove_reader(fd) + self.readers.remove(fd) + if fd in self.writers: + self.asyncio_loop.remove_writer(fd) + self.writers.remove(fd) + del self.handlers[fd] + + def _handle_events(self, fd, events): + self.handlers[fd](fd, events) + + def start(self): + self.asyncio_loop.run_forever() + + def stop(self): + self.asyncio_loop.stop() + + def _run_callback(self, callback, *args, **kwargs): + try: + callback(*args, **kwargs) + except Exception: + self.handle_callback_exception(callback) + + def add_timeout(self, deadline, callback): + if isinstance(deadline, (int, float)): + delay = max(deadline - self.time(), 0) + elif isinstance(deadline, datetime.timedelta): + delay = deadline.total_seconds() + else: + raise TypeError("Unsupported deadline %r", deadline) + return self.asyncio_loop.call_later(delay, self._run_callback, + stack_context.wrap(callback)) + + def remove_timeout(self, timeout): + timeout.cancel() + + def add_callback(self, callback, *args, **kwargs): + if self.closing: + raise RuntimeError("IOLoop is closing") + if kwargs: + self.asyncio_loop.call_soon_threadsafe(functools.partial( + self._run_callback, stack_context.wrap(callback), + *args, **kwargs)) + else: + self.asyncio_loop.call_soon_threadsafe( + self._run_callback, stack_context.wrap(callback), *args) + + add_callback_from_signal = add_callback + + +class AsyncIOMainLoop(BaseAsyncIOLoop): + def initialize(self): + super(AsyncIOMainLoop, self).initialize(asyncio.get_event_loop(), + close_loop=False) + +class AsyncIOLoop(BaseAsyncIOLoop): + def initialize(self): + super(AsyncIOLoop, self).initialize(asyncio.new_event_loop(), + close_loop=True) diff --git a/libs/tornado/process.py b/libs/tornado/process.py index ffd2d29d..942c5c3f 100755 --- a/libs/tornado/process.py +++ b/libs/tornado/process.py @@ -92,7 +92,8 @@ def fork_processes(num_processes, max_restarts=100): between any server code. Note that multiple processes are not compatible with the autoreload - module (or the debug=True option to `tornado.web.Application`). + module (or the ``autoreload=True`` option to `tornado.web.Application` + which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. diff --git a/libs/tornado/simple_httpclient.py b/libs/tornado/simple_httpclient.py index d8dbb271..2558ada8 100755 --- a/libs/tornado/simple_httpclient.py +++ b/libs/tornado/simple_httpclient.py @@ -72,6 +72,7 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): self.max_clients = max_clients self.queue = collections.deque() self.active = {} + self.waiting = {} self.max_buffer_size = max_buffer_size if resolver: self.resolver = resolver @@ -89,7 +90,16 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): self.resolver.close() def fetch_impl(self, request, callback): - self.queue.append((request, callback)) + key = object() + self.queue.append((key, request, callback)) + if not len(self.active) < self.max_clients: + timeout_handle = self.io_loop.add_timeout( + self.io_loop.time() + min(request.connect_timeout, + request.request_timeout), + functools.partial(self._on_timeout, key)) + else: + timeout_handle = None + self.waiting[key] = (request, callback, timeout_handle) self._process_queue() if self.queue: gen_log.debug("max_clients limit reached, request queued. " @@ -99,8 +109,10 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): def _process_queue(self): with stack_context.NullContext(): while self.queue and len(self.active) < self.max_clients: - request, callback = self.queue.popleft() - key = object() + key, request, callback = self.queue.popleft() + if key not in self.waiting: + continue + self._remove_timeout(key) self.active[key] = (request, callback) release_callback = functools.partial(self._release_fetch, key) self._handle_request(request, release_callback, callback) @@ -113,6 +125,22 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): del self.active[key] self._process_queue() + def _remove_timeout(self, key): + if key in self.waiting: + request, callback, timeout_handle = self.waiting[key] + if timeout_handle is not None: + self.io_loop.remove_timeout(timeout_handle) + del self.waiting[key] + + def _on_timeout(self, key): + request, callback, timeout_handle = self.waiting[key] + self.queue.remove((key, request, callback)) + timeout_response = HTTPResponse( + request, 599, error=HTTPError(599, "Timeout"), + request_time=self.io_loop.time() - request.start_time) + self.io_loop.add_callback(callback, timeout_response) + del self.waiting[key] + class _HTTPConnection(object): _SUPPORTED_METHODS = set(["GET", "HEAD", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"]) @@ -162,15 +190,18 @@ class _HTTPConnection(object): # so restrict to ipv4 by default. af = socket.AF_INET + timeout = min(self.request.connect_timeout, self.request.request_timeout) + if timeout: + self._timeout = self.io_loop.add_timeout( + self.start_time + timeout, + stack_context.wrap(self._on_timeout)) self.resolver.resolve(host, port, af, callback=self._on_resolve) def _on_resolve(self, addrinfo): + if self.final_callback is None: + # final_callback is cleared if we've hit our timeout + return self.stream = self._create_stream(addrinfo) - timeout = min(self.request.connect_timeout, self.request.request_timeout) - if timeout: - self._timeout = self.io_loop.add_timeout( - self.start_time + timeout, - stack_context.wrap(self._on_timeout)) self.stream.set_close_callback(self._on_close) # ipv6 addresses are broken (in self.parsed.hostname) until # 2.7, here is correctly parsed value calculated in __init__ @@ -199,10 +230,10 @@ class _HTTPConnection(object): # the SSL_OP_NO_SSLv2, but that wasn't exposed to python # until 3.2. Python 2.7 adds the ciphers argument, which # can also be used to disable SSLv2. As a last resort - # on python 2.6, we set ssl_version to SSLv3. This is + # on python 2.6, we set ssl_version to TLSv1. This is # more narrow than we'd like since it also breaks - # compatibility with servers configured for TLSv1 only, - # but nearly all servers support SSLv3: + # compatibility with servers configured for SSLv3 only, + # but nearly all servers support both SSLv3 and TLSv1: # http://blog.ivanristic.com/2011/09/ssl-survey-protocol-support.html if sys.version_info >= (2, 7): ssl_options["ciphers"] = "DEFAULT:!SSLv2" @@ -210,7 +241,7 @@ class _HTTPConnection(object): # This is really only necessary for pre-1.0 versions # of openssl, but python 2.6 doesn't expose version # information. - ssl_options["ssl_version"] = ssl.PROTOCOL_SSLv3 + ssl_options["ssl_version"] = ssl.PROTOCOL_TLSv1 return SSLIOStream(socket.socket(af), io_loop=self.io_loop, @@ -233,6 +264,8 @@ class _HTTPConnection(object): def _on_connect(self): self._remove_timeout() + if self.final_callback is None: + return if self.request.request_timeout: self._timeout = self.io_loop.add_timeout( self.start_time + self.request.request_timeout, @@ -269,9 +302,15 @@ class _HTTPConnection(object): self.request.headers["User-Agent"] = self.request.user_agent if not self.request.allow_nonstandard_methods: if self.request.method in ("POST", "PATCH", "PUT"): - assert self.request.body is not None + if self.request.body is None: + raise AssertionError( + 'Body must not be empty for "%s" request' + % self.request.method) else: - assert self.request.body is None + if self.request.body is not None: + raise AssertionError( + 'Body must be empty for "%s" request' + % self.request.method) if self.request.body is not None: self.request.headers["Content-Length"] = str(len( self.request.body)) diff --git a/libs/tornado/speedups.c b/libs/tornado/speedups.c new file mode 100644 index 00000000..8a316c58 --- /dev/null +++ b/libs/tornado/speedups.c @@ -0,0 +1,49 @@ +#include + +static PyObject* websocket_mask(PyObject* self, PyObject* args) { + const char* mask; + int mask_len; + const char* data; + int data_len; + int i; + + if (!PyArg_ParseTuple(args, "s#s#", &mask, &mask_len, &data, &data_len)) { + return NULL; + } + + PyObject* result = PyBytes_FromStringAndSize(NULL, data_len); + if (!result) { + return NULL; + } + char* buf = PyBytes_AsString(result); + for (i = 0; i < data_len; i++) { + buf[i] = data[i] ^ mask[i % 4]; + } + + return result; +} + +static PyMethodDef methods[] = { + {"websocket_mask", websocket_mask, METH_VARARGS, ""}, + {NULL, NULL, 0, NULL} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef speedupsmodule = { + PyModuleDef_HEAD_INIT, + "speedups", + NULL, + -1, + methods +}; + +PyMODINIT_FUNC +PyInit_speedups() { + return PyModule_Create(&speedupsmodule); +} +#else // Python 2.x +PyMODINIT_FUNC +initspeedups() { + Py_InitModule("tornado.speedups", methods); +} +#endif diff --git a/libs/tornado/tcpserver.py b/libs/tornado/tcpserver.py index 8473a21a..c0773732 100755 --- a/libs/tornado/tcpserver.py +++ b/libs/tornado/tcpserver.py @@ -180,7 +180,8 @@ class TCPServer(object): between any server code. Note that multiple processes are not compatible with the autoreload - module (or the ``debug=True`` option to `tornado.web.Application`). + module (or the ``autoreload=True`` option to `tornado.web.Application` + which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``TCPServer.start(n)``. """ diff --git a/libs/tornado/web.py b/libs/tornado/web.py index 5f8d6091..b6d7e97e 100755 --- a/libs/tornado/web.py +++ b/libs/tornado/web.py @@ -250,7 +250,7 @@ class RequestHandler(object): not self.request.connection.no_keep_alive): conn_header = self.request.headers.get("Connection") if conn_header and (conn_header.lower() == "keep-alive"): - self.set_header("Connection", "Keep-Alive") + self._headers["Connection"] = "Keep-Alive" self._write_buffer = [] self._status_code = 200 self._reason = httputil.responses[200] @@ -348,12 +348,7 @@ class RequestHandler(object): The returned value is always unicode. """ - args = self.get_arguments(name, strip=strip) - if not args: - if default is self._ARG_DEFAULT: - raise MissingArgumentError(name) - return default - return args[-1] + return self._get_argument(name, default, self.request.arguments, strip) def get_arguments(self, name, strip=True): """Returns a list of the arguments with the given name. @@ -362,9 +357,73 @@ class RequestHandler(object): The returned values are always unicode. """ + return self._get_arguments(name, self.request.arguments, strip) + def get_body_argument(self, name, default=_ARG_DEFAULT, strip=True): + """Returns the value of the argument with the given name + from the request body. + + If default is not provided, the argument is considered to be + required, and we raise a `MissingArgumentError` if it is missing. + + If the argument appears in the url more than once, we return the + last value. + + The returned value is always unicode. + + .. versionadded:: 3.2 + """ + return self._get_argument(name, default, self.request.body_arguments, strip) + + def get_body_arguments(self, name, strip=True): + """Returns a list of the body arguments with the given name. + + If the argument is not present, returns an empty list. + + The returned values are always unicode. + + .. versionadded:: 3.2 + """ + return self._get_arguments(name, self.request.body_arguments, strip) + + def get_query_argument(self, name, default=_ARG_DEFAULT, strip=True): + """Returns the value of the argument with the given name + from the request query string. + + If default is not provided, the argument is considered to be + required, and we raise a `MissingArgumentError` if it is missing. + + If the argument appears in the url more than once, we return the + last value. + + The returned value is always unicode. + + .. versionadded:: 3.2 + """ + return self._get_argument(name, default, self.request.query_arguments, strip) + + def get_query_arguments(self, name, strip=True): + """Returns a list of the query arguments with the given name. + + If the argument is not present, returns an empty list. + + The returned values are always unicode. + + .. versionadded:: 3.2 + """ + return self._get_arguments(name, self.request.query_arguments, strip) + + def _get_argument(self, name, default, source, strip=True): + args = self._get_arguments(name, source, strip=strip) + if not args: + if default is self._ARG_DEFAULT: + raise MissingArgumentError(name) + return default + return args[-1] + + def _get_arguments(self, name, source, strip=True): values = [] - for v in self.request.arguments.get(name, []): + for v in source.get(name, []): v = self.decode_argument(v, name=name) if isinstance(v, unicode_type): # Get rid of any weird control chars (unless decoding gave @@ -838,7 +897,7 @@ class RequestHandler(object): else: self.finish(self.get_error_html(status_code, **kwargs)) return - if self.settings.get("debug") and "exc_info" in kwargs: + if self.settings.get("serve_traceback") and "exc_info" in kwargs: # in debug mode, try to send a traceback self.set_header('Content-Type', 'text/plain') for line in traceback.format_exception(*kwargs["exc_info"]): @@ -1318,6 +1377,12 @@ def asynchronous(method): if not self._finished: self.finish() IOLoop.current().add_future(result, future_complete) + # Once we have done this, hide the Future from our + # caller (i.e. RequestHandler._when_complete), which + # would otherwise set up its own callback and + # exception handler (resulting in exceptions being + # logged twice). + return None return result return wrapper @@ -1383,10 +1448,16 @@ class Application(object): or (regexp, request_class) tuples. When we receive requests, we iterate over the list in order and instantiate an instance of the first request class whose regexp matches the request path. + The request class can be specified as either a class object or a + (fully-qualified) name. - Each tuple can contain an optional third element, which should be - a dictionary if it is present. That dictionary is passed as - keyword arguments to the contructor of the handler. This pattern + Each tuple can contain additional elements, which correspond to the + arguments to the `URLSpec` constructor. (Prior to Tornado 3.2, this + only tuples of two or three elements were allowed). + + A dictionary may be passed as the third element of the tuple, + which will be used as keyword arguments to the handler's + constructor and `~RequestHandler.initialize` method. This pattern is used for the `StaticFileHandler` in this example (note that a `StaticFileHandler` can be installed automatically with the static_path setting described below):: @@ -1409,6 +1480,7 @@ class Application(object): and ``/robots.txt`` from the same directory. A custom subclass of `StaticFileHandler` can be specified with the ``static_handler_class`` setting. + """ def __init__(self, handlers=None, default_host="", transforms=None, wsgi=False, **settings): @@ -1447,8 +1519,14 @@ class Application(object): if handlers: self.add_handlers(".*$", handlers) + if self.settings.get('debug'): + self.settings.setdefault('autoreload', True) + self.settings.setdefault('compiled_template_cache', False) + self.settings.setdefault('static_hash_cache', False) + self.settings.setdefault('serve_traceback', True) + # Automatically reload modified modules - if self.settings.get("debug") and not wsgi: + if self.settings.get('autoreload') and not wsgi: from tornado import autoreload autoreload.start() @@ -1493,20 +1571,8 @@ class Application(object): for spec in host_handlers: if isinstance(spec, (tuple, list)): - assert len(spec) in (2, 3) - pattern = spec[0] - handler = spec[1] - - if isinstance(handler, str): - # import the Module and instantiate the class - # Must be a fully qualified name (module.ClassName) - handler = import_object(handler) - - if len(spec) == 3: - kwargs = spec[2] - else: - kwargs = {} - spec = URLSpec(pattern, handler, kwargs) + assert len(spec) in (2, 3, 4) + spec = URLSpec(*spec) handlers.append(spec) if spec.name: if spec.name in self.named_handlers: @@ -1597,14 +1663,23 @@ class Application(object): args = [unquote(s) for s in match.groups()] break if not handler: - handler = ErrorHandler(self, request, status_code=404) + if self.settings.get('default_handler_class'): + handler_class = self.settings['default_handler_class'] + handler_args = self.settings.get( + 'default_handler_args', {}) + else: + handler_class = ErrorHandler + handler_args = dict(status_code=404) + handler = handler_class(self, request, **handler_args) - # In debug mode, re-compile templates and reload static files on every + # If template cache is disabled (usually in the debug mode), + # re-compile templates and reload static files on every # request so you don't need to restart to see changes - if self.settings.get("debug"): + if not self.settings.get("compiled_template_cache", True): with RequestHandler._template_loader_lock: for loader in RequestHandler._template_loaders.values(): loader.reset() + if not self.settings.get('static_hash_cache', True): StaticFileHandler.reset() handler._execute(transforms, *args, **kwargs) @@ -2454,7 +2529,7 @@ class _UIModuleNamespace(object): class URLSpec(object): """Specifies mappings between URLs and handlers.""" - def __init__(self, pattern, handler_class, kwargs=None, name=None): + def __init__(self, pattern, handler, kwargs=None, name=None): """Parameters: * ``pattern``: Regular expression to be matched. Any groups @@ -2475,7 +2550,13 @@ class URLSpec(object): assert len(self.regex.groupindex) in (0, self.regex.groups), \ ("groups in url regexes must either be all named or all " "positional: %r" % self.regex.pattern) - self.handler_class = handler_class + + if isinstance(handler, str): + # import the Module and instantiate the class + # Must be a fully qualified name (module.ClassName) + handler = import_object(handler) + + self.handler_class = handler self.kwargs = kwargs or {} self.name = name self._path, self._group_count = self._find_groups() diff --git a/libs/tornado/websocket.py b/libs/tornado/websocket.py index 676d21bf..8c2f5a64 100755 --- a/libs/tornado/websocket.py +++ b/libs/tornado/websocket.py @@ -33,7 +33,7 @@ import tornado.web from tornado.concurrent import TracebackFuture from tornado.escape import utf8, native_str -from tornado import httpclient +from tornado import httpclient, httputil from tornado.ioloop import IOLoop from tornado.iostream import StreamClosedError from tornado.log import gen_log, app_log @@ -52,6 +52,10 @@ class WebSocketError(Exception): class WebSocketClosedError(WebSocketError): + """Raised by operations on a closed connection. + + .. versionadded:: 3.2 + """ pass @@ -163,6 +167,12 @@ class WebSocketHandler(tornado.web.RequestHandler): encoded as json). If the ``binary`` argument is false, the message will be sent as utf8; in binary mode any byte string is allowed. + + If the connection is already closed, raises `WebSocketClosedError`. + + .. versionchanged:: 3.2 + `WebSocketClosedError` was added (previously a closed connection + would raise an `AttributeError`) """ if self.ws_connection is None: raise WebSocketClosedError() @@ -586,7 +596,7 @@ class WebSocketProtocol13(WebSocketProtocol): frame += struct.pack("!BQ", 127 | mask_bit, l) if self.mask_outgoing: mask = os.urandom(4) - data = mask + self._apply_mask(mask, data) + data = mask + _websocket_mask(mask, data) frame += data self.stream.write(frame) @@ -671,21 +681,8 @@ class WebSocketProtocol13(WebSocketProtocol): except StreamClosedError: self._abort() - def _apply_mask(self, mask, data): - mask = array.array("B", mask) - unmasked = array.array("B", data) - for i in xrange(len(data)): - unmasked[i] = unmasked[i] ^ mask[i % 4] - if hasattr(unmasked, 'tobytes'): - # tostring was deprecated in py32. It hasn't been removed, - # but since we turn on deprecation warnings in our tests - # we need to use the right one. - return unmasked.tobytes() - else: - return unmasked.tostring() - def _on_masked_frame_data(self, data): - self._on_frame_data(self._apply_mask(self._frame_mask, data)) + self._on_frame_data(_websocket_mask(self._frame_mask, data)) def _on_frame_data(self, data): if self._frame_opcode_is_control: @@ -771,7 +768,11 @@ class WebSocketProtocol13(WebSocketProtocol): class WebSocketClientConnection(simple_httpclient._HTTPConnection): - """WebSocket client connection.""" + """WebSocket client connection. + + This class should not be instantiated directly; use the + `websocket_connect` function instead. + """ def __init__(self, io_loop, request): self.connect_future = TracebackFuture() self.read_future = None @@ -793,9 +794,19 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): io_loop, None, request, lambda: None, self._on_http_response, 104857600, self.resolver) + def close(self): + """Closes the websocket connection. + + .. versionadded:: 3.2 + """ + if self.protocol is not None: + self.protocol.close() + self.protocol = None + def _on_close(self): self.on_message(None) self.resolver.close() + super(WebSocketClientConnection, self)._on_close() def _on_http_response(self, response): if not self.connect_future.done(): @@ -859,13 +870,54 @@ def websocket_connect(url, io_loop=None, callback=None, connect_timeout=None): Takes a url and returns a Future whose result is a `WebSocketClientConnection`. + + .. versionchanged:: 3.2 + Also accepts ``HTTPRequest`` objects in place of urls. """ if io_loop is None: io_loop = IOLoop.current() - request = httpclient.HTTPRequest(url, connect_timeout=connect_timeout) + if isinstance(url, httpclient.HTTPRequest): + assert connect_timeout is None + request = url + # Copy and convert the headers dict/object (see comments in + # AsyncHTTPClient.fetch) + request.headers = httputil.HTTPHeaders(request.headers) + else: + request = httpclient.HTTPRequest(url, connect_timeout=connect_timeout) request = httpclient._RequestProxy( request, httpclient.HTTPRequest._DEFAULTS) conn = WebSocketClientConnection(io_loop, request) if callback is not None: io_loop.add_future(conn.connect_future, callback) return conn.connect_future + +def _websocket_mask_python(mask, data): + """Websocket masking function. + + `mask` is a `bytes` object of length 4; `data` is a `bytes` object of any length. + Returns a `bytes` object of the same length as `data` with the mask applied + as specified in section 5.3 of RFC 6455. + + This pure-python implementation may be replaced by an optimized version when available. + """ + mask = array.array("B", mask) + unmasked = array.array("B", data) + for i in xrange(len(data)): + unmasked[i] = unmasked[i] ^ mask[i % 4] + if hasattr(unmasked, 'tobytes'): + # tostring was deprecated in py32. It hasn't been removed, + # but since we turn on deprecation warnings in our tests + # we need to use the right one. + return unmasked.tobytes() + else: + return unmasked.tostring() + +if os.environ.get('TORNADO_NO_EXTENSION'): + # This environment variable exists to make it easier to do performance comparisons; + # it's not guaranteed to remain supported in the future. + _websocket_mask = _websocket_mask_python +else: + try: + from tornado.speedups import websocket_mask as _websocket_mask + except ImportError: + _websocket_mask = _websocket_mask_python diff --git a/libs/tornado/wsgi.py b/libs/tornado/wsgi.py index 5e25a564..8e5ddedb 100755 --- a/libs/tornado/wsgi.py +++ b/libs/tornado/wsgi.py @@ -33,6 +33,7 @@ from __future__ import absolute_import, division, print_function, with_statement import sys import time +import copy import tornado from tornado import escape @@ -142,11 +143,14 @@ class HTTPRequest(object): self.path += urllib_parse.quote(from_wsgi_str(environ.get("PATH_INFO", ""))) self.uri = self.path self.arguments = {} + self.query_arguments = {} + self.body_arguments = {} self.query = environ.get("QUERY_STRING", "") if self.query: self.uri += "?" + self.query self.arguments = parse_qs_bytes(native_str(self.query), keep_blank_values=True) + self.query_arguments = copy.deepcopy(self.arguments) self.version = "HTTP/1.1" self.headers = httputil.HTTPHeaders() if environ.get("CONTENT_TYPE"): @@ -171,7 +175,10 @@ class HTTPRequest(object): # Parse request body self.files = {} httputil.parse_body_arguments(self.headers.get("Content-Type", ""), - self.body, self.arguments, self.files) + self.body, self.body_arguments, self.files) + + for k, v in self.body_arguments.items(): + self.arguments.setdefault(k, []).extend(v) self._start_time = time.time() self._finish_time = None From 88d614850058d529f7c8a87d94a381e4b6eabb77 Mon Sep 17 00:00:00 2001 From: Ruud Date: Fri, 22 Nov 2013 16:09:15 +0100 Subject: [PATCH 18/32] Update libs --- libs/bencode/LICENSE.txt | 143 - libs/bencode/__init__.py | 132 +- libs/bencode/bencode.py | 131 - libs/bs4/__init__.py | 165 +- libs/bs4/builder/__init__.py | 32 +- libs/bs4/builder/_html5lib.py | 103 +- libs/bs4/builder/_htmlparser.py | 24 +- libs/bs4/builder/_lxml.py | 154 +- libs/bs4/dammit.py | 377 +- libs/bs4/diagnose.py | 204 + libs/bs4/element.py | 530 +- libs/bs4/testing.py | 77 + libs/gntp/__init__.py | 509 -- libs/gntp/cli.py | 141 + libs/gntp/config.py | 77 + libs/gntp/core.py | 511 ++ libs/gntp/errors.py | 25 + libs/gntp/notifier.py | 126 +- libs/gntp/shim.py | 45 + libs/gntp/version.py | 4 + libs/guessit/__init__.py | 110 +- libs/guessit/__main__.py | 25 +- libs/guessit/fileutils.py | 6 +- libs/guessit/guess.py | 65 +- libs/guessit/language.py | 12 +- libs/guessit/matcher.py | 51 +- libs/guessit/patterns.py | 24 +- libs/guessit/slogging.py | 8 +- libs/guessit/textutils.py | 11 +- libs/guessit/transfo/__init__.py | 22 +- libs/guessit/transfo/guess_country.py | 2 +- libs/guessit/transfo/guess_episodes_rexps.py | 15 +- libs/guessit/transfo/guess_filetype.py | 27 +- libs/guessit/transfo/guess_language.py | 24 +- .../guess_movie_title_from_position.py | 3 +- libs/guessit/transfo/guess_video_rexps.py | 7 +- .../transfo/guess_weak_episodes_rexps.py | 4 +- libs/html5lib/__init__.py | 20 +- libs/html5lib/constants.py | 4991 ++++++------ libs/html5lib/filters/_base.py | 2 + .../filters/alphabeticalattributes.py | 20 + libs/html5lib/filters/formfiller.py | 127 - libs/html5lib/filters/inject_meta_charset.py | 55 +- libs/html5lib/filters/lint.py | 63 +- libs/html5lib/filters/optionaltags.py | 15 +- libs/html5lib/filters/sanitizer.py | 10 +- libs/html5lib/filters/whitespace.py | 21 +- libs/html5lib/html5parser.py | 956 ++- libs/html5lib/ihatexml.py | 184 +- libs/html5lib/inputstream.py | 675 +- libs/html5lib/sanitizer.py | 387 +- libs/html5lib/serializer/__init__.py | 15 +- libs/html5lib/serializer/htmlserializer.py | 160 +- libs/html5lib/serializer/xhtmlserializer.py | 9 - libs/html5lib/tokenizer.py | 899 +- libs/html5lib/treeadapters/__init__.py | 0 libs/html5lib/treeadapters/sax.py | 44 + libs/html5lib/treebuilders/__init__.py | 76 +- libs/html5lib/treebuilders/_base.py | 130 +- libs/html5lib/treebuilders/dom.py | 194 +- libs/html5lib/treebuilders/etree.py | 265 +- libs/html5lib/treebuilders/etree_lxml.py | 269 +- libs/html5lib/treebuilders/simpletree.py | 256 - libs/html5lib/treebuilders/soup.py | 236 - libs/html5lib/treewalkers/__init__.py | 29 +- libs/html5lib/treewalkers/_base.py | 216 +- libs/html5lib/treewalkers/dom.py | 15 +- libs/html5lib/treewalkers/etree.py | 77 +- libs/html5lib/treewalkers/genshistream.py | 59 +- libs/html5lib/treewalkers/lxmletree.py | 394 +- libs/html5lib/treewalkers/pulldom.py | 13 +- libs/html5lib/treewalkers/simpletree.py | 78 - libs/html5lib/treewalkers/soup.py | 60 - libs/html5lib/trie/__init__.py | 12 + libs/html5lib/trie/_base.py | 37 + libs/html5lib/trie/datrie.py | 44 + libs/html5lib/trie/py.py | 67 + libs/html5lib/utils.py | 177 +- libs/httplib2/__init__.py | 872 +- libs/httplib2/cacerts.txt | 739 ++ libs/httplib2/iri2uri.py | 58 +- libs/httplib2/socks.py | 438 + libs/pyasn1/__init__.py | 9 +- libs/pyasn1/codec/__init__.py | 1 + libs/pyasn1/codec/ber/__init__.py | 1 + libs/pyasn1/codec/ber/decoder.py | 414 +- libs/pyasn1/codec/ber/encoder.py | 53 +- libs/pyasn1/codec/cer/__init__.py | 1 + libs/pyasn1/codec/cer/decoder.py | 15 +- libs/pyasn1/codec/der/__init__.py | 1 + libs/pyasn1/codec/der/decoder.py | 6 +- libs/pyasn1/compat/__init__.py | 1 + libs/pyasn1/compat/octets.py | 2 + libs/pyasn1/debug.py | 65 + libs/pyasn1/type/__init__.py | 1 + libs/pyasn1/type/base.py | 7 +- libs/pyasn1/type/namedtype.py | 8 +- libs/pyasn1/type/namedval.py | 4 +- libs/pyasn1/type/tag.py | 2 +- libs/pyasn1/type/tagmap.py | 4 +- libs/pyasn1/type/univ.py | 21 +- libs/pyutil/_version.py | 2 +- libs/pyutil/benchutil.py | 80 +- libs/pyutil/benchutil.py~ | 35 +- libs/pyutil/data/wordlist.txt | 7248 +++++++++++++++++ libs/pyutil/fileutil.py~ | 271 - libs/pyutil/iputil.py | 155 +- libs/pyutil/iputil.py~ | 288 + libs/pyutil/mathutil.py | 2 +- libs/pyutil/mathutil.py~ | 106 + libs/pyutil/odict.py~ | 552 -- libs/pyutil/randutil.py | 3 +- libs/pyutil/randutil.py~ | 85 - libs/pyutil/scripts/passphrase.py | 71 + libs/pyutil/scripts/time_comparisons.py | 209 + libs/pyutil/test/current/test_mathutil.py | 7 + libs/pyutil/test/current/test_mathutil.py~ | 135 + libs/pyutil/time_comparisons.py | 44 - libs/pyutil/time_comparisons.py~ | 72 - 119 files changed, 18448 insertions(+), 8923 deletions(-) delete mode 100644 libs/bencode/LICENSE.txt delete mode 100644 libs/bencode/bencode.py create mode 100644 libs/bs4/diagnose.py create mode 100644 libs/gntp/cli.py create mode 100644 libs/gntp/config.py create mode 100644 libs/gntp/core.py create mode 100644 libs/gntp/errors.py create mode 100644 libs/gntp/shim.py create mode 100644 libs/gntp/version.py create mode 100644 libs/html5lib/filters/alphabeticalattributes.py delete mode 100644 libs/html5lib/filters/formfiller.py delete mode 100644 libs/html5lib/serializer/xhtmlserializer.py create mode 100644 libs/html5lib/treeadapters/__init__.py create mode 100644 libs/html5lib/treeadapters/sax.py delete mode 100755 libs/html5lib/treebuilders/simpletree.py delete mode 100644 libs/html5lib/treebuilders/soup.py delete mode 100644 libs/html5lib/treewalkers/simpletree.py delete mode 100644 libs/html5lib/treewalkers/soup.py create mode 100644 libs/html5lib/trie/__init__.py create mode 100644 libs/html5lib/trie/_base.py create mode 100644 libs/html5lib/trie/datrie.py create mode 100644 libs/html5lib/trie/py.py create mode 100644 libs/httplib2/cacerts.txt create mode 100644 libs/httplib2/socks.py create mode 100644 libs/pyasn1/debug.py create mode 100644 libs/pyutil/data/wordlist.txt delete mode 100644 libs/pyutil/fileutil.py~ create mode 100644 libs/pyutil/iputil.py~ create mode 100644 libs/pyutil/mathutil.py~ delete mode 100644 libs/pyutil/odict.py~ delete mode 100644 libs/pyutil/randutil.py~ create mode 100644 libs/pyutil/scripts/passphrase.py create mode 100644 libs/pyutil/scripts/time_comparisons.py create mode 100644 libs/pyutil/test/current/test_mathutil.py~ delete mode 100644 libs/pyutil/time_comparisons.py delete mode 100644 libs/pyutil/time_comparisons.py~ diff --git a/libs/bencode/LICENSE.txt b/libs/bencode/LICENSE.txt deleted file mode 100644 index 4b7a6747..00000000 --- a/libs/bencode/LICENSE.txt +++ /dev/null @@ -1,143 +0,0 @@ -BitTorrent Open Source License - -Version 1.1 - -This BitTorrent Open Source License (the "License") applies to the BitTorrent client and related software products as well as any updates or maintenance releases of that software ("BitTorrent Products") that are distributed by BitTorrent, Inc. ("Licensor"). Any BitTorrent Product licensed pursuant to this License is a Licensed Product. Licensed Product, in its entirety, is protected by U.S. copyright law. This License identifies the terms under which you may use, copy, distribute or modify Licensed Product. - -Preamble - -This Preamble is intended to describe, in plain English, the nature and scope of this License. However, this Preamble is not a part of this license. The legal effect of this License is dependent only upon the terms of the License and not this Preamble. - -This License complies with the Open Source Definition and is derived from the Jabber Open Source License 1.0 (the "JOSL"), which has been approved by Open Source Initiative. Sections 4(c) and 4(f)(iii) from the JOSL have been deleted. - -This License provides that: - -1. You may use or give away the Licensed Product, alone or as a component of an aggregate software distribution containing programs from several different sources. No royalty or other fee is required. - -2. Both Source Code and executable versions of the Licensed Product, including Modifications made by previous Contributors, are available for your use. (The terms "Licensed Product," "Modifications," "Contributors" and "Source Code" are defined in the License.) - -3. You are allowed to make Modifications to the Licensed Product, and you can create Derivative Works from it. (The term "Derivative Works" is defined in the License.) - -4. By accepting the Licensed Product under the provisions of this License, you agree that any Modifications you make to the Licensed Product and then distribute are governed by the provisions of this License. In particular, you must make the Source Code of your Modifications available to others free of charge and without a royalty. - -5. You may sell, accept donations or otherwise receive compensation for executable versions of a Licensed Product, without paying a royalty or other fee to the Licensor or any Contributor, provided that such executable versions contain your or another Contributor?s material Modifications. For the avoidance of doubt, to the extent your executable version of a Licensed Product does not contain your or another Contributor?s material Modifications, you may not sell, accept donations or otherwise receive compensation for such executable. - -You may use the Licensed Product for any purpose, but the Licensor is not providing you any warranty whatsoever, nor is the Licensor accepting any liability in the event that the Licensed Product doesn't work properly or causes you any injury or damages. - -6. If you sublicense the Licensed Product or Derivative Works, you may charge fees for warranty or support, or for accepting indemnity or liability obligations to your customers. You cannot charge for, sell, accept donations or otherwise receive compensation for the Source Code. - -7. If you assert any patent claims against the Licensor relating to the Licensed Product, or if you breach any terms of the License, your rights to the Licensed Product under this License automatically terminate. - -You may use this License to distribute your own Derivative Works, in which case the provisions of this License will apply to your Derivative Works just as they do to the original Licensed Product. - -Alternatively, you may distribute your Derivative Works under any other OSI-approved Open Source license, or under a proprietary license of your choice. If you use any license other than this License, however, you must continue to fulfill the requirements of this License (including the provisions relating to publishing the Source Code) for those portions of your Derivative Works that consist of the Licensed Product, including the files containing Modifications. - -New versions of this License may be published from time to time in connection with new versions of a Licensed Product or otherwise. You may choose to continue to use the license terms in this version of the License for the Licensed Product that was originally licensed hereunder, however, the new versions of this License will at all times apply to new versions of the Licensed Product released by Licensor after the release of the new version of this License. Only the Licensor has the right to change the License terms as they apply to the Licensed Product. - -This License relies on precise definitions for certain terms. Those terms are defined when they are first used, and the definitions are repeated for your convenience in a Glossary at the end of the License. - -License Terms - -1. Grant of License From Licensor. Subject to the terms and conditions of this License, Licensor hereby grants you a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims, to do the following: - -a. Use, reproduce, modify, display, perform, sublicense and distribute any Modifications created by a Contributor or portions thereof, in both Source Code or as an executable program, either on an unmodified basis or as part of Derivative Works. - -b. Under claims of patents now or hereafter owned or controlled by Contributor, to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof, but solely to the extent that any such claim is necessary to enable you to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof or Derivative Works thereof. - -2. Grant of License to Modifications From Contributor. "Modifications" means any additions to or deletions from the substance or structure of (i) a file containing a Licensed Product, or (ii) any new file that contains any part of a Licensed Product. Hereinafter in this License, the term "Licensed Product" shall include all previous Modifications that you receive from any Contributor. Subject to the terms and conditions of this License, By application of the provisions in Section 4(a) below, each person or entity who created or contributed to the creation of, and distributed, a Modification (a "Contributor") hereby grants you a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims, to do the following: - -a. Use, reproduce, modify, display, perform, sublicense and distribute any Modifications created by such Contributor or portions thereof, in both Source Code or as an executable program, either on an unmodified basis or as part of Derivative Works. - -b. Under claims of patents now or hereafter owned or controlled by Contributor, to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof, but solely to the extent that any such claim is necessary to enable you to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof or Derivative Works thereof. - -3. Exclusions From License Grant. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor or any Contributor except as expressly stated herein. No patent license is granted separate from the Licensed Product, for code that you delete from the Licensed Product, or for combinations of the Licensed Product with other software or hardware. No right is granted to the trademarks of Licensor or any Contributor even if such marks are included in the Licensed Product. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any code that Licensor otherwise would have a right to license. As an express condition for your use of the Licensed Product, you hereby agree that you will not, without the prior written consent of Licensor, use any trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor or any Contributor except as expressly stated herein. For the avoidance of doubt and without limiting the foregoing, you hereby agree that you will not use or display any trademark of Licensor or any Contributor in any domain name, directory filepath, advertisement, link or other reference to you in any manner or in any media. - -4. Your Obligations Regarding Distribution. - -a. Application of This License to Your Modifications. As an express condition for your use of the Licensed Product, you hereby agree that any Modifications that you create or to which you contribute, and which you distribute, are governed by the terms of this License including, without limitation, Section 2. Any Modifications that you create or to which you contribute may be distributed only under the terms of this License or a future version of this License released under Section 7. You must include a copy of this License with every copy of the Modifications you distribute. You agree not to offer or impose any terms on any Source Code or executable version of the Licensed Product or Modifications that alter or restrict the applicable version of this License or the recipients' rights hereunder. However, you may include an additional document offering the additional rights described in Section 4(d). - -b. Availability of Source Code. You must make available, without charge, under the terms of this License, the Source Code of the Licensed Product and any Modifications that you distribute, either on the same media as you distribute any executable or other form of the Licensed Product, or via a mechanism generally accepted in the software development community for the electronic transfer of data (an "Electronic Distribution Mechanism"). The Source Code for any version of Licensed Product or Modifications that you distribute must remain available for as long as any executable or other form of the Licensed Product is distributed by you. You are responsible for ensuring that the Source Code version remains available even if the Electronic Distribution Mechanism is maintained by a third party. - -c. Intellectual Property Matters. - - i. Third Party Claims. If you have knowledge that a license to a third party's intellectual property right is required to exercise the rights granted by this License, you must include a text file with the Source Code distribution titled "LEGAL" that describes the claim and the party making the claim in sufficient detail that a recipient will know whom to contact. If you obtain such knowledge after you make any Modifications available as described in Section 4(b), you shall promptly modify the LEGAL file in all copies you make available thereafter and shall take other steps (such as notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who received the Licensed Product from you that new knowledge has been obtained. - - ii. Contributor APIs. If your Modifications include an application programming interface ("API") and you have knowledge of patent licenses that are reasonably necessary to implement that API, you must also include this information in the LEGAL file. - - iii. Representations. You represent that, except as disclosed pursuant to 4(c)(i) above, you believe that any Modifications you distribute are your original creations and that you have sufficient rights to grant the rights conveyed by this License. - -d. Required Notices. You must duplicate this License in any documentation you provide along with the Source Code of any Modifications you create or to which you contribute, and which you distribute, wherever you describe recipients' rights relating to Licensed Product. You must duplicate the notice contained in Exhibit A (the "Notice") in each file of the Source Code of any copy you distribute of the Licensed Product. If you created a Modification, you may add your name as a Contributor to the Notice. If it is not possible to put the Notice in a particular Source Code file due to its structure, then you must include such Notice in a location (such as a relevant directory file) where a user would be likely to look for such a notice. You may choose to offer, and charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Licensed Product. However, you may do so only on your own behalf, and not on behalf of the Licensor or any Contributor. You must make it clear that any such warranty, support, indemnity or liability obligation is offered by you alone, and you hereby agree to indemnify the Licensor and every Contributor for any liability incurred by the Licensor or such Contributor as a result of warranty, support, indemnity or liability terms you offer. - -e. Distribution of Executable Versions. You may distribute Licensed Product as an executable program under a license of your choice that may contain terms different from this License provided (i) you have satisfied the requirements of Sections 4(a) through 4(d) for that distribution, (ii) you include a conspicuous notice in the executable version, related documentation and collateral materials stating that the Source Code version of the -Licensed Product is available under the terms of this License, including a description of how and where you have fulfilled the obligations of Section 4(b), and (iii) you make it clear that any terms that differ from this License are offered by you alone, not by Licensor or any Contributor. You hereby agree to indemnify the Licensor and every Contributor for any liability incurred by Licensor or such Contributor as a result of any terms you offer. - -f. Distribution of Derivative Works. You may create Derivative Works (e.g., combinations of some or all of the Licensed Product with other code) and distribute the Derivative Works as products under any other license you select, with the proviso that the requirements of this License are fulfilled for those portions of the Derivative Works that consist of the Licensed Product or any Modifications thereto. - -g. Compensation for Distribution of Executable Versions of Licensed Products, Modifications or Derivative Works. Notwithstanding any provision of this License to the contrary, by distributing, selling, licensing, sublicensing or otherwise making available any Licensed Product, or Modification or Derivative Work thereof, you and Licensor hereby acknowledge and agree that you may sell, license or sublicense for a fee, accept donations or otherwise receive compensation for executable versions of a Licensed Product, without paying a royalty or other fee to the Licensor or any other Contributor, provided that such executable versions (i) contain your or another Contributor?s material Modifications, or (ii) are otherwise material Derivative Works. For purposes of this License, an executable version of the Licensed Product will be deemed to contain a material Modification, or will otherwise be deemed a material Derivative Work, if (a) the Licensed Product is modified with your own or a third party?s software programs or other code, and/or the Licensed Product is combined with a number of your own or a third party?s software programs or code, respectively, and (b) such software programs or code add or contribute material value, functionality or features to the License Product. For the avoidance of doubt, to the extent your executable version of a Licensed Product does not contain your or another Contributor?s material Modifications or is otherwise not a material Derivative Work, in each case as contemplated herein, you may not sell, license or sublicense for a fee, accept donations or otherwise receive compensation for such executable. Additionally, without limitation of the foregoing and notwithstanding any provision of this License to the contrary, you cannot charge for, sell, license or sublicense for a fee, accept donations or otherwise receive compensation for the Source Code. - -5. Inability to Comply Due to Statute or Regulation. If it is impossible for you to comply with any of the terms of this License with respect to some or all of the Licensed Product due to statute, judicial order, or regulation, then you must (i) comply with the terms of this License to the maximum extent possible, (ii) cite the statute or regulation that prohibits you from adhering to the License, and (iii) describe the limitations and the code they affect. Such description must be included in the LEGAL file described in Section 4(d), and must be included with all distributions of the Source Code. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill at computer programming to be able to understand it. - -6. Application of This License. This License applies to code to which Licensor or Contributor has attached the Notice in Exhibit A, which is incorporated herein by this reference. - -7. Versions of This License. - -a. New Versions. Licensor may publish from time to time revised and/or new versions of the License. - -b. Effect of New Versions. Once Licensed Product has been published under a particular version of the License, you may always continue to use it under the terms of that version, provided that any such license be in full force and effect at the time, and has not been revoked or otherwise terminated. You may also choose to use such Licensed Product under the terms of any subsequent version (but not any prior version) of the License published by Licensor. No one other than Licensor has the right to modify the terms applicable to Licensed Product created under this License. - -c. Derivative Works of this License. If you create or use a modified version of this License, which you may do only in order to apply it to software that is not already a Licensed Product under this License, you must rename your license so that it is not confusingly similar to this License, and must make it clear that your license contains terms that differ from this License. In so naming your license, you may not use any trademark of Licensor or any Contributor. - -8. Disclaimer of Warranty. LICENSED PRODUCT IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE LICENSED PRODUCT IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LICENSED PRODUCT IS WITH YOU. SHOULD LICENSED PRODUCT PROVE DEFECTIVE IN ANY RESPECT, YOU (AND NOT THE LICENSOR OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS -DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF LICENSED PRODUCT IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -9. Termination. - -a. Automatic Termination Upon Breach. This license and the rights granted hereunder will terminate automatically if you fail to comply with the terms herein and fail to cure such breach within ten (10) days of being notified of the breach by the Licensor. For purposes of this provision, proof of delivery via email to the address listed in the ?WHOIS? database of the registrar for any website through which you distribute or market any Licensed Product, or to any alternate email address which you designate in writing to the Licensor, shall constitute sufficient notification. All sublicenses to the Licensed Product that are properly granted shall survive any termination of this license so long as they continue to complye with the terms of this License. Provisions that, by their nature, must remain in effect beyond the termination of this License, shall survive. - -b. Termination Upon Assertion of Patent Infringement. If you initiate litigation by asserting a patent infringement claim (excluding declaratory judgment actions) against Licensor or a Contributor (Licensor or Contributor against whom you file such an action is referred to herein as Respondent) alleging that Licensed Product directly or indirectly infringes any patent, then any and all rights granted by such Respondent to you under Sections 1 or 2 of this License shall terminate prospectively upon sixty (60) days notice from Respondent (the "Notice Period") unless within that Notice Period you either agree in writing (i) to pay Respondent a mutually agreeable reasonably royalty for your past or future use of Licensed Product made by such Respondent, or (ii) withdraw your litigation claim with respect to Licensed Product against such Respondent. If within said Notice Period a reasonable royalty and payment arrangement are not mutually agreed upon in writing by the parties or the litigation claim is not withdrawn, the rights granted by Licensor to you under Sections 1 and 2 automatically terminate at the expiration of said Notice Period. - -c. Reasonable Value of This License. If you assert a patent infringement claim against Respondent alleging that Licensed Product directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by said Respondent under Sections 1 and 2 shall be taken into account in determining the amount or value of any payment or license. - -d. No Retroactive Effect of Termination. In the event of termination under Sections 9(a) or 9(b) above, all end user license agreements (excluding licenses to distributors and resellers) that have been validly granted by you or any distributor hereunder prior to termination shall survive termination. - -10. Limitation of Liability. UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL THE LICENSOR, ANY CONTRIBUTOR, OR ANY DISTRIBUTOR OF LICENSED PRODUCT, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -11. Responsibility for Claims. As between Licensor and Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License. You agree to work with Licensor and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. - -12. U.S. Government End Users. The Licensed Product is a commercial item, as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software and commercial computer software documentation, as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Licensed Product with only those rights set forth herein. - -13. Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by California law provisions (except to the extent applicable law, if any, provides otherwise), excluding its conflict-of-law provisions. You expressly agree that in any litigation relating to this license the losing party shall be responsible for costs including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation that provides that the language of a contract shall be construed against the drafter shall not apply to this License. - -14. Definition of You in This License. You throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License or a future version of this License issued under Section 7. For legal entities, you includes any entity that controls, is controlled by, is under common control with, or affiliated with, you. For purposes of this definition, control means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. You are responsible for advising any affiliated entity of the terms of this License, and that any rights or privileges derived from or obtained by way of this License are subject to the restrictions outlined herein. - -15. Glossary. All defined terms in this License that are used in more than one Section of this License are repeated here, in alphabetical order, for the convenience of the reader. The Section of this License in which each defined term is first used is shown in parentheses. - -Contributor: Each person or entity who created or contributed to the creation of, and distributed, a Modification. (See Section 2) - -Derivative Works: That term as used in this License is defined under U.S. copyright law. (See Section 1(b)) - -License: This BitTorrent Open Source License. (See first paragraph of License) - -Licensed Product: Any BitTorrent Product licensed pursuant to this License. The term "Licensed Product" includes all previous Modifications from any Contributor that you receive. (See first paragraph of License and Section 2) - -Licensor: BitTorrent, Inc. (See first paragraph of License) - -Modifications: Any additions to or deletions from the substance or structure of (i) a file containing Licensed Product, or (ii) any new file that contains any part of Licensed Product. (See Section 2) - -Notice: The notice contained in Exhibit A. (See Section 4(e)) - -Source Code: The preferred form for making modifications to the Licensed Product, including all modules contained therein, plus any associated interface definition files, scripts used to control compilation and installation of an executable program, or a list of differential comparisons against the Source Code of the Licensed Product. (See Section 1(a)) - -You: This term is defined in Section 14 of this License. - - -EXHIBIT A - -The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product or any hereto. Contributors to any Modifications may add their own copyright notices to identify their own contributions. - -License: - -The contents of this file are subject to the BitTorrent Open Source License Version 1.0 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License at http://www.bittorrent.com/license/. - -Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. - diff --git a/libs/bencode/__init__.py b/libs/bencode/__init__.py index 4424fc7e..7a2af172 100644 --- a/libs/bencode/__init__.py +++ b/libs/bencode/__init__.py @@ -1 +1,131 @@ -from bencode import * \ No newline at end of file +# The contents of this file are subject to the BitTorrent Open Source License +# Version 1.1 (the License). You may not copy or use this file, in either +# source code or executable form, except in compliance with the License. You +# may obtain a copy of the License at http://www.bittorrent.com/license/. +# +# Software distributed under the License is distributed on an AS IS basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. + +# Written by Petru Paler + +from BTL import BTFailure + + +def decode_int(x, f): + f += 1 + newf = x.index('e', f) + n = int(x[f:newf]) + if x[f] == '-': + if x[f + 1] == '0': + raise ValueError + elif x[f] == '0' and newf != f+1: + raise ValueError + return (n, newf+1) + +def decode_string(x, f): + colon = x.index(':', f) + n = int(x[f:colon]) + if x[f] == '0' and colon != f+1: + raise ValueError + colon += 1 + return (x[colon:colon+n], colon+n) + +def decode_list(x, f): + r, f = [], f+1 + while x[f] != 'e': + v, f = decode_func[x[f]](x, f) + r.append(v) + return (r, f + 1) + +def decode_dict(x, f): + r, f = {}, f+1 + while x[f] != 'e': + k, f = decode_string(x, f) + r[k], f = decode_func[x[f]](x, f) + return (r, f + 1) + +decode_func = {} +decode_func['l'] = decode_list +decode_func['d'] = decode_dict +decode_func['i'] = decode_int +decode_func['0'] = decode_string +decode_func['1'] = decode_string +decode_func['2'] = decode_string +decode_func['3'] = decode_string +decode_func['4'] = decode_string +decode_func['5'] = decode_string +decode_func['6'] = decode_string +decode_func['7'] = decode_string +decode_func['8'] = decode_string +decode_func['9'] = decode_string + +def bdecode(x): + try: + r, l = decode_func[x[0]](x, 0) + except (IndexError, KeyError, ValueError): + raise BTFailure("not a valid bencoded string") + if l != len(x): + raise BTFailure("invalid bencoded value (data after valid prefix)") + return r + +from types import StringType, IntType, LongType, DictType, ListType, TupleType + + +class Bencached(object): + + __slots__ = ['bencoded'] + + def __init__(self, s): + self.bencoded = s + +def encode_bencached(x,r): + r.append(x.bencoded) + +def encode_int(x, r): + r.extend(('i', str(x), 'e')) + +def encode_bool(x, r): + if x: + encode_int(1, r) + else: + encode_int(0, r) + +def encode_string(x, r): + r.extend((str(len(x)), ':', x)) + +def encode_list(x, r): + r.append('l') + for i in x: + encode_func[type(i)](i, r) + r.append('e') + +def encode_dict(x,r): + r.append('d') + ilist = x.items() + ilist.sort() + for k, v in ilist: + r.extend((str(len(k)), ':', k)) + encode_func[type(v)](v, r) + r.append('e') + +encode_func = {} +encode_func[Bencached] = encode_bencached +encode_func[IntType] = encode_int +encode_func[LongType] = encode_int +encode_func[StringType] = encode_string +encode_func[ListType] = encode_list +encode_func[TupleType] = encode_list +encode_func[DictType] = encode_dict + +try: + from types import BooleanType + encode_func[BooleanType] = encode_bool +except ImportError: + pass + +def bencode(x): + r = [] + encode_func[type(x)](x, r) + return ''.join(r) diff --git a/libs/bencode/bencode.py b/libs/bencode/bencode.py deleted file mode 100644 index 7a2af172..00000000 --- a/libs/bencode/bencode.py +++ /dev/null @@ -1,131 +0,0 @@ -# The contents of this file are subject to the BitTorrent Open Source License -# Version 1.1 (the License). You may not copy or use this file, in either -# source code or executable form, except in compliance with the License. You -# may obtain a copy of the License at http://www.bittorrent.com/license/. -# -# Software distributed under the License is distributed on an AS IS basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. - -# Written by Petru Paler - -from BTL import BTFailure - - -def decode_int(x, f): - f += 1 - newf = x.index('e', f) - n = int(x[f:newf]) - if x[f] == '-': - if x[f + 1] == '0': - raise ValueError - elif x[f] == '0' and newf != f+1: - raise ValueError - return (n, newf+1) - -def decode_string(x, f): - colon = x.index(':', f) - n = int(x[f:colon]) - if x[f] == '0' and colon != f+1: - raise ValueError - colon += 1 - return (x[colon:colon+n], colon+n) - -def decode_list(x, f): - r, f = [], f+1 - while x[f] != 'e': - v, f = decode_func[x[f]](x, f) - r.append(v) - return (r, f + 1) - -def decode_dict(x, f): - r, f = {}, f+1 - while x[f] != 'e': - k, f = decode_string(x, f) - r[k], f = decode_func[x[f]](x, f) - return (r, f + 1) - -decode_func = {} -decode_func['l'] = decode_list -decode_func['d'] = decode_dict -decode_func['i'] = decode_int -decode_func['0'] = decode_string -decode_func['1'] = decode_string -decode_func['2'] = decode_string -decode_func['3'] = decode_string -decode_func['4'] = decode_string -decode_func['5'] = decode_string -decode_func['6'] = decode_string -decode_func['7'] = decode_string -decode_func['8'] = decode_string -decode_func['9'] = decode_string - -def bdecode(x): - try: - r, l = decode_func[x[0]](x, 0) - except (IndexError, KeyError, ValueError): - raise BTFailure("not a valid bencoded string") - if l != len(x): - raise BTFailure("invalid bencoded value (data after valid prefix)") - return r - -from types import StringType, IntType, LongType, DictType, ListType, TupleType - - -class Bencached(object): - - __slots__ = ['bencoded'] - - def __init__(self, s): - self.bencoded = s - -def encode_bencached(x,r): - r.append(x.bencoded) - -def encode_int(x, r): - r.extend(('i', str(x), 'e')) - -def encode_bool(x, r): - if x: - encode_int(1, r) - else: - encode_int(0, r) - -def encode_string(x, r): - r.extend((str(len(x)), ':', x)) - -def encode_list(x, r): - r.append('l') - for i in x: - encode_func[type(i)](i, r) - r.append('e') - -def encode_dict(x,r): - r.append('d') - ilist = x.items() - ilist.sort() - for k, v in ilist: - r.extend((str(len(k)), ':', k)) - encode_func[type(v)](v, r) - r.append('e') - -encode_func = {} -encode_func[Bencached] = encode_bencached -encode_func[IntType] = encode_int -encode_func[LongType] = encode_int -encode_func[StringType] = encode_string -encode_func[ListType] = encode_list -encode_func[TupleType] = encode_list -encode_func[DictType] = encode_dict - -try: - from types import BooleanType - encode_func[BooleanType] = encode_bool -except ImportError: - pass - -def bencode(x): - r = [] - encode_func[type(x)](x, r) - return ''.join(r) diff --git a/libs/bs4/__init__.py b/libs/bs4/__init__.py index af8c718d..7ba34269 100644 --- a/libs/bs4/__init__.py +++ b/libs/bs4/__init__.py @@ -17,16 +17,17 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.1.0" -__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" +__version__ = "4.3.2" +__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson" __license__ = "MIT" __all__ = ['BeautifulSoup'] +import os import re import warnings -from .builder import builder_registry +from .builder import builder_registry, ParserRejectedMarkup from .dammit import UnicodeDammit from .element import ( CData, @@ -74,11 +75,7 @@ class BeautifulSoup(Tag): # want, look for one with these features. DEFAULT_BUILDER_FEATURES = ['html', 'fast'] - # Used when determining whether a text node is all whitespace and - # can be replaced with a single space. A text node that contains - # fancy Unicode spaces (usually non-breaking) should be left - # alone. - STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, } + ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, **kwargs): @@ -149,7 +146,7 @@ class BeautifulSoup(Tag): features = self.DEFAULT_BUILDER_FEATURES builder_class = builder_registry.lookup(*features) if builder_class is None: - raise ValueError( + raise FeatureNotFound( "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) @@ -160,18 +157,46 @@ class BeautifulSoup(Tag): self.parse_only = parse_only - self.reset() - if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - (self.markup, self.original_encoding, self.declared_html_encoding, - self.contains_replacement_characters) = ( - self.builder.prepare_markup(markup, from_encoding)) + elif len(markup) <= 256: + # Print out warnings for a couple beginner problems + # involving passing non-markup to Beautiful Soup. + # Beautiful Soup will still parse the input as markup, + # just in case that's what the user really wants. + if (isinstance(markup, unicode) + and not os.path.supports_unicode_filenames): + possible_filename = markup.encode("utf8") + else: + possible_filename = markup + is_file = False + try: + is_file = os.path.exists(possible_filename) + except Exception, e: + # This is almost certainly a problem involving + # characters not valid in filenames on this + # system. Just let it go. + pass + if is_file: + warnings.warn( + '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) + if markup[:5] == "http:" or markup[:6] == "https:": + # TODO: This is ugly but I couldn't get it to work in + # Python 3 otherwise. + if ((isinstance(markup, bytes) and not b' ' in markup) + or (isinstance(markup, unicode) and not u' ' in markup)): + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) - try: - self._feed() - except StopParsing: - pass + for (self.markup, self.original_encoding, self.declared_html_encoding, + self.contains_replacement_characters) in ( + self.builder.prepare_markup(markup, from_encoding)): + self.reset() + try: + self._feed() + break + except ParserRejectedMarkup: + pass # Clear out the markup and remove the builder's circular # reference to this object. @@ -192,29 +217,32 @@ class BeautifulSoup(Tag): Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) self.hidden = 1 self.builder.reset() - self.currentData = [] + self.current_data = [] self.currentTag = None self.tagStack = [] + self.preserve_whitespace_tag_stack = [] self.pushTag(self) def new_tag(self, name, namespace=None, nsprefix=None, **attrs): """Create a new tag associated with this soup.""" return Tag(None, self.builder, name, namespace, nsprefix, attrs) - def new_string(self, s): + def new_string(self, s, subclass=NavigableString): """Create a new NavigableString associated with this soup.""" - navigable = NavigableString(s) + navigable = subclass(s) navigable.setup() return navigable def insert_before(self, successor): - raise ValueError("BeautifulSoup objects don't support insert_before().") + raise NotImplementedError("BeautifulSoup objects don't support insert_before().") def insert_after(self, successor): - raise ValueError("BeautifulSoup objects don't support insert_after().") + raise NotImplementedError("BeautifulSoup objects don't support insert_after().") def popTag(self): tag = self.tagStack.pop() + if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: + self.preserve_whitespace_tag_stack.pop() #print "Pop", tag.name if self.tagStack: self.currentTag = self.tagStack[-1] @@ -226,32 +254,49 @@ class BeautifulSoup(Tag): self.currentTag.contents.append(tag) self.tagStack.append(tag) self.currentTag = self.tagStack[-1] + if tag.name in self.builder.preserve_whitespace_tags: + self.preserve_whitespace_tag_stack.append(tag) def endData(self, containerClass=NavigableString): - if self.currentData: - currentData = u''.join(self.currentData) - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and - not set([tag.name for tag in self.tagStack]).intersection( - self.builder.preserve_whitespace_tags)): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - self.currentData = [] + if self.current_data: + current_data = u''.join(self.current_data) + # If whitespace is not preserved, and this string contains + # nothing but ASCII spaces, replace it with a single space + # or newline. + if not self.preserve_whitespace_tag_stack: + strippable = True + for i in current_data: + if i not in self.ASCII_SPACES: + strippable = False + break + if strippable: + if '\n' in current_data: + current_data = '\n' + else: + current_data = ' ' + + # Reset the data collector. + self.current_data = [] + + # Should we add this string to the tree at all? if self.parse_only and len(self.tagStack) <= 1 and \ (not self.parse_only.text or \ - not self.parse_only.search(currentData)): + not self.parse_only.search(current_data)): return - o = containerClass(currentData) + + o = containerClass(current_data) self.object_was_parsed(o) - def object_was_parsed(self, o): + def object_was_parsed(self, o, parent=None, most_recent_element=None): """Add an object to the parse tree.""" - o.setup(self.currentTag, self.previous_element) - if self.previous_element: - self.previous_element.next_element = o - self.previous_element = o - self.currentTag.contents.append(o) + parent = parent or self.currentTag + most_recent_element = most_recent_element or self._most_recent_element + o.setup(parent, most_recent_element) + + if most_recent_element is not None: + most_recent_element.next_element = o + self._most_recent_element = o + parent.contents.append(o) def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent @@ -260,22 +305,21 @@ class BeautifulSoup(Tag): the given tag.""" #print "Popping to %s" % name if name == self.ROOT_TAG_NAME: + # The BeautifulSoup object itself can never be popped. return - numPops = 0 - mostRecentTag = None + most_recently_popped = None - for i in range(len(self.tagStack) - 1, 0, -1): - if (name == self.tagStack[i].name - and nsprefix == self.tagStack[i].nsprefix == nsprefix): - numPops = len(self.tagStack) - i + stack_size = len(self.tagStack) + for i in range(stack_size - 1, 0, -1): + t = self.tagStack[i] + if (name == t.name and nsprefix == t.prefix): + if inclusivePop: + most_recently_popped = self.popTag() break - if not inclusivePop: - numPops = numPops - 1 + most_recently_popped = self.popTag() - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag + return most_recently_popped def handle_starttag(self, name, namespace, nsprefix, attrs): """Push a start tag on to the stack. @@ -295,12 +339,12 @@ class BeautifulSoup(Tag): return None tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, - self.currentTag, self.previous_element) + self.currentTag, self._most_recent_element) if tag is None: return tag - if self.previous_element: - self.previous_element.next_element = tag - self.previous_element = tag + if self._most_recent_element: + self._most_recent_element.next_element = tag + self._most_recent_element = tag self.pushTag(tag) return tag @@ -310,7 +354,7 @@ class BeautifulSoup(Tag): self._popToTag(name, nsprefix) def handle_data(self, data): - self.currentData.append(data) + self.current_data.append(data) def decode(self, pretty_print=False, eventual_encoding=DEFAULT_OUTPUT_ENCODING, @@ -333,6 +377,10 @@ class BeautifulSoup(Tag): return prefix + super(BeautifulSoup, self).decode( indent_level, eventual_encoding, formatter) +# Alias to make it easier to type import: 'from bs4 import _soup' +_s = BeautifulSoup +_soup = BeautifulSoup + class BeautifulStoneSoup(BeautifulSoup): """Deprecated interface to an XML parser.""" @@ -347,6 +395,9 @@ class BeautifulStoneSoup(BeautifulSoup): class StopParsing(Exception): pass +class FeatureNotFound(ValueError): + pass + #By default, act as an HTML pretty-printer. if __name__ == '__main__': diff --git a/libs/bs4/builder/__init__.py b/libs/bs4/builder/__init__.py index 4c22b864..740f5f29 100644 --- a/libs/bs4/builder/__init__.py +++ b/libs/bs4/builder/__init__.py @@ -147,18 +147,29 @@ class TreeBuilder(object): Modifies its input in place. """ + if not attrs: + return attrs if self.cdata_list_attributes: universal = self.cdata_list_attributes.get('*', []) tag_specific = self.cdata_list_attributes.get( - tag_name.lower(), []) - for cdata_list_attr in itertools.chain(universal, tag_specific): - if cdata_list_attr in dict(attrs): - # Basically, we have a "class" attribute whose - # value is a whitespace-separated list of CSS - # classes. Split it into a list. - value = attrs[cdata_list_attr] - values = whitespace_re.split(value) - attrs[cdata_list_attr] = values + tag_name.lower(), None) + for attr in attrs.keys(): + if attr in universal or (tag_specific and attr in tag_specific): + # We have a "class"-type attribute whose string + # value is a whitespace-separated list of + # values. Split it into a list. + value = attrs[attr] + if isinstance(value, basestring): + values = whitespace_re.split(value) + else: + # html5lib sometimes calls setAttributes twice + # for the same tag when rearranging the parse + # tree. On the second call the attribute value + # here is already a list. If this happens, + # leave the value alone rather than trying to + # split it again. + values = value + attrs[attr] = values return attrs class SAXTreeBuilder(TreeBuilder): @@ -287,6 +298,9 @@ def register_treebuilders_from(module): # Register the builder while we're at it. this_module.builder_registry.register(obj) +class ParserRejectedMarkup(Exception): + pass + # Builders are registered in reverse order of priority, so that custom # builder registrations will take precedence. In general, we want lxml # to take precedence over html5lib, because it's faster. And we only diff --git a/libs/bs4/builder/_html5lib.py b/libs/bs4/builder/_html5lib.py index 6001e386..7de36ae7 100644 --- a/libs/bs4/builder/_html5lib.py +++ b/libs/bs4/builder/_html5lib.py @@ -27,7 +27,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): def prepare_markup(self, markup, user_specified_encoding): # Store the user-specified encoding for use later on. self.user_specified_encoding = user_specified_encoding - return markup, None, None, False + yield (markup, None, None, False) # These methods are defined by Beautiful Soup. def feed(self, markup): @@ -123,17 +123,50 @@ class Element(html5lib.treebuilders._base.Node): self.namespace = namespace def appendChild(self, node): - if (node.element.__class__ == NavigableString and self.element.contents + string_child = child = None + if isinstance(node, basestring): + # Some other piece of code decided to pass in a string + # instead of creating a TextElement object to contain the + # string. + string_child = child = node + elif isinstance(node, Tag): + # Some other piece of code decided to pass in a Tag + # instead of creating an Element object to contain the + # Tag. + child = node + elif node.element.__class__ == NavigableString: + string_child = child = node.element + else: + child = node.element + + if not isinstance(child, basestring) and child.parent is not None: + node.element.extract() + + if (string_child and self.element.contents and self.element.contents[-1].__class__ == NavigableString): - # Concatenate new text onto old text node - # XXX This has O(n^2) performance, for input like + # We are appending a string onto another string. + # TODO This has O(n^2) performance, for input like # "aaa..." old_element = self.element.contents[-1] - new_element = self.soup.new_string(old_element + node.element) + new_element = self.soup.new_string(old_element + string_child) old_element.replace_with(new_element) + self.soup._most_recent_element = new_element else: - self.element.append(node.element) - node.parent = self + if isinstance(node, basestring): + # Create a brand new NavigableString from this string. + child = self.soup.new_string(node) + + # Tell Beautiful Soup to act as if it parsed this element + # immediately after the parent's last descendant. (Or + # immediately after the parent, if it has no children.) + if self.element.contents: + most_recent_element = self.element._last_descendant(False) + else: + most_recent_element = self.element + + self.soup.object_was_parsed( + child, parent=self.element, + most_recent_element=most_recent_element) def getAttributes(self): return AttrList(self.element) @@ -162,11 +195,11 @@ class Element(html5lib.treebuilders._base.Node): attributes = property(getAttributes, setAttributes) def insertText(self, data, insertBefore=None): - text = TextNode(self.soup.new_string(data), self.soup) if insertBefore: - self.insertBefore(text, insertBefore) + text = TextNode(self.soup.new_string(data), self.soup) + self.insertBefore(data, insertBefore) else: - self.appendChild(text) + self.appendChild(data) def insertBefore(self, node, refNode): index = self.element.index(refNode.element) @@ -183,16 +216,46 @@ class Element(html5lib.treebuilders._base.Node): def removeChild(self, node): node.element.extract() - def reparentChildren(self, newParent): - while self.element.contents: - child = self.element.contents[0] - child.extract() - if isinstance(child, Tag): - newParent.appendChild( - Element(child, self.soup, namespaces["html"])) - else: - newParent.appendChild( - TextNode(child, self.soup)) + def reparentChildren(self, new_parent): + """Move all of this tag's children into another tag.""" + element = self.element + new_parent_element = new_parent.element + # Determine what this tag's next_element will be once all the children + # are removed. + final_next_element = element.next_sibling + + new_parents_last_descendant = new_parent_element._last_descendant(False, False) + if len(new_parent_element.contents) > 0: + # The new parent already contains children. We will be + # appending this tag's children to the end. + new_parents_last_child = new_parent_element.contents[-1] + new_parents_last_descendant_next_element = new_parents_last_descendant.next_element + else: + # The new parent contains no children. + new_parents_last_child = None + new_parents_last_descendant_next_element = new_parent_element.next_element + + to_append = element.contents + append_after = new_parent.element.contents + if len(to_append) > 0: + # Set the first child's previous_element and previous_sibling + # to elements within the new parent + first_child = to_append[0] + first_child.previous_element = new_parents_last_descendant + first_child.previous_sibling = new_parents_last_child + + # Fix the last child's next_element and next_sibling + last_child = to_append[-1] + last_child.next_element = new_parents_last_descendant_next_element + last_child.next_sibling = None + + for child in to_append: + child.parent = new_parent_element + new_parent_element.contents.append(child) + + # Now that this element has no children, change its .next_element. + element.contents = [] + element.next_element = final_next_element def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) diff --git a/libs/bs4/builder/_htmlparser.py b/libs/bs4/builder/_htmlparser.py index ede5cecb..ca8d8b89 100644 --- a/libs/bs4/builder/_htmlparser.py +++ b/libs/bs4/builder/_htmlparser.py @@ -45,7 +45,15 @@ HTMLPARSER = 'html.parser' class BeautifulSoupHTMLParser(HTMLParser): def handle_starttag(self, name, attrs): # XXX namespace - self.soup.handle_starttag(name, None, None, dict(attrs)) + attr_dict = {} + for key, value in attrs: + # Change None attribute values to the empty string + # for consistency with the other tree builders. + if value is None: + value = '' + attr_dict[key] = value + attrvalue = '""' + self.soup.handle_starttag(name, None, None, attr_dict) def handle_endtag(self, name): self.soup.handle_endtag(name) @@ -58,6 +66,8 @@ class BeautifulSoupHTMLParser(HTMLParser): # it's fixed. if name.startswith('x'): real_name = int(name.lstrip('x'), 16) + elif name.startswith('X'): + real_name = int(name.lstrip('X'), 16) else: real_name = int(name) @@ -85,6 +95,9 @@ class BeautifulSoupHTMLParser(HTMLParser): self.soup.endData() if data.startswith("DOCTYPE "): data = data[len("DOCTYPE "):] + elif data == 'DOCTYPE': + # i.e. "" + data = '' self.soup.handle_data(data) self.soup.endData(Doctype) @@ -130,13 +143,14 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): replaced with REPLACEMENT CHARACTER). """ if isinstance(markup, unicode): - return markup, None, None, False + yield (markup, None, None, False) + return try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) - return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) + yield (dammit.markup, dammit.original_encoding, + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): args, kwargs = self.parser_args diff --git a/libs/bs4/builder/_lxml.py b/libs/bs4/builder/_lxml.py index c78fdff6..fa5d4987 100644 --- a/libs/bs4/builder/_lxml.py +++ b/libs/bs4/builder/_lxml.py @@ -3,6 +3,7 @@ __all__ = [ 'LXMLTreeBuilder', ] +from io import BytesIO from StringIO import StringIO import collections from lxml import etree @@ -12,9 +13,10 @@ from bs4.builder import ( HTML, HTMLTreeBuilder, PERMISSIVE, + ParserRejectedMarkup, TreeBuilder, XML) -from bs4.dammit import UnicodeDammit +from bs4.dammit import EncodingDetector LXML = 'lxml' @@ -28,24 +30,36 @@ class LXMLTreeBuilderForXML(TreeBuilder): CHUNK_SIZE = 512 - @property - def default_parser(self): + # This namespace mapping is specified in the XML Namespace + # standard. + DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} + + def default_parser(self, encoding): # This can either return a parser object or a class, which # will be instantiated with default arguments. - return etree.XMLParser(target=self, strip_cdata=False, recover=True) + if self._default_parser is not None: + return self._default_parser + return etree.XMLParser( + target=self, strip_cdata=False, recover=True, encoding=encoding) + + def parser_for(self, encoding): + # Use the default parser. + parser = self.default_parser(encoding) - def __init__(self, parser=None, empty_element_tags=None): - if empty_element_tags is not None: - self.empty_element_tags = set(empty_element_tags) - if parser is None: - # Use the default parser. - parser = self.default_parser if isinstance(parser, collections.Callable): # Instantiate the parser with default arguments - parser = parser(target=self, strip_cdata=False) - self.parser = parser + parser = parser(target=self, strip_cdata=False, encoding=encoding) + return parser + + def __init__(self, parser=None, empty_element_tags=None): + # TODO: Issue a warning if parser is present but not a + # callable, since that means there's no way to create new + # parsers for different encodings. + self._default_parser = parser + if empty_element_tags is not None: + self.empty_element_tags = set(empty_element_tags) self.soup = None - self.nsmaps = None + self.nsmaps = [self.DEFAULT_NSMAPS] def _getNsTag(self, tag): # Split the namespace URL out of a fully-qualified lxml tag @@ -58,50 +72,69 @@ class LXMLTreeBuilderForXML(TreeBuilder): def prepare_markup(self, markup, user_specified_encoding=None, document_declared_encoding=None): """ - :return: A 3-tuple (markup, original encoding, encoding - declared within markup). + :yield: A series of 4-tuples. + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for parsing the document. """ if isinstance(markup, unicode): - return markup, None, None, False + # We were given Unicode. Maybe lxml can parse Unicode on + # this system? + yield markup, None, document_declared_encoding, False + if isinstance(markup, unicode): + # No, apparently not. Convert the Unicode to UTF-8 and + # tell lxml to parse it as UTF-8. + yield (markup.encode("utf8"), "utf8", + document_declared_encoding, False) + + # Instead of using UnicodeDammit to convert the bytestring to + # Unicode using different encodings, use EncodingDetector to + # iterate over the encodings, and tell lxml to try to parse + # the document as each one in turn. + is_html = not self.is_xml try_encodings = [user_specified_encoding, document_declared_encoding] - dammit = UnicodeDammit(markup, try_encodings, is_html=True) - return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) + detector = EncodingDetector(markup, try_encodings, is_html) + for encoding in detector.encodings: + yield (detector.markup, encoding, document_declared_encoding, False) def feed(self, markup): - if isinstance(markup, basestring): + if isinstance(markup, bytes): + markup = BytesIO(markup) + elif isinstance(markup, unicode): markup = StringIO(markup) + # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) - self.parser.feed(data) - while data != '': - # Now call feed() on the rest of the data, chunk by chunk. - data = markup.read(self.CHUNK_SIZE) - if data != '': - self.parser.feed(data) - self.parser.close() + try: + self.parser = self.parser_for(self.soup.original_encoding) + self.parser.feed(data) + while len(data) != 0: + # Now call feed() on the rest of the data, chunk by chunk. + data = markup.read(self.CHUNK_SIZE) + if len(data) != 0: + self.parser.feed(data) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError), e: + raise ParserRejectedMarkup(str(e)) def close(self): - self.nsmaps = None + self.nsmaps = [self.DEFAULT_NSMAPS] def start(self, name, attrs, nsmap={}): # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. attrs = dict(attrs) - nsprefix = None # Invert each namespace map as it comes in. - if len(nsmap) == 0 and self.nsmaps != None: - # There are no new namespaces for this tag, but namespaces - # are in play, so we need a separate tag stack to know - # when they end. + if len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. self.nsmaps.append(None) elif len(nsmap) > 0: # A new namespace mapping has come into play. - if self.nsmaps is None: - self.nsmaps = [] inverted_nsmap = dict((value, key) for key, value in nsmap.items()) self.nsmaps.append(inverted_nsmap) # Also treat the namespace mapping as a set of attributes on the @@ -111,14 +144,34 @@ class LXMLTreeBuilderForXML(TreeBuilder): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace + + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in attrs.items(): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + namespace, name = self._getNsTag(name) - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break + nsprefix = self._prefix_for_namespace(namespace) self.soup.handle_starttag(name, namespace, nsprefix, attrs) + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + return None + def end(self, name): self.soup.endData() completed_tag = self.soup.tagStack[-1] @@ -130,14 +183,10 @@ class LXMLTreeBuilderForXML(TreeBuilder): nsprefix = inverted_nsmap[namespace] break self.soup.handle_endtag(name, nsprefix) - if self.nsmaps != None: + if len(self.nsmaps) > 1: # This tag, or one of its parents, introduced a namespace # mapping, so pop it off the stack. self.nsmaps.pop() - if len(self.nsmaps) == 0: - # Namespaces are no longer in play, so don't bother keeping - # track of the namespace stack. - self.nsmaps = None def pi(self, target, data): pass @@ -166,13 +215,18 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): features = [LXML, HTML, FAST, PERMISSIVE] is_xml = False - @property - def default_parser(self): + def default_parser(self, encoding): return etree.HTMLParser def feed(self, markup): - self.parser.feed(markup) - self.parser.close() + encoding = self.soup.original_encoding + try: + self.parser = self.parser_for(encoding) + self.parser.feed(markup) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError), e: + raise ParserRejectedMarkup(str(e)) + def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" diff --git a/libs/bs4/dammit.py b/libs/bs4/dammit.py index 58cad9ba..59640b7c 100644 --- a/libs/bs4/dammit.py +++ b/libs/bs4/dammit.py @@ -1,27 +1,40 @@ # -*- coding: utf-8 -*- """Beautiful Soup bonus library: Unicode, Dammit -This class forces XML data into a standard format (usually to UTF-8 or -Unicode). It is heavily based on code from Mark Pilgrim's Universal -Feed Parser. It does not rewrite the XML or HTML to reflect a new -encoding; that's the tree builder's job. +This library converts a bytestream to Unicode through any means +necessary. It is heavily based on code from Mark Pilgrim's Universal +Feed Parser. It works best on XML and XML, but it does not rewrite the +XML or HTML to reflect a new encoding; that's the tree builder's job. """ import codecs from htmlentitydefs import codepoint2name import re -import warnings +import logging +import string -# Autodetects character encodings. Very useful. -# Download from http://chardet.feedparser.org/ -# or 'apt-get install python-chardet' -# or 'easy_install chardet' +# Import a library to autodetect character encodings. +chardet_type = None try: - import chardet - #import chardet.constants - #chardet.constants._debug = 1 + # First try the fast C implementation. + # PyPI package: cchardet + import cchardet + def chardet_dammit(s): + return cchardet.detect(s)['encoding'] except ImportError: - chardet = None + try: + # Fall back to the pure Python implementation + # Debian package: python-chardet + # PyPI package: chardet + import chardet + def chardet_dammit(s): + return chardet.detect(s)['encoding'] + #import chardet.constants + #chardet.constants._debug = 1 + except ImportError: + # No chardet available. + def chardet_dammit(s): + return None # Available from http://cjkpython.i18n.org/. try: @@ -69,6 +82,8 @@ class EntitySubstitution(object): "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" ")") + AMPERSAND_OR_BRACKET = re.compile("([<>&])") + @classmethod def _substitute_html_entity(cls, matchobj): entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) @@ -122,6 +137,28 @@ class EntitySubstitution(object): def substitute_xml(cls, value, make_quoted_attribute=False): """Substitute XML entities for special XML characters. + :param value: A string to be substituted. The less-than sign + will become <, the greater-than sign will become >, + and any ampersands will become &. If you want ampersands + that appear to be part of an entity definition to be left + alone, use substitute_xml_containing_entities() instead. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets and ampersands. + value = cls.AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_xml_containing_entities( + cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + :param value: A string to be substituted. The less-than sign will become <, the greater-than sign will become >, and any ampersands that are not part of an entity defition will @@ -155,6 +192,125 @@ class EntitySubstitution(object): cls._substitute_html_entity, s) +class EncodingDetector: + """Suggests a number of possible encodings for a bytestring. + + Order of precedence: + + 1. Encodings you specifically tell EncodingDetector to try first + (the override_encodings argument to the constructor). + + 2. An encoding declared within the bytestring itself, either in an + XML declaration (if the bytestring is to be interpreted as an XML + document), or in a tag (if the bytestring is to be + interpreted as an HTML document.) + + 3. An encoding detected through textual analysis by chardet, + cchardet, or a similar external library. + + 4. UTF-8. + + 5. Windows-1252. + """ + def __init__(self, markup, override_encodings=None, is_html=False): + self.override_encodings = override_encodings or [] + self.chardet_encoding = None + self.is_html = is_html + self.declared_encoding = None + + # First order of business: strip a byte-order mark. + self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) + + def _usable(self, encoding, tried): + if encoding is not None: + encoding = encoding.lower() + if encoding not in tried: + tried.add(encoding) + return True + return False + + @property + def encodings(self): + """Yield a number of encodings that might work for this markup.""" + tried = set() + for e in self.override_encodings: + if self._usable(e, tried): + yield e + + # Did the document originally start with a byte-order mark + # that indicated its encoding? + if self._usable(self.sniffed_encoding, tried): + yield self.sniffed_encoding + + # Look within the document for an XML or HTML encoding + # declaration. + if self.declared_encoding is None: + self.declared_encoding = self.find_declared_encoding( + self.markup, self.is_html) + if self._usable(self.declared_encoding, tried): + yield self.declared_encoding + + # Use third-party character set detection to guess at the + # encoding. + if self.chardet_encoding is None: + self.chardet_encoding = chardet_dammit(self.markup) + if self._usable(self.chardet_encoding, tried): + yield self.chardet_encoding + + # As a last-ditch effort, try utf-8 and windows-1252. + for e in ('utf-8', 'windows-1252'): + if self._usable(e, tried): + yield e + + @classmethod + def strip_byte_order_mark(cls, data): + """If a byte-order mark is present, strip it and return the encoding it implies.""" + encoding = None + if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == b'\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == b'\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == b'\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + return data, encoding + + @classmethod + def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): + """Given a document, tries to find its declared encoding. + + An XML encoding is declared at the beginning of the document. + + An HTML encoding is declared in a tag, hopefully near the + beginning of the document. + """ + if search_entire_document: + xml_endpos = html_endpos = len(markup) + else: + xml_endpos = 1024 + html_endpos = max(2048, int(len(markup) * 0.05)) + + declared_encoding = None + declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) + if not declared_encoding_match and is_html: + declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) + if declared_encoding_match is not None: + declared_encoding = declared_encoding_match.groups()[0].decode( + 'ascii') + if declared_encoding: + return declared_encoding.lower() + return None + class UnicodeDammit: """A class for detecting the encoding of a *ML document and converting it to a Unicode string. If the source encoding is @@ -176,65 +332,48 @@ class UnicodeDammit: def __init__(self, markup, override_encodings=[], smart_quotes_to=None, is_html=False): - self.declared_html_encoding = None self.smart_quotes_to = smart_quotes_to self.tried_encodings = [] self.contains_replacement_characters = False + self.is_html = is_html - if markup == '' or isinstance(markup, unicode): + self.detector = EncodingDetector(markup, override_encodings, is_html) + + # Short-circuit if the data is in Unicode to begin with. + if isinstance(markup, unicode) or markup == '': self.markup = markup self.unicode_markup = unicode(markup) self.original_encoding = None return - new_markup, document_encoding, sniffed_encoding = \ - self._detectEncoding(markup, is_html) - self.markup = new_markup + # The encoding detector may have stripped a byte-order mark. + # Use the stripped markup from this point on. + self.markup = self.detector.markup u = None - if new_markup != markup: - # _detectEncoding modified the markup, then converted it to - # Unicode and then to UTF-8. So convert it from UTF-8. - u = self._convert_from("utf8") - self.original_encoding = sniffed_encoding + for encoding in self.detector.encodings: + markup = self.detector.markup + u = self._convert_from(encoding) + if u is not None: + break if not u: - for proposed_encoding in ( - override_encodings + [document_encoding, sniffed_encoding]): - if proposed_encoding is not None: - u = self._convert_from(proposed_encoding) - if u: - break + # None of the encodings worked. As an absolute last resort, + # try them again with character replacement. - # If no luck and we have auto-detection library, try that: - if not u and chardet and not isinstance(self.markup, unicode): - u = self._convert_from(chardet.detect(self.markup)['encoding']) - - # As a last resort, try utf-8 and windows-1252: - if not u: - for proposed_encoding in ("utf-8", "windows-1252"): - u = self._convert_from(proposed_encoding) - if u: - break - - # As an absolute last resort, try the encodings again with - # character replacement. - if not u: - for proposed_encoding in ( - override_encodings + [ - document_encoding, sniffed_encoding, "utf-8", "windows-1252"]): - if proposed_encoding != "ascii": - u = self._convert_from(proposed_encoding, "replace") + for encoding in self.detector.encodings: + if encoding != "ascii": + u = self._convert_from(encoding, "replace") if u is not None: - warnings.warn( - UnicodeWarning( + logging.warning( "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.")) + "replaced with REPLACEMENT CHARACTER.") self.contains_replacement_characters = True break - # We could at this point force it to ASCII, but that would - # destroy so much data that I think giving up is better + # If none of that worked, we could at this point force it to + # ASCII, but that would destroy so much data that I think + # giving up is better. self.unicode_markup = u if not u: self.original_encoding = None @@ -262,11 +401,10 @@ class UnicodeDammit: return None self.tried_encodings.append((proposed, errors)) markup = self.markup - # Convert smart quotes to HTML if coming from an encoding # that might have them. if (self.smart_quotes_to is not None - and proposed.lower() in self.ENCODINGS_WITH_SMART_QUOTES): + and proposed in self.ENCODINGS_WITH_SMART_QUOTES): smart_quotes_re = b"([\x80-\x9f])" smart_quotes_compiled = re.compile(smart_quotes_re) markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) @@ -287,99 +425,24 @@ class UnicodeDammit: def _to_unicode(self, data, encoding, errors="strict"): '''Given a string and its encoding, decodes the string into Unicode. %encoding is a string recognized by encodings.aliases''' + return unicode(data, encoding, errors) - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == '\xef\xbb\xbf': - encoding = 'utf-8' - data = data[3:] - elif data[:4] == '\x00\x00\xfe\xff': - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == '\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding, errors) - return newdata - - def _detectEncoding(self, xml_data, is_html=False): - """Given a document, tries to detect its XML encoding.""" - xml_encoding = sniffed_xml_encoding = None - try: - if xml_data[:4] == b'\x4c\x6f\xa7\x94': - # EBCDIC - xml_data = self._ebcdic_to_ascii(xml_data) - elif xml_data[:4] == b'\x00\x3c\x00\x3f': - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xfe\xff') \ - and (xml_data[2:4] != b'\x00\x00'): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == b'\x3c\x00\x3f\x00': - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xff\xfe') and \ - (xml_data[2:4] != b'\x00\x00'): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == b'\x00\x00\x00\x3c': - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == b'\x3c\x00\x00\x00': - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == b'\x00\x00\xfe\xff': - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == b'\xff\xfe\x00\x00': - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == b'\xef\xbb\xbf': - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - sniffed_xml_encoding = 'ascii' - pass - except: - xml_encoding_match = None - xml_encoding_match = xml_encoding_re.match(xml_data) - if not xml_encoding_match and is_html: - xml_encoding_match = html_meta_re.search(xml_data) - if xml_encoding_match is not None: - xml_encoding = xml_encoding_match.groups()[0].decode( - 'ascii').lower() - if is_html: - self.declared_html_encoding = xml_encoding - if sniffed_xml_encoding and \ - (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', - 'iso-10646-ucs-4', 'ucs-4', 'csucs4', - 'utf-16', 'utf-32', 'utf_16', 'utf_32', - 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - return xml_data, xml_encoding, sniffed_xml_encoding + @property + def declared_html_encoding(self): + if not self.is_html: + return None + return self.detector.declared_encoding def find_codec(self, charset): - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ - or (charset and self._codec(charset.replace("-", ""))) \ - or (charset and self._codec(charset.replace("-", "_"))) \ + value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) + or (charset and self._codec(charset.replace("-", ""))) + or (charset and self._codec(charset.replace("-", "_"))) + or (charset and charset.lower()) or charset + ) + if value: + return value.lower() + return None def _codec(self, charset): if not charset: @@ -392,32 +455,6 @@ class UnicodeDammit: pass return codec - EBCDIC_TO_ASCII_MAP = None - - def _ebcdic_to_ascii(self, s): - c = self.__class__ - if not c.EBCDIC_TO_ASCII_MAP: - emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, - 201,202,106,107,108,109,110,111,112,113,114,203,204,205, - 206,207,208,209,126,115,116,117,118,119,120,121,122,210, - 211,212,213,214,215,216,217,218,219,220,221,222,223,224, - 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, - 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, - 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, - 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, - 250,251,252,253,254,255) - import string - c.EBCDIC_TO_ASCII_MAP = string.maketrans( - ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap))) - return s.translate(c.EBCDIC_TO_ASCII_MAP) # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. MS_CHARS = {b'\x80': ('euro', '20AC'), diff --git a/libs/bs4/diagnose.py b/libs/bs4/diagnose.py new file mode 100644 index 00000000..4d0b00af --- /dev/null +++ b/libs/bs4/diagnose.py @@ -0,0 +1,204 @@ +"""Diagnostic functions, mainly for use when doing tech support.""" +import cProfile +from StringIO import StringIO +from HTMLParser import HTMLParser +import bs4 +from bs4 import BeautifulSoup, __version__ +from bs4.builder import builder_registry + +import os +import pstats +import random +import tempfile +import time +import traceback +import sys +import cProfile + +def diagnose(data): + """Diagnostic suite for isolating common problems.""" + print "Diagnostic running on Beautiful Soup %s" % __version__ + print "Python version %s" % sys.version + + basic_parsers = ["html.parser", "html5lib", "lxml"] + for name in basic_parsers: + for builder in builder_registry.builders: + if name in builder.features: + break + else: + basic_parsers.remove(name) + print ( + "I noticed that %s is not installed. Installing it may help." % + name) + + if 'lxml' in basic_parsers: + basic_parsers.append(["lxml", "xml"]) + from lxml import etree + print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) + + if 'html5lib' in basic_parsers: + import html5lib + print "Found html5lib version %s" % html5lib.__version__ + + if hasattr(data, 'read'): + data = data.read() + elif os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + data = open(data).read() + elif data.startswith("http:") or data.startswith("https:"): + print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data + print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." + return + print + + for parser in basic_parsers: + print "Trying to parse your markup with %s" % parser + success = False + try: + soup = BeautifulSoup(data, parser) + success = True + except Exception, e: + print "%s could not parse the markup." % parser + traceback.print_exc() + if success: + print "Here's what %s did with the markup:" % parser + print soup.prettify() + + print "-" * 80 + +def lxml_trace(data, html=True, **kwargs): + """Print out the lxml events that occur during parsing. + + This lets you see how lxml parses a document when no Beautiful + Soup code is running. + """ + from lxml import etree + for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + print("%s, %4s, %s" % (event, element.tag, element.text)) + +class AnnouncingParser(HTMLParser): + """Announces HTMLParser parse events, without doing anything else.""" + + def _p(self, s): + print(s) + + def handle_starttag(self, name, attrs): + self._p("%s START" % name) + + def handle_endtag(self, name): + self._p("%s END" % name) + + def handle_data(self, data): + self._p("%s DATA" % data) + + def handle_charref(self, name): + self._p("%s CHARREF" % name) + + def handle_entityref(self, name): + self._p("%s ENTITYREF" % name) + + def handle_comment(self, data): + self._p("%s COMMENT" % data) + + def handle_decl(self, data): + self._p("%s DECL" % data) + + def unknown_decl(self, data): + self._p("%s UNKNOWN-DECL" % data) + + def handle_pi(self, data): + self._p("%s PI" % data) + +def htmlparser_trace(data): + """Print out the HTMLParser events that occur during parsing. + + This lets you see how HTMLParser parses a document when no + Beautiful Soup code is running. + """ + parser = AnnouncingParser() + parser.feed(data) + +_vowels = "aeiou" +_consonants = "bcdfghjklmnpqrstvwxyz" + +def rword(length=5): + "Generate a random word-like string." + s = '' + for i in range(length): + if i % 2 == 0: + t = _consonants + else: + t = _vowels + s += random.choice(t) + return s + +def rsentence(length=4): + "Generate a random sentence-like string." + return " ".join(rword(random.randint(4,9)) for i in range(length)) + +def rdoc(num_elements=1000): + """Randomly generate an invalid HTML document.""" + tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] + elements = [] + for i in range(num_elements): + choice = random.randint(0,3) + if choice == 0: + # New tag. + tag_name = random.choice(tag_names) + elements.append("<%s>" % tag_name) + elif choice == 1: + elements.append(rsentence(random.randint(1,4))) + elif choice == 2: + # Close a tag. + tag_name = random.choice(tag_names) + elements.append("" % tag_name) + return "" + "\n".join(elements) + "" + +def benchmark_parsers(num_elements=100000): + """Very basic head-to-head performance benchmark.""" + print "Comparative parser benchmark on Beautiful Soup %s" % __version__ + data = rdoc(num_elements) + print "Generated a large invalid HTML document (%d bytes)." % len(data) + + for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: + success = False + try: + a = time.time() + soup = BeautifulSoup(data, parser) + b = time.time() + success = True + except Exception, e: + print "%s could not parse the markup." % parser + traceback.print_exc() + if success: + print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) + + from lxml import etree + a = time.time() + etree.HTML(data) + b = time.time() + print "Raw lxml parsed the markup in %.2fs." % (b-a) + + import html5lib + parser = html5lib.HTMLParser() + a = time.time() + parser.parse(data) + b = time.time() + print "Raw html5lib parsed the markup in %.2fs." % (b-a) + +def profile(num_elements=100000, parser="lxml"): + + filehandle = tempfile.NamedTemporaryFile() + filename = filehandle.name + + data = rdoc(num_elements) + vars = dict(bs4=bs4, data=data, parser=parser) + cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) + + stats = pstats.Stats(filename) + # stats.strip_dirs() + stats.sort_stats("cumulative") + stats.print_stats('_html5lib|bs4', 50) + +if __name__ == '__main__': + diagnose(sys.stdin.read()) diff --git a/libs/bs4/element.py b/libs/bs4/element.py index 91a40078..da9afdf4 100644 --- a/libs/bs4/element.py +++ b/libs/bs4/element.py @@ -26,6 +26,9 @@ class NamespacedAttribute(unicode): def __new__(cls, prefix, name, namespace=None): if name is None: obj = unicode.__new__(cls, prefix) + elif prefix is None: + # Not really namespaced. + obj = unicode.__new__(cls, name) else: obj = unicode.__new__(cls, prefix + ":" + name) obj.prefix = prefix @@ -78,6 +81,40 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): return match.group(1) + encoding return self.CHARSET_RE.sub(rewrite, self.original_value) +class HTMLAwareEntitySubstitution(EntitySubstitution): + + """Entity substitution rules that are aware of some HTML quirks. + + Specifically, the contents of +""" + soup = BeautifulSoup(doc, "xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + self.assertTrue(b"< < hey > >" in encoded) + + def test_can_parse_unicode_document(self): + markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) + + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + self.assertEqual( + unicode(soup.rss), markup) def test_docstring_includes_correct_encoding(self): soup = self.soup("") @@ -472,6 +529,20 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual("http://example.com/", root['xmlns:a']) self.assertEqual("http://example.net/", root['xmlns:b']) + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + self.assertEqual(unicode(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) + + def test_namespaced_attributes_xml_namespace(self): + markup = 'bar' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): """Smoke test for a tree builder that supports HTML5.""" @@ -501,6 +572,12 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): self.assertEqual(namespace, soup.math.namespace) self.assertEqual(namespace, soup.msqrt.namespace) + def test_xml_declaration_becomes_comment(self): + markup = '' + soup = self.soup(markup) + self.assertTrue(isinstance(soup.contents[0], Comment)) + self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') + self.assertEqual("html", soup.contents[0].next_element.name) def skipIf(condition, reason): def nothing(test, *args, **kwargs): diff --git a/libs/gntp/__init__.py b/libs/gntp/__init__.py index eabbfa47..e69de29b 100755 --- a/libs/gntp/__init__.py +++ b/libs/gntp/__init__.py @@ -1,509 +0,0 @@ -import re -import hashlib -import time -import StringIO - -__version__ = '0.8' - -#GNTP/ [:][ :.] -GNTP_INFO_LINE = re.compile( - 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)' + - ' (?P[A-Z0-9]+(:(?P[A-F0-9]+))?) ?' + - '((?P[A-Z0-9]+):(?P[A-F0-9]+).(?P[A-F0-9]+))?\r\n', - re.IGNORECASE -) - -GNTP_INFO_LINE_SHORT = re.compile( - 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)', - re.IGNORECASE -) - -GNTP_HEADER = re.compile('([\w-]+):(.+)') - -GNTP_EOL = '\r\n' - - -class BaseError(Exception): - def gntp_error(self): - error = GNTPError(self.errorcode, self.errordesc) - return error.encode() - - -class ParseError(BaseError): - errorcode = 500 - errordesc = 'Error parsing the message' - - -class AuthError(BaseError): - errorcode = 400 - errordesc = 'Error with authorization' - - -class UnsupportedError(BaseError): - errorcode = 500 - errordesc = 'Currently unsupported by gntp.py' - - -class _GNTPBuffer(StringIO.StringIO): - """GNTP Buffer class""" - def writefmt(self, message = "", *args): - """Shortcut function for writing GNTP Headers""" - self.write((message % args).encode('utf8', 'replace')) - self.write(GNTP_EOL) - - -class _GNTPBase(object): - """Base initilization - - :param string messagetype: GNTP Message type - :param string version: GNTP Protocol version - :param string encription: Encryption protocol - """ - def __init__(self, messagetype = None, version = '1.0', encryption = None): - self.info = { - 'version': version, - 'messagetype': messagetype, - 'encryptionAlgorithmID': encryption - } - self.headers = {} - self.resources = {} - - def __str__(self): - return self.encode() - - def _parse_info(self, data): - """Parse the first line of a GNTP message to get security and other info values - - :param string data: GNTP Message - :return dict: Parsed GNTP Info line - """ - - match = GNTP_INFO_LINE.match(data) - - if not match: - raise ParseError('ERROR_PARSING_INFO_LINE') - - info = match.groupdict() - if info['encryptionAlgorithmID'] == 'NONE': - info['encryptionAlgorithmID'] = None - - return info - - def set_password(self, password, encryptAlgo = 'MD5'): - """Set a password for a GNTP Message - - :param string password: Null to clear password - :param string encryptAlgo: Supports MD5, SHA1, SHA256, SHA512 - """ - hash = { - 'MD5': hashlib.md5, - 'SHA1': hashlib.sha1, - 'SHA256': hashlib.sha256, - 'SHA512': hashlib.sha512, - } - - self.password = password - self.encryptAlgo = encryptAlgo.upper() - if not password: - self.info['encryptionAlgorithmID'] = None - self.info['keyHashAlgorithm'] = None - return - if not self.encryptAlgo in hash.keys(): - raise UnsupportedError('INVALID HASH "%s"' % self.encryptAlgo) - - hashfunction = hash.get(self.encryptAlgo) - - password = password.encode('utf8') - seed = time.ctime() - salt = hashfunction(seed).hexdigest() - saltHash = hashfunction(seed).digest() - keyBasis = password + saltHash - key = hashfunction(keyBasis).digest() - keyHash = hashfunction(key).hexdigest() - - self.info['keyHashAlgorithmID'] = self.encryptAlgo - self.info['keyHash'] = keyHash.upper() - self.info['salt'] = salt.upper() - - def _decode_hex(self, value): - """Helper function to decode hex string to `proper` hex string - - :param string value: Human readable hex string - :return string: Hex string - """ - result = '' - for i in range(0, len(value), 2): - tmp = int(value[i:i + 2], 16) - result += chr(tmp) - return result - - def _decode_binary(self, rawIdentifier, identifier): - rawIdentifier += '\r\n\r\n' - dataLength = int(identifier['Length']) - pointerStart = self.raw.find(rawIdentifier) + len(rawIdentifier) - pointerEnd = pointerStart + dataLength - data = self.raw[pointerStart:pointerEnd] - if not len(data) == dataLength: - raise ParseError('INVALID_DATA_LENGTH Expected: %s Recieved %s' % (dataLength, len(data))) - return data - - def _validate_password(self, password): - """Validate GNTP Message against stored password""" - self.password = password - if password == None: - raise AuthError('Missing password') - keyHash = self.info.get('keyHash', None) - if keyHash is None and self.password is None: - return True - if keyHash is None: - raise AuthError('Invalid keyHash') - if self.password is None: - raise AuthError('Missing password') - - password = self.password.encode('utf8') - saltHash = self._decode_hex(self.info['salt']) - - keyBasis = password + saltHash - key = hashlib.md5(keyBasis).digest() - keyHash = hashlib.md5(key).hexdigest() - - if not keyHash.upper() == self.info['keyHash'].upper(): - raise AuthError('Invalid Hash') - return True - - def validate(self): - """Verify required headers""" - for header in self._requiredHeaders: - if not self.headers.get(header, False): - raise ParseError('Missing Notification Header: ' + header) - - def _format_info(self): - """Generate info line for GNTP Message - - :return string: - """ - info = u'GNTP/%s %s' % ( - self.info.get('version'), - self.info.get('messagetype'), - ) - if self.info.get('encryptionAlgorithmID', None): - info += ' %s:%s' % ( - self.info.get('encryptionAlgorithmID'), - self.info.get('ivValue'), - ) - else: - info += ' NONE' - - if self.info.get('keyHashAlgorithmID', None): - info += ' %s:%s.%s' % ( - self.info.get('keyHashAlgorithmID'), - self.info.get('keyHash'), - self.info.get('salt') - ) - - return info - - def _parse_dict(self, data): - """Helper function to parse blocks of GNTP headers into a dictionary - - :param string data: - :return dict: - """ - dict = {} - for line in data.split('\r\n'): - match = GNTP_HEADER.match(line) - if not match: - continue - - key = unicode(match.group(1).strip(), 'utf8', 'replace') - val = unicode(match.group(2).strip(), 'utf8', 'replace') - dict[key] = val - return dict - - def add_header(self, key, value): - if isinstance(value, unicode): - self.headers[key] = value - else: - self.headers[key] = unicode('%s' % value, 'utf8', 'replace') - - def add_resource(self, data): - """Add binary resource - - :param string data: Binary Data - """ - identifier = hashlib.md5(data).hexdigest() - self.resources[identifier] = data - return 'x-growl-resource://%s' % identifier - - def decode(self, data, password = None): - """Decode GNTP Message - - :param string data: - """ - self.password = password - self.raw = data - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(data) - self.headers = self._parse_dict(parts[0]) - - def encode(self): - """Encode a generic GNTP Message - - :return string: GNTP Message ready to be sent - """ - - buffer = _GNTPBuffer() - - buffer.writefmt(self._format_info()) - - #Headers - for k, v in self.headers.iteritems(): - buffer.writefmt('%s: %s', k, v) - buffer.writefmt() - - #Resources - for resource, data in self.resources.iteritems(): - buffer.writefmt('Identifier: %s', resource) - buffer.writefmt('Length: %d', len(data)) - buffer.writefmt() - buffer.write(data) - buffer.writefmt() - buffer.writefmt() - - return buffer.getvalue() - - -class GNTPRegister(_GNTPBase): - """Represents a GNTP Registration Command - - :param string data: (Optional) See decode() - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Application-Name', - 'Notifications-Count' - ] - _requiredNotificationHeaders = ['Notification-Name'] - - def __init__(self, data = None, password = None): - _GNTPBase.__init__(self, 'REGISTER') - self.notifications = [] - - if data: - self.decode(data, password) - else: - self.set_password(password) - self.add_header('Application-Name', 'pygntp') - self.add_header('Notifications-Count', 0) - - def validate(self): - '''Validate required headers and validate notification headers''' - for header in self._requiredHeaders: - if not self.headers.get(header, False): - raise ParseError('Missing Registration Header: ' + header) - for notice in self.notifications: - for header in self._requiredNotificationHeaders: - if not notice.get(header, False): - raise ParseError('Missing Notification Header: ' + header) - - def decode(self, data, password): - """Decode existing GNTP Registration message - - :param string data: Message to decode - """ - self.raw = data - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(data) - self._validate_password(password) - self.headers = self._parse_dict(parts[0]) - - for i, part in enumerate(parts): - if i == 0: - continue # Skip Header - if part.strip() == '': - continue - notice = self._parse_dict(part) - if notice.get('Notification-Name', False): - self.notifications.append(notice) - elif notice.get('Identifier', False): - notice['Data'] = self._decode_binary(part, notice) - #open('register.png','wblol').write(notice['Data']) - self.resources[notice.get('Identifier')] = notice - - def add_notification(self, name, enabled = True): - """Add new Notification to Registration message - - :param string name: Notification Name - :param boolean enabled: Enable this notification by default - """ - notice = {} - notice['Notification-Name'] = u'%s' % name - notice['Notification-Enabled'] = u'%s' % enabled - - self.notifications.append(notice) - self.add_header('Notifications-Count', len(self.notifications)) - - def encode(self): - """Encode a GNTP Registration Message - - :return string: Encoded GNTP Registration message - """ - - buffer = _GNTPBuffer() - - buffer.writefmt(self._format_info()) - - #Headers - for k, v in self.headers.iteritems(): - buffer.writefmt('%s: %s', k, v) - buffer.writefmt() - - #Notifications - if len(self.notifications) > 0: - for notice in self.notifications: - for k, v in notice.iteritems(): - buffer.writefmt('%s: %s', k, v) - buffer.writefmt() - - #Resources - for resource, data in self.resources.iteritems(): - buffer.writefmt('Identifier: %s', resource) - buffer.writefmt('Length: %d', len(data)) - buffer.writefmt() - buffer.write(data) - buffer.writefmt() - buffer.writefmt() - - return buffer.getvalue() - - -class GNTPNotice(_GNTPBase): - """Represents a GNTP Notification Command - - :param string data: (Optional) See decode() - :param string app: (Optional) Set Application-Name - :param string name: (Optional) Set Notification-Name - :param string title: (Optional) Set Notification Title - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Application-Name', - 'Notification-Name', - 'Notification-Title' - ] - - def __init__(self, data = None, app = None, name = None, title = None, password = None): - _GNTPBase.__init__(self, 'NOTIFY') - - if data: - self.decode(data, password) - else: - self.set_password(password) - if app: - self.add_header('Application-Name', app) - if name: - self.add_header('Notification-Name', name) - if title: - self.add_header('Notification-Title', title) - - def decode(self, data, password): - """Decode existing GNTP Notification message - - :param string data: Message to decode. - """ - self.raw = data - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(data) - self._validate_password(password) - self.headers = self._parse_dict(parts[0]) - - for i, part in enumerate(parts): - if i == 0: - continue # Skip Header - if part.strip() == '': - continue - notice = self._parse_dict(part) - if notice.get('Identifier', False): - notice['Data'] = self._decode_binary(part, notice) - #open('notice.png','wblol').write(notice['Data']) - self.resources[notice.get('Identifier')] = notice - - -class GNTPSubscribe(_GNTPBase): - """Represents a GNTP Subscribe Command - - :param string data: (Optional) See decode() - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Subscriber-ID', - 'Subscriber-Name', - ] - - def __init__(self, data = None, password = None): - _GNTPBase.__init__(self, 'SUBSCRIBE') - if data: - self.decode(data, password) - else: - self.set_password(password) - - -class GNTPOK(_GNTPBase): - """Represents a GNTP OK Response - - :param string data: (Optional) See _GNTPResponse.decode() - :param string action: (Optional) Set type of action the OK Response is for - """ - _requiredHeaders = ['Response-Action'] - - def __init__(self, data = None, action = None): - _GNTPBase.__init__(self, '-OK') - if data: - self.decode(data) - if action: - self.add_header('Response-Action', action) - - -class GNTPError(_GNTPBase): - """Represents a GNTP Error response - - :param string data: (Optional) See _GNTPResponse.decode() - :param string errorcode: (Optional) Error code - :param string errordesc: (Optional) Error Description - """ - _requiredHeaders = ['Error-Code', 'Error-Description'] - - def __init__(self, data = None, errorcode = None, errordesc = None): - _GNTPBase.__init__(self, '-ERROR') - if data: - self.decode(data) - if errorcode: - self.add_header('Error-Code', errorcode) - self.add_header('Error-Description', errordesc) - - def error(self): - return (self.headers.get('Error-Code', None), - self.headers.get('Error-Description', None)) - - -def parse_gntp(data, password = None): - """Attempt to parse a message as a GNTP message - - :param string data: Message to be parsed - :param string password: Optional password to be used to verify the message - """ - match = GNTP_INFO_LINE_SHORT.match(data) - if not match: - raise ParseError('INVALID_GNTP_INFO') - info = match.groupdict() - if info['messagetype'] == 'REGISTER': - return GNTPRegister(data, password = password) - elif info['messagetype'] == 'NOTIFY': - return GNTPNotice(data, password = password) - elif info['messagetype'] == 'SUBSCRIBE': - return GNTPSubscribe(data, password = password) - elif info['messagetype'] == '-OK': - return GNTPOK(data) - elif info['messagetype'] == '-ERROR': - return GNTPError(data) - raise ParseError('INVALID_GNTP_MESSAGE') diff --git a/libs/gntp/cli.py b/libs/gntp/cli.py new file mode 100644 index 00000000..bc083062 --- /dev/null +++ b/libs/gntp/cli.py @@ -0,0 +1,141 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +import logging +import os +import sys +from optparse import OptionParser, OptionGroup + +from gntp.notifier import GrowlNotifier +from gntp.shim import RawConfigParser +from gntp.version import __version__ + +DEFAULT_CONFIG = os.path.expanduser('~/.gntp') + +config = RawConfigParser({ + 'hostname': 'localhost', + 'password': None, + 'port': 23053, +}) +config.read([DEFAULT_CONFIG]) +if not config.has_section('gntp'): + config.add_section('gntp') + + +class ClientParser(OptionParser): + def __init__(self): + OptionParser.__init__(self, version="%%prog %s" % __version__) + + group = OptionGroup(self, "Network Options") + group.add_option("-H", "--host", + dest="host", default=config.get('gntp', 'hostname'), + help="Specify a hostname to which to send a remote notification. [%default]") + group.add_option("--port", + dest="port", default=config.getint('gntp', 'port'), type="int", + help="port to listen on [%default]") + group.add_option("-P", "--password", + dest='password', default=config.get('gntp', 'password'), + help="Network password") + self.add_option_group(group) + + group = OptionGroup(self, "Notification Options") + group.add_option("-n", "--name", + dest="app", default='Python GNTP Test Client', + help="Set the name of the application [%default]") + group.add_option("-s", "--sticky", + dest='sticky', default=False, action="store_true", + help="Make the notification sticky [%default]") + group.add_option("--image", + dest="icon", default=None, + help="Icon for notification (URL or /path/to/file)") + group.add_option("-m", "--message", + dest="message", default=None, + help="Sets the message instead of using stdin") + group.add_option("-p", "--priority", + dest="priority", default=0, type="int", + help="-2 to 2 [%default]") + group.add_option("-d", "--identifier", + dest="identifier", + help="Identifier for coalescing") + group.add_option("-t", "--title", + dest="title", default=None, + help="Set the title of the notification [%default]") + group.add_option("-N", "--notification", + dest="name", default='Notification', + help="Set the notification name [%default]") + group.add_option("--callback", + dest="callback", + help="URL callback") + self.add_option_group(group) + + # Extra Options + self.add_option('-v', '--verbose', + dest='verbose', default=0, action='count', + help="Verbosity levels") + + def parse_args(self, args=None, values=None): + values, args = OptionParser.parse_args(self, args, values) + + if values.message is None: + print('Enter a message followed by Ctrl-D') + try: + message = sys.stdin.read() + except KeyboardInterrupt: + exit() + else: + message = values.message + + if values.title is None: + values.title = ' '.join(args) + + # If we still have an empty title, use the + # first bit of the message as the title + if values.title == '': + values.title = message[:20] + + values.verbose = logging.WARNING - values.verbose * 10 + + return values, message + + +def main(): + (options, message) = ClientParser().parse_args() + logging.basicConfig(level=options.verbose) + if not os.path.exists(DEFAULT_CONFIG): + logging.info('No config read found at %s', DEFAULT_CONFIG) + + growl = GrowlNotifier( + applicationName=options.app, + notifications=[options.name], + defaultNotifications=[options.name], + hostname=options.host, + password=options.password, + port=options.port, + ) + result = growl.register() + if result is not True: + exit(result) + + # This would likely be better placed within the growl notifier + # class but until I make _checkIcon smarter this is "easier" + if options.icon is not None and not options.icon.startswith('http'): + logging.info('Loading image %s', options.icon) + f = open(options.icon) + options.icon = f.read() + f.close() + + result = growl.notify( + noteType=options.name, + title=options.title, + description=message, + icon=options.icon, + sticky=options.sticky, + priority=options.priority, + callback=options.callback, + identifier=options.identifier, + ) + if result is not True: + exit(result) + +if __name__ == "__main__": + main() diff --git a/libs/gntp/config.py b/libs/gntp/config.py new file mode 100644 index 00000000..7536bd14 --- /dev/null +++ b/libs/gntp/config.py @@ -0,0 +1,77 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +""" +The gntp.config module is provided as an extended GrowlNotifier object that takes +advantage of the ConfigParser module to allow us to setup some default values +(such as hostname, password, and port) in a more global way to be shared among +programs using gntp +""" +import logging +import os + +import gntp.notifier +import gntp.shim + +__all__ = [ + 'mini', + 'GrowlNotifier' +] + +logger = logging.getLogger(__name__) + + +class GrowlNotifier(gntp.notifier.GrowlNotifier): + """ + ConfigParser enhanced GrowlNotifier object + + For right now, we are only interested in letting users overide certain + values from ~/.gntp + + :: + + [gntp] + hostname = ? + password = ? + port = ? + """ + def __init__(self, *args, **kwargs): + config = gntp.shim.RawConfigParser({ + 'hostname': kwargs.get('hostname', 'localhost'), + 'password': kwargs.get('password'), + 'port': kwargs.get('port', 23053), + }) + + config.read([os.path.expanduser('~/.gntp')]) + + # If the file does not exist, then there will be no gntp section defined + # and the config.get() lines below will get confused. Since we are not + # saving the config, it should be safe to just add it here so the + # code below doesn't complain + if not config.has_section('gntp'): + logger.info('Error reading ~/.gntp config file') + config.add_section('gntp') + + kwargs['password'] = config.get('gntp', 'password') + kwargs['hostname'] = config.get('gntp', 'hostname') + kwargs['port'] = config.getint('gntp', 'port') + + super(GrowlNotifier, self).__init__(*args, **kwargs) + + +def mini(description, **kwargs): + """Single notification function + + Simple notification function in one line. Has only one required parameter + and attempts to use reasonable defaults for everything else + :param string description: Notification message + """ + kwargs['notifierFactory'] = GrowlNotifier + gntp.notifier.mini(description, **kwargs) + + +if __name__ == '__main__': + # If we're running this module directly we're likely running it as a test + # so extra debugging is useful + logging.basicConfig(level=logging.INFO) + mini('Testing mini notification') diff --git a/libs/gntp/core.py b/libs/gntp/core.py new file mode 100644 index 00000000..ee544d3d --- /dev/null +++ b/libs/gntp/core.py @@ -0,0 +1,511 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +import hashlib +import re +import time + +import gntp.shim +import gntp.errors as errors + +__all__ = [ + 'GNTPRegister', + 'GNTPNotice', + 'GNTPSubscribe', + 'GNTPOK', + 'GNTPError', + 'parse_gntp', +] + +#GNTP/ [:][ :.] +GNTP_INFO_LINE = re.compile( + 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)' + + ' (?P[A-Z0-9]+(:(?P[A-F0-9]+))?) ?' + + '((?P[A-Z0-9]+):(?P[A-F0-9]+).(?P[A-F0-9]+))?\r\n', + re.IGNORECASE +) + +GNTP_INFO_LINE_SHORT = re.compile( + 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)', + re.IGNORECASE +) + +GNTP_HEADER = re.compile('([\w-]+):(.+)') + +GNTP_EOL = gntp.shim.b('\r\n') +GNTP_SEP = gntp.shim.b(': ') + + +class _GNTPBuffer(gntp.shim.StringIO): + """GNTP Buffer class""" + def writeln(self, value=None): + if value: + self.write(gntp.shim.b(value)) + self.write(GNTP_EOL) + + def writeheader(self, key, value): + if not isinstance(value, str): + value = str(value) + self.write(gntp.shim.b(key)) + self.write(GNTP_SEP) + self.write(gntp.shim.b(value)) + self.write(GNTP_EOL) + + +class _GNTPBase(object): + """Base initilization + + :param string messagetype: GNTP Message type + :param string version: GNTP Protocol version + :param string encription: Encryption protocol + """ + def __init__(self, messagetype=None, version='1.0', encryption=None): + self.info = { + 'version': version, + 'messagetype': messagetype, + 'encryptionAlgorithmID': encryption + } + self.hash_algo = { + 'MD5': hashlib.md5, + 'SHA1': hashlib.sha1, + 'SHA256': hashlib.sha256, + 'SHA512': hashlib.sha512, + } + self.headers = {} + self.resources = {} + + def __str__(self): + return self.encode() + + def _parse_info(self, data): + """Parse the first line of a GNTP message to get security and other info values + + :param string data: GNTP Message + :return dict: Parsed GNTP Info line + """ + + match = GNTP_INFO_LINE.match(data) + + if not match: + raise errors.ParseError('ERROR_PARSING_INFO_LINE') + + info = match.groupdict() + if info['encryptionAlgorithmID'] == 'NONE': + info['encryptionAlgorithmID'] = None + + return info + + def set_password(self, password, encryptAlgo='MD5'): + """Set a password for a GNTP Message + + :param string password: Null to clear password + :param string encryptAlgo: Supports MD5, SHA1, SHA256, SHA512 + """ + if not password: + self.info['encryptionAlgorithmID'] = None + self.info['keyHashAlgorithm'] = None + return + + self.password = gntp.shim.b(password) + self.encryptAlgo = encryptAlgo.upper() + + if not self.encryptAlgo in self.hash_algo: + raise errors.UnsupportedError('INVALID HASH "%s"' % self.encryptAlgo) + + hashfunction = self.hash_algo.get(self.encryptAlgo) + + password = password.encode('utf8') + seed = time.ctime().encode('utf8') + salt = hashfunction(seed).hexdigest() + saltHash = hashfunction(seed).digest() + keyBasis = password + saltHash + key = hashfunction(keyBasis).digest() + keyHash = hashfunction(key).hexdigest() + + self.info['keyHashAlgorithmID'] = self.encryptAlgo + self.info['keyHash'] = keyHash.upper() + self.info['salt'] = salt.upper() + + def _decode_hex(self, value): + """Helper function to decode hex string to `proper` hex string + + :param string value: Human readable hex string + :return string: Hex string + """ + result = '' + for i in range(0, len(value), 2): + tmp = int(value[i:i + 2], 16) + result += chr(tmp) + return result + + def _decode_binary(self, rawIdentifier, identifier): + rawIdentifier += '\r\n\r\n' + dataLength = int(identifier['Length']) + pointerStart = self.raw.find(rawIdentifier) + len(rawIdentifier) + pointerEnd = pointerStart + dataLength + data = self.raw[pointerStart:pointerEnd] + if not len(data) == dataLength: + raise errors.ParseError('INVALID_DATA_LENGTH Expected: %s Recieved %s' % (dataLength, len(data))) + return data + + def _validate_password(self, password): + """Validate GNTP Message against stored password""" + self.password = password + if password is None: + raise errors.AuthError('Missing password') + keyHash = self.info.get('keyHash', None) + if keyHash is None and self.password is None: + return True + if keyHash is None: + raise errors.AuthError('Invalid keyHash') + if self.password is None: + raise errors.AuthError('Missing password') + + keyHashAlgorithmID = self.info.get('keyHashAlgorithmID','MD5') + + password = self.password.encode('utf8') + saltHash = self._decode_hex(self.info['salt']) + + keyBasis = password + saltHash + self.key = self.hash_algo[keyHashAlgorithmID](keyBasis).digest() + keyHash = self.hash_algo[keyHashAlgorithmID](self.key).hexdigest() + + if not keyHash.upper() == self.info['keyHash'].upper(): + raise errors.AuthError('Invalid Hash') + return True + + def validate(self): + """Verify required headers""" + for header in self._requiredHeaders: + if not self.headers.get(header, False): + raise errors.ParseError('Missing Notification Header: ' + header) + + def _format_info(self): + """Generate info line for GNTP Message + + :return string: + """ + info = 'GNTP/%s %s' % ( + self.info.get('version'), + self.info.get('messagetype'), + ) + if self.info.get('encryptionAlgorithmID', None): + info += ' %s:%s' % ( + self.info.get('encryptionAlgorithmID'), + self.info.get('ivValue'), + ) + else: + info += ' NONE' + + if self.info.get('keyHashAlgorithmID', None): + info += ' %s:%s.%s' % ( + self.info.get('keyHashAlgorithmID'), + self.info.get('keyHash'), + self.info.get('salt') + ) + + return info + + def _parse_dict(self, data): + """Helper function to parse blocks of GNTP headers into a dictionary + + :param string data: + :return dict: Dictionary of parsed GNTP Headers + """ + d = {} + for line in data.split('\r\n'): + match = GNTP_HEADER.match(line) + if not match: + continue + + key = match.group(1).strip() + val = match.group(2).strip() + d[key] = val + return d + + def add_header(self, key, value): + self.headers[key] = value + + def add_resource(self, data): + """Add binary resource + + :param string data: Binary Data + """ + data = gntp.shim.b(data) + identifier = hashlib.md5(data).hexdigest() + self.resources[identifier] = data + return 'x-growl-resource://%s' % identifier + + def decode(self, data, password=None): + """Decode GNTP Message + + :param string data: + """ + self.password = password + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self.headers = self._parse_dict(parts[0]) + + def encode(self): + """Encode a generic GNTP Message + + :return string: GNTP Message ready to be sent. Returned as a byte string + """ + + buff = _GNTPBuffer() + + buff.writeln(self._format_info()) + + #Headers + for k, v in self.headers.items(): + buff.writeheader(k, v) + buff.writeln() + + #Resources + for resource, data in self.resources.items(): + buff.writeheader('Identifier', resource) + buff.writeheader('Length', len(data)) + buff.writeln() + buff.write(data) + buff.writeln() + buff.writeln() + + return buff.getvalue() + + +class GNTPRegister(_GNTPBase): + """Represents a GNTP Registration Command + + :param string data: (Optional) See decode() + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Application-Name', + 'Notifications-Count' + ] + _requiredNotificationHeaders = ['Notification-Name'] + + def __init__(self, data=None, password=None): + _GNTPBase.__init__(self, 'REGISTER') + self.notifications = [] + + if data: + self.decode(data, password) + else: + self.set_password(password) + self.add_header('Application-Name', 'pygntp') + self.add_header('Notifications-Count', 0) + + def validate(self): + '''Validate required headers and validate notification headers''' + for header in self._requiredHeaders: + if not self.headers.get(header, False): + raise errors.ParseError('Missing Registration Header: ' + header) + for notice in self.notifications: + for header in self._requiredNotificationHeaders: + if not notice.get(header, False): + raise errors.ParseError('Missing Notification Header: ' + header) + + def decode(self, data, password): + """Decode existing GNTP Registration message + + :param string data: Message to decode + """ + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self._validate_password(password) + self.headers = self._parse_dict(parts[0]) + + for i, part in enumerate(parts): + if i == 0: + continue # Skip Header + if part.strip() == '': + continue + notice = self._parse_dict(part) + if notice.get('Notification-Name', False): + self.notifications.append(notice) + elif notice.get('Identifier', False): + notice['Data'] = self._decode_binary(part, notice) + #open('register.png','wblol').write(notice['Data']) + self.resources[notice.get('Identifier')] = notice + + def add_notification(self, name, enabled=True): + """Add new Notification to Registration message + + :param string name: Notification Name + :param boolean enabled: Enable this notification by default + """ + notice = {} + notice['Notification-Name'] = name + notice['Notification-Enabled'] = enabled + + self.notifications.append(notice) + self.add_header('Notifications-Count', len(self.notifications)) + + def encode(self): + """Encode a GNTP Registration Message + + :return string: Encoded GNTP Registration message. Returned as a byte string + """ + + buff = _GNTPBuffer() + + buff.writeln(self._format_info()) + + #Headers + for k, v in self.headers.items(): + buff.writeheader(k, v) + buff.writeln() + + #Notifications + if len(self.notifications) > 0: + for notice in self.notifications: + for k, v in notice.items(): + buff.writeheader(k, v) + buff.writeln() + + #Resources + for resource, data in self.resources.items(): + buff.writeheader('Identifier', resource) + buff.writeheader('Length', len(data)) + buff.writeln() + buff.write(data) + buff.writeln() + buff.writeln() + + return buff.getvalue() + + +class GNTPNotice(_GNTPBase): + """Represents a GNTP Notification Command + + :param string data: (Optional) See decode() + :param string app: (Optional) Set Application-Name + :param string name: (Optional) Set Notification-Name + :param string title: (Optional) Set Notification Title + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Application-Name', + 'Notification-Name', + 'Notification-Title' + ] + + def __init__(self, data=None, app=None, name=None, title=None, password=None): + _GNTPBase.__init__(self, 'NOTIFY') + + if data: + self.decode(data, password) + else: + self.set_password(password) + if app: + self.add_header('Application-Name', app) + if name: + self.add_header('Notification-Name', name) + if title: + self.add_header('Notification-Title', title) + + def decode(self, data, password): + """Decode existing GNTP Notification message + + :param string data: Message to decode. + """ + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self._validate_password(password) + self.headers = self._parse_dict(parts[0]) + + for i, part in enumerate(parts): + if i == 0: + continue # Skip Header + if part.strip() == '': + continue + notice = self._parse_dict(part) + if notice.get('Identifier', False): + notice['Data'] = self._decode_binary(part, notice) + #open('notice.png','wblol').write(notice['Data']) + self.resources[notice.get('Identifier')] = notice + + +class GNTPSubscribe(_GNTPBase): + """Represents a GNTP Subscribe Command + + :param string data: (Optional) See decode() + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Subscriber-ID', + 'Subscriber-Name', + ] + + def __init__(self, data=None, password=None): + _GNTPBase.__init__(self, 'SUBSCRIBE') + if data: + self.decode(data, password) + else: + self.set_password(password) + + +class GNTPOK(_GNTPBase): + """Represents a GNTP OK Response + + :param string data: (Optional) See _GNTPResponse.decode() + :param string action: (Optional) Set type of action the OK Response is for + """ + _requiredHeaders = ['Response-Action'] + + def __init__(self, data=None, action=None): + _GNTPBase.__init__(self, '-OK') + if data: + self.decode(data) + if action: + self.add_header('Response-Action', action) + + +class GNTPError(_GNTPBase): + """Represents a GNTP Error response + + :param string data: (Optional) See _GNTPResponse.decode() + :param string errorcode: (Optional) Error code + :param string errordesc: (Optional) Error Description + """ + _requiredHeaders = ['Error-Code', 'Error-Description'] + + def __init__(self, data=None, errorcode=None, errordesc=None): + _GNTPBase.__init__(self, '-ERROR') + if data: + self.decode(data) + if errorcode: + self.add_header('Error-Code', errorcode) + self.add_header('Error-Description', errordesc) + + def error(self): + return (self.headers.get('Error-Code', None), + self.headers.get('Error-Description', None)) + + +def parse_gntp(data, password=None): + """Attempt to parse a message as a GNTP message + + :param string data: Message to be parsed + :param string password: Optional password to be used to verify the message + """ + data = gntp.shim.u(data) + match = GNTP_INFO_LINE_SHORT.match(data) + if not match: + raise errors.ParseError('INVALID_GNTP_INFO') + info = match.groupdict() + if info['messagetype'] == 'REGISTER': + return GNTPRegister(data, password=password) + elif info['messagetype'] == 'NOTIFY': + return GNTPNotice(data, password=password) + elif info['messagetype'] == 'SUBSCRIBE': + return GNTPSubscribe(data, password=password) + elif info['messagetype'] == '-OK': + return GNTPOK(data) + elif info['messagetype'] == '-ERROR': + return GNTPError(data) + raise errors.ParseError('INVALID_GNTP_MESSAGE') diff --git a/libs/gntp/errors.py b/libs/gntp/errors.py new file mode 100644 index 00000000..c006fd68 --- /dev/null +++ b/libs/gntp/errors.py @@ -0,0 +1,25 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +class BaseError(Exception): + pass + + +class ParseError(BaseError): + errorcode = 500 + errordesc = 'Error parsing the message' + + +class AuthError(BaseError): + errorcode = 400 + errordesc = 'Error with authorization' + + +class UnsupportedError(BaseError): + errorcode = 500 + errordesc = 'Currently unsupported by gntp.py' + + +class NetworkError(BaseError): + errorcode = 500 + errordesc = "Error connecting to growl server" diff --git a/libs/gntp/notifier.py b/libs/gntp/notifier.py index 539dae2a..1719ecdf 100755 --- a/libs/gntp/notifier.py +++ b/libs/gntp/notifier.py @@ -1,3 +1,6 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + """ The gntp.notifier module is provided as a simple way to send notifications using GNTP @@ -9,10 +12,15 @@ using GNTP `Original Python bindings `_ """ -import gntp -import socket import logging import platform +import socket +import sys + +from gntp.version import __version__ +import gntp.core +import gntp.errors as errors +import gntp.shim __all__ = [ 'mini', @@ -37,9 +45,9 @@ class GrowlNotifier(object): passwordHash = 'MD5' socketTimeout = 3 - def __init__(self, applicationName = 'Python GNTP', notifications = [], - defaultNotifications = None, applicationIcon = None, hostname = 'localhost', - password = None, port = 23053): + def __init__(self, applicationName='Python GNTP', notifications=[], + defaultNotifications=None, applicationIcon=None, hostname='localhost', + password=None, port=23053): self.applicationName = applicationName self.notifications = list(notifications) @@ -61,7 +69,7 @@ class GrowlNotifier(object): then we return False ''' logger.info('Checking icon') - return data.startswith('http') + return gntp.shim.u(data).startswith('http') def register(self): """Send GNTP Registration @@ -71,7 +79,7 @@ class GrowlNotifier(object): sent a registration message at least once """ logger.info('Sending registration to %s:%s', self.hostname, self.port) - register = gntp.GNTPRegister() + register = gntp.core.GNTPRegister() register.add_header('Application-Name', self.applicationName) for notification in self.notifications: enabled = notification in self.defaultNotifications @@ -80,16 +88,16 @@ class GrowlNotifier(object): if self._checkIcon(self.applicationIcon): register.add_header('Application-Icon', self.applicationIcon) else: - id = register.add_resource(self.applicationIcon) - register.add_header('Application-Icon', id) + resource = register.add_resource(self.applicationIcon) + register.add_header('Application-Icon', resource) if self.password: register.set_password(self.password, self.passwordHash) self.add_origin_info(register) self.register_hook(register) return self._send('register', register) - def notify(self, noteType, title, description, icon = None, sticky = False, - priority = None, callback = None, identifier = None): + def notify(self, noteType, title, description, icon=None, sticky=False, + priority=None, callback=None, identifier=None, custom={}): """Send a GNTP notifications .. warning:: @@ -102,6 +110,8 @@ class GrowlNotifier(object): :param boolean sticky: Sticky notification :param integer priority: Message priority level from -2 to 2 :param string callback: URL callback + :param dict custom: Custom attributes. Key names should be prefixed with X- + according to the spec but this is not enforced by this class .. warning:: For now, only URL callbacks are supported. In the future, the @@ -109,7 +119,7 @@ class GrowlNotifier(object): """ logger.info('Sending notification [%s] to %s:%s', noteType, self.hostname, self.port) assert noteType in self.notifications - notice = gntp.GNTPNotice() + notice = gntp.core.GNTPNotice() notice.add_header('Application-Name', self.applicationName) notice.add_header('Notification-Name', noteType) notice.add_header('Notification-Title', title) @@ -123,8 +133,8 @@ class GrowlNotifier(object): if self._checkIcon(icon): notice.add_header('Notification-Icon', icon) else: - id = notice.add_resource(icon) - notice.add_header('Notification-Icon', id) + resource = notice.add_resource(icon) + notice.add_header('Notification-Icon', resource) if description: notice.add_header('Notification-Text', description) @@ -133,6 +143,9 @@ class GrowlNotifier(object): if identifier: notice.add_header('Notification-Coalescing-ID', identifier) + for key in custom: + notice.add_header(key, custom[key]) + self.add_origin_info(notice) self.notify_hook(notice) @@ -140,7 +153,7 @@ class GrowlNotifier(object): def subscribe(self, id, name, port): """Send a Subscribe request to a remote machine""" - sub = gntp.GNTPSubscribe() + sub = gntp.core.GNTPSubscribe() sub.add_header('Subscriber-ID', id) sub.add_header('Subscriber-Name', name) sub.add_header('Subscriber-Port', port) @@ -156,7 +169,7 @@ class GrowlNotifier(object): """Add optional Origin headers to message""" packet.add_header('Origin-Machine-Name', platform.node()) packet.add_header('Origin-Software-Name', 'gntp.py') - packet.add_header('Origin-Software-Version', gntp.__version__) + packet.add_header('Origin-Software-Version', __version__) packet.add_header('Origin-Platform-Name', platform.system()) packet.add_header('Origin-Platform-Version', platform.platform()) @@ -179,27 +192,33 @@ class GrowlNotifier(object): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self.socketTimeout) - s.connect((self.hostname, self.port)) - s.send(data) - recv_data = s.recv(1024) - while not recv_data.endswith("\r\n\r\n"): - recv_data += s.recv(1024) - response = gntp.parse_gntp(recv_data) + try: + s.connect((self.hostname, self.port)) + s.send(data) + recv_data = s.recv(1024) + while not recv_data.endswith(gntp.shim.b("\r\n\r\n")): + recv_data += s.recv(1024) + except socket.error: + # Python2.5 and Python3 compatibile exception + exc = sys.exc_info()[1] + raise errors.NetworkError(exc) + + response = gntp.core.parse_gntp(recv_data) s.close() logger.debug('From : %s:%s <%s>\n%s', self.hostname, self.port, response.__class__, response) - if type(response) == gntp.GNTPOK: + if type(response) == gntp.core.GNTPOK: return True logger.error('Invalid response: %s', response.error()) return response.error() -def mini(description, applicationName = 'PythonMini', noteType = "Message", - title = "Mini Message", applicationIcon = None, hostname = 'localhost', - password = None, port = 23053, sticky = False, priority = None, - callback = None, notificationIcon = None, identifier = None, - notifierFactory = GrowlNotifier): +def mini(description, applicationName='PythonMini', noteType="Message", + title="Mini Message", applicationIcon=None, hostname='localhost', + password=None, port=23053, sticky=False, priority=None, + callback=None, notificationIcon=None, identifier=None, + notifierFactory=GrowlNotifier): """Single notification function Simple notification function in one line. Has only one required parameter @@ -210,32 +229,37 @@ def mini(description, applicationName = 'PythonMini', noteType = "Message", For now, only URL callbacks are supported. In the future, the callback argument will also support a function """ - growl = notifierFactory( - applicationName = applicationName, - notifications = [noteType], - defaultNotifications = [noteType], - applicationIcon = applicationIcon, - hostname = hostname, - password = password, - port = port, - ) - result = growl.register() - if result is not True: - return result + try: + growl = notifierFactory( + applicationName=applicationName, + notifications=[noteType], + defaultNotifications=[noteType], + applicationIcon=applicationIcon, + hostname=hostname, + password=password, + port=port, + ) + result = growl.register() + if result is not True: + return result - return growl.notify( - noteType = noteType, - title = title, - description = description, - icon = notificationIcon, - sticky = sticky, - priority = priority, - callback = callback, - identifier = identifier, - ) + return growl.notify( + noteType=noteType, + title=title, + description=description, + icon=notificationIcon, + sticky=sticky, + priority=priority, + callback=callback, + identifier=identifier, + ) + except Exception: + # We want the "mini" function to be simple and swallow Exceptions + # in order to be less invasive + logger.exception("Growl error") if __name__ == '__main__': # If we're running this module directly we're likely running it as a test # so extra debugging is useful - logging.basicConfig(level = logging.INFO) + logging.basicConfig(level=logging.INFO) mini('Testing mini notification') diff --git a/libs/gntp/shim.py b/libs/gntp/shim.py new file mode 100644 index 00000000..3a387828 --- /dev/null +++ b/libs/gntp/shim.py @@ -0,0 +1,45 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +""" +Python2.5 and Python3.3 compatibility shim + +Heavily inspirted by the "six" library. +https://pypi.python.org/pypi/six +""" + +import sys + +PY3 = sys.version_info[0] == 3 + +if PY3: + def b(s): + if isinstance(s, bytes): + return s + return s.encode('utf8', 'replace') + + def u(s): + if isinstance(s, bytes): + return s.decode('utf8', 'replace') + return s + + from io import BytesIO as StringIO + from configparser import RawConfigParser +else: + def b(s): + if isinstance(s, unicode): + return s.encode('utf8', 'replace') + return s + + def u(s): + if isinstance(s, unicode): + return s + if isinstance(s, int): + s = str(s) + return unicode(s, "utf8", "replace") + + from StringIO import StringIO + from ConfigParser import RawConfigParser + +b.__doc__ = "Ensure we have a byte string" +u.__doc__ = "Ensure we have a unicode string" diff --git a/libs/gntp/version.py b/libs/gntp/version.py new file mode 100644 index 00000000..2166aaca --- /dev/null +++ b/libs/gntp/version.py @@ -0,0 +1,4 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +__version__ = '1.0.2' diff --git a/libs/guessit/__init__.py b/libs/guessit/__init__.py index ce140248..e6cfa276 100755 --- a/libs/guessit/__init__.py +++ b/libs/guessit/__init__.py @@ -20,7 +20,7 @@ from __future__ import unicode_literals -__version__ = '0.7-dev' +__version__ = '0.6.2' __all__ = ['Guess', 'Language', 'guess_file_info', 'guess_video_info', 'guess_movie_info', 'guess_episode_info'] @@ -76,6 +76,7 @@ from guessit.language import Language from guessit.matcher import IterativeMatcher from guessit.textutils import clean_string import logging +import json log = logging.getLogger(__name__) @@ -105,17 +106,74 @@ def _guess_filename(filename, filetype): mtree = IterativeMatcher(filename, filetype=filetype) + m = mtree.matched() + + second_pass_opts = [] + second_pass_transfo_opts = {} + # if there are multiple possible years found, we assume the first one is # part of the title, reparse the tree taking this into account years = set(n.value for n in find_nodes(mtree.match_tree, 'year')) if len(years) >= 2: - mtree = IterativeMatcher(filename, filetype=filetype, - opts=['skip_first_year']) + second_pass_opts.append('skip_first_year') + to_skip_language_nodes = [] + + title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series'])) + title_spans = {} + for title_node in title_nodes: + title_spans[title_node.span[0]] = title_node + title_spans[title_node.span[1]] = title_node + + for lang_key in ('language', 'subtitleLanguage'): + langs = {} + lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key)) + + for lang_node in lang_nodes: + lang = lang_node.guess.get(lang_key, None) + if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()): + # Language is next or before title, and is not a language code. Add to skip for 2nd pass. + + # if filetype is subtitle and the language appears last, just before + # the extension, then it is likely a subtitle language + parts = clean_string(lang_node.root.value).split() + if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2): + continue + + to_skip_language_nodes.append(lang_node) + elif not lang in langs: + langs[lang] = lang_node + else: + # The same language was found. Keep the more confident one, and add others to skip for 2nd pass. + existing_lang_node = langs[lang] + to_skip = None + if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'): + # lang_node is to remove + to_skip = lang_node + else: + # existing_lang_node is to remove + langs[lang] = lang_node + to_skip = existing_lang_node + to_skip_language_nodes.append(to_skip) + + + if to_skip_language_nodes: + second_pass_transfo_opts['guess_language'] = ( + ((), { 'skip': [ { 'node_idx': node.parent.node_idx, + 'span': node.span } + for node in to_skip_language_nodes ] })) + + if second_pass_opts or second_pass_transfo_opts: + # 2nd pass is needed + log.info("Running 2nd pass with options: %s" % second_pass_opts) + log.info("Transfo options: %s" % second_pass_transfo_opts) + mtree = IterativeMatcher(filename, filetype=filetype, + opts=second_pass_opts, + transfo_opts=second_pass_transfo_opts) m = mtree.matched() - if 'language' not in m and 'subtitleLanguage' not in m: + if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m: return m # if we found some language, make sure we didn't cut a title or sth... @@ -123,51 +181,10 @@ def _guess_filename(filename, filetype): opts=['nolanguage', 'nocountry']) m2 = mtree2.matched() - - if m.get('title') is None: - return m - if m.get('title') != m2.get('title'): title = next(find_nodes(mtree.match_tree, 'title')) title2 = next(find_nodes(mtree2.match_tree, 'title')) - langs = list(find_nodes(mtree.match_tree, ['language', 'subtitleLanguage'])) - if not langs: - return warning('A weird error happened with language detection') - - # find the language that is likely more relevant - for lng in langs: - if lng.value in title2.value: - # if the language was detected as part of a potential title, - # look at this one in particular - lang = lng - break - else: - # pick the first one if we don't have a better choice - lang = langs[0] - - - # language code are rarely part of a title, and those - # should be handled by the Language exceptions anyway - if len(lang.value) <= 3: - return m - - - # if filetype is subtitle and the language appears last, just before - # the extension, then it is likely a subtitle language - parts = clean_string(title.root.value).split() - if (m['type'] in ['moviesubtitle', 'episodesubtitle'] and - parts.index(lang.value) == len(parts) - 2): - return m - - # if the language was in the middle of the other potential title, - # keep the other title (eg: The Italian Job), except if it is at the - # very beginning, in which case we consider it an error - if m2['title'].startswith(lang.value): - return m - elif lang.value in title2.value: - return m2 - # if a node is in an explicit group, then the correct title is probably # the other one if title.root.node_at(title.node_idx[:2]).is_explicit(): @@ -175,9 +192,6 @@ def _guess_filename(filename, filetype): elif title2.root.node_at(title2.node_idx[:2]).is_explicit(): return m - return warning('Not sure of the title because of the language position') - - return m diff --git a/libs/guessit/__main__.py b/libs/guessit/__main__.py index 957ec9da..ccfa3af6 100755 --- a/libs/guessit/__main__.py +++ b/libs/guessit/__main__.py @@ -24,16 +24,19 @@ from guessit import u from guessit import slogging, guess_file_info from optparse import OptionParser import logging +import sys +import os +import locale -def detect_filename(filename, filetype, info=['filename']): +def detect_filename(filename, filetype, info=['filename'], advanced = False): filename = u(filename) print('For:', filename) - print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string()) + print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced)) -def run_demo(episodes=True, movies=True): +def run_demo(episodes=True, movies=True, advanced=False): # NOTE: tests should not be added here but rather in the tests/ folder # this is just intended as a quick example if episodes: @@ -50,7 +53,7 @@ def run_demo(episodes=True, movies=True): for f in testeps: print('-'*80) - detect_filename(f, filetype='episode') + detect_filename(f, filetype='episode', advanced=advanced) if movies: @@ -77,12 +80,17 @@ def run_demo(episodes=True, movies=True): for f in testmovies: print('-'*80) - detect_filename(f, filetype = 'movie') + detect_filename(f, filetype = 'movie', advanced = advanced) def main(): slogging.setupLogging() + # see http://bugs.python.org/issue2128 + if sys.version_info.major < 3 and os.name == 'nt': + for i, a in enumerate(sys.argv): + sys.argv[i] = a.decode(locale.getpreferredencoding()) + parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]') parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help = 'display debug output') @@ -92,6 +100,8 @@ def main(): 'them, comma-separated') parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect', help = 'the suggested file type: movie, episode or autodetect') + parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False, + help = 'display advanced information for filename guesses, as json output') parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False, help = 'run a few builtin tests instead of analyzing a file') @@ -100,13 +110,14 @@ def main(): logging.getLogger('guessit').setLevel(logging.DEBUG) if options.demo: - run_demo(episodes=True, movies=True) + run_demo(episodes=True, movies=True, advanced=options.advanced) else: if args: for filename in args: detect_filename(filename, filetype = options.filetype, - info = options.info.split(',')) + info = options.info.split(','), + advanced = options.advanced) else: parser.print_help() diff --git a/libs/guessit/fileutils.py b/libs/guessit/fileutils.py index dc077e64..9531f82a 100755 --- a/libs/guessit/fileutils.py +++ b/libs/guessit/fileutils.py @@ -44,13 +44,14 @@ def split_path(path): result = [] while True: head, tail = os.path.split(path) + headlen = len(head) # on Unix systems, the root folder is '/' - if head == '/' and tail == '': + if head and head == '/'*headlen and tail == '': return ['/'] + result # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\ - if ((len(head) == 3 and head[1:] == ':\\') or (len(head) == 2 and head == '\\\\')) and tail == '': + if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '': return [head] + result if head == '' and tail == '': @@ -61,6 +62,7 @@ def split_path(path): path = head continue + # otherwise, add the last path fragment and keep splitting result = [tail] + result path = head diff --git a/libs/guessit/guess.py b/libs/guessit/guess.py index 33d36517..73babceb 100755 --- a/libs/guessit/guess.py +++ b/libs/guessit/guess.py @@ -41,15 +41,21 @@ class Guess(UnicodeMixin, dict): confidence = kwargs.pop('confidence') except KeyError: confidence = 0 + + try: + raw = kwargs.pop('raw') + except KeyError: + raw = None dict.__init__(self, *args, **kwargs) self._confidence = {} + self._raw = {} for prop in self: self._confidence[prop] = confidence - - - def to_dict(self): + self._raw[prop] = raw + + def to_dict(self, advanced=False): data = dict(self) for prop, value in data.items(): if isinstance(value, datetime.date): @@ -58,46 +64,65 @@ class Guess(UnicodeMixin, dict): data[prop] = u(value) elif isinstance(value, list): data[prop] = [u(x) for x in value] + if advanced: + data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)} return data - def nice_string(self): - data = self.to_dict() - - parts = json.dumps(data, indent=4).split('\n') - for i, p in enumerate(parts): - if p[:5] != ' "': - continue - - prop = p.split('"')[1] - parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:] - - return '\n'.join(parts) + def nice_string(self, advanced=False): + if advanced: + data = self.to_dict(advanced) + return json.dumps(data, indent=4) + else: + data = self.to_dict() + + parts = json.dumps(data, indent=4).split('\n') + for i, p in enumerate(parts): + if p[:5] != ' "': + continue + + prop = p.split('"')[1] + parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:] + + return '\n'.join(parts) def __unicode__(self): return u(self.to_dict()) def confidence(self, prop): return self._confidence.get(prop, -1) + + def raw(self, prop): + return self._raw.get(prop, None) - def set(self, prop, value, confidence=None): + def set(self, prop, value, confidence=None, raw=None): self[prop] = value if confidence is not None: self._confidence[prop] = confidence + if raw is not None: + self._raw[prop] = raw def set_confidence(self, prop, value): self._confidence[prop] = value + + def set_raw(self, prop, value): + self._raw[prop] = value - def update(self, other, confidence=None): + def update(self, other, confidence=None, raw=None): dict.update(self, other) if isinstance(other, Guess): for prop in other: self._confidence[prop] = other.confidence(prop) + self._raw[prop] = other.raw(prop) if confidence is not None: for prop in other: self._confidence[prop] = confidence + if raw is not None: + for prop in other: + self._raw[prop] = raw + def update_highest_confidence(self, other): """Update this guess with the values from the given one. In case there is property present in both, only the one with the highest one @@ -110,6 +135,7 @@ class Guess(UnicodeMixin, dict): continue self[prop] = other[prop] self._confidence[prop] = other.confidence(prop) + self._raw[prop] = other.raw(prop) def choose_int(g1, g2): @@ -181,7 +207,7 @@ def choose_string(g1, g2): elif v1l in v2l: return (v1, combined_prob) - # in case of conflict, return the one with highest priority + # in case of conflict, return the one with highest confidence else: if c1 > c2: return (v1, c1 - c2) @@ -288,7 +314,8 @@ def merge_all(guesses, append=None): result.set(prop, result.get(prop, []) + [g[prop]], # TODO: what to do with confidence here? maybe an # arithmetic mean... - confidence=g.confidence(prop)) + confidence=g.confidence(prop), + raw=g.raw(prop)) del g[prop] diff --git a/libs/guessit/language.py b/libs/guessit/language.py index 2714c6e0..4d22cf05 100755 --- a/libs/guessit/language.py +++ b/libs/guessit/language.py @@ -296,7 +296,7 @@ UNDETERMINED = Language('und') ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED]) ALL_LANGUAGES_NAMES = lng_all_names -def search_language(string, lang_filter=None): +def search_language(string, lang_filter=None, skip=None): """Looks for language patterns, and if found return the language object, its group span and an associated confidence. @@ -345,6 +345,16 @@ def search_language(string, lang_filter=None): if pos != -1: end = pos + len(lang) + + # skip if span in in skip list + while skip and (pos - 1, end - 1) in skip: + pos = slow.find(lang, end) + if pos == -1: + continue + end = pos + len(lang) + if pos == -1: + continue + # make sure our word is always surrounded by separators if slow[pos - 1] not in sep or slow[end] not in sep: continue diff --git a/libs/guessit/matcher.py b/libs/guessit/matcher.py index 43378192..1984c01c 100755 --- a/libs/guessit/matcher.py +++ b/libs/guessit/matcher.py @@ -21,14 +21,14 @@ from __future__ import unicode_literals from guessit import PY3, u, base_text_type from guessit.matchtree import MatchTree -from guessit.textutils import normalize_unicode +from guessit.textutils import normalize_unicode, clean_string import logging log = logging.getLogger(__name__) class IterativeMatcher(object): - def __init__(self, filename, filetype='autodetect', opts=None): + def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None): """An iterative matcher tries to match different patterns that appear in the filename. @@ -38,7 +38,8 @@ class IterativeMatcher(object): a movie. The recognized 'filetype' values are: - [ autodetect, subtitle, movie, moviesubtitle, episode, episodesubtitle ] + [ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode, + episodesubtitle, episodeinfo ] The IterativeMatcher works mainly in 2 steps: @@ -61,15 +62,20 @@ class IterativeMatcher(object): it corresponds to a video codec, denoted by the letter'v' in the 4th line. (for more info, see guess.matchtree.to_string) + Second, it tries to merge all this information into a single object + containing all the found properties, and does some (basic) conflict + resolution when they arise. - Second, it tries to merge all this information into a single object - containing all the found properties, and does some (basic) conflict - resolution when they arise. + + When you create the Matcher, you can pass it: + - a list 'opts' of option names, that act as global flags + - a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) } + with which to call the transfo.process() function. """ - valid_filetypes = ('autodetect', 'subtitle', 'video', - 'movie', 'moviesubtitle', - 'episode', 'episodesubtitle') + valid_filetypes = ('autodetect', 'subtitle', 'info', 'video', + 'movie', 'moviesubtitle', 'movieinfo', + 'episode', 'episodesubtitle', 'episodeinfo') if filetype not in valid_filetypes: raise ValueError("filetype needs to be one of %s" % valid_filetypes) if not PY3 and not isinstance(filename, unicode): @@ -80,10 +86,22 @@ class IterativeMatcher(object): if opts is None: opts = [] - elif isinstance(opts, base_text_type): - opts = opts.split() + if not isinstance(opts, list): + raise ValueError('opts must be a list of option names! Received: type=%s val=%s', + type(opts), opts) + + if transfo_opts is None: + transfo_opts = {} + if not isinstance(transfo_opts, dict): + raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+ + 'Received: type=%s val=%s', type(transfo_opts), transfo_opts) self.match_tree = MatchTree(filename) + + # sanity check: make sure we don't process a (mostly) empty string + if clean_string(filename) == '': + return + mtree = self.match_tree mtree.guess.set('type', filetype, confidence=1.0) @@ -91,7 +109,11 @@ class IterativeMatcher(object): transfo = __import__('guessit.transfo.' + transfo_name, globals=globals(), locals=locals(), fromlist=['process'], level=0) - transfo.process(mtree, *args, **kwargs) + default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {})) + all_args = args or default_args + all_kwargs = dict(default_kwargs) + all_kwargs.update(kwargs) # keep all kwargs merged together + transfo.process(mtree, *all_args, **all_kwargs) # 1- first split our path into dirs + basename + ext apply_transfo('split_path_components') @@ -111,7 +133,7 @@ class IterativeMatcher(object): # - language before episodes_rexps # - properties before language (eg: he-aac vs hebrew) # - release_group before properties (eg: XviD-?? vs xvid) - if mtree.guess['type'] in ('episode', 'episodesubtitle'): + if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'): strategy = [ 'guess_date', 'guess_website', 'guess_release_group', 'guess_properties', 'guess_language', 'guess_video_rexps', @@ -124,6 +146,7 @@ class IterativeMatcher(object): if 'nolanguage' in opts: strategy.remove('guess_language') + for name in strategy: apply_transfo(name) @@ -143,7 +166,7 @@ class IterativeMatcher(object): # 5- try to identify the remaining unknown groups by looking at their # position relative to other known elements - if mtree.guess['type'] in ('episode', 'episodesubtitle'): + if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'): apply_transfo('guess_episode_info_from_position') else: apply_transfo('guess_movie_title_from_position') diff --git a/libs/guessit/patterns.py b/libs/guessit/patterns.py index ed3982b9..f803a11c 100755 --- a/libs/guessit/patterns.py +++ b/libs/guessit/patterns.py @@ -25,6 +25,8 @@ import re subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ] +info_exts = [ 'nfo' ] + video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv'] @@ -32,7 +34,7 @@ video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', group_delimiters = [ '()', '[]', '{}' ] # separator character regexp -sep = r'[][)(}{+ /\._-]' # regexp art, hehe :D +sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D # character used to represent a deleted char (when matching groups) deleted = '_' @@ -49,7 +51,7 @@ episode_rexps = [ # ... Season 2 ... #(r'[Ss](?P[0-9]{1,3})[^0-9]?(?P(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)), # ... 2x13 ... - (r'[^0-9](?P[0-9]{1,2})[^0-9]?(?P(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)), + (r'[^0-9](?P[0-9]{1,2})[^0-9 .-]?(?P(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)), # ... s02 ... #(sep + r's(?P[0-9]{1,2})' + sep, 0.6, (1, -1)), @@ -122,9 +124,12 @@ prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ], 'VHS': [ 'VHS' ], 'WEB-DL': [ 'WEB-DL' ] }, + 'is3D': { True: [ '3D' ] }, + 'screenSize': { '480p': [ '480[pi]?' ], '720p': [ '720[pi]?' ], - '1080p': [ '1080[pi]?' ] }, + '1080i': [ '1080i' ], + '1080p': [ '1080p', '1080[^i]' ] }, 'videoCodec': { 'XviD': [ 'Xvid' ], 'DivX': [ 'DVDivX', 'DivX' ], @@ -140,7 +145,7 @@ prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ], 'DTS': [ 'DTS' ], 'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] }, - 'audioChannels': { '5.1': [ r'5\.1', 'DD5[\._ ]1', '5ch' ] }, + 'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] }, 'episodeFormat': { 'Minisode': [ 'Minisodes?' ] } @@ -170,7 +175,7 @@ prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'P } _dash = '-' -_psep = '[-\. _]?' +_psep = '[-. _]?' def _to_rexp(prop): return re.compile(prop.replace(_dash, _psep), re.IGNORECASE) @@ -237,8 +242,9 @@ def canonical_form(string): def compute_canonical_form(property_name, value): """Return the canonical form of a property given its type if it is a valid one, None otherwise.""" - for canonical_form, rexps in properties_rexps[property_name].items(): - for rexp in rexps: - if rexp.match(value): - return canonical_form + if isinstance(value, basestring): + for canonical_form, rexps in properties_rexps[property_name].items(): + for rexp in rexps: + if rexp.match(value): + return canonical_form return None diff --git a/libs/guessit/slogging.py b/libs/guessit/slogging.py index 75e261cf..39591a20 100755 --- a/libs/guessit/slogging.py +++ b/libs/guessit/slogging.py @@ -31,14 +31,15 @@ RED_FONT = "\x1B[0;31m" RESET_FONT = "\x1B[0m" -def setupLogging(colored=True, with_time=False, with_thread=False, filename=None): +def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): """Set up a nice colored logger as the main application logger.""" class SimpleFormatter(logging.Formatter): def __init__(self, with_time, with_thread): self.fmt = (('%(asctime)s ' if with_time else '') + '%(levelname)-8s ' + - '[%(name)s:%(funcName)s]' + + '[%(name)s:%(funcName)s' + + (':%(lineno)s' if with_lineno else '') + ']' + ('[%(threadName)s]' if with_thread else '') + ' -- %(message)s') logging.Formatter.__init__(self, self.fmt) @@ -47,7 +48,8 @@ def setupLogging(colored=True, with_time=False, with_thread=False, filename=None def __init__(self, with_time, with_thread): self.fmt = (('%(asctime)s ' if with_time else '') + '-CC-%(levelname)-8s ' + - BLUE_FONT + '[%(name)s:%(funcName)s]' + + BLUE_FONT + '[%(name)s:%(funcName)s' + + (':%(lineno)s' if with_lineno else '') + ']' + RESET_FONT + ('[%(threadName)s]' if with_thread else '') + ' -- %(message)s') diff --git a/libs/guessit/textutils.py b/libs/guessit/textutils.py index f195e2b7..ae9d28c3 100755 --- a/libs/guessit/textutils.py +++ b/libs/guessit/textutils.py @@ -43,10 +43,13 @@ def strip_brackets(s): return s -def clean_string(s): - for c in sep[:-2]: # do not remove dashes ('-') - s = s.replace(c, ' ') - parts = s.split() +def clean_string(st): + for c in sep: + # do not remove certain chars + if c in ['-', ',']: + continue + st = st.replace(c, ' ') + parts = st.split() result = ' '.join(p for p in parts if p != '') # now also remove dashes on the outer part of the string diff --git a/libs/guessit/transfo/__init__.py b/libs/guessit/transfo/__init__.py index 820690a7..a28aa988 100755 --- a/libs/guessit/transfo/__init__.py +++ b/libs/guessit/transfo/__init__.py @@ -28,7 +28,7 @@ log = logging.getLogger(__name__) def found_property(node, name, confidence): - node.guess = Guess({name: node.clean_value}, confidence=confidence) + node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value) log.debug('Found with confidence %.2f: %s' % (confidence, node.guess)) @@ -52,11 +52,17 @@ def format_guess(guess): def find_and_split_node(node, strategy, logger): string = ' %s ' % node.value # add sentinels - for matcher, confidence in strategy: + for matcher, confidence, args, kwargs in strategy: + all_args = [string] if getattr(matcher, 'use_node', False): - result, span = matcher(string, node) + all_args.append(node) + if args: + all_args.append(args) + + if kwargs: + result, span = matcher(*all_args, **kwargs) else: - result, span = matcher(string) + result, span = matcher(*all_args) if result: # readjust span to compensate for sentinels @@ -69,7 +75,7 @@ def find_and_split_node(node, strategy, logger): if confidence is None: confidence = 1.0 - guess = format_guess(Guess(result, confidence=confidence)) + guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1])) msg = 'Found with confidence %.2f: %s' % (confidence, guess) (logger or log).debug(msg) @@ -84,10 +90,12 @@ def find_and_split_node(node, strategy, logger): class SingleNodeGuesser(object): - def __init__(self, guess_func, confidence, logger=None): + def __init__(self, guess_func, confidence, logger, *args, **kwargs): self.guess_func = guess_func self.confidence = confidence self.logger = logger + self.args = args + self.kwargs = kwargs def process(self, mtree): # strategy is a list of pairs (guesser, confidence) @@ -95,7 +103,7 @@ class SingleNodeGuesser(object): # it will override it, otherwise it will leave the guess confidence # - if the guesser returns a simple dict as a guess and confidence is # specified, it will use it, or 1.0 otherwise - strategy = [ (self.guess_func, self.confidence) ] + strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ] for node in mtree.unidentified_leaves(): find_and_split_node(node, strategy, self.logger) diff --git a/libs/guessit/transfo/guess_country.py b/libs/guessit/transfo/guess_country.py index 1d690698..aadb84f7 100755 --- a/libs/guessit/transfo/guess_country.py +++ b/libs/guessit/transfo/guess_country.py @@ -45,4 +45,4 @@ def process(mtree): except ValueError: continue - node.guess = Guess(country=country, confidence=1.0) + node.guess = Guess(country=country, confidence=1.0, raw=c) diff --git a/libs/guessit/transfo/guess_episodes_rexps.py b/libs/guessit/transfo/guess_episodes_rexps.py index 29562be2..30c2ca2f 100755 --- a/libs/guessit/transfo/guess_episodes_rexps.py +++ b/libs/guessit/transfo/guess_episodes_rexps.py @@ -40,27 +40,22 @@ def guess_episodes_rexps(string): for rexp, confidence, span_adjust in episode_rexps: match = re.search(rexp, string, re.IGNORECASE) if match: - guess = Guess(match.groupdict(), confidence=confidence) - span = (match.start() + span_adjust[0], + span = (match.start() + span_adjust[0], match.end() + span_adjust[1]) - - # episodes which have a season > 30 are most likely errors - # (Simpsons is at 24!) - if int(guess.get('season', 0)) > 30: - continue + guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]]) # decide whether we have only a single episode number or an # episode list if guess.get('episodeNumber'): eplist = number_list(guess['episodeNumber']) - guess.set('episodeNumber', eplist[0], confidence=confidence) + guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]]) if len(eplist) > 1: - guess.set('episodeList', eplist, confidence=confidence) + guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]]) if guess.get('bonusNumber'): eplist = number_list(guess['bonusNumber']) - guess.set('bonusNumber', eplist[0], confidence=confidence) + guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]]) return guess, span diff --git a/libs/guessit/transfo/guess_filetype.py b/libs/guessit/transfo/guess_filetype.py index 4d98d016..4279c0b0 100755 --- a/libs/guessit/transfo/guess_filetype.py +++ b/libs/guessit/transfo/guess_filetype.py @@ -20,7 +20,7 @@ from __future__ import unicode_literals from guessit import Guess -from guessit.patterns import (subtitle_exts, video_exts, episode_rexps, +from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps, find_properties, compute_canonical_form) from guessit.date import valid_year from guessit.textutils import clean_string @@ -53,12 +53,16 @@ def guess_filetype(mtree, filetype): filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' + elif filetype_container[0] == 'info': + filetype_container[0] = 'episodeinfo' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' + elif filetype_container[0] == 'info': + filetype_container[0] = 'movieinfo' def upgrade_subtitle(): if 'movie' in filetype_container[0]: @@ -68,6 +72,14 @@ def guess_filetype(mtree, filetype): else: filetype_container[0] = 'subtitle' + def upgrade_info(): + if 'movie' in filetype_container[0]: + filetype_container[0] = 'movieinfo' + elif 'episode' in filetype_container[0]: + filetype_container[0] = 'episodeinfo' + else: + filetype_container[0] = 'info' + def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type @@ -78,6 +90,9 @@ def guess_filetype(mtree, filetype): if fileext in subtitle_exts: upgrade_subtitle() other = { 'container': fileext } + elif fileext in info_exts: + upgrade_info() + other = { 'container': fileext } elif fileext in video_exts: upgrade(type='video') other = { 'container': fileext } @@ -104,17 +119,20 @@ def guess_filetype(mtree, filetype): fname = clean_string(filename).lower() for m in MOVIES: if m in fname: + log.debug('Found in exception list of movies -> type = movie') upgrade_movie() for s in SERIES: if s in fname: + log.debug('Found in exception list of series -> type = episode') upgrade_episode() # now look whether there are some specific hints for episode vs movie - if filetype_container[0] in ('video', 'subtitle'): + if filetype_container[0] in ('video', 'subtitle', 'info'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: + log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group()) upgrade_episode() break @@ -133,24 +151,29 @@ def guess_filetype(mtree, filetype): possible = False if possible: + log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group()) upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': + log.debug('Found characteristic property of episodes: %s = "%s"', prop, value) upgrade_episode() break elif compute_canonical_form('format', value) == 'DVB': + log.debug('Found characteristic property of episodes: %s = "%s"', prop, value) upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: + log.debug('Found characteristic property of episodes: %s = "%s"', prop, value) upgrade_episode() # if no episode info found, assume it's a movie + log.debug('Nothing characteristic found, assuming type = movie') upgrade_movie() filetype = filetype_container[0] diff --git a/libs/guessit/transfo/guess_language.py b/libs/guessit/transfo/guess_language.py index 86c1cf55..648a06b1 100755 --- a/libs/guessit/transfo/guess_language.py +++ b/libs/guessit/transfo/guess_language.py @@ -22,22 +22,34 @@ from __future__ import unicode_literals from guessit import Guess from guessit.transfo import SingleNodeGuesser from guessit.language import search_language -from guessit.textutils import clean_string, find_words import logging log = logging.getLogger(__name__) -def guess_language(string): - language, span, confidence = search_language(string) +def guess_language(string, node, skip=None): + if skip: + relative_skip = [] + for entry in skip: + node_idx = entry['node_idx'] + span = entry['span'] + if node_idx == node.node_idx[:len(node_idx)]: + relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1) + relative_skip.append(relative_span) + skip = relative_skip + + language, span, confidence = search_language(string, skip=skip) if language: return (Guess({'language': language}, - confidence=confidence), + confidence=confidence, + raw= string[span[0]:span[1]]), span) return None, None +guess_language.use_node = True -def process(mtree): - SingleNodeGuesser(guess_language, None, log).process(mtree) + +def process(mtree, *args, **kwargs): + SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree) # Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo diff --git a/libs/guessit/transfo/guess_movie_title_from_position.py b/libs/guessit/transfo/guess_movie_title_from_position.py index d2e2deb2..bcb42b45 100755 --- a/libs/guessit/transfo/guess_movie_title_from_position.py +++ b/libs/guessit/transfo/guess_movie_title_from_position.py @@ -29,7 +29,8 @@ log = logging.getLogger(__name__) def process(mtree): def found_property(node, name, value, confidence): node.guess = Guess({ name: value }, - confidence=confidence) + confidence=confidence, + raw=value) log.debug('Found with confidence %.2f: %s' % (confidence, node.guess)) def found_title(node, confidence): diff --git a/libs/guessit/transfo/guess_video_rexps.py b/libs/guessit/transfo/guess_video_rexps.py index 8ae9e6c6..1b511f15 100755 --- a/libs/guessit/transfo/guess_video_rexps.py +++ b/libs/guessit/transfo/guess_video_rexps.py @@ -38,9 +38,10 @@ def guess_video_rexps(string): # the soonest that we can catch it) if metadata.get('cdNumberTotal', -1) is None: del metadata['cdNumberTotal'] - return (Guess(metadata, confidence=confidence), - (match.start() + span_adjust[0], - match.end() + span_adjust[1] - 2)) + span = (match.start() + span_adjust[0], + match.end() + span_adjust[1] - 2) + return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]), + span) return None, None diff --git a/libs/guessit/transfo/guess_weak_episodes_rexps.py b/libs/guessit/transfo/guess_weak_episodes_rexps.py index 8436ade8..18306b43 100755 --- a/libs/guessit/transfo/guess_weak_episodes_rexps.py +++ b/libs/guessit/transfo/guess_weak_episodes_rexps.py @@ -48,9 +48,9 @@ def guess_weak_episodes_rexps(string, node): continue return Guess({ 'season': season, 'episodeNumber': epnum }, - confidence=0.6), span + confidence=0.6, raw=string[span[0]:span[1]]), span else: - return Guess(metadata, confidence=0.3), span + return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span return None, None diff --git a/libs/html5lib/__init__.py b/libs/html5lib/__init__.py index 16537aad..66c1a8eb 100644 --- a/libs/html5lib/__init__.py +++ b/libs/html5lib/__init__.py @@ -1,4 +1,4 @@ -""" +""" HTML parsing library based on the WHATWG "HTML5" specification. The parser is designed to be compatible with existing HTML found in the wild and implements well-defined error recovery that @@ -8,10 +8,16 @@ Example usage: import html5lib f = open("my_document.html") -tree = html5lib.parse(f) +tree = html5lib.parse(f) """ -__version__ = "0.95-dev" -from html5parser import HTMLParser, parse, parseFragment -from treebuilders import getTreeBuilder -from treewalkers import getTreeWalker -from serializer import serialize + +from __future__ import absolute_import, division, unicode_literals + +from .html5parser import HTMLParser, parse, parseFragment +from .treebuilders import getTreeBuilder +from .treewalkers import getTreeWalker +from .serializer import serialize + +__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", + "getTreeWalker", "serialize"] +__version__ = "0.99" diff --git a/libs/html5lib/constants.py b/libs/html5lib/constants.py index b533018e..e7089846 100644 --- a/libs/html5lib/constants.py +++ b/libs/html5lib/constants.py @@ -1,302 +1,301 @@ -import string, gettext -_ = gettext.gettext +from __future__ import absolute_import, division, unicode_literals -try: - frozenset -except NameError: - # Import from the sets module for python 2.3 - from sets import Set as set - from sets import ImmutableSet as frozenset +import string +import gettext +_ = gettext.gettext EOF = None E = { - "null-character": - _(u"Null character in input stream, replaced with U+FFFD."), - "invalid-codepoint": - _(u"Invalid codepoint in stream."), + "null-character": + _("Null character in input stream, replaced with U+FFFD."), + "invalid-codepoint": + _("Invalid codepoint in stream."), "incorrectly-placed-solidus": - _(u"Solidus (/) incorrectly placed in tag."), + _("Solidus (/) incorrectly placed in tag."), "incorrect-cr-newline-entity": - _(u"Incorrect CR newline entity, replaced with LF."), + _("Incorrect CR newline entity, replaced with LF."), "illegal-windows-1252-entity": - _(u"Entity used with illegal number (windows-1252 reference)."), + _("Entity used with illegal number (windows-1252 reference)."), "cant-convert-numeric-entity": - _(u"Numeric entity couldn't be converted to character " - u"(codepoint U+%(charAsInt)08x)."), + _("Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x)."), "illegal-codepoint-for-numeric-entity": - _(u"Numeric entity represents an illegal codepoint: " - u"U+%(charAsInt)08x."), + _("Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x."), "numeric-entity-without-semicolon": - _(u"Numeric entity didn't end with ';'."), + _("Numeric entity didn't end with ';'."), "expected-numeric-entity-but-got-eof": - _(u"Numeric entity expected. Got end of file instead."), + _("Numeric entity expected. Got end of file instead."), "expected-numeric-entity": - _(u"Numeric entity expected but none found."), + _("Numeric entity expected but none found."), "named-entity-without-semicolon": - _(u"Named entity didn't end with ';'."), + _("Named entity didn't end with ';'."), "expected-named-entity": - _(u"Named entity expected. Got none."), + _("Named entity expected. Got none."), "attributes-in-end-tag": - _(u"End tag contains unexpected attributes."), + _("End tag contains unexpected attributes."), 'self-closing-flag-on-end-tag': - _(u"End tag contains unexpected self-closing flag."), + _("End tag contains unexpected self-closing flag."), "expected-tag-name-but-got-right-bracket": - _(u"Expected tag name. Got '>' instead."), + _("Expected tag name. Got '>' instead."), "expected-tag-name-but-got-question-mark": - _(u"Expected tag name. Got '?' instead. (HTML doesn't " - u"support processing instructions.)"), + _("Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)"), "expected-tag-name": - _(u"Expected tag name. Got something else instead"), + _("Expected tag name. Got something else instead"), "expected-closing-tag-but-got-right-bracket": - _(u"Expected closing tag. Got '>' instead. Ignoring ''."), + _("Expected closing tag. Got '>' instead. Ignoring ''."), "expected-closing-tag-but-got-eof": - _(u"Expected closing tag. Unexpected end of file."), + _("Expected closing tag. Unexpected end of file."), "expected-closing-tag-but-got-char": - _(u"Expected closing tag. Unexpected character '%(data)s' found."), + _("Expected closing tag. Unexpected character '%(data)s' found."), "eof-in-tag-name": - _(u"Unexpected end of file in the tag name."), + _("Unexpected end of file in the tag name."), "expected-attribute-name-but-got-eof": - _(u"Unexpected end of file. Expected attribute name instead."), + _("Unexpected end of file. Expected attribute name instead."), "eof-in-attribute-name": - _(u"Unexpected end of file in attribute name."), + _("Unexpected end of file in attribute name."), "invalid-character-in-attribute-name": - _(u"Invalid chracter in attribute name"), + _("Invalid character in attribute name"), "duplicate-attribute": - _(u"Dropped duplicate attribute on tag."), + _("Dropped duplicate attribute on tag."), "expected-end-of-tag-name-but-got-eof": - _(u"Unexpected end of file. Expected = or end of tag."), + _("Unexpected end of file. Expected = or end of tag."), "expected-attribute-value-but-got-eof": - _(u"Unexpected end of file. Expected attribute value."), + _("Unexpected end of file. Expected attribute value."), "expected-attribute-value-but-got-right-bracket": - _(u"Expected attribute value. Got '>' instead."), + _("Expected attribute value. Got '>' instead."), 'equals-in-unquoted-attribute-value': - _(u"Unexpected = in unquoted attribute"), + _("Unexpected = in unquoted attribute"), 'unexpected-character-in-unquoted-attribute-value': - _(u"Unexpected character in unquoted attribute"), + _("Unexpected character in unquoted attribute"), "invalid-character-after-attribute-name": - _(u"Unexpected character after attribute name."), + _("Unexpected character after attribute name."), "unexpected-character-after-attribute-value": - _(u"Unexpected character after attribute value."), + _("Unexpected character after attribute value."), "eof-in-attribute-value-double-quote": - _(u"Unexpected end of file in attribute value (\")."), + _("Unexpected end of file in attribute value (\")."), "eof-in-attribute-value-single-quote": - _(u"Unexpected end of file in attribute value (')."), + _("Unexpected end of file in attribute value (')."), "eof-in-attribute-value-no-quotes": - _(u"Unexpected end of file in attribute value."), + _("Unexpected end of file in attribute value."), "unexpected-EOF-after-solidus-in-tag": - _(u"Unexpected end of file in tag. Expected >"), - "unexpected-character-after-soldius-in-tag": - _(u"Unexpected character after / in tag. Expected >"), + _("Unexpected end of file in tag. Expected >"), + "unexpected-character-after-solidus-in-tag": + _("Unexpected character after / in tag. Expected >"), "expected-dashes-or-doctype": - _(u"Expected '--' or 'DOCTYPE'. Not found."), + _("Expected '--' or 'DOCTYPE'. Not found."), "unexpected-bang-after-double-dash-in-comment": - _(u"Unexpected ! after -- in comment"), + _("Unexpected ! after -- in comment"), "unexpected-space-after-double-dash-in-comment": - _(u"Unexpected space after -- in comment"), + _("Unexpected space after -- in comment"), "incorrect-comment": - _(u"Incorrect comment."), + _("Incorrect comment."), "eof-in-comment": - _(u"Unexpected end of file in comment."), + _("Unexpected end of file in comment."), "eof-in-comment-end-dash": - _(u"Unexpected end of file in comment (-)"), + _("Unexpected end of file in comment (-)"), "unexpected-dash-after-double-dash-in-comment": - _(u"Unexpected '-' after '--' found in comment."), + _("Unexpected '-' after '--' found in comment."), "eof-in-comment-double-dash": - _(u"Unexpected end of file in comment (--)."), + _("Unexpected end of file in comment (--)."), "eof-in-comment-end-space-state": - _(u"Unexpected end of file in comment."), + _("Unexpected end of file in comment."), "eof-in-comment-end-bang-state": - _(u"Unexpected end of file in comment."), + _("Unexpected end of file in comment."), "unexpected-char-in-comment": - _(u"Unexpected character in comment found."), + _("Unexpected character in comment found."), "need-space-after-doctype": - _(u"No space after literal string 'DOCTYPE'."), + _("No space after literal string 'DOCTYPE'."), "expected-doctype-name-but-got-right-bracket": - _(u"Unexpected > character. Expected DOCTYPE name."), + _("Unexpected > character. Expected DOCTYPE name."), "expected-doctype-name-but-got-eof": - _(u"Unexpected end of file. Expected DOCTYPE name."), + _("Unexpected end of file. Expected DOCTYPE name."), "eof-in-doctype-name": - _(u"Unexpected end of file in DOCTYPE name."), + _("Unexpected end of file in DOCTYPE name."), "eof-in-doctype": - _(u"Unexpected end of file in DOCTYPE."), + _("Unexpected end of file in DOCTYPE."), "expected-space-or-right-bracket-in-doctype": - _(u"Expected space or '>'. Got '%(data)s'"), + _("Expected space or '>'. Got '%(data)s'"), "unexpected-end-of-doctype": - _(u"Unexpected end of DOCTYPE."), + _("Unexpected end of DOCTYPE."), "unexpected-char-in-doctype": - _(u"Unexpected character in DOCTYPE."), + _("Unexpected character in DOCTYPE."), "eof-in-innerhtml": - _(u"XXX innerHTML EOF"), + _("XXX innerHTML EOF"), "unexpected-doctype": - _(u"Unexpected DOCTYPE. Ignored."), + _("Unexpected DOCTYPE. Ignored."), "non-html-root": - _(u"html needs to be the first start tag."), + _("html needs to be the first start tag."), "expected-doctype-but-got-eof": - _(u"Unexpected End of file. Expected DOCTYPE."), + _("Unexpected End of file. Expected DOCTYPE."), "unknown-doctype": - _(u"Erroneous DOCTYPE."), + _("Erroneous DOCTYPE."), "expected-doctype-but-got-chars": - _(u"Unexpected non-space characters. Expected DOCTYPE."), + _("Unexpected non-space characters. Expected DOCTYPE."), "expected-doctype-but-got-start-tag": - _(u"Unexpected start tag (%(name)s). Expected DOCTYPE."), + _("Unexpected start tag (%(name)s). Expected DOCTYPE."), "expected-doctype-but-got-end-tag": - _(u"Unexpected end tag (%(name)s). Expected DOCTYPE."), + _("Unexpected end tag (%(name)s). Expected DOCTYPE."), "end-tag-after-implied-root": - _(u"Unexpected end tag (%(name)s) after the (implied) root element."), + _("Unexpected end tag (%(name)s) after the (implied) root element."), "expected-named-closing-tag-but-got-eof": - _(u"Unexpected end of file. Expected end tag (%(name)s)."), + _("Unexpected end of file. Expected end tag (%(name)s)."), "two-heads-are-not-better-than-one": - _(u"Unexpected start tag head in existing head. Ignored."), + _("Unexpected start tag head in existing head. Ignored."), "unexpected-end-tag": - _(u"Unexpected end tag (%(name)s). Ignored."), + _("Unexpected end tag (%(name)s). Ignored."), "unexpected-start-tag-out-of-my-head": - _(u"Unexpected start tag (%(name)s) that can be in head. Moved."), + _("Unexpected start tag (%(name)s) that can be in head. Moved."), "unexpected-start-tag": - _(u"Unexpected start tag (%(name)s)."), + _("Unexpected start tag (%(name)s)."), "missing-end-tag": - _(u"Missing end tag (%(name)s)."), + _("Missing end tag (%(name)s)."), "missing-end-tags": - _(u"Missing end tags (%(name)s)."), + _("Missing end tags (%(name)s)."), "unexpected-start-tag-implies-end-tag": - _(u"Unexpected start tag (%(startName)s) " - u"implies end tag (%(endName)s)."), + _("Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s)."), "unexpected-start-tag-treated-as": - _(u"Unexpected start tag (%(originalName)s). Treated as %(newName)s."), + _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."), "deprecated-tag": - _(u"Unexpected start tag %(name)s. Don't use it!"), + _("Unexpected start tag %(name)s. Don't use it!"), "unexpected-start-tag-ignored": - _(u"Unexpected start tag %(name)s. Ignored."), + _("Unexpected start tag %(name)s. Ignored."), "expected-one-end-tag-but-got-another": - _(u"Unexpected end tag (%(gotName)s). " - u"Missing end tag (%(expectedName)s)."), + _("Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s)."), "end-tag-too-early": - _(u"End tag (%(name)s) seen too early. Expected other end tag."), + _("End tag (%(name)s) seen too early. Expected other end tag."), "end-tag-too-early-named": - _(u"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), + _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), "end-tag-too-early-ignored": - _(u"End tag (%(name)s) seen too early. Ignored."), + _("End tag (%(name)s) seen too early. Ignored."), "adoption-agency-1.1": - _(u"End tag (%(name)s) violates step 1, " - u"paragraph 1 of the adoption agency algorithm."), + _("End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm."), "adoption-agency-1.2": - _(u"End tag (%(name)s) violates step 1, " - u"paragraph 2 of the adoption agency algorithm."), + _("End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm."), "adoption-agency-1.3": - _(u"End tag (%(name)s) violates step 1, " - u"paragraph 3 of the adoption agency algorithm."), + _("End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm."), + "adoption-agency-4.4": + _("End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm."), "unexpected-end-tag-treated-as": - _(u"Unexpected end tag (%(originalName)s). Treated as %(newName)s."), + _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."), "no-end-tag": - _(u"This element (%(name)s) has no end tag."), + _("This element (%(name)s) has no end tag."), "unexpected-implied-end-tag-in-table": - _(u"Unexpected implied end tag (%(name)s) in the table phase."), + _("Unexpected implied end tag (%(name)s) in the table phase."), "unexpected-implied-end-tag-in-table-body": - _(u"Unexpected implied end tag (%(name)s) in the table body phase."), + _("Unexpected implied end tag (%(name)s) in the table body phase."), "unexpected-char-implies-table-voodoo": - _(u"Unexpected non-space characters in " - u"table context caused voodoo mode."), + _("Unexpected non-space characters in " + "table context caused voodoo mode."), "unexpected-hidden-input-in-table": - _(u"Unexpected input with type hidden in table context."), + _("Unexpected input with type hidden in table context."), "unexpected-form-in-table": - _(u"Unexpected form in table context."), + _("Unexpected form in table context."), "unexpected-start-tag-implies-table-voodoo": - _(u"Unexpected start tag (%(name)s) in " - u"table context caused voodoo mode."), + _("Unexpected start tag (%(name)s) in " + "table context caused voodoo mode."), "unexpected-end-tag-implies-table-voodoo": - _(u"Unexpected end tag (%(name)s) in " - u"table context caused voodoo mode."), + _("Unexpected end tag (%(name)s) in " + "table context caused voodoo mode."), "unexpected-cell-in-table-body": - _(u"Unexpected table cell start tag (%(name)s) " - u"in the table body phase."), + _("Unexpected table cell start tag (%(name)s) " + "in the table body phase."), "unexpected-cell-end-tag": - _(u"Got table cell end tag (%(name)s) " - u"while required end tags are missing."), + _("Got table cell end tag (%(name)s) " + "while required end tags are missing."), "unexpected-end-tag-in-table-body": - _(u"Unexpected end tag (%(name)s) in the table body phase. Ignored."), + _("Unexpected end tag (%(name)s) in the table body phase. Ignored."), "unexpected-implied-end-tag-in-table-row": - _(u"Unexpected implied end tag (%(name)s) in the table row phase."), + _("Unexpected implied end tag (%(name)s) in the table row phase."), "unexpected-end-tag-in-table-row": - _(u"Unexpected end tag (%(name)s) in the table row phase. Ignored."), + _("Unexpected end tag (%(name)s) in the table row phase. Ignored."), "unexpected-select-in-select": - _(u"Unexpected select start tag in the select phase " - u"treated as select end tag."), + _("Unexpected select start tag in the select phase " + "treated as select end tag."), "unexpected-input-in-select": - _(u"Unexpected input start tag in the select phase."), + _("Unexpected input start tag in the select phase."), "unexpected-start-tag-in-select": - _(u"Unexpected start tag token (%(name)s in the select phase. " - u"Ignored."), + _("Unexpected start tag token (%(name)s in the select phase. " + "Ignored."), "unexpected-end-tag-in-select": - _(u"Unexpected end tag (%(name)s) in the select phase. Ignored."), + _("Unexpected end tag (%(name)s) in the select phase. Ignored."), "unexpected-table-element-start-tag-in-select-in-table": - _(u"Unexpected table element start tag (%(name)s) in the select in table phase."), + _("Unexpected table element start tag (%(name)s) in the select in table phase."), "unexpected-table-element-end-tag-in-select-in-table": - _(u"Unexpected table element end tag (%(name)s) in the select in table phase."), + _("Unexpected table element end tag (%(name)s) in the select in table phase."), "unexpected-char-after-body": - _(u"Unexpected non-space characters in the after body phase."), + _("Unexpected non-space characters in the after body phase."), "unexpected-start-tag-after-body": - _(u"Unexpected start tag token (%(name)s)" - u" in the after body phase."), + _("Unexpected start tag token (%(name)s)" + " in the after body phase."), "unexpected-end-tag-after-body": - _(u"Unexpected end tag token (%(name)s)" - u" in the after body phase."), + _("Unexpected end tag token (%(name)s)" + " in the after body phase."), "unexpected-char-in-frameset": - _(u"Unepxected characters in the frameset phase. Characters ignored."), + _("Unexpected characters in the frameset phase. Characters ignored."), "unexpected-start-tag-in-frameset": - _(u"Unexpected start tag token (%(name)s)" - u" in the frameset phase. Ignored."), + _("Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored."), "unexpected-frameset-in-frameset-innerhtml": - _(u"Unexpected end tag token (frameset) " - u"in the frameset phase (innerHTML)."), + _("Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML)."), "unexpected-end-tag-in-frameset": - _(u"Unexpected end tag token (%(name)s)" - u" in the frameset phase. Ignored."), + _("Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored."), "unexpected-char-after-frameset": - _(u"Unexpected non-space characters in the " - u"after frameset phase. Ignored."), + _("Unexpected non-space characters in the " + "after frameset phase. Ignored."), "unexpected-start-tag-after-frameset": - _(u"Unexpected start tag (%(name)s)" - u" in the after frameset phase. Ignored."), + _("Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored."), "unexpected-end-tag-after-frameset": - _(u"Unexpected end tag (%(name)s)" - u" in the after frameset phase. Ignored."), + _("Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored."), "unexpected-end-tag-after-body-innerhtml": - _(u"Unexpected end tag after body(innerHtml)"), + _("Unexpected end tag after body(innerHtml)"), "expected-eof-but-got-char": - _(u"Unexpected non-space characters. Expected end of file."), + _("Unexpected non-space characters. Expected end of file."), "expected-eof-but-got-start-tag": - _(u"Unexpected start tag (%(name)s)" - u". Expected end of file."), + _("Unexpected start tag (%(name)s)" + ". Expected end of file."), "expected-eof-but-got-end-tag": - _(u"Unexpected end tag (%(name)s)" - u". Expected end of file."), + _("Unexpected end tag (%(name)s)" + ". Expected end of file."), "eof-in-table": - _(u"Unexpected end of file. Expected table content."), + _("Unexpected end of file. Expected table content."), "eof-in-select": - _(u"Unexpected end of file. Expected select content."), + _("Unexpected end of file. Expected select content."), "eof-in-frameset": - _(u"Unexpected end of file. Expected frameset content."), + _("Unexpected end of file. Expected frameset content."), "eof-in-script-in-script": - _(u"Unexpected end of file. Expected script content."), + _("Unexpected end of file. Expected script content."), "eof-in-foreign-lands": - _(u"Unexpected end of file. Expected foreign content"), + _("Unexpected end of file. Expected foreign content"), "non-void-element-with-trailing-solidus": - _(u"Trailing solidus not allowed on element %(name)s"), + _("Trailing solidus not allowed on element %(name)s"), "unexpected-html-element-in-foreign-content": - _(u"Element %(name)s not allowed in a non-html context"), + _("Element %(name)s not allowed in a non-html context"), "unexpected-end-tag-before-html": - _(u"Unexpected end tag (%(name)s) before html."), + _("Unexpected end tag (%(name)s) before html."), "XXX-undefined-error": - (u"Undefined error (this sucks and should be fixed)"), + _("Undefined error (this sucks and should be fixed)"), } namespaces = { - "html":"http://www.w3.org/1999/xhtml", - "mathml":"http://www.w3.org/1998/Math/MathML", - "svg":"http://www.w3.org/2000/svg", - "xlink":"http://www.w3.org/1999/xlink", - "xml":"http://www.w3.org/XML/1998/namespace", - "xmlns":"http://www.w3.org/2000/xmlns/" + "html": "http://www.w3.org/1999/xhtml", + "mathml": "http://www.w3.org/1998/Math/MathML", + "svg": "http://www.w3.org/2000/svg", + "xlink": "http://www.w3.org/1999/xlink", + "xml": "http://www.w3.org/XML/1998/namespace", + "xmlns": "http://www.w3.org/2000/xmlns/" } scopingElements = frozenset(( @@ -380,7 +379,7 @@ specialElements = frozenset(( (namespaces["html"], "iframe"), # Note that image is commented out in the spec as "this isn't an # element that can end up on the stack, so it doesn't matter," - (namespaces["html"], "image"), + (namespaces["html"], "image"), (namespaces["html"], "img"), (namespaces["html"], "input"), (namespaces["html"], "isindex"), @@ -434,12 +433,30 @@ mathmlTextIntegrationPointElements = frozenset(( (namespaces["mathml"], "mtext") )) +adjustForeignAttributes = { + "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), + "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), + "xlink:href": ("xlink", "href", namespaces["xlink"]), + "xlink:role": ("xlink", "role", namespaces["xlink"]), + "xlink:show": ("xlink", "show", namespaces["xlink"]), + "xlink:title": ("xlink", "title", namespaces["xlink"]), + "xlink:type": ("xlink", "type", namespaces["xlink"]), + "xml:base": ("xml", "base", namespaces["xml"]), + "xml:lang": ("xml", "lang", namespaces["xml"]), + "xml:space": ("xml", "space", namespaces["xml"]), + "xmlns": (None, "xmlns", namespaces["xmlns"]), + "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) +} + +unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in + adjustForeignAttributes.items()]) + spaceCharacters = frozenset(( - u"\t", - u"\n", - u"\u000C", - u" ", - u"\r" + "\t", + "\n", + "\u000C", + " ", + "\r" )) tableInsertModeElements = frozenset(( @@ -456,8 +473,8 @@ asciiLetters = frozenset(string.ascii_letters) digits = frozenset(string.digits) hexDigits = frozenset(string.hexdigits) -asciiUpper2Lower = dict([(ord(c),ord(c.lower())) - for c in string.ascii_uppercase]) +asciiUpper2Lower = dict([(ord(c), ord(c.lower())) + for c in string.ascii_uppercase]) # Heading elements need to be ordered headingElements = ( @@ -503,8 +520,8 @@ booleanAttributes = { "": frozenset(("irrelevant",)), "style": frozenset(("scoped",)), "img": frozenset(("ismap",)), - "audio": frozenset(("autoplay","controls")), - "video": frozenset(("autoplay","controls")), + "audio": frozenset(("autoplay", "controls")), + "video": frozenset(("autoplay", "controls")), "script": frozenset(("defer", "async")), "details": frozenset(("open",)), "datagrid": frozenset(("multiple", "disabled")), @@ -523,2312 +540,2312 @@ booleanAttributes = { # entitiesWindows1252 has to be _ordered_ and needs to have an index. It # therefore can't be a frozenset. entitiesWindows1252 = ( - 8364, # 0x80 0x20AC EURO SIGN - 65533, # 0x81 UNDEFINED - 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK - 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK - 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK - 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS - 8224, # 0x86 0x2020 DAGGER - 8225, # 0x87 0x2021 DOUBLE DAGGER - 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT - 8240, # 0x89 0x2030 PER MILLE SIGN - 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON - 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE - 65533, # 0x8D UNDEFINED - 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON - 65533, # 0x8F UNDEFINED - 65533, # 0x90 UNDEFINED - 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK - 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK - 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK - 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK - 8226, # 0x95 0x2022 BULLET - 8211, # 0x96 0x2013 EN DASH - 8212, # 0x97 0x2014 EM DASH - 732, # 0x98 0x02DC SMALL TILDE - 8482, # 0x99 0x2122 TRADE MARK SIGN - 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON - 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE - 65533, # 0x9D UNDEFINED - 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON - 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ) xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;')) entities = { - "AElig": u"\xc6", - "AElig;": u"\xc6", - "AMP": u"&", - "AMP;": u"&", - "Aacute": u"\xc1", - "Aacute;": u"\xc1", - "Abreve;": u"\u0102", - "Acirc": u"\xc2", - "Acirc;": u"\xc2", - "Acy;": u"\u0410", - "Afr;": u"\U0001d504", - "Agrave": u"\xc0", - "Agrave;": u"\xc0", - "Alpha;": u"\u0391", - "Amacr;": u"\u0100", - "And;": u"\u2a53", - "Aogon;": u"\u0104", - "Aopf;": u"\U0001d538", - "ApplyFunction;": u"\u2061", - "Aring": u"\xc5", - "Aring;": u"\xc5", - "Ascr;": u"\U0001d49c", - "Assign;": u"\u2254", - "Atilde": u"\xc3", - "Atilde;": u"\xc3", - "Auml": u"\xc4", - "Auml;": u"\xc4", - "Backslash;": u"\u2216", - "Barv;": u"\u2ae7", - "Barwed;": u"\u2306", - "Bcy;": u"\u0411", - "Because;": u"\u2235", - "Bernoullis;": u"\u212c", - "Beta;": u"\u0392", - "Bfr;": u"\U0001d505", - "Bopf;": u"\U0001d539", - "Breve;": u"\u02d8", - "Bscr;": u"\u212c", - "Bumpeq;": u"\u224e", - "CHcy;": u"\u0427", - "COPY": u"\xa9", - "COPY;": u"\xa9", - "Cacute;": u"\u0106", - "Cap;": u"\u22d2", - "CapitalDifferentialD;": u"\u2145", - "Cayleys;": u"\u212d", - "Ccaron;": u"\u010c", - "Ccedil": u"\xc7", - "Ccedil;": u"\xc7", - "Ccirc;": u"\u0108", - "Cconint;": u"\u2230", - "Cdot;": u"\u010a", - "Cedilla;": u"\xb8", - "CenterDot;": u"\xb7", - "Cfr;": u"\u212d", - "Chi;": u"\u03a7", - "CircleDot;": u"\u2299", - "CircleMinus;": u"\u2296", - "CirclePlus;": u"\u2295", - "CircleTimes;": u"\u2297", - "ClockwiseContourIntegral;": u"\u2232", - "CloseCurlyDoubleQuote;": u"\u201d", - "CloseCurlyQuote;": u"\u2019", - "Colon;": u"\u2237", - "Colone;": u"\u2a74", - "Congruent;": u"\u2261", - "Conint;": u"\u222f", - "ContourIntegral;": u"\u222e", - "Copf;": u"\u2102", - "Coproduct;": u"\u2210", - "CounterClockwiseContourIntegral;": u"\u2233", - "Cross;": u"\u2a2f", - "Cscr;": u"\U0001d49e", - "Cup;": u"\u22d3", - "CupCap;": u"\u224d", - "DD;": u"\u2145", - "DDotrahd;": u"\u2911", - "DJcy;": u"\u0402", - "DScy;": u"\u0405", - "DZcy;": u"\u040f", - "Dagger;": u"\u2021", - "Darr;": u"\u21a1", - "Dashv;": u"\u2ae4", - "Dcaron;": u"\u010e", - "Dcy;": u"\u0414", - "Del;": u"\u2207", - "Delta;": u"\u0394", - "Dfr;": u"\U0001d507", - "DiacriticalAcute;": u"\xb4", - "DiacriticalDot;": u"\u02d9", - "DiacriticalDoubleAcute;": u"\u02dd", - "DiacriticalGrave;": u"`", - "DiacriticalTilde;": u"\u02dc", - "Diamond;": u"\u22c4", - "DifferentialD;": u"\u2146", - "Dopf;": u"\U0001d53b", - "Dot;": u"\xa8", - "DotDot;": u"\u20dc", - "DotEqual;": u"\u2250", - "DoubleContourIntegral;": u"\u222f", - "DoubleDot;": u"\xa8", - "DoubleDownArrow;": u"\u21d3", - "DoubleLeftArrow;": u"\u21d0", - "DoubleLeftRightArrow;": u"\u21d4", - "DoubleLeftTee;": u"\u2ae4", - "DoubleLongLeftArrow;": u"\u27f8", - "DoubleLongLeftRightArrow;": u"\u27fa", - "DoubleLongRightArrow;": u"\u27f9", - "DoubleRightArrow;": u"\u21d2", - "DoubleRightTee;": u"\u22a8", - "DoubleUpArrow;": u"\u21d1", - "DoubleUpDownArrow;": u"\u21d5", - "DoubleVerticalBar;": u"\u2225", - "DownArrow;": u"\u2193", - "DownArrowBar;": u"\u2913", - "DownArrowUpArrow;": u"\u21f5", - "DownBreve;": u"\u0311", - "DownLeftRightVector;": u"\u2950", - "DownLeftTeeVector;": u"\u295e", - "DownLeftVector;": u"\u21bd", - "DownLeftVectorBar;": u"\u2956", - "DownRightTeeVector;": u"\u295f", - "DownRightVector;": u"\u21c1", - "DownRightVectorBar;": u"\u2957", - "DownTee;": u"\u22a4", - "DownTeeArrow;": u"\u21a7", - "Downarrow;": u"\u21d3", - "Dscr;": u"\U0001d49f", - "Dstrok;": u"\u0110", - "ENG;": u"\u014a", - "ETH": u"\xd0", - "ETH;": u"\xd0", - "Eacute": u"\xc9", - "Eacute;": u"\xc9", - "Ecaron;": u"\u011a", - "Ecirc": u"\xca", - "Ecirc;": u"\xca", - "Ecy;": u"\u042d", - "Edot;": u"\u0116", - "Efr;": u"\U0001d508", - "Egrave": u"\xc8", - "Egrave;": u"\xc8", - "Element;": u"\u2208", - "Emacr;": u"\u0112", - "EmptySmallSquare;": u"\u25fb", - "EmptyVerySmallSquare;": u"\u25ab", - "Eogon;": u"\u0118", - "Eopf;": u"\U0001d53c", - "Epsilon;": u"\u0395", - "Equal;": u"\u2a75", - "EqualTilde;": u"\u2242", - "Equilibrium;": u"\u21cc", - "Escr;": u"\u2130", - "Esim;": u"\u2a73", - "Eta;": u"\u0397", - "Euml": u"\xcb", - "Euml;": u"\xcb", - "Exists;": u"\u2203", - "ExponentialE;": u"\u2147", - "Fcy;": u"\u0424", - "Ffr;": u"\U0001d509", - "FilledSmallSquare;": u"\u25fc", - "FilledVerySmallSquare;": u"\u25aa", - "Fopf;": u"\U0001d53d", - "ForAll;": u"\u2200", - "Fouriertrf;": u"\u2131", - "Fscr;": u"\u2131", - "GJcy;": u"\u0403", - "GT": u">", - "GT;": u">", - "Gamma;": u"\u0393", - "Gammad;": u"\u03dc", - "Gbreve;": u"\u011e", - "Gcedil;": u"\u0122", - "Gcirc;": u"\u011c", - "Gcy;": u"\u0413", - "Gdot;": u"\u0120", - "Gfr;": u"\U0001d50a", - "Gg;": u"\u22d9", - "Gopf;": u"\U0001d53e", - "GreaterEqual;": u"\u2265", - "GreaterEqualLess;": u"\u22db", - "GreaterFullEqual;": u"\u2267", - "GreaterGreater;": u"\u2aa2", - "GreaterLess;": u"\u2277", - "GreaterSlantEqual;": u"\u2a7e", - "GreaterTilde;": u"\u2273", - "Gscr;": u"\U0001d4a2", - "Gt;": u"\u226b", - "HARDcy;": u"\u042a", - "Hacek;": u"\u02c7", - "Hat;": u"^", - "Hcirc;": u"\u0124", - "Hfr;": u"\u210c", - "HilbertSpace;": u"\u210b", - "Hopf;": u"\u210d", - "HorizontalLine;": u"\u2500", - "Hscr;": u"\u210b", - "Hstrok;": u"\u0126", - "HumpDownHump;": u"\u224e", - "HumpEqual;": u"\u224f", - "IEcy;": u"\u0415", - "IJlig;": u"\u0132", - "IOcy;": u"\u0401", - "Iacute": u"\xcd", - "Iacute;": u"\xcd", - "Icirc": u"\xce", - "Icirc;": u"\xce", - "Icy;": u"\u0418", - "Idot;": u"\u0130", - "Ifr;": u"\u2111", - "Igrave": u"\xcc", - "Igrave;": u"\xcc", - "Im;": u"\u2111", - "Imacr;": u"\u012a", - "ImaginaryI;": u"\u2148", - "Implies;": u"\u21d2", - "Int;": u"\u222c", - "Integral;": u"\u222b", - "Intersection;": u"\u22c2", - "InvisibleComma;": u"\u2063", - "InvisibleTimes;": u"\u2062", - "Iogon;": u"\u012e", - "Iopf;": u"\U0001d540", - "Iota;": u"\u0399", - "Iscr;": u"\u2110", - "Itilde;": u"\u0128", - "Iukcy;": u"\u0406", - "Iuml": u"\xcf", - "Iuml;": u"\xcf", - "Jcirc;": u"\u0134", - "Jcy;": u"\u0419", - "Jfr;": u"\U0001d50d", - "Jopf;": u"\U0001d541", - "Jscr;": u"\U0001d4a5", - "Jsercy;": u"\u0408", - "Jukcy;": u"\u0404", - "KHcy;": u"\u0425", - "KJcy;": u"\u040c", - "Kappa;": u"\u039a", - "Kcedil;": u"\u0136", - "Kcy;": u"\u041a", - "Kfr;": u"\U0001d50e", - "Kopf;": u"\U0001d542", - "Kscr;": u"\U0001d4a6", - "LJcy;": u"\u0409", - "LT": u"<", - "LT;": u"<", - "Lacute;": u"\u0139", - "Lambda;": u"\u039b", - "Lang;": u"\u27ea", - "Laplacetrf;": u"\u2112", - "Larr;": u"\u219e", - "Lcaron;": u"\u013d", - "Lcedil;": u"\u013b", - "Lcy;": u"\u041b", - "LeftAngleBracket;": u"\u27e8", - "LeftArrow;": u"\u2190", - "LeftArrowBar;": u"\u21e4", - "LeftArrowRightArrow;": u"\u21c6", - "LeftCeiling;": u"\u2308", - "LeftDoubleBracket;": u"\u27e6", - "LeftDownTeeVector;": u"\u2961", - "LeftDownVector;": u"\u21c3", - "LeftDownVectorBar;": u"\u2959", - "LeftFloor;": u"\u230a", - "LeftRightArrow;": u"\u2194", - "LeftRightVector;": u"\u294e", - "LeftTee;": u"\u22a3", - "LeftTeeArrow;": u"\u21a4", - "LeftTeeVector;": u"\u295a", - "LeftTriangle;": u"\u22b2", - "LeftTriangleBar;": u"\u29cf", - "LeftTriangleEqual;": u"\u22b4", - "LeftUpDownVector;": u"\u2951", - "LeftUpTeeVector;": u"\u2960", - "LeftUpVector;": u"\u21bf", - "LeftUpVectorBar;": u"\u2958", - "LeftVector;": u"\u21bc", - "LeftVectorBar;": u"\u2952", - "Leftarrow;": u"\u21d0", - "Leftrightarrow;": u"\u21d4", - "LessEqualGreater;": u"\u22da", - "LessFullEqual;": u"\u2266", - "LessGreater;": u"\u2276", - "LessLess;": u"\u2aa1", - "LessSlantEqual;": u"\u2a7d", - "LessTilde;": u"\u2272", - "Lfr;": u"\U0001d50f", - "Ll;": u"\u22d8", - "Lleftarrow;": u"\u21da", - "Lmidot;": u"\u013f", - "LongLeftArrow;": u"\u27f5", - "LongLeftRightArrow;": u"\u27f7", - "LongRightArrow;": u"\u27f6", - "Longleftarrow;": u"\u27f8", - "Longleftrightarrow;": u"\u27fa", - "Longrightarrow;": u"\u27f9", - "Lopf;": u"\U0001d543", - "LowerLeftArrow;": u"\u2199", - "LowerRightArrow;": u"\u2198", - "Lscr;": u"\u2112", - "Lsh;": u"\u21b0", - "Lstrok;": u"\u0141", - "Lt;": u"\u226a", - "Map;": u"\u2905", - "Mcy;": u"\u041c", - "MediumSpace;": u"\u205f", - "Mellintrf;": u"\u2133", - "Mfr;": u"\U0001d510", - "MinusPlus;": u"\u2213", - "Mopf;": u"\U0001d544", - "Mscr;": u"\u2133", - "Mu;": u"\u039c", - "NJcy;": u"\u040a", - "Nacute;": u"\u0143", - "Ncaron;": u"\u0147", - "Ncedil;": u"\u0145", - "Ncy;": u"\u041d", - "NegativeMediumSpace;": u"\u200b", - "NegativeThickSpace;": u"\u200b", - "NegativeThinSpace;": u"\u200b", - "NegativeVeryThinSpace;": u"\u200b", - "NestedGreaterGreater;": u"\u226b", - "NestedLessLess;": u"\u226a", - "NewLine;": u"\n", - "Nfr;": u"\U0001d511", - "NoBreak;": u"\u2060", - "NonBreakingSpace;": u"\xa0", - "Nopf;": u"\u2115", - "Not;": u"\u2aec", - "NotCongruent;": u"\u2262", - "NotCupCap;": u"\u226d", - "NotDoubleVerticalBar;": u"\u2226", - "NotElement;": u"\u2209", - "NotEqual;": u"\u2260", - "NotEqualTilde;": u"\u2242\u0338", - "NotExists;": u"\u2204", - "NotGreater;": u"\u226f", - "NotGreaterEqual;": u"\u2271", - "NotGreaterFullEqual;": u"\u2267\u0338", - "NotGreaterGreater;": u"\u226b\u0338", - "NotGreaterLess;": u"\u2279", - "NotGreaterSlantEqual;": u"\u2a7e\u0338", - "NotGreaterTilde;": u"\u2275", - "NotHumpDownHump;": u"\u224e\u0338", - "NotHumpEqual;": u"\u224f\u0338", - "NotLeftTriangle;": u"\u22ea", - "NotLeftTriangleBar;": u"\u29cf\u0338", - "NotLeftTriangleEqual;": u"\u22ec", - "NotLess;": u"\u226e", - "NotLessEqual;": u"\u2270", - "NotLessGreater;": u"\u2278", - "NotLessLess;": u"\u226a\u0338", - "NotLessSlantEqual;": u"\u2a7d\u0338", - "NotLessTilde;": u"\u2274", - "NotNestedGreaterGreater;": u"\u2aa2\u0338", - "NotNestedLessLess;": u"\u2aa1\u0338", - "NotPrecedes;": u"\u2280", - "NotPrecedesEqual;": u"\u2aaf\u0338", - "NotPrecedesSlantEqual;": u"\u22e0", - "NotReverseElement;": u"\u220c", - "NotRightTriangle;": u"\u22eb", - "NotRightTriangleBar;": u"\u29d0\u0338", - "NotRightTriangleEqual;": u"\u22ed", - "NotSquareSubset;": u"\u228f\u0338", - "NotSquareSubsetEqual;": u"\u22e2", - "NotSquareSuperset;": u"\u2290\u0338", - "NotSquareSupersetEqual;": u"\u22e3", - "NotSubset;": u"\u2282\u20d2", - "NotSubsetEqual;": u"\u2288", - "NotSucceeds;": u"\u2281", - "NotSucceedsEqual;": u"\u2ab0\u0338", - "NotSucceedsSlantEqual;": u"\u22e1", - "NotSucceedsTilde;": u"\u227f\u0338", - "NotSuperset;": u"\u2283\u20d2", - "NotSupersetEqual;": u"\u2289", - "NotTilde;": u"\u2241", - "NotTildeEqual;": u"\u2244", - "NotTildeFullEqual;": u"\u2247", - "NotTildeTilde;": u"\u2249", - "NotVerticalBar;": u"\u2224", - "Nscr;": u"\U0001d4a9", - "Ntilde": u"\xd1", - "Ntilde;": u"\xd1", - "Nu;": u"\u039d", - "OElig;": u"\u0152", - "Oacute": u"\xd3", - "Oacute;": u"\xd3", - "Ocirc": u"\xd4", - "Ocirc;": u"\xd4", - "Ocy;": u"\u041e", - "Odblac;": u"\u0150", - "Ofr;": u"\U0001d512", - "Ograve": u"\xd2", - "Ograve;": u"\xd2", - "Omacr;": u"\u014c", - "Omega;": u"\u03a9", - "Omicron;": u"\u039f", - "Oopf;": u"\U0001d546", - "OpenCurlyDoubleQuote;": u"\u201c", - "OpenCurlyQuote;": u"\u2018", - "Or;": u"\u2a54", - "Oscr;": u"\U0001d4aa", - "Oslash": u"\xd8", - "Oslash;": u"\xd8", - "Otilde": u"\xd5", - "Otilde;": u"\xd5", - "Otimes;": u"\u2a37", - "Ouml": u"\xd6", - "Ouml;": u"\xd6", - "OverBar;": u"\u203e", - "OverBrace;": u"\u23de", - "OverBracket;": u"\u23b4", - "OverParenthesis;": u"\u23dc", - "PartialD;": u"\u2202", - "Pcy;": u"\u041f", - "Pfr;": u"\U0001d513", - "Phi;": u"\u03a6", - "Pi;": u"\u03a0", - "PlusMinus;": u"\xb1", - "Poincareplane;": u"\u210c", - "Popf;": u"\u2119", - "Pr;": u"\u2abb", - "Precedes;": u"\u227a", - "PrecedesEqual;": u"\u2aaf", - "PrecedesSlantEqual;": u"\u227c", - "PrecedesTilde;": u"\u227e", - "Prime;": u"\u2033", - "Product;": u"\u220f", - "Proportion;": u"\u2237", - "Proportional;": u"\u221d", - "Pscr;": u"\U0001d4ab", - "Psi;": u"\u03a8", - "QUOT": u"\"", - "QUOT;": u"\"", - "Qfr;": u"\U0001d514", - "Qopf;": u"\u211a", - "Qscr;": u"\U0001d4ac", - "RBarr;": u"\u2910", - "REG": u"\xae", - "REG;": u"\xae", - "Racute;": u"\u0154", - "Rang;": u"\u27eb", - "Rarr;": u"\u21a0", - "Rarrtl;": u"\u2916", - "Rcaron;": u"\u0158", - "Rcedil;": u"\u0156", - "Rcy;": u"\u0420", - "Re;": u"\u211c", - "ReverseElement;": u"\u220b", - "ReverseEquilibrium;": u"\u21cb", - "ReverseUpEquilibrium;": u"\u296f", - "Rfr;": u"\u211c", - "Rho;": u"\u03a1", - "RightAngleBracket;": u"\u27e9", - "RightArrow;": u"\u2192", - "RightArrowBar;": u"\u21e5", - "RightArrowLeftArrow;": u"\u21c4", - "RightCeiling;": u"\u2309", - "RightDoubleBracket;": u"\u27e7", - "RightDownTeeVector;": u"\u295d", - "RightDownVector;": u"\u21c2", - "RightDownVectorBar;": u"\u2955", - "RightFloor;": u"\u230b", - "RightTee;": u"\u22a2", - "RightTeeArrow;": u"\u21a6", - "RightTeeVector;": u"\u295b", - "RightTriangle;": u"\u22b3", - "RightTriangleBar;": u"\u29d0", - "RightTriangleEqual;": u"\u22b5", - "RightUpDownVector;": u"\u294f", - "RightUpTeeVector;": u"\u295c", - "RightUpVector;": u"\u21be", - "RightUpVectorBar;": u"\u2954", - "RightVector;": u"\u21c0", - "RightVectorBar;": u"\u2953", - "Rightarrow;": u"\u21d2", - "Ropf;": u"\u211d", - "RoundImplies;": u"\u2970", - "Rrightarrow;": u"\u21db", - "Rscr;": u"\u211b", - "Rsh;": u"\u21b1", - "RuleDelayed;": u"\u29f4", - "SHCHcy;": u"\u0429", - "SHcy;": u"\u0428", - "SOFTcy;": u"\u042c", - "Sacute;": u"\u015a", - "Sc;": u"\u2abc", - "Scaron;": u"\u0160", - "Scedil;": u"\u015e", - "Scirc;": u"\u015c", - "Scy;": u"\u0421", - "Sfr;": u"\U0001d516", - "ShortDownArrow;": u"\u2193", - "ShortLeftArrow;": u"\u2190", - "ShortRightArrow;": u"\u2192", - "ShortUpArrow;": u"\u2191", - "Sigma;": u"\u03a3", - "SmallCircle;": u"\u2218", - "Sopf;": u"\U0001d54a", - "Sqrt;": u"\u221a", - "Square;": u"\u25a1", - "SquareIntersection;": u"\u2293", - "SquareSubset;": u"\u228f", - "SquareSubsetEqual;": u"\u2291", - "SquareSuperset;": u"\u2290", - "SquareSupersetEqual;": u"\u2292", - "SquareUnion;": u"\u2294", - "Sscr;": u"\U0001d4ae", - "Star;": u"\u22c6", - "Sub;": u"\u22d0", - "Subset;": u"\u22d0", - "SubsetEqual;": u"\u2286", - "Succeeds;": u"\u227b", - "SucceedsEqual;": u"\u2ab0", - "SucceedsSlantEqual;": u"\u227d", - "SucceedsTilde;": u"\u227f", - "SuchThat;": u"\u220b", - "Sum;": u"\u2211", - "Sup;": u"\u22d1", - "Superset;": u"\u2283", - "SupersetEqual;": u"\u2287", - "Supset;": u"\u22d1", - "THORN": u"\xde", - "THORN;": u"\xde", - "TRADE;": u"\u2122", - "TSHcy;": u"\u040b", - "TScy;": u"\u0426", - "Tab;": u"\t", - "Tau;": u"\u03a4", - "Tcaron;": u"\u0164", - "Tcedil;": u"\u0162", - "Tcy;": u"\u0422", - "Tfr;": u"\U0001d517", - "Therefore;": u"\u2234", - "Theta;": u"\u0398", - "ThickSpace;": u"\u205f\u200a", - "ThinSpace;": u"\u2009", - "Tilde;": u"\u223c", - "TildeEqual;": u"\u2243", - "TildeFullEqual;": u"\u2245", - "TildeTilde;": u"\u2248", - "Topf;": u"\U0001d54b", - "TripleDot;": u"\u20db", - "Tscr;": u"\U0001d4af", - "Tstrok;": u"\u0166", - "Uacute": u"\xda", - "Uacute;": u"\xda", - "Uarr;": u"\u219f", - "Uarrocir;": u"\u2949", - "Ubrcy;": u"\u040e", - "Ubreve;": u"\u016c", - "Ucirc": u"\xdb", - "Ucirc;": u"\xdb", - "Ucy;": u"\u0423", - "Udblac;": u"\u0170", - "Ufr;": u"\U0001d518", - "Ugrave": u"\xd9", - "Ugrave;": u"\xd9", - "Umacr;": u"\u016a", - "UnderBar;": u"_", - "UnderBrace;": u"\u23df", - "UnderBracket;": u"\u23b5", - "UnderParenthesis;": u"\u23dd", - "Union;": u"\u22c3", - "UnionPlus;": u"\u228e", - "Uogon;": u"\u0172", - "Uopf;": u"\U0001d54c", - "UpArrow;": u"\u2191", - "UpArrowBar;": u"\u2912", - "UpArrowDownArrow;": u"\u21c5", - "UpDownArrow;": u"\u2195", - "UpEquilibrium;": u"\u296e", - "UpTee;": u"\u22a5", - "UpTeeArrow;": u"\u21a5", - "Uparrow;": u"\u21d1", - "Updownarrow;": u"\u21d5", - "UpperLeftArrow;": u"\u2196", - "UpperRightArrow;": u"\u2197", - "Upsi;": u"\u03d2", - "Upsilon;": u"\u03a5", - "Uring;": u"\u016e", - "Uscr;": u"\U0001d4b0", - "Utilde;": u"\u0168", - "Uuml": u"\xdc", - "Uuml;": u"\xdc", - "VDash;": u"\u22ab", - "Vbar;": u"\u2aeb", - "Vcy;": u"\u0412", - "Vdash;": u"\u22a9", - "Vdashl;": u"\u2ae6", - "Vee;": u"\u22c1", - "Verbar;": u"\u2016", - "Vert;": u"\u2016", - "VerticalBar;": u"\u2223", - "VerticalLine;": u"|", - "VerticalSeparator;": u"\u2758", - "VerticalTilde;": u"\u2240", - "VeryThinSpace;": u"\u200a", - "Vfr;": u"\U0001d519", - "Vopf;": u"\U0001d54d", - "Vscr;": u"\U0001d4b1", - "Vvdash;": u"\u22aa", - "Wcirc;": u"\u0174", - "Wedge;": u"\u22c0", - "Wfr;": u"\U0001d51a", - "Wopf;": u"\U0001d54e", - "Wscr;": u"\U0001d4b2", - "Xfr;": u"\U0001d51b", - "Xi;": u"\u039e", - "Xopf;": u"\U0001d54f", - "Xscr;": u"\U0001d4b3", - "YAcy;": u"\u042f", - "YIcy;": u"\u0407", - "YUcy;": u"\u042e", - "Yacute": u"\xdd", - "Yacute;": u"\xdd", - "Ycirc;": u"\u0176", - "Ycy;": u"\u042b", - "Yfr;": u"\U0001d51c", - "Yopf;": u"\U0001d550", - "Yscr;": u"\U0001d4b4", - "Yuml;": u"\u0178", - "ZHcy;": u"\u0416", - "Zacute;": u"\u0179", - "Zcaron;": u"\u017d", - "Zcy;": u"\u0417", - "Zdot;": u"\u017b", - "ZeroWidthSpace;": u"\u200b", - "Zeta;": u"\u0396", - "Zfr;": u"\u2128", - "Zopf;": u"\u2124", - "Zscr;": u"\U0001d4b5", - "aacute": u"\xe1", - "aacute;": u"\xe1", - "abreve;": u"\u0103", - "ac;": u"\u223e", - "acE;": u"\u223e\u0333", - "acd;": u"\u223f", - "acirc": u"\xe2", - "acirc;": u"\xe2", - "acute": u"\xb4", - "acute;": u"\xb4", - "acy;": u"\u0430", - "aelig": u"\xe6", - "aelig;": u"\xe6", - "af;": u"\u2061", - "afr;": u"\U0001d51e", - "agrave": u"\xe0", - "agrave;": u"\xe0", - "alefsym;": u"\u2135", - "aleph;": u"\u2135", - "alpha;": u"\u03b1", - "amacr;": u"\u0101", - "amalg;": u"\u2a3f", - "amp": u"&", - "amp;": u"&", - "and;": u"\u2227", - "andand;": u"\u2a55", - "andd;": u"\u2a5c", - "andslope;": u"\u2a58", - "andv;": u"\u2a5a", - "ang;": u"\u2220", - "ange;": u"\u29a4", - "angle;": u"\u2220", - "angmsd;": u"\u2221", - "angmsdaa;": u"\u29a8", - "angmsdab;": u"\u29a9", - "angmsdac;": u"\u29aa", - "angmsdad;": u"\u29ab", - "angmsdae;": u"\u29ac", - "angmsdaf;": u"\u29ad", - "angmsdag;": u"\u29ae", - "angmsdah;": u"\u29af", - "angrt;": u"\u221f", - "angrtvb;": u"\u22be", - "angrtvbd;": u"\u299d", - "angsph;": u"\u2222", - "angst;": u"\xc5", - "angzarr;": u"\u237c", - "aogon;": u"\u0105", - "aopf;": u"\U0001d552", - "ap;": u"\u2248", - "apE;": u"\u2a70", - "apacir;": u"\u2a6f", - "ape;": u"\u224a", - "apid;": u"\u224b", - "apos;": u"'", - "approx;": u"\u2248", - "approxeq;": u"\u224a", - "aring": u"\xe5", - "aring;": u"\xe5", - "ascr;": u"\U0001d4b6", - "ast;": u"*", - "asymp;": u"\u2248", - "asympeq;": u"\u224d", - "atilde": u"\xe3", - "atilde;": u"\xe3", - "auml": u"\xe4", - "auml;": u"\xe4", - "awconint;": u"\u2233", - "awint;": u"\u2a11", - "bNot;": u"\u2aed", - "backcong;": u"\u224c", - "backepsilon;": u"\u03f6", - "backprime;": u"\u2035", - "backsim;": u"\u223d", - "backsimeq;": u"\u22cd", - "barvee;": u"\u22bd", - "barwed;": u"\u2305", - "barwedge;": u"\u2305", - "bbrk;": u"\u23b5", - "bbrktbrk;": u"\u23b6", - "bcong;": u"\u224c", - "bcy;": u"\u0431", - "bdquo;": u"\u201e", - "becaus;": u"\u2235", - "because;": u"\u2235", - "bemptyv;": u"\u29b0", - "bepsi;": u"\u03f6", - "bernou;": u"\u212c", - "beta;": u"\u03b2", - "beth;": u"\u2136", - "between;": u"\u226c", - "bfr;": u"\U0001d51f", - "bigcap;": u"\u22c2", - "bigcirc;": u"\u25ef", - "bigcup;": u"\u22c3", - "bigodot;": u"\u2a00", - "bigoplus;": u"\u2a01", - "bigotimes;": u"\u2a02", - "bigsqcup;": u"\u2a06", - "bigstar;": u"\u2605", - "bigtriangledown;": u"\u25bd", - "bigtriangleup;": u"\u25b3", - "biguplus;": u"\u2a04", - "bigvee;": u"\u22c1", - "bigwedge;": u"\u22c0", - "bkarow;": u"\u290d", - "blacklozenge;": u"\u29eb", - "blacksquare;": u"\u25aa", - "blacktriangle;": u"\u25b4", - "blacktriangledown;": u"\u25be", - "blacktriangleleft;": u"\u25c2", - "blacktriangleright;": u"\u25b8", - "blank;": u"\u2423", - "blk12;": u"\u2592", - "blk14;": u"\u2591", - "blk34;": u"\u2593", - "block;": u"\u2588", - "bne;": u"=\u20e5", - "bnequiv;": u"\u2261\u20e5", - "bnot;": u"\u2310", - "bopf;": u"\U0001d553", - "bot;": u"\u22a5", - "bottom;": u"\u22a5", - "bowtie;": u"\u22c8", - "boxDL;": u"\u2557", - "boxDR;": u"\u2554", - "boxDl;": u"\u2556", - "boxDr;": u"\u2553", - "boxH;": u"\u2550", - "boxHD;": u"\u2566", - "boxHU;": u"\u2569", - "boxHd;": u"\u2564", - "boxHu;": u"\u2567", - "boxUL;": u"\u255d", - "boxUR;": u"\u255a", - "boxUl;": u"\u255c", - "boxUr;": u"\u2559", - "boxV;": u"\u2551", - "boxVH;": u"\u256c", - "boxVL;": u"\u2563", - "boxVR;": u"\u2560", - "boxVh;": u"\u256b", - "boxVl;": u"\u2562", - "boxVr;": u"\u255f", - "boxbox;": u"\u29c9", - "boxdL;": u"\u2555", - "boxdR;": u"\u2552", - "boxdl;": u"\u2510", - "boxdr;": u"\u250c", - "boxh;": u"\u2500", - "boxhD;": u"\u2565", - "boxhU;": u"\u2568", - "boxhd;": u"\u252c", - "boxhu;": u"\u2534", - "boxminus;": u"\u229f", - "boxplus;": u"\u229e", - "boxtimes;": u"\u22a0", - "boxuL;": u"\u255b", - "boxuR;": u"\u2558", - "boxul;": u"\u2518", - "boxur;": u"\u2514", - "boxv;": u"\u2502", - "boxvH;": u"\u256a", - "boxvL;": u"\u2561", - "boxvR;": u"\u255e", - "boxvh;": u"\u253c", - "boxvl;": u"\u2524", - "boxvr;": u"\u251c", - "bprime;": u"\u2035", - "breve;": u"\u02d8", - "brvbar": u"\xa6", - "brvbar;": u"\xa6", - "bscr;": u"\U0001d4b7", - "bsemi;": u"\u204f", - "bsim;": u"\u223d", - "bsime;": u"\u22cd", - "bsol;": u"\\", - "bsolb;": u"\u29c5", - "bsolhsub;": u"\u27c8", - "bull;": u"\u2022", - "bullet;": u"\u2022", - "bump;": u"\u224e", - "bumpE;": u"\u2aae", - "bumpe;": u"\u224f", - "bumpeq;": u"\u224f", - "cacute;": u"\u0107", - "cap;": u"\u2229", - "capand;": u"\u2a44", - "capbrcup;": u"\u2a49", - "capcap;": u"\u2a4b", - "capcup;": u"\u2a47", - "capdot;": u"\u2a40", - "caps;": u"\u2229\ufe00", - "caret;": u"\u2041", - "caron;": u"\u02c7", - "ccaps;": u"\u2a4d", - "ccaron;": u"\u010d", - "ccedil": u"\xe7", - "ccedil;": u"\xe7", - "ccirc;": u"\u0109", - "ccups;": u"\u2a4c", - "ccupssm;": u"\u2a50", - "cdot;": u"\u010b", - "cedil": u"\xb8", - "cedil;": u"\xb8", - "cemptyv;": u"\u29b2", - "cent": u"\xa2", - "cent;": u"\xa2", - "centerdot;": u"\xb7", - "cfr;": u"\U0001d520", - "chcy;": u"\u0447", - "check;": u"\u2713", - "checkmark;": u"\u2713", - "chi;": u"\u03c7", - "cir;": u"\u25cb", - "cirE;": u"\u29c3", - "circ;": u"\u02c6", - "circeq;": u"\u2257", - "circlearrowleft;": u"\u21ba", - "circlearrowright;": u"\u21bb", - "circledR;": u"\xae", - "circledS;": u"\u24c8", - "circledast;": u"\u229b", - "circledcirc;": u"\u229a", - "circleddash;": u"\u229d", - "cire;": u"\u2257", - "cirfnint;": u"\u2a10", - "cirmid;": u"\u2aef", - "cirscir;": u"\u29c2", - "clubs;": u"\u2663", - "clubsuit;": u"\u2663", - "colon;": u":", - "colone;": u"\u2254", - "coloneq;": u"\u2254", - "comma;": u",", - "commat;": u"@", - "comp;": u"\u2201", - "compfn;": u"\u2218", - "complement;": u"\u2201", - "complexes;": u"\u2102", - "cong;": u"\u2245", - "congdot;": u"\u2a6d", - "conint;": u"\u222e", - "copf;": u"\U0001d554", - "coprod;": u"\u2210", - "copy": u"\xa9", - "copy;": u"\xa9", - "copysr;": u"\u2117", - "crarr;": u"\u21b5", - "cross;": u"\u2717", - "cscr;": u"\U0001d4b8", - "csub;": u"\u2acf", - "csube;": u"\u2ad1", - "csup;": u"\u2ad0", - "csupe;": u"\u2ad2", - "ctdot;": u"\u22ef", - "cudarrl;": u"\u2938", - "cudarrr;": u"\u2935", - "cuepr;": u"\u22de", - "cuesc;": u"\u22df", - "cularr;": u"\u21b6", - "cularrp;": u"\u293d", - "cup;": u"\u222a", - "cupbrcap;": u"\u2a48", - "cupcap;": u"\u2a46", - "cupcup;": u"\u2a4a", - "cupdot;": u"\u228d", - "cupor;": u"\u2a45", - "cups;": u"\u222a\ufe00", - "curarr;": u"\u21b7", - "curarrm;": u"\u293c", - "curlyeqprec;": u"\u22de", - "curlyeqsucc;": u"\u22df", - "curlyvee;": u"\u22ce", - "curlywedge;": u"\u22cf", - "curren": u"\xa4", - "curren;": u"\xa4", - "curvearrowleft;": u"\u21b6", - "curvearrowright;": u"\u21b7", - "cuvee;": u"\u22ce", - "cuwed;": u"\u22cf", - "cwconint;": u"\u2232", - "cwint;": u"\u2231", - "cylcty;": u"\u232d", - "dArr;": u"\u21d3", - "dHar;": u"\u2965", - "dagger;": u"\u2020", - "daleth;": u"\u2138", - "darr;": u"\u2193", - "dash;": u"\u2010", - "dashv;": u"\u22a3", - "dbkarow;": u"\u290f", - "dblac;": u"\u02dd", - "dcaron;": u"\u010f", - "dcy;": u"\u0434", - "dd;": u"\u2146", - "ddagger;": u"\u2021", - "ddarr;": u"\u21ca", - "ddotseq;": u"\u2a77", - "deg": u"\xb0", - "deg;": u"\xb0", - "delta;": u"\u03b4", - "demptyv;": u"\u29b1", - "dfisht;": u"\u297f", - "dfr;": u"\U0001d521", - "dharl;": u"\u21c3", - "dharr;": u"\u21c2", - "diam;": u"\u22c4", - "diamond;": u"\u22c4", - "diamondsuit;": u"\u2666", - "diams;": u"\u2666", - "die;": u"\xa8", - "digamma;": u"\u03dd", - "disin;": u"\u22f2", - "div;": u"\xf7", - "divide": u"\xf7", - "divide;": u"\xf7", - "divideontimes;": u"\u22c7", - "divonx;": u"\u22c7", - "djcy;": u"\u0452", - "dlcorn;": u"\u231e", - "dlcrop;": u"\u230d", - "dollar;": u"$", - "dopf;": u"\U0001d555", - "dot;": u"\u02d9", - "doteq;": u"\u2250", - "doteqdot;": u"\u2251", - "dotminus;": u"\u2238", - "dotplus;": u"\u2214", - "dotsquare;": u"\u22a1", - "doublebarwedge;": u"\u2306", - "downarrow;": u"\u2193", - "downdownarrows;": u"\u21ca", - "downharpoonleft;": u"\u21c3", - "downharpoonright;": u"\u21c2", - "drbkarow;": u"\u2910", - "drcorn;": u"\u231f", - "drcrop;": u"\u230c", - "dscr;": u"\U0001d4b9", - "dscy;": u"\u0455", - "dsol;": u"\u29f6", - "dstrok;": u"\u0111", - "dtdot;": u"\u22f1", - "dtri;": u"\u25bf", - "dtrif;": u"\u25be", - "duarr;": u"\u21f5", - "duhar;": u"\u296f", - "dwangle;": u"\u29a6", - "dzcy;": u"\u045f", - "dzigrarr;": u"\u27ff", - "eDDot;": u"\u2a77", - "eDot;": u"\u2251", - "eacute": u"\xe9", - "eacute;": u"\xe9", - "easter;": u"\u2a6e", - "ecaron;": u"\u011b", - "ecir;": u"\u2256", - "ecirc": u"\xea", - "ecirc;": u"\xea", - "ecolon;": u"\u2255", - "ecy;": u"\u044d", - "edot;": u"\u0117", - "ee;": u"\u2147", - "efDot;": u"\u2252", - "efr;": u"\U0001d522", - "eg;": u"\u2a9a", - "egrave": u"\xe8", - "egrave;": u"\xe8", - "egs;": u"\u2a96", - "egsdot;": u"\u2a98", - "el;": u"\u2a99", - "elinters;": u"\u23e7", - "ell;": u"\u2113", - "els;": u"\u2a95", - "elsdot;": u"\u2a97", - "emacr;": u"\u0113", - "empty;": u"\u2205", - "emptyset;": u"\u2205", - "emptyv;": u"\u2205", - "emsp13;": u"\u2004", - "emsp14;": u"\u2005", - "emsp;": u"\u2003", - "eng;": u"\u014b", - "ensp;": u"\u2002", - "eogon;": u"\u0119", - "eopf;": u"\U0001d556", - "epar;": u"\u22d5", - "eparsl;": u"\u29e3", - "eplus;": u"\u2a71", - "epsi;": u"\u03b5", - "epsilon;": u"\u03b5", - "epsiv;": u"\u03f5", - "eqcirc;": u"\u2256", - "eqcolon;": u"\u2255", - "eqsim;": u"\u2242", - "eqslantgtr;": u"\u2a96", - "eqslantless;": u"\u2a95", - "equals;": u"=", - "equest;": u"\u225f", - "equiv;": u"\u2261", - "equivDD;": u"\u2a78", - "eqvparsl;": u"\u29e5", - "erDot;": u"\u2253", - "erarr;": u"\u2971", - "escr;": u"\u212f", - "esdot;": u"\u2250", - "esim;": u"\u2242", - "eta;": u"\u03b7", - "eth": u"\xf0", - "eth;": u"\xf0", - "euml": u"\xeb", - "euml;": u"\xeb", - "euro;": u"\u20ac", - "excl;": u"!", - "exist;": u"\u2203", - "expectation;": u"\u2130", - "exponentiale;": u"\u2147", - "fallingdotseq;": u"\u2252", - "fcy;": u"\u0444", - "female;": u"\u2640", - "ffilig;": u"\ufb03", - "fflig;": u"\ufb00", - "ffllig;": u"\ufb04", - "ffr;": u"\U0001d523", - "filig;": u"\ufb01", - "fjlig;": u"fj", - "flat;": u"\u266d", - "fllig;": u"\ufb02", - "fltns;": u"\u25b1", - "fnof;": u"\u0192", - "fopf;": u"\U0001d557", - "forall;": u"\u2200", - "fork;": u"\u22d4", - "forkv;": u"\u2ad9", - "fpartint;": u"\u2a0d", - "frac12": u"\xbd", - "frac12;": u"\xbd", - "frac13;": u"\u2153", - "frac14": u"\xbc", - "frac14;": u"\xbc", - "frac15;": u"\u2155", - "frac16;": u"\u2159", - "frac18;": u"\u215b", - "frac23;": u"\u2154", - "frac25;": u"\u2156", - "frac34": u"\xbe", - "frac34;": u"\xbe", - "frac35;": u"\u2157", - "frac38;": u"\u215c", - "frac45;": u"\u2158", - "frac56;": u"\u215a", - "frac58;": u"\u215d", - "frac78;": u"\u215e", - "frasl;": u"\u2044", - "frown;": u"\u2322", - "fscr;": u"\U0001d4bb", - "gE;": u"\u2267", - "gEl;": u"\u2a8c", - "gacute;": u"\u01f5", - "gamma;": u"\u03b3", - "gammad;": u"\u03dd", - "gap;": u"\u2a86", - "gbreve;": u"\u011f", - "gcirc;": u"\u011d", - "gcy;": u"\u0433", - "gdot;": u"\u0121", - "ge;": u"\u2265", - "gel;": u"\u22db", - "geq;": u"\u2265", - "geqq;": u"\u2267", - "geqslant;": u"\u2a7e", - "ges;": u"\u2a7e", - "gescc;": u"\u2aa9", - "gesdot;": u"\u2a80", - "gesdoto;": u"\u2a82", - "gesdotol;": u"\u2a84", - "gesl;": u"\u22db\ufe00", - "gesles;": u"\u2a94", - "gfr;": u"\U0001d524", - "gg;": u"\u226b", - "ggg;": u"\u22d9", - "gimel;": u"\u2137", - "gjcy;": u"\u0453", - "gl;": u"\u2277", - "glE;": u"\u2a92", - "gla;": u"\u2aa5", - "glj;": u"\u2aa4", - "gnE;": u"\u2269", - "gnap;": u"\u2a8a", - "gnapprox;": u"\u2a8a", - "gne;": u"\u2a88", - "gneq;": u"\u2a88", - "gneqq;": u"\u2269", - "gnsim;": u"\u22e7", - "gopf;": u"\U0001d558", - "grave;": u"`", - "gscr;": u"\u210a", - "gsim;": u"\u2273", - "gsime;": u"\u2a8e", - "gsiml;": u"\u2a90", - "gt": u">", - "gt;": u">", - "gtcc;": u"\u2aa7", - "gtcir;": u"\u2a7a", - "gtdot;": u"\u22d7", - "gtlPar;": u"\u2995", - "gtquest;": u"\u2a7c", - "gtrapprox;": u"\u2a86", - "gtrarr;": u"\u2978", - "gtrdot;": u"\u22d7", - "gtreqless;": u"\u22db", - "gtreqqless;": u"\u2a8c", - "gtrless;": u"\u2277", - "gtrsim;": u"\u2273", - "gvertneqq;": u"\u2269\ufe00", - "gvnE;": u"\u2269\ufe00", - "hArr;": u"\u21d4", - "hairsp;": u"\u200a", - "half;": u"\xbd", - "hamilt;": u"\u210b", - "hardcy;": u"\u044a", - "harr;": u"\u2194", - "harrcir;": u"\u2948", - "harrw;": u"\u21ad", - "hbar;": u"\u210f", - "hcirc;": u"\u0125", - "hearts;": u"\u2665", - "heartsuit;": u"\u2665", - "hellip;": u"\u2026", - "hercon;": u"\u22b9", - "hfr;": u"\U0001d525", - "hksearow;": u"\u2925", - "hkswarow;": u"\u2926", - "hoarr;": u"\u21ff", - "homtht;": u"\u223b", - "hookleftarrow;": u"\u21a9", - "hookrightarrow;": u"\u21aa", - "hopf;": u"\U0001d559", - "horbar;": u"\u2015", - "hscr;": u"\U0001d4bd", - "hslash;": u"\u210f", - "hstrok;": u"\u0127", - "hybull;": u"\u2043", - "hyphen;": u"\u2010", - "iacute": u"\xed", - "iacute;": u"\xed", - "ic;": u"\u2063", - "icirc": u"\xee", - "icirc;": u"\xee", - "icy;": u"\u0438", - "iecy;": u"\u0435", - "iexcl": u"\xa1", - "iexcl;": u"\xa1", - "iff;": u"\u21d4", - "ifr;": u"\U0001d526", - "igrave": u"\xec", - "igrave;": u"\xec", - "ii;": u"\u2148", - "iiiint;": u"\u2a0c", - "iiint;": u"\u222d", - "iinfin;": u"\u29dc", - "iiota;": u"\u2129", - "ijlig;": u"\u0133", - "imacr;": u"\u012b", - "image;": u"\u2111", - "imagline;": u"\u2110", - "imagpart;": u"\u2111", - "imath;": u"\u0131", - "imof;": u"\u22b7", - "imped;": u"\u01b5", - "in;": u"\u2208", - "incare;": u"\u2105", - "infin;": u"\u221e", - "infintie;": u"\u29dd", - "inodot;": u"\u0131", - "int;": u"\u222b", - "intcal;": u"\u22ba", - "integers;": u"\u2124", - "intercal;": u"\u22ba", - "intlarhk;": u"\u2a17", - "intprod;": u"\u2a3c", - "iocy;": u"\u0451", - "iogon;": u"\u012f", - "iopf;": u"\U0001d55a", - "iota;": u"\u03b9", - "iprod;": u"\u2a3c", - "iquest": u"\xbf", - "iquest;": u"\xbf", - "iscr;": u"\U0001d4be", - "isin;": u"\u2208", - "isinE;": u"\u22f9", - "isindot;": u"\u22f5", - "isins;": u"\u22f4", - "isinsv;": u"\u22f3", - "isinv;": u"\u2208", - "it;": u"\u2062", - "itilde;": u"\u0129", - "iukcy;": u"\u0456", - "iuml": u"\xef", - "iuml;": u"\xef", - "jcirc;": u"\u0135", - "jcy;": u"\u0439", - "jfr;": u"\U0001d527", - "jmath;": u"\u0237", - "jopf;": u"\U0001d55b", - "jscr;": u"\U0001d4bf", - "jsercy;": u"\u0458", - "jukcy;": u"\u0454", - "kappa;": u"\u03ba", - "kappav;": u"\u03f0", - "kcedil;": u"\u0137", - "kcy;": u"\u043a", - "kfr;": u"\U0001d528", - "kgreen;": u"\u0138", - "khcy;": u"\u0445", - "kjcy;": u"\u045c", - "kopf;": u"\U0001d55c", - "kscr;": u"\U0001d4c0", - "lAarr;": u"\u21da", - "lArr;": u"\u21d0", - "lAtail;": u"\u291b", - "lBarr;": u"\u290e", - "lE;": u"\u2266", - "lEg;": u"\u2a8b", - "lHar;": u"\u2962", - "lacute;": u"\u013a", - "laemptyv;": u"\u29b4", - "lagran;": u"\u2112", - "lambda;": u"\u03bb", - "lang;": u"\u27e8", - "langd;": u"\u2991", - "langle;": u"\u27e8", - "lap;": u"\u2a85", - "laquo": u"\xab", - "laquo;": u"\xab", - "larr;": u"\u2190", - "larrb;": u"\u21e4", - "larrbfs;": u"\u291f", - "larrfs;": u"\u291d", - "larrhk;": u"\u21a9", - "larrlp;": u"\u21ab", - "larrpl;": u"\u2939", - "larrsim;": u"\u2973", - "larrtl;": u"\u21a2", - "lat;": u"\u2aab", - "latail;": u"\u2919", - "late;": u"\u2aad", - "lates;": u"\u2aad\ufe00", - "lbarr;": u"\u290c", - "lbbrk;": u"\u2772", - "lbrace;": u"{", - "lbrack;": u"[", - "lbrke;": u"\u298b", - "lbrksld;": u"\u298f", - "lbrkslu;": u"\u298d", - "lcaron;": u"\u013e", - "lcedil;": u"\u013c", - "lceil;": u"\u2308", - "lcub;": u"{", - "lcy;": u"\u043b", - "ldca;": u"\u2936", - "ldquo;": u"\u201c", - "ldquor;": u"\u201e", - "ldrdhar;": u"\u2967", - "ldrushar;": u"\u294b", - "ldsh;": u"\u21b2", - "le;": u"\u2264", - "leftarrow;": u"\u2190", - "leftarrowtail;": u"\u21a2", - "leftharpoondown;": u"\u21bd", - "leftharpoonup;": u"\u21bc", - "leftleftarrows;": u"\u21c7", - "leftrightarrow;": u"\u2194", - "leftrightarrows;": u"\u21c6", - "leftrightharpoons;": u"\u21cb", - "leftrightsquigarrow;": u"\u21ad", - "leftthreetimes;": u"\u22cb", - "leg;": u"\u22da", - "leq;": u"\u2264", - "leqq;": u"\u2266", - "leqslant;": u"\u2a7d", - "les;": u"\u2a7d", - "lescc;": u"\u2aa8", - "lesdot;": u"\u2a7f", - "lesdoto;": u"\u2a81", - "lesdotor;": u"\u2a83", - "lesg;": u"\u22da\ufe00", - "lesges;": u"\u2a93", - "lessapprox;": u"\u2a85", - "lessdot;": u"\u22d6", - "lesseqgtr;": u"\u22da", - "lesseqqgtr;": u"\u2a8b", - "lessgtr;": u"\u2276", - "lesssim;": u"\u2272", - "lfisht;": u"\u297c", - "lfloor;": u"\u230a", - "lfr;": u"\U0001d529", - "lg;": u"\u2276", - "lgE;": u"\u2a91", - "lhard;": u"\u21bd", - "lharu;": u"\u21bc", - "lharul;": u"\u296a", - "lhblk;": u"\u2584", - "ljcy;": u"\u0459", - "ll;": u"\u226a", - "llarr;": u"\u21c7", - "llcorner;": u"\u231e", - "llhard;": u"\u296b", - "lltri;": u"\u25fa", - "lmidot;": u"\u0140", - "lmoust;": u"\u23b0", - "lmoustache;": u"\u23b0", - "lnE;": u"\u2268", - "lnap;": u"\u2a89", - "lnapprox;": u"\u2a89", - "lne;": u"\u2a87", - "lneq;": u"\u2a87", - "lneqq;": u"\u2268", - "lnsim;": u"\u22e6", - "loang;": u"\u27ec", - "loarr;": u"\u21fd", - "lobrk;": u"\u27e6", - "longleftarrow;": u"\u27f5", - "longleftrightarrow;": u"\u27f7", - "longmapsto;": u"\u27fc", - "longrightarrow;": u"\u27f6", - "looparrowleft;": u"\u21ab", - "looparrowright;": u"\u21ac", - "lopar;": u"\u2985", - "lopf;": u"\U0001d55d", - "loplus;": u"\u2a2d", - "lotimes;": u"\u2a34", - "lowast;": u"\u2217", - "lowbar;": u"_", - "loz;": u"\u25ca", - "lozenge;": u"\u25ca", - "lozf;": u"\u29eb", - "lpar;": u"(", - "lparlt;": u"\u2993", - "lrarr;": u"\u21c6", - "lrcorner;": u"\u231f", - "lrhar;": u"\u21cb", - "lrhard;": u"\u296d", - "lrm;": u"\u200e", - "lrtri;": u"\u22bf", - "lsaquo;": u"\u2039", - "lscr;": u"\U0001d4c1", - "lsh;": u"\u21b0", - "lsim;": u"\u2272", - "lsime;": u"\u2a8d", - "lsimg;": u"\u2a8f", - "lsqb;": u"[", - "lsquo;": u"\u2018", - "lsquor;": u"\u201a", - "lstrok;": u"\u0142", - "lt": u"<", - "lt;": u"<", - "ltcc;": u"\u2aa6", - "ltcir;": u"\u2a79", - "ltdot;": u"\u22d6", - "lthree;": u"\u22cb", - "ltimes;": u"\u22c9", - "ltlarr;": u"\u2976", - "ltquest;": u"\u2a7b", - "ltrPar;": u"\u2996", - "ltri;": u"\u25c3", - "ltrie;": u"\u22b4", - "ltrif;": u"\u25c2", - "lurdshar;": u"\u294a", - "luruhar;": u"\u2966", - "lvertneqq;": u"\u2268\ufe00", - "lvnE;": u"\u2268\ufe00", - "mDDot;": u"\u223a", - "macr": u"\xaf", - "macr;": u"\xaf", - "male;": u"\u2642", - "malt;": u"\u2720", - "maltese;": u"\u2720", - "map;": u"\u21a6", - "mapsto;": u"\u21a6", - "mapstodown;": u"\u21a7", - "mapstoleft;": u"\u21a4", - "mapstoup;": u"\u21a5", - "marker;": u"\u25ae", - "mcomma;": u"\u2a29", - "mcy;": u"\u043c", - "mdash;": u"\u2014", - "measuredangle;": u"\u2221", - "mfr;": u"\U0001d52a", - "mho;": u"\u2127", - "micro": u"\xb5", - "micro;": u"\xb5", - "mid;": u"\u2223", - "midast;": u"*", - "midcir;": u"\u2af0", - "middot": u"\xb7", - "middot;": u"\xb7", - "minus;": u"\u2212", - "minusb;": u"\u229f", - "minusd;": u"\u2238", - "minusdu;": u"\u2a2a", - "mlcp;": u"\u2adb", - "mldr;": u"\u2026", - "mnplus;": u"\u2213", - "models;": u"\u22a7", - "mopf;": u"\U0001d55e", - "mp;": u"\u2213", - "mscr;": u"\U0001d4c2", - "mstpos;": u"\u223e", - "mu;": u"\u03bc", - "multimap;": u"\u22b8", - "mumap;": u"\u22b8", - "nGg;": u"\u22d9\u0338", - "nGt;": u"\u226b\u20d2", - "nGtv;": u"\u226b\u0338", - "nLeftarrow;": u"\u21cd", - "nLeftrightarrow;": u"\u21ce", - "nLl;": u"\u22d8\u0338", - "nLt;": u"\u226a\u20d2", - "nLtv;": u"\u226a\u0338", - "nRightarrow;": u"\u21cf", - "nVDash;": u"\u22af", - "nVdash;": u"\u22ae", - "nabla;": u"\u2207", - "nacute;": u"\u0144", - "nang;": u"\u2220\u20d2", - "nap;": u"\u2249", - "napE;": u"\u2a70\u0338", - "napid;": u"\u224b\u0338", - "napos;": u"\u0149", - "napprox;": u"\u2249", - "natur;": u"\u266e", - "natural;": u"\u266e", - "naturals;": u"\u2115", - "nbsp": u"\xa0", - "nbsp;": u"\xa0", - "nbump;": u"\u224e\u0338", - "nbumpe;": u"\u224f\u0338", - "ncap;": u"\u2a43", - "ncaron;": u"\u0148", - "ncedil;": u"\u0146", - "ncong;": u"\u2247", - "ncongdot;": u"\u2a6d\u0338", - "ncup;": u"\u2a42", - "ncy;": u"\u043d", - "ndash;": u"\u2013", - "ne;": u"\u2260", - "neArr;": u"\u21d7", - "nearhk;": u"\u2924", - "nearr;": u"\u2197", - "nearrow;": u"\u2197", - "nedot;": u"\u2250\u0338", - "nequiv;": u"\u2262", - "nesear;": u"\u2928", - "nesim;": u"\u2242\u0338", - "nexist;": u"\u2204", - "nexists;": u"\u2204", - "nfr;": u"\U0001d52b", - "ngE;": u"\u2267\u0338", - "nge;": u"\u2271", - "ngeq;": u"\u2271", - "ngeqq;": u"\u2267\u0338", - "ngeqslant;": u"\u2a7e\u0338", - "nges;": u"\u2a7e\u0338", - "ngsim;": u"\u2275", - "ngt;": u"\u226f", - "ngtr;": u"\u226f", - "nhArr;": u"\u21ce", - "nharr;": u"\u21ae", - "nhpar;": u"\u2af2", - "ni;": u"\u220b", - "nis;": u"\u22fc", - "nisd;": u"\u22fa", - "niv;": u"\u220b", - "njcy;": u"\u045a", - "nlArr;": u"\u21cd", - "nlE;": u"\u2266\u0338", - "nlarr;": u"\u219a", - "nldr;": u"\u2025", - "nle;": u"\u2270", - "nleftarrow;": u"\u219a", - "nleftrightarrow;": u"\u21ae", - "nleq;": u"\u2270", - "nleqq;": u"\u2266\u0338", - "nleqslant;": u"\u2a7d\u0338", - "nles;": u"\u2a7d\u0338", - "nless;": u"\u226e", - "nlsim;": u"\u2274", - "nlt;": u"\u226e", - "nltri;": u"\u22ea", - "nltrie;": u"\u22ec", - "nmid;": u"\u2224", - "nopf;": u"\U0001d55f", - "not": u"\xac", - "not;": u"\xac", - "notin;": u"\u2209", - "notinE;": u"\u22f9\u0338", - "notindot;": u"\u22f5\u0338", - "notinva;": u"\u2209", - "notinvb;": u"\u22f7", - "notinvc;": u"\u22f6", - "notni;": u"\u220c", - "notniva;": u"\u220c", - "notnivb;": u"\u22fe", - "notnivc;": u"\u22fd", - "npar;": u"\u2226", - "nparallel;": u"\u2226", - "nparsl;": u"\u2afd\u20e5", - "npart;": u"\u2202\u0338", - "npolint;": u"\u2a14", - "npr;": u"\u2280", - "nprcue;": u"\u22e0", - "npre;": u"\u2aaf\u0338", - "nprec;": u"\u2280", - "npreceq;": u"\u2aaf\u0338", - "nrArr;": u"\u21cf", - "nrarr;": u"\u219b", - "nrarrc;": u"\u2933\u0338", - "nrarrw;": u"\u219d\u0338", - "nrightarrow;": u"\u219b", - "nrtri;": u"\u22eb", - "nrtrie;": u"\u22ed", - "nsc;": u"\u2281", - "nsccue;": u"\u22e1", - "nsce;": u"\u2ab0\u0338", - "nscr;": u"\U0001d4c3", - "nshortmid;": u"\u2224", - "nshortparallel;": u"\u2226", - "nsim;": u"\u2241", - "nsime;": u"\u2244", - "nsimeq;": u"\u2244", - "nsmid;": u"\u2224", - "nspar;": u"\u2226", - "nsqsube;": u"\u22e2", - "nsqsupe;": u"\u22e3", - "nsub;": u"\u2284", - "nsubE;": u"\u2ac5\u0338", - "nsube;": u"\u2288", - "nsubset;": u"\u2282\u20d2", - "nsubseteq;": u"\u2288", - "nsubseteqq;": u"\u2ac5\u0338", - "nsucc;": u"\u2281", - "nsucceq;": u"\u2ab0\u0338", - "nsup;": u"\u2285", - "nsupE;": u"\u2ac6\u0338", - "nsupe;": u"\u2289", - "nsupset;": u"\u2283\u20d2", - "nsupseteq;": u"\u2289", - "nsupseteqq;": u"\u2ac6\u0338", - "ntgl;": u"\u2279", - "ntilde": u"\xf1", - "ntilde;": u"\xf1", - "ntlg;": u"\u2278", - "ntriangleleft;": u"\u22ea", - "ntrianglelefteq;": u"\u22ec", - "ntriangleright;": u"\u22eb", - "ntrianglerighteq;": u"\u22ed", - "nu;": u"\u03bd", - "num;": u"#", - "numero;": u"\u2116", - "numsp;": u"\u2007", - "nvDash;": u"\u22ad", - "nvHarr;": u"\u2904", - "nvap;": u"\u224d\u20d2", - "nvdash;": u"\u22ac", - "nvge;": u"\u2265\u20d2", - "nvgt;": u">\u20d2", - "nvinfin;": u"\u29de", - "nvlArr;": u"\u2902", - "nvle;": u"\u2264\u20d2", - "nvlt;": u"<\u20d2", - "nvltrie;": u"\u22b4\u20d2", - "nvrArr;": u"\u2903", - "nvrtrie;": u"\u22b5\u20d2", - "nvsim;": u"\u223c\u20d2", - "nwArr;": u"\u21d6", - "nwarhk;": u"\u2923", - "nwarr;": u"\u2196", - "nwarrow;": u"\u2196", - "nwnear;": u"\u2927", - "oS;": u"\u24c8", - "oacute": u"\xf3", - "oacute;": u"\xf3", - "oast;": u"\u229b", - "ocir;": u"\u229a", - "ocirc": u"\xf4", - "ocirc;": u"\xf4", - "ocy;": u"\u043e", - "odash;": u"\u229d", - "odblac;": u"\u0151", - "odiv;": u"\u2a38", - "odot;": u"\u2299", - "odsold;": u"\u29bc", - "oelig;": u"\u0153", - "ofcir;": u"\u29bf", - "ofr;": u"\U0001d52c", - "ogon;": u"\u02db", - "ograve": u"\xf2", - "ograve;": u"\xf2", - "ogt;": u"\u29c1", - "ohbar;": u"\u29b5", - "ohm;": u"\u03a9", - "oint;": u"\u222e", - "olarr;": u"\u21ba", - "olcir;": u"\u29be", - "olcross;": u"\u29bb", - "oline;": u"\u203e", - "olt;": u"\u29c0", - "omacr;": u"\u014d", - "omega;": u"\u03c9", - "omicron;": u"\u03bf", - "omid;": u"\u29b6", - "ominus;": u"\u2296", - "oopf;": u"\U0001d560", - "opar;": u"\u29b7", - "operp;": u"\u29b9", - "oplus;": u"\u2295", - "or;": u"\u2228", - "orarr;": u"\u21bb", - "ord;": u"\u2a5d", - "order;": u"\u2134", - "orderof;": u"\u2134", - "ordf": u"\xaa", - "ordf;": u"\xaa", - "ordm": u"\xba", - "ordm;": u"\xba", - "origof;": u"\u22b6", - "oror;": u"\u2a56", - "orslope;": u"\u2a57", - "orv;": u"\u2a5b", - "oscr;": u"\u2134", - "oslash": u"\xf8", - "oslash;": u"\xf8", - "osol;": u"\u2298", - "otilde": u"\xf5", - "otilde;": u"\xf5", - "otimes;": u"\u2297", - "otimesas;": u"\u2a36", - "ouml": u"\xf6", - "ouml;": u"\xf6", - "ovbar;": u"\u233d", - "par;": u"\u2225", - "para": u"\xb6", - "para;": u"\xb6", - "parallel;": u"\u2225", - "parsim;": u"\u2af3", - "parsl;": u"\u2afd", - "part;": u"\u2202", - "pcy;": u"\u043f", - "percnt;": u"%", - "period;": u".", - "permil;": u"\u2030", - "perp;": u"\u22a5", - "pertenk;": u"\u2031", - "pfr;": u"\U0001d52d", - "phi;": u"\u03c6", - "phiv;": u"\u03d5", - "phmmat;": u"\u2133", - "phone;": u"\u260e", - "pi;": u"\u03c0", - "pitchfork;": u"\u22d4", - "piv;": u"\u03d6", - "planck;": u"\u210f", - "planckh;": u"\u210e", - "plankv;": u"\u210f", - "plus;": u"+", - "plusacir;": u"\u2a23", - "plusb;": u"\u229e", - "pluscir;": u"\u2a22", - "plusdo;": u"\u2214", - "plusdu;": u"\u2a25", - "pluse;": u"\u2a72", - "plusmn": u"\xb1", - "plusmn;": u"\xb1", - "plussim;": u"\u2a26", - "plustwo;": u"\u2a27", - "pm;": u"\xb1", - "pointint;": u"\u2a15", - "popf;": u"\U0001d561", - "pound": u"\xa3", - "pound;": u"\xa3", - "pr;": u"\u227a", - "prE;": u"\u2ab3", - "prap;": u"\u2ab7", - "prcue;": u"\u227c", - "pre;": u"\u2aaf", - "prec;": u"\u227a", - "precapprox;": u"\u2ab7", - "preccurlyeq;": u"\u227c", - "preceq;": u"\u2aaf", - "precnapprox;": u"\u2ab9", - "precneqq;": u"\u2ab5", - "precnsim;": u"\u22e8", - "precsim;": u"\u227e", - "prime;": u"\u2032", - "primes;": u"\u2119", - "prnE;": u"\u2ab5", - "prnap;": u"\u2ab9", - "prnsim;": u"\u22e8", - "prod;": u"\u220f", - "profalar;": u"\u232e", - "profline;": u"\u2312", - "profsurf;": u"\u2313", - "prop;": u"\u221d", - "propto;": u"\u221d", - "prsim;": u"\u227e", - "prurel;": u"\u22b0", - "pscr;": u"\U0001d4c5", - "psi;": u"\u03c8", - "puncsp;": u"\u2008", - "qfr;": u"\U0001d52e", - "qint;": u"\u2a0c", - "qopf;": u"\U0001d562", - "qprime;": u"\u2057", - "qscr;": u"\U0001d4c6", - "quaternions;": u"\u210d", - "quatint;": u"\u2a16", - "quest;": u"?", - "questeq;": u"\u225f", - "quot": u"\"", - "quot;": u"\"", - "rAarr;": u"\u21db", - "rArr;": u"\u21d2", - "rAtail;": u"\u291c", - "rBarr;": u"\u290f", - "rHar;": u"\u2964", - "race;": u"\u223d\u0331", - "racute;": u"\u0155", - "radic;": u"\u221a", - "raemptyv;": u"\u29b3", - "rang;": u"\u27e9", - "rangd;": u"\u2992", - "range;": u"\u29a5", - "rangle;": u"\u27e9", - "raquo": u"\xbb", - "raquo;": u"\xbb", - "rarr;": u"\u2192", - "rarrap;": u"\u2975", - "rarrb;": u"\u21e5", - "rarrbfs;": u"\u2920", - "rarrc;": u"\u2933", - "rarrfs;": u"\u291e", - "rarrhk;": u"\u21aa", - "rarrlp;": u"\u21ac", - "rarrpl;": u"\u2945", - "rarrsim;": u"\u2974", - "rarrtl;": u"\u21a3", - "rarrw;": u"\u219d", - "ratail;": u"\u291a", - "ratio;": u"\u2236", - "rationals;": u"\u211a", - "rbarr;": u"\u290d", - "rbbrk;": u"\u2773", - "rbrace;": u"}", - "rbrack;": u"]", - "rbrke;": u"\u298c", - "rbrksld;": u"\u298e", - "rbrkslu;": u"\u2990", - "rcaron;": u"\u0159", - "rcedil;": u"\u0157", - "rceil;": u"\u2309", - "rcub;": u"}", - "rcy;": u"\u0440", - "rdca;": u"\u2937", - "rdldhar;": u"\u2969", - "rdquo;": u"\u201d", - "rdquor;": u"\u201d", - "rdsh;": u"\u21b3", - "real;": u"\u211c", - "realine;": u"\u211b", - "realpart;": u"\u211c", - "reals;": u"\u211d", - "rect;": u"\u25ad", - "reg": u"\xae", - "reg;": u"\xae", - "rfisht;": u"\u297d", - "rfloor;": u"\u230b", - "rfr;": u"\U0001d52f", - "rhard;": u"\u21c1", - "rharu;": u"\u21c0", - "rharul;": u"\u296c", - "rho;": u"\u03c1", - "rhov;": u"\u03f1", - "rightarrow;": u"\u2192", - "rightarrowtail;": u"\u21a3", - "rightharpoondown;": u"\u21c1", - "rightharpoonup;": u"\u21c0", - "rightleftarrows;": u"\u21c4", - "rightleftharpoons;": u"\u21cc", - "rightrightarrows;": u"\u21c9", - "rightsquigarrow;": u"\u219d", - "rightthreetimes;": u"\u22cc", - "ring;": u"\u02da", - "risingdotseq;": u"\u2253", - "rlarr;": u"\u21c4", - "rlhar;": u"\u21cc", - "rlm;": u"\u200f", - "rmoust;": u"\u23b1", - "rmoustache;": u"\u23b1", - "rnmid;": u"\u2aee", - "roang;": u"\u27ed", - "roarr;": u"\u21fe", - "robrk;": u"\u27e7", - "ropar;": u"\u2986", - "ropf;": u"\U0001d563", - "roplus;": u"\u2a2e", - "rotimes;": u"\u2a35", - "rpar;": u")", - "rpargt;": u"\u2994", - "rppolint;": u"\u2a12", - "rrarr;": u"\u21c9", - "rsaquo;": u"\u203a", - "rscr;": u"\U0001d4c7", - "rsh;": u"\u21b1", - "rsqb;": u"]", - "rsquo;": u"\u2019", - "rsquor;": u"\u2019", - "rthree;": u"\u22cc", - "rtimes;": u"\u22ca", - "rtri;": u"\u25b9", - "rtrie;": u"\u22b5", - "rtrif;": u"\u25b8", - "rtriltri;": u"\u29ce", - "ruluhar;": u"\u2968", - "rx;": u"\u211e", - "sacute;": u"\u015b", - "sbquo;": u"\u201a", - "sc;": u"\u227b", - "scE;": u"\u2ab4", - "scap;": u"\u2ab8", - "scaron;": u"\u0161", - "sccue;": u"\u227d", - "sce;": u"\u2ab0", - "scedil;": u"\u015f", - "scirc;": u"\u015d", - "scnE;": u"\u2ab6", - "scnap;": u"\u2aba", - "scnsim;": u"\u22e9", - "scpolint;": u"\u2a13", - "scsim;": u"\u227f", - "scy;": u"\u0441", - "sdot;": u"\u22c5", - "sdotb;": u"\u22a1", - "sdote;": u"\u2a66", - "seArr;": u"\u21d8", - "searhk;": u"\u2925", - "searr;": u"\u2198", - "searrow;": u"\u2198", - "sect": u"\xa7", - "sect;": u"\xa7", - "semi;": u";", - "seswar;": u"\u2929", - "setminus;": u"\u2216", - "setmn;": u"\u2216", - "sext;": u"\u2736", - "sfr;": u"\U0001d530", - "sfrown;": u"\u2322", - "sharp;": u"\u266f", - "shchcy;": u"\u0449", - "shcy;": u"\u0448", - "shortmid;": u"\u2223", - "shortparallel;": u"\u2225", - "shy": u"\xad", - "shy;": u"\xad", - "sigma;": u"\u03c3", - "sigmaf;": u"\u03c2", - "sigmav;": u"\u03c2", - "sim;": u"\u223c", - "simdot;": u"\u2a6a", - "sime;": u"\u2243", - "simeq;": u"\u2243", - "simg;": u"\u2a9e", - "simgE;": u"\u2aa0", - "siml;": u"\u2a9d", - "simlE;": u"\u2a9f", - "simne;": u"\u2246", - "simplus;": u"\u2a24", - "simrarr;": u"\u2972", - "slarr;": u"\u2190", - "smallsetminus;": u"\u2216", - "smashp;": u"\u2a33", - "smeparsl;": u"\u29e4", - "smid;": u"\u2223", - "smile;": u"\u2323", - "smt;": u"\u2aaa", - "smte;": u"\u2aac", - "smtes;": u"\u2aac\ufe00", - "softcy;": u"\u044c", - "sol;": u"/", - "solb;": u"\u29c4", - "solbar;": u"\u233f", - "sopf;": u"\U0001d564", - "spades;": u"\u2660", - "spadesuit;": u"\u2660", - "spar;": u"\u2225", - "sqcap;": u"\u2293", - "sqcaps;": u"\u2293\ufe00", - "sqcup;": u"\u2294", - "sqcups;": u"\u2294\ufe00", - "sqsub;": u"\u228f", - "sqsube;": u"\u2291", - "sqsubset;": u"\u228f", - "sqsubseteq;": u"\u2291", - "sqsup;": u"\u2290", - "sqsupe;": u"\u2292", - "sqsupset;": u"\u2290", - "sqsupseteq;": u"\u2292", - "squ;": u"\u25a1", - "square;": u"\u25a1", - "squarf;": u"\u25aa", - "squf;": u"\u25aa", - "srarr;": u"\u2192", - "sscr;": u"\U0001d4c8", - "ssetmn;": u"\u2216", - "ssmile;": u"\u2323", - "sstarf;": u"\u22c6", - "star;": u"\u2606", - "starf;": u"\u2605", - "straightepsilon;": u"\u03f5", - "straightphi;": u"\u03d5", - "strns;": u"\xaf", - "sub;": u"\u2282", - "subE;": u"\u2ac5", - "subdot;": u"\u2abd", - "sube;": u"\u2286", - "subedot;": u"\u2ac3", - "submult;": u"\u2ac1", - "subnE;": u"\u2acb", - "subne;": u"\u228a", - "subplus;": u"\u2abf", - "subrarr;": u"\u2979", - "subset;": u"\u2282", - "subseteq;": u"\u2286", - "subseteqq;": u"\u2ac5", - "subsetneq;": u"\u228a", - "subsetneqq;": u"\u2acb", - "subsim;": u"\u2ac7", - "subsub;": u"\u2ad5", - "subsup;": u"\u2ad3", - "succ;": u"\u227b", - "succapprox;": u"\u2ab8", - "succcurlyeq;": u"\u227d", - "succeq;": u"\u2ab0", - "succnapprox;": u"\u2aba", - "succneqq;": u"\u2ab6", - "succnsim;": u"\u22e9", - "succsim;": u"\u227f", - "sum;": u"\u2211", - "sung;": u"\u266a", - "sup1": u"\xb9", - "sup1;": u"\xb9", - "sup2": u"\xb2", - "sup2;": u"\xb2", - "sup3": u"\xb3", - "sup3;": u"\xb3", - "sup;": u"\u2283", - "supE;": u"\u2ac6", - "supdot;": u"\u2abe", - "supdsub;": u"\u2ad8", - "supe;": u"\u2287", - "supedot;": u"\u2ac4", - "suphsol;": u"\u27c9", - "suphsub;": u"\u2ad7", - "suplarr;": u"\u297b", - "supmult;": u"\u2ac2", - "supnE;": u"\u2acc", - "supne;": u"\u228b", - "supplus;": u"\u2ac0", - "supset;": u"\u2283", - "supseteq;": u"\u2287", - "supseteqq;": u"\u2ac6", - "supsetneq;": u"\u228b", - "supsetneqq;": u"\u2acc", - "supsim;": u"\u2ac8", - "supsub;": u"\u2ad4", - "supsup;": u"\u2ad6", - "swArr;": u"\u21d9", - "swarhk;": u"\u2926", - "swarr;": u"\u2199", - "swarrow;": u"\u2199", - "swnwar;": u"\u292a", - "szlig": u"\xdf", - "szlig;": u"\xdf", - "target;": u"\u2316", - "tau;": u"\u03c4", - "tbrk;": u"\u23b4", - "tcaron;": u"\u0165", - "tcedil;": u"\u0163", - "tcy;": u"\u0442", - "tdot;": u"\u20db", - "telrec;": u"\u2315", - "tfr;": u"\U0001d531", - "there4;": u"\u2234", - "therefore;": u"\u2234", - "theta;": u"\u03b8", - "thetasym;": u"\u03d1", - "thetav;": u"\u03d1", - "thickapprox;": u"\u2248", - "thicksim;": u"\u223c", - "thinsp;": u"\u2009", - "thkap;": u"\u2248", - "thksim;": u"\u223c", - "thorn": u"\xfe", - "thorn;": u"\xfe", - "tilde;": u"\u02dc", - "times": u"\xd7", - "times;": u"\xd7", - "timesb;": u"\u22a0", - "timesbar;": u"\u2a31", - "timesd;": u"\u2a30", - "tint;": u"\u222d", - "toea;": u"\u2928", - "top;": u"\u22a4", - "topbot;": u"\u2336", - "topcir;": u"\u2af1", - "topf;": u"\U0001d565", - "topfork;": u"\u2ada", - "tosa;": u"\u2929", - "tprime;": u"\u2034", - "trade;": u"\u2122", - "triangle;": u"\u25b5", - "triangledown;": u"\u25bf", - "triangleleft;": u"\u25c3", - "trianglelefteq;": u"\u22b4", - "triangleq;": u"\u225c", - "triangleright;": u"\u25b9", - "trianglerighteq;": u"\u22b5", - "tridot;": u"\u25ec", - "trie;": u"\u225c", - "triminus;": u"\u2a3a", - "triplus;": u"\u2a39", - "trisb;": u"\u29cd", - "tritime;": u"\u2a3b", - "trpezium;": u"\u23e2", - "tscr;": u"\U0001d4c9", - "tscy;": u"\u0446", - "tshcy;": u"\u045b", - "tstrok;": u"\u0167", - "twixt;": u"\u226c", - "twoheadleftarrow;": u"\u219e", - "twoheadrightarrow;": u"\u21a0", - "uArr;": u"\u21d1", - "uHar;": u"\u2963", - "uacute": u"\xfa", - "uacute;": u"\xfa", - "uarr;": u"\u2191", - "ubrcy;": u"\u045e", - "ubreve;": u"\u016d", - "ucirc": u"\xfb", - "ucirc;": u"\xfb", - "ucy;": u"\u0443", - "udarr;": u"\u21c5", - "udblac;": u"\u0171", - "udhar;": u"\u296e", - "ufisht;": u"\u297e", - "ufr;": u"\U0001d532", - "ugrave": u"\xf9", - "ugrave;": u"\xf9", - "uharl;": u"\u21bf", - "uharr;": u"\u21be", - "uhblk;": u"\u2580", - "ulcorn;": u"\u231c", - "ulcorner;": u"\u231c", - "ulcrop;": u"\u230f", - "ultri;": u"\u25f8", - "umacr;": u"\u016b", - "uml": u"\xa8", - "uml;": u"\xa8", - "uogon;": u"\u0173", - "uopf;": u"\U0001d566", - "uparrow;": u"\u2191", - "updownarrow;": u"\u2195", - "upharpoonleft;": u"\u21bf", - "upharpoonright;": u"\u21be", - "uplus;": u"\u228e", - "upsi;": u"\u03c5", - "upsih;": u"\u03d2", - "upsilon;": u"\u03c5", - "upuparrows;": u"\u21c8", - "urcorn;": u"\u231d", - "urcorner;": u"\u231d", - "urcrop;": u"\u230e", - "uring;": u"\u016f", - "urtri;": u"\u25f9", - "uscr;": u"\U0001d4ca", - "utdot;": u"\u22f0", - "utilde;": u"\u0169", - "utri;": u"\u25b5", - "utrif;": u"\u25b4", - "uuarr;": u"\u21c8", - "uuml": u"\xfc", - "uuml;": u"\xfc", - "uwangle;": u"\u29a7", - "vArr;": u"\u21d5", - "vBar;": u"\u2ae8", - "vBarv;": u"\u2ae9", - "vDash;": u"\u22a8", - "vangrt;": u"\u299c", - "varepsilon;": u"\u03f5", - "varkappa;": u"\u03f0", - "varnothing;": u"\u2205", - "varphi;": u"\u03d5", - "varpi;": u"\u03d6", - "varpropto;": u"\u221d", - "varr;": u"\u2195", - "varrho;": u"\u03f1", - "varsigma;": u"\u03c2", - "varsubsetneq;": u"\u228a\ufe00", - "varsubsetneqq;": u"\u2acb\ufe00", - "varsupsetneq;": u"\u228b\ufe00", - "varsupsetneqq;": u"\u2acc\ufe00", - "vartheta;": u"\u03d1", - "vartriangleleft;": u"\u22b2", - "vartriangleright;": u"\u22b3", - "vcy;": u"\u0432", - "vdash;": u"\u22a2", - "vee;": u"\u2228", - "veebar;": u"\u22bb", - "veeeq;": u"\u225a", - "vellip;": u"\u22ee", - "verbar;": u"|", - "vert;": u"|", - "vfr;": u"\U0001d533", - "vltri;": u"\u22b2", - "vnsub;": u"\u2282\u20d2", - "vnsup;": u"\u2283\u20d2", - "vopf;": u"\U0001d567", - "vprop;": u"\u221d", - "vrtri;": u"\u22b3", - "vscr;": u"\U0001d4cb", - "vsubnE;": u"\u2acb\ufe00", - "vsubne;": u"\u228a\ufe00", - "vsupnE;": u"\u2acc\ufe00", - "vsupne;": u"\u228b\ufe00", - "vzigzag;": u"\u299a", - "wcirc;": u"\u0175", - "wedbar;": u"\u2a5f", - "wedge;": u"\u2227", - "wedgeq;": u"\u2259", - "weierp;": u"\u2118", - "wfr;": u"\U0001d534", - "wopf;": u"\U0001d568", - "wp;": u"\u2118", - "wr;": u"\u2240", - "wreath;": u"\u2240", - "wscr;": u"\U0001d4cc", - "xcap;": u"\u22c2", - "xcirc;": u"\u25ef", - "xcup;": u"\u22c3", - "xdtri;": u"\u25bd", - "xfr;": u"\U0001d535", - "xhArr;": u"\u27fa", - "xharr;": u"\u27f7", - "xi;": u"\u03be", - "xlArr;": u"\u27f8", - "xlarr;": u"\u27f5", - "xmap;": u"\u27fc", - "xnis;": u"\u22fb", - "xodot;": u"\u2a00", - "xopf;": u"\U0001d569", - "xoplus;": u"\u2a01", - "xotime;": u"\u2a02", - "xrArr;": u"\u27f9", - "xrarr;": u"\u27f6", - "xscr;": u"\U0001d4cd", - "xsqcup;": u"\u2a06", - "xuplus;": u"\u2a04", - "xutri;": u"\u25b3", - "xvee;": u"\u22c1", - "xwedge;": u"\u22c0", - "yacute": u"\xfd", - "yacute;": u"\xfd", - "yacy;": u"\u044f", - "ycirc;": u"\u0177", - "ycy;": u"\u044b", - "yen": u"\xa5", - "yen;": u"\xa5", - "yfr;": u"\U0001d536", - "yicy;": u"\u0457", - "yopf;": u"\U0001d56a", - "yscr;": u"\U0001d4ce", - "yucy;": u"\u044e", - "yuml": u"\xff", - "yuml;": u"\xff", - "zacute;": u"\u017a", - "zcaron;": u"\u017e", - "zcy;": u"\u0437", - "zdot;": u"\u017c", - "zeetrf;": u"\u2128", - "zeta;": u"\u03b6", - "zfr;": u"\U0001d537", - "zhcy;": u"\u0436", - "zigrarr;": u"\u21dd", - "zopf;": u"\U0001d56b", - "zscr;": u"\U0001d4cf", - "zwj;": u"\u200d", - "zwnj;": u"\u200c", + "AElig": "\xc6", + "AElig;": "\xc6", + "AMP": "&", + "AMP;": "&", + "Aacute": "\xc1", + "Aacute;": "\xc1", + "Abreve;": "\u0102", + "Acirc": "\xc2", + "Acirc;": "\xc2", + "Acy;": "\u0410", + "Afr;": "\U0001d504", + "Agrave": "\xc0", + "Agrave;": "\xc0", + "Alpha;": "\u0391", + "Amacr;": "\u0100", + "And;": "\u2a53", + "Aogon;": "\u0104", + "Aopf;": "\U0001d538", + "ApplyFunction;": "\u2061", + "Aring": "\xc5", + "Aring;": "\xc5", + "Ascr;": "\U0001d49c", + "Assign;": "\u2254", + "Atilde": "\xc3", + "Atilde;": "\xc3", + "Auml": "\xc4", + "Auml;": "\xc4", + "Backslash;": "\u2216", + "Barv;": "\u2ae7", + "Barwed;": "\u2306", + "Bcy;": "\u0411", + "Because;": "\u2235", + "Bernoullis;": "\u212c", + "Beta;": "\u0392", + "Bfr;": "\U0001d505", + "Bopf;": "\U0001d539", + "Breve;": "\u02d8", + "Bscr;": "\u212c", + "Bumpeq;": "\u224e", + "CHcy;": "\u0427", + "COPY": "\xa9", + "COPY;": "\xa9", + "Cacute;": "\u0106", + "Cap;": "\u22d2", + "CapitalDifferentialD;": "\u2145", + "Cayleys;": "\u212d", + "Ccaron;": "\u010c", + "Ccedil": "\xc7", + "Ccedil;": "\xc7", + "Ccirc;": "\u0108", + "Cconint;": "\u2230", + "Cdot;": "\u010a", + "Cedilla;": "\xb8", + "CenterDot;": "\xb7", + "Cfr;": "\u212d", + "Chi;": "\u03a7", + "CircleDot;": "\u2299", + "CircleMinus;": "\u2296", + "CirclePlus;": "\u2295", + "CircleTimes;": "\u2297", + "ClockwiseContourIntegral;": "\u2232", + "CloseCurlyDoubleQuote;": "\u201d", + "CloseCurlyQuote;": "\u2019", + "Colon;": "\u2237", + "Colone;": "\u2a74", + "Congruent;": "\u2261", + "Conint;": "\u222f", + "ContourIntegral;": "\u222e", + "Copf;": "\u2102", + "Coproduct;": "\u2210", + "CounterClockwiseContourIntegral;": "\u2233", + "Cross;": "\u2a2f", + "Cscr;": "\U0001d49e", + "Cup;": "\u22d3", + "CupCap;": "\u224d", + "DD;": "\u2145", + "DDotrahd;": "\u2911", + "DJcy;": "\u0402", + "DScy;": "\u0405", + "DZcy;": "\u040f", + "Dagger;": "\u2021", + "Darr;": "\u21a1", + "Dashv;": "\u2ae4", + "Dcaron;": "\u010e", + "Dcy;": "\u0414", + "Del;": "\u2207", + "Delta;": "\u0394", + "Dfr;": "\U0001d507", + "DiacriticalAcute;": "\xb4", + "DiacriticalDot;": "\u02d9", + "DiacriticalDoubleAcute;": "\u02dd", + "DiacriticalGrave;": "`", + "DiacriticalTilde;": "\u02dc", + "Diamond;": "\u22c4", + "DifferentialD;": "\u2146", + "Dopf;": "\U0001d53b", + "Dot;": "\xa8", + "DotDot;": "\u20dc", + "DotEqual;": "\u2250", + "DoubleContourIntegral;": "\u222f", + "DoubleDot;": "\xa8", + "DoubleDownArrow;": "\u21d3", + "DoubleLeftArrow;": "\u21d0", + "DoubleLeftRightArrow;": "\u21d4", + "DoubleLeftTee;": "\u2ae4", + "DoubleLongLeftArrow;": "\u27f8", + "DoubleLongLeftRightArrow;": "\u27fa", + "DoubleLongRightArrow;": "\u27f9", + "DoubleRightArrow;": "\u21d2", + "DoubleRightTee;": "\u22a8", + "DoubleUpArrow;": "\u21d1", + "DoubleUpDownArrow;": "\u21d5", + "DoubleVerticalBar;": "\u2225", + "DownArrow;": "\u2193", + "DownArrowBar;": "\u2913", + "DownArrowUpArrow;": "\u21f5", + "DownBreve;": "\u0311", + "DownLeftRightVector;": "\u2950", + "DownLeftTeeVector;": "\u295e", + "DownLeftVector;": "\u21bd", + "DownLeftVectorBar;": "\u2956", + "DownRightTeeVector;": "\u295f", + "DownRightVector;": "\u21c1", + "DownRightVectorBar;": "\u2957", + "DownTee;": "\u22a4", + "DownTeeArrow;": "\u21a7", + "Downarrow;": "\u21d3", + "Dscr;": "\U0001d49f", + "Dstrok;": "\u0110", + "ENG;": "\u014a", + "ETH": "\xd0", + "ETH;": "\xd0", + "Eacute": "\xc9", + "Eacute;": "\xc9", + "Ecaron;": "\u011a", + "Ecirc": "\xca", + "Ecirc;": "\xca", + "Ecy;": "\u042d", + "Edot;": "\u0116", + "Efr;": "\U0001d508", + "Egrave": "\xc8", + "Egrave;": "\xc8", + "Element;": "\u2208", + "Emacr;": "\u0112", + "EmptySmallSquare;": "\u25fb", + "EmptyVerySmallSquare;": "\u25ab", + "Eogon;": "\u0118", + "Eopf;": "\U0001d53c", + "Epsilon;": "\u0395", + "Equal;": "\u2a75", + "EqualTilde;": "\u2242", + "Equilibrium;": "\u21cc", + "Escr;": "\u2130", + "Esim;": "\u2a73", + "Eta;": "\u0397", + "Euml": "\xcb", + "Euml;": "\xcb", + "Exists;": "\u2203", + "ExponentialE;": "\u2147", + "Fcy;": "\u0424", + "Ffr;": "\U0001d509", + "FilledSmallSquare;": "\u25fc", + "FilledVerySmallSquare;": "\u25aa", + "Fopf;": "\U0001d53d", + "ForAll;": "\u2200", + "Fouriertrf;": "\u2131", + "Fscr;": "\u2131", + "GJcy;": "\u0403", + "GT": ">", + "GT;": ">", + "Gamma;": "\u0393", + "Gammad;": "\u03dc", + "Gbreve;": "\u011e", + "Gcedil;": "\u0122", + "Gcirc;": "\u011c", + "Gcy;": "\u0413", + "Gdot;": "\u0120", + "Gfr;": "\U0001d50a", + "Gg;": "\u22d9", + "Gopf;": "\U0001d53e", + "GreaterEqual;": "\u2265", + "GreaterEqualLess;": "\u22db", + "GreaterFullEqual;": "\u2267", + "GreaterGreater;": "\u2aa2", + "GreaterLess;": "\u2277", + "GreaterSlantEqual;": "\u2a7e", + "GreaterTilde;": "\u2273", + "Gscr;": "\U0001d4a2", + "Gt;": "\u226b", + "HARDcy;": "\u042a", + "Hacek;": "\u02c7", + "Hat;": "^", + "Hcirc;": "\u0124", + "Hfr;": "\u210c", + "HilbertSpace;": "\u210b", + "Hopf;": "\u210d", + "HorizontalLine;": "\u2500", + "Hscr;": "\u210b", + "Hstrok;": "\u0126", + "HumpDownHump;": "\u224e", + "HumpEqual;": "\u224f", + "IEcy;": "\u0415", + "IJlig;": "\u0132", + "IOcy;": "\u0401", + "Iacute": "\xcd", + "Iacute;": "\xcd", + "Icirc": "\xce", + "Icirc;": "\xce", + "Icy;": "\u0418", + "Idot;": "\u0130", + "Ifr;": "\u2111", + "Igrave": "\xcc", + "Igrave;": "\xcc", + "Im;": "\u2111", + "Imacr;": "\u012a", + "ImaginaryI;": "\u2148", + "Implies;": "\u21d2", + "Int;": "\u222c", + "Integral;": "\u222b", + "Intersection;": "\u22c2", + "InvisibleComma;": "\u2063", + "InvisibleTimes;": "\u2062", + "Iogon;": "\u012e", + "Iopf;": "\U0001d540", + "Iota;": "\u0399", + "Iscr;": "\u2110", + "Itilde;": "\u0128", + "Iukcy;": "\u0406", + "Iuml": "\xcf", + "Iuml;": "\xcf", + "Jcirc;": "\u0134", + "Jcy;": "\u0419", + "Jfr;": "\U0001d50d", + "Jopf;": "\U0001d541", + "Jscr;": "\U0001d4a5", + "Jsercy;": "\u0408", + "Jukcy;": "\u0404", + "KHcy;": "\u0425", + "KJcy;": "\u040c", + "Kappa;": "\u039a", + "Kcedil;": "\u0136", + "Kcy;": "\u041a", + "Kfr;": "\U0001d50e", + "Kopf;": "\U0001d542", + "Kscr;": "\U0001d4a6", + "LJcy;": "\u0409", + "LT": "<", + "LT;": "<", + "Lacute;": "\u0139", + "Lambda;": "\u039b", + "Lang;": "\u27ea", + "Laplacetrf;": "\u2112", + "Larr;": "\u219e", + "Lcaron;": "\u013d", + "Lcedil;": "\u013b", + "Lcy;": "\u041b", + "LeftAngleBracket;": "\u27e8", + "LeftArrow;": "\u2190", + "LeftArrowBar;": "\u21e4", + "LeftArrowRightArrow;": "\u21c6", + "LeftCeiling;": "\u2308", + "LeftDoubleBracket;": "\u27e6", + "LeftDownTeeVector;": "\u2961", + "LeftDownVector;": "\u21c3", + "LeftDownVectorBar;": "\u2959", + "LeftFloor;": "\u230a", + "LeftRightArrow;": "\u2194", + "LeftRightVector;": "\u294e", + "LeftTee;": "\u22a3", + "LeftTeeArrow;": "\u21a4", + "LeftTeeVector;": "\u295a", + "LeftTriangle;": "\u22b2", + "LeftTriangleBar;": "\u29cf", + "LeftTriangleEqual;": "\u22b4", + "LeftUpDownVector;": "\u2951", + "LeftUpTeeVector;": "\u2960", + "LeftUpVector;": "\u21bf", + "LeftUpVectorBar;": "\u2958", + "LeftVector;": "\u21bc", + "LeftVectorBar;": "\u2952", + "Leftarrow;": "\u21d0", + "Leftrightarrow;": "\u21d4", + "LessEqualGreater;": "\u22da", + "LessFullEqual;": "\u2266", + "LessGreater;": "\u2276", + "LessLess;": "\u2aa1", + "LessSlantEqual;": "\u2a7d", + "LessTilde;": "\u2272", + "Lfr;": "\U0001d50f", + "Ll;": "\u22d8", + "Lleftarrow;": "\u21da", + "Lmidot;": "\u013f", + "LongLeftArrow;": "\u27f5", + "LongLeftRightArrow;": "\u27f7", + "LongRightArrow;": "\u27f6", + "Longleftarrow;": "\u27f8", + "Longleftrightarrow;": "\u27fa", + "Longrightarrow;": "\u27f9", + "Lopf;": "\U0001d543", + "LowerLeftArrow;": "\u2199", + "LowerRightArrow;": "\u2198", + "Lscr;": "\u2112", + "Lsh;": "\u21b0", + "Lstrok;": "\u0141", + "Lt;": "\u226a", + "Map;": "\u2905", + "Mcy;": "\u041c", + "MediumSpace;": "\u205f", + "Mellintrf;": "\u2133", + "Mfr;": "\U0001d510", + "MinusPlus;": "\u2213", + "Mopf;": "\U0001d544", + "Mscr;": "\u2133", + "Mu;": "\u039c", + "NJcy;": "\u040a", + "Nacute;": "\u0143", + "Ncaron;": "\u0147", + "Ncedil;": "\u0145", + "Ncy;": "\u041d", + "NegativeMediumSpace;": "\u200b", + "NegativeThickSpace;": "\u200b", + "NegativeThinSpace;": "\u200b", + "NegativeVeryThinSpace;": "\u200b", + "NestedGreaterGreater;": "\u226b", + "NestedLessLess;": "\u226a", + "NewLine;": "\n", + "Nfr;": "\U0001d511", + "NoBreak;": "\u2060", + "NonBreakingSpace;": "\xa0", + "Nopf;": "\u2115", + "Not;": "\u2aec", + "NotCongruent;": "\u2262", + "NotCupCap;": "\u226d", + "NotDoubleVerticalBar;": "\u2226", + "NotElement;": "\u2209", + "NotEqual;": "\u2260", + "NotEqualTilde;": "\u2242\u0338", + "NotExists;": "\u2204", + "NotGreater;": "\u226f", + "NotGreaterEqual;": "\u2271", + "NotGreaterFullEqual;": "\u2267\u0338", + "NotGreaterGreater;": "\u226b\u0338", + "NotGreaterLess;": "\u2279", + "NotGreaterSlantEqual;": "\u2a7e\u0338", + "NotGreaterTilde;": "\u2275", + "NotHumpDownHump;": "\u224e\u0338", + "NotHumpEqual;": "\u224f\u0338", + "NotLeftTriangle;": "\u22ea", + "NotLeftTriangleBar;": "\u29cf\u0338", + "NotLeftTriangleEqual;": "\u22ec", + "NotLess;": "\u226e", + "NotLessEqual;": "\u2270", + "NotLessGreater;": "\u2278", + "NotLessLess;": "\u226a\u0338", + "NotLessSlantEqual;": "\u2a7d\u0338", + "NotLessTilde;": "\u2274", + "NotNestedGreaterGreater;": "\u2aa2\u0338", + "NotNestedLessLess;": "\u2aa1\u0338", + "NotPrecedes;": "\u2280", + "NotPrecedesEqual;": "\u2aaf\u0338", + "NotPrecedesSlantEqual;": "\u22e0", + "NotReverseElement;": "\u220c", + "NotRightTriangle;": "\u22eb", + "NotRightTriangleBar;": "\u29d0\u0338", + "NotRightTriangleEqual;": "\u22ed", + "NotSquareSubset;": "\u228f\u0338", + "NotSquareSubsetEqual;": "\u22e2", + "NotSquareSuperset;": "\u2290\u0338", + "NotSquareSupersetEqual;": "\u22e3", + "NotSubset;": "\u2282\u20d2", + "NotSubsetEqual;": "\u2288", + "NotSucceeds;": "\u2281", + "NotSucceedsEqual;": "\u2ab0\u0338", + "NotSucceedsSlantEqual;": "\u22e1", + "NotSucceedsTilde;": "\u227f\u0338", + "NotSuperset;": "\u2283\u20d2", + "NotSupersetEqual;": "\u2289", + "NotTilde;": "\u2241", + "NotTildeEqual;": "\u2244", + "NotTildeFullEqual;": "\u2247", + "NotTildeTilde;": "\u2249", + "NotVerticalBar;": "\u2224", + "Nscr;": "\U0001d4a9", + "Ntilde": "\xd1", + "Ntilde;": "\xd1", + "Nu;": "\u039d", + "OElig;": "\u0152", + "Oacute": "\xd3", + "Oacute;": "\xd3", + "Ocirc": "\xd4", + "Ocirc;": "\xd4", + "Ocy;": "\u041e", + "Odblac;": "\u0150", + "Ofr;": "\U0001d512", + "Ograve": "\xd2", + "Ograve;": "\xd2", + "Omacr;": "\u014c", + "Omega;": "\u03a9", + "Omicron;": "\u039f", + "Oopf;": "\U0001d546", + "OpenCurlyDoubleQuote;": "\u201c", + "OpenCurlyQuote;": "\u2018", + "Or;": "\u2a54", + "Oscr;": "\U0001d4aa", + "Oslash": "\xd8", + "Oslash;": "\xd8", + "Otilde": "\xd5", + "Otilde;": "\xd5", + "Otimes;": "\u2a37", + "Ouml": "\xd6", + "Ouml;": "\xd6", + "OverBar;": "\u203e", + "OverBrace;": "\u23de", + "OverBracket;": "\u23b4", + "OverParenthesis;": "\u23dc", + "PartialD;": "\u2202", + "Pcy;": "\u041f", + "Pfr;": "\U0001d513", + "Phi;": "\u03a6", + "Pi;": "\u03a0", + "PlusMinus;": "\xb1", + "Poincareplane;": "\u210c", + "Popf;": "\u2119", + "Pr;": "\u2abb", + "Precedes;": "\u227a", + "PrecedesEqual;": "\u2aaf", + "PrecedesSlantEqual;": "\u227c", + "PrecedesTilde;": "\u227e", + "Prime;": "\u2033", + "Product;": "\u220f", + "Proportion;": "\u2237", + "Proportional;": "\u221d", + "Pscr;": "\U0001d4ab", + "Psi;": "\u03a8", + "QUOT": "\"", + "QUOT;": "\"", + "Qfr;": "\U0001d514", + "Qopf;": "\u211a", + "Qscr;": "\U0001d4ac", + "RBarr;": "\u2910", + "REG": "\xae", + "REG;": "\xae", + "Racute;": "\u0154", + "Rang;": "\u27eb", + "Rarr;": "\u21a0", + "Rarrtl;": "\u2916", + "Rcaron;": "\u0158", + "Rcedil;": "\u0156", + "Rcy;": "\u0420", + "Re;": "\u211c", + "ReverseElement;": "\u220b", + "ReverseEquilibrium;": "\u21cb", + "ReverseUpEquilibrium;": "\u296f", + "Rfr;": "\u211c", + "Rho;": "\u03a1", + "RightAngleBracket;": "\u27e9", + "RightArrow;": "\u2192", + "RightArrowBar;": "\u21e5", + "RightArrowLeftArrow;": "\u21c4", + "RightCeiling;": "\u2309", + "RightDoubleBracket;": "\u27e7", + "RightDownTeeVector;": "\u295d", + "RightDownVector;": "\u21c2", + "RightDownVectorBar;": "\u2955", + "RightFloor;": "\u230b", + "RightTee;": "\u22a2", + "RightTeeArrow;": "\u21a6", + "RightTeeVector;": "\u295b", + "RightTriangle;": "\u22b3", + "RightTriangleBar;": "\u29d0", + "RightTriangleEqual;": "\u22b5", + "RightUpDownVector;": "\u294f", + "RightUpTeeVector;": "\u295c", + "RightUpVector;": "\u21be", + "RightUpVectorBar;": "\u2954", + "RightVector;": "\u21c0", + "RightVectorBar;": "\u2953", + "Rightarrow;": "\u21d2", + "Ropf;": "\u211d", + "RoundImplies;": "\u2970", + "Rrightarrow;": "\u21db", + "Rscr;": "\u211b", + "Rsh;": "\u21b1", + "RuleDelayed;": "\u29f4", + "SHCHcy;": "\u0429", + "SHcy;": "\u0428", + "SOFTcy;": "\u042c", + "Sacute;": "\u015a", + "Sc;": "\u2abc", + "Scaron;": "\u0160", + "Scedil;": "\u015e", + "Scirc;": "\u015c", + "Scy;": "\u0421", + "Sfr;": "\U0001d516", + "ShortDownArrow;": "\u2193", + "ShortLeftArrow;": "\u2190", + "ShortRightArrow;": "\u2192", + "ShortUpArrow;": "\u2191", + "Sigma;": "\u03a3", + "SmallCircle;": "\u2218", + "Sopf;": "\U0001d54a", + "Sqrt;": "\u221a", + "Square;": "\u25a1", + "SquareIntersection;": "\u2293", + "SquareSubset;": "\u228f", + "SquareSubsetEqual;": "\u2291", + "SquareSuperset;": "\u2290", + "SquareSupersetEqual;": "\u2292", + "SquareUnion;": "\u2294", + "Sscr;": "\U0001d4ae", + "Star;": "\u22c6", + "Sub;": "\u22d0", + "Subset;": "\u22d0", + "SubsetEqual;": "\u2286", + "Succeeds;": "\u227b", + "SucceedsEqual;": "\u2ab0", + "SucceedsSlantEqual;": "\u227d", + "SucceedsTilde;": "\u227f", + "SuchThat;": "\u220b", + "Sum;": "\u2211", + "Sup;": "\u22d1", + "Superset;": "\u2283", + "SupersetEqual;": "\u2287", + "Supset;": "\u22d1", + "THORN": "\xde", + "THORN;": "\xde", + "TRADE;": "\u2122", + "TSHcy;": "\u040b", + "TScy;": "\u0426", + "Tab;": "\t", + "Tau;": "\u03a4", + "Tcaron;": "\u0164", + "Tcedil;": "\u0162", + "Tcy;": "\u0422", + "Tfr;": "\U0001d517", + "Therefore;": "\u2234", + "Theta;": "\u0398", + "ThickSpace;": "\u205f\u200a", + "ThinSpace;": "\u2009", + "Tilde;": "\u223c", + "TildeEqual;": "\u2243", + "TildeFullEqual;": "\u2245", + "TildeTilde;": "\u2248", + "Topf;": "\U0001d54b", + "TripleDot;": "\u20db", + "Tscr;": "\U0001d4af", + "Tstrok;": "\u0166", + "Uacute": "\xda", + "Uacute;": "\xda", + "Uarr;": "\u219f", + "Uarrocir;": "\u2949", + "Ubrcy;": "\u040e", + "Ubreve;": "\u016c", + "Ucirc": "\xdb", + "Ucirc;": "\xdb", + "Ucy;": "\u0423", + "Udblac;": "\u0170", + "Ufr;": "\U0001d518", + "Ugrave": "\xd9", + "Ugrave;": "\xd9", + "Umacr;": "\u016a", + "UnderBar;": "_", + "UnderBrace;": "\u23df", + "UnderBracket;": "\u23b5", + "UnderParenthesis;": "\u23dd", + "Union;": "\u22c3", + "UnionPlus;": "\u228e", + "Uogon;": "\u0172", + "Uopf;": "\U0001d54c", + "UpArrow;": "\u2191", + "UpArrowBar;": "\u2912", + "UpArrowDownArrow;": "\u21c5", + "UpDownArrow;": "\u2195", + "UpEquilibrium;": "\u296e", + "UpTee;": "\u22a5", + "UpTeeArrow;": "\u21a5", + "Uparrow;": "\u21d1", + "Updownarrow;": "\u21d5", + "UpperLeftArrow;": "\u2196", + "UpperRightArrow;": "\u2197", + "Upsi;": "\u03d2", + "Upsilon;": "\u03a5", + "Uring;": "\u016e", + "Uscr;": "\U0001d4b0", + "Utilde;": "\u0168", + "Uuml": "\xdc", + "Uuml;": "\xdc", + "VDash;": "\u22ab", + "Vbar;": "\u2aeb", + "Vcy;": "\u0412", + "Vdash;": "\u22a9", + "Vdashl;": "\u2ae6", + "Vee;": "\u22c1", + "Verbar;": "\u2016", + "Vert;": "\u2016", + "VerticalBar;": "\u2223", + "VerticalLine;": "|", + "VerticalSeparator;": "\u2758", + "VerticalTilde;": "\u2240", + "VeryThinSpace;": "\u200a", + "Vfr;": "\U0001d519", + "Vopf;": "\U0001d54d", + "Vscr;": "\U0001d4b1", + "Vvdash;": "\u22aa", + "Wcirc;": "\u0174", + "Wedge;": "\u22c0", + "Wfr;": "\U0001d51a", + "Wopf;": "\U0001d54e", + "Wscr;": "\U0001d4b2", + "Xfr;": "\U0001d51b", + "Xi;": "\u039e", + "Xopf;": "\U0001d54f", + "Xscr;": "\U0001d4b3", + "YAcy;": "\u042f", + "YIcy;": "\u0407", + "YUcy;": "\u042e", + "Yacute": "\xdd", + "Yacute;": "\xdd", + "Ycirc;": "\u0176", + "Ycy;": "\u042b", + "Yfr;": "\U0001d51c", + "Yopf;": "\U0001d550", + "Yscr;": "\U0001d4b4", + "Yuml;": "\u0178", + "ZHcy;": "\u0416", + "Zacute;": "\u0179", + "Zcaron;": "\u017d", + "Zcy;": "\u0417", + "Zdot;": "\u017b", + "ZeroWidthSpace;": "\u200b", + "Zeta;": "\u0396", + "Zfr;": "\u2128", + "Zopf;": "\u2124", + "Zscr;": "\U0001d4b5", + "aacute": "\xe1", + "aacute;": "\xe1", + "abreve;": "\u0103", + "ac;": "\u223e", + "acE;": "\u223e\u0333", + "acd;": "\u223f", + "acirc": "\xe2", + "acirc;": "\xe2", + "acute": "\xb4", + "acute;": "\xb4", + "acy;": "\u0430", + "aelig": "\xe6", + "aelig;": "\xe6", + "af;": "\u2061", + "afr;": "\U0001d51e", + "agrave": "\xe0", + "agrave;": "\xe0", + "alefsym;": "\u2135", + "aleph;": "\u2135", + "alpha;": "\u03b1", + "amacr;": "\u0101", + "amalg;": "\u2a3f", + "amp": "&", + "amp;": "&", + "and;": "\u2227", + "andand;": "\u2a55", + "andd;": "\u2a5c", + "andslope;": "\u2a58", + "andv;": "\u2a5a", + "ang;": "\u2220", + "ange;": "\u29a4", + "angle;": "\u2220", + "angmsd;": "\u2221", + "angmsdaa;": "\u29a8", + "angmsdab;": "\u29a9", + "angmsdac;": "\u29aa", + "angmsdad;": "\u29ab", + "angmsdae;": "\u29ac", + "angmsdaf;": "\u29ad", + "angmsdag;": "\u29ae", + "angmsdah;": "\u29af", + "angrt;": "\u221f", + "angrtvb;": "\u22be", + "angrtvbd;": "\u299d", + "angsph;": "\u2222", + "angst;": "\xc5", + "angzarr;": "\u237c", + "aogon;": "\u0105", + "aopf;": "\U0001d552", + "ap;": "\u2248", + "apE;": "\u2a70", + "apacir;": "\u2a6f", + "ape;": "\u224a", + "apid;": "\u224b", + "apos;": "'", + "approx;": "\u2248", + "approxeq;": "\u224a", + "aring": "\xe5", + "aring;": "\xe5", + "ascr;": "\U0001d4b6", + "ast;": "*", + "asymp;": "\u2248", + "asympeq;": "\u224d", + "atilde": "\xe3", + "atilde;": "\xe3", + "auml": "\xe4", + "auml;": "\xe4", + "awconint;": "\u2233", + "awint;": "\u2a11", + "bNot;": "\u2aed", + "backcong;": "\u224c", + "backepsilon;": "\u03f6", + "backprime;": "\u2035", + "backsim;": "\u223d", + "backsimeq;": "\u22cd", + "barvee;": "\u22bd", + "barwed;": "\u2305", + "barwedge;": "\u2305", + "bbrk;": "\u23b5", + "bbrktbrk;": "\u23b6", + "bcong;": "\u224c", + "bcy;": "\u0431", + "bdquo;": "\u201e", + "becaus;": "\u2235", + "because;": "\u2235", + "bemptyv;": "\u29b0", + "bepsi;": "\u03f6", + "bernou;": "\u212c", + "beta;": "\u03b2", + "beth;": "\u2136", + "between;": "\u226c", + "bfr;": "\U0001d51f", + "bigcap;": "\u22c2", + "bigcirc;": "\u25ef", + "bigcup;": "\u22c3", + "bigodot;": "\u2a00", + "bigoplus;": "\u2a01", + "bigotimes;": "\u2a02", + "bigsqcup;": "\u2a06", + "bigstar;": "\u2605", + "bigtriangledown;": "\u25bd", + "bigtriangleup;": "\u25b3", + "biguplus;": "\u2a04", + "bigvee;": "\u22c1", + "bigwedge;": "\u22c0", + "bkarow;": "\u290d", + "blacklozenge;": "\u29eb", + "blacksquare;": "\u25aa", + "blacktriangle;": "\u25b4", + "blacktriangledown;": "\u25be", + "blacktriangleleft;": "\u25c2", + "blacktriangleright;": "\u25b8", + "blank;": "\u2423", + "blk12;": "\u2592", + "blk14;": "\u2591", + "blk34;": "\u2593", + "block;": "\u2588", + "bne;": "=\u20e5", + "bnequiv;": "\u2261\u20e5", + "bnot;": "\u2310", + "bopf;": "\U0001d553", + "bot;": "\u22a5", + "bottom;": "\u22a5", + "bowtie;": "\u22c8", + "boxDL;": "\u2557", + "boxDR;": "\u2554", + "boxDl;": "\u2556", + "boxDr;": "\u2553", + "boxH;": "\u2550", + "boxHD;": "\u2566", + "boxHU;": "\u2569", + "boxHd;": "\u2564", + "boxHu;": "\u2567", + "boxUL;": "\u255d", + "boxUR;": "\u255a", + "boxUl;": "\u255c", + "boxUr;": "\u2559", + "boxV;": "\u2551", + "boxVH;": "\u256c", + "boxVL;": "\u2563", + "boxVR;": "\u2560", + "boxVh;": "\u256b", + "boxVl;": "\u2562", + "boxVr;": "\u255f", + "boxbox;": "\u29c9", + "boxdL;": "\u2555", + "boxdR;": "\u2552", + "boxdl;": "\u2510", + "boxdr;": "\u250c", + "boxh;": "\u2500", + "boxhD;": "\u2565", + "boxhU;": "\u2568", + "boxhd;": "\u252c", + "boxhu;": "\u2534", + "boxminus;": "\u229f", + "boxplus;": "\u229e", + "boxtimes;": "\u22a0", + "boxuL;": "\u255b", + "boxuR;": "\u2558", + "boxul;": "\u2518", + "boxur;": "\u2514", + "boxv;": "\u2502", + "boxvH;": "\u256a", + "boxvL;": "\u2561", + "boxvR;": "\u255e", + "boxvh;": "\u253c", + "boxvl;": "\u2524", + "boxvr;": "\u251c", + "bprime;": "\u2035", + "breve;": "\u02d8", + "brvbar": "\xa6", + "brvbar;": "\xa6", + "bscr;": "\U0001d4b7", + "bsemi;": "\u204f", + "bsim;": "\u223d", + "bsime;": "\u22cd", + "bsol;": "\\", + "bsolb;": "\u29c5", + "bsolhsub;": "\u27c8", + "bull;": "\u2022", + "bullet;": "\u2022", + "bump;": "\u224e", + "bumpE;": "\u2aae", + "bumpe;": "\u224f", + "bumpeq;": "\u224f", + "cacute;": "\u0107", + "cap;": "\u2229", + "capand;": "\u2a44", + "capbrcup;": "\u2a49", + "capcap;": "\u2a4b", + "capcup;": "\u2a47", + "capdot;": "\u2a40", + "caps;": "\u2229\ufe00", + "caret;": "\u2041", + "caron;": "\u02c7", + "ccaps;": "\u2a4d", + "ccaron;": "\u010d", + "ccedil": "\xe7", + "ccedil;": "\xe7", + "ccirc;": "\u0109", + "ccups;": "\u2a4c", + "ccupssm;": "\u2a50", + "cdot;": "\u010b", + "cedil": "\xb8", + "cedil;": "\xb8", + "cemptyv;": "\u29b2", + "cent": "\xa2", + "cent;": "\xa2", + "centerdot;": "\xb7", + "cfr;": "\U0001d520", + "chcy;": "\u0447", + "check;": "\u2713", + "checkmark;": "\u2713", + "chi;": "\u03c7", + "cir;": "\u25cb", + "cirE;": "\u29c3", + "circ;": "\u02c6", + "circeq;": "\u2257", + "circlearrowleft;": "\u21ba", + "circlearrowright;": "\u21bb", + "circledR;": "\xae", + "circledS;": "\u24c8", + "circledast;": "\u229b", + "circledcirc;": "\u229a", + "circleddash;": "\u229d", + "cire;": "\u2257", + "cirfnint;": "\u2a10", + "cirmid;": "\u2aef", + "cirscir;": "\u29c2", + "clubs;": "\u2663", + "clubsuit;": "\u2663", + "colon;": ":", + "colone;": "\u2254", + "coloneq;": "\u2254", + "comma;": ",", + "commat;": "@", + "comp;": "\u2201", + "compfn;": "\u2218", + "complement;": "\u2201", + "complexes;": "\u2102", + "cong;": "\u2245", + "congdot;": "\u2a6d", + "conint;": "\u222e", + "copf;": "\U0001d554", + "coprod;": "\u2210", + "copy": "\xa9", + "copy;": "\xa9", + "copysr;": "\u2117", + "crarr;": "\u21b5", + "cross;": "\u2717", + "cscr;": "\U0001d4b8", + "csub;": "\u2acf", + "csube;": "\u2ad1", + "csup;": "\u2ad0", + "csupe;": "\u2ad2", + "ctdot;": "\u22ef", + "cudarrl;": "\u2938", + "cudarrr;": "\u2935", + "cuepr;": "\u22de", + "cuesc;": "\u22df", + "cularr;": "\u21b6", + "cularrp;": "\u293d", + "cup;": "\u222a", + "cupbrcap;": "\u2a48", + "cupcap;": "\u2a46", + "cupcup;": "\u2a4a", + "cupdot;": "\u228d", + "cupor;": "\u2a45", + "cups;": "\u222a\ufe00", + "curarr;": "\u21b7", + "curarrm;": "\u293c", + "curlyeqprec;": "\u22de", + "curlyeqsucc;": "\u22df", + "curlyvee;": "\u22ce", + "curlywedge;": "\u22cf", + "curren": "\xa4", + "curren;": "\xa4", + "curvearrowleft;": "\u21b6", + "curvearrowright;": "\u21b7", + "cuvee;": "\u22ce", + "cuwed;": "\u22cf", + "cwconint;": "\u2232", + "cwint;": "\u2231", + "cylcty;": "\u232d", + "dArr;": "\u21d3", + "dHar;": "\u2965", + "dagger;": "\u2020", + "daleth;": "\u2138", + "darr;": "\u2193", + "dash;": "\u2010", + "dashv;": "\u22a3", + "dbkarow;": "\u290f", + "dblac;": "\u02dd", + "dcaron;": "\u010f", + "dcy;": "\u0434", + "dd;": "\u2146", + "ddagger;": "\u2021", + "ddarr;": "\u21ca", + "ddotseq;": "\u2a77", + "deg": "\xb0", + "deg;": "\xb0", + "delta;": "\u03b4", + "demptyv;": "\u29b1", + "dfisht;": "\u297f", + "dfr;": "\U0001d521", + "dharl;": "\u21c3", + "dharr;": "\u21c2", + "diam;": "\u22c4", + "diamond;": "\u22c4", + "diamondsuit;": "\u2666", + "diams;": "\u2666", + "die;": "\xa8", + "digamma;": "\u03dd", + "disin;": "\u22f2", + "div;": "\xf7", + "divide": "\xf7", + "divide;": "\xf7", + "divideontimes;": "\u22c7", + "divonx;": "\u22c7", + "djcy;": "\u0452", + "dlcorn;": "\u231e", + "dlcrop;": "\u230d", + "dollar;": "$", + "dopf;": "\U0001d555", + "dot;": "\u02d9", + "doteq;": "\u2250", + "doteqdot;": "\u2251", + "dotminus;": "\u2238", + "dotplus;": "\u2214", + "dotsquare;": "\u22a1", + "doublebarwedge;": "\u2306", + "downarrow;": "\u2193", + "downdownarrows;": "\u21ca", + "downharpoonleft;": "\u21c3", + "downharpoonright;": "\u21c2", + "drbkarow;": "\u2910", + "drcorn;": "\u231f", + "drcrop;": "\u230c", + "dscr;": "\U0001d4b9", + "dscy;": "\u0455", + "dsol;": "\u29f6", + "dstrok;": "\u0111", + "dtdot;": "\u22f1", + "dtri;": "\u25bf", + "dtrif;": "\u25be", + "duarr;": "\u21f5", + "duhar;": "\u296f", + "dwangle;": "\u29a6", + "dzcy;": "\u045f", + "dzigrarr;": "\u27ff", + "eDDot;": "\u2a77", + "eDot;": "\u2251", + "eacute": "\xe9", + "eacute;": "\xe9", + "easter;": "\u2a6e", + "ecaron;": "\u011b", + "ecir;": "\u2256", + "ecirc": "\xea", + "ecirc;": "\xea", + "ecolon;": "\u2255", + "ecy;": "\u044d", + "edot;": "\u0117", + "ee;": "\u2147", + "efDot;": "\u2252", + "efr;": "\U0001d522", + "eg;": "\u2a9a", + "egrave": "\xe8", + "egrave;": "\xe8", + "egs;": "\u2a96", + "egsdot;": "\u2a98", + "el;": "\u2a99", + "elinters;": "\u23e7", + "ell;": "\u2113", + "els;": "\u2a95", + "elsdot;": "\u2a97", + "emacr;": "\u0113", + "empty;": "\u2205", + "emptyset;": "\u2205", + "emptyv;": "\u2205", + "emsp13;": "\u2004", + "emsp14;": "\u2005", + "emsp;": "\u2003", + "eng;": "\u014b", + "ensp;": "\u2002", + "eogon;": "\u0119", + "eopf;": "\U0001d556", + "epar;": "\u22d5", + "eparsl;": "\u29e3", + "eplus;": "\u2a71", + "epsi;": "\u03b5", + "epsilon;": "\u03b5", + "epsiv;": "\u03f5", + "eqcirc;": "\u2256", + "eqcolon;": "\u2255", + "eqsim;": "\u2242", + "eqslantgtr;": "\u2a96", + "eqslantless;": "\u2a95", + "equals;": "=", + "equest;": "\u225f", + "equiv;": "\u2261", + "equivDD;": "\u2a78", + "eqvparsl;": "\u29e5", + "erDot;": "\u2253", + "erarr;": "\u2971", + "escr;": "\u212f", + "esdot;": "\u2250", + "esim;": "\u2242", + "eta;": "\u03b7", + "eth": "\xf0", + "eth;": "\xf0", + "euml": "\xeb", + "euml;": "\xeb", + "euro;": "\u20ac", + "excl;": "!", + "exist;": "\u2203", + "expectation;": "\u2130", + "exponentiale;": "\u2147", + "fallingdotseq;": "\u2252", + "fcy;": "\u0444", + "female;": "\u2640", + "ffilig;": "\ufb03", + "fflig;": "\ufb00", + "ffllig;": "\ufb04", + "ffr;": "\U0001d523", + "filig;": "\ufb01", + "fjlig;": "fj", + "flat;": "\u266d", + "fllig;": "\ufb02", + "fltns;": "\u25b1", + "fnof;": "\u0192", + "fopf;": "\U0001d557", + "forall;": "\u2200", + "fork;": "\u22d4", + "forkv;": "\u2ad9", + "fpartint;": "\u2a0d", + "frac12": "\xbd", + "frac12;": "\xbd", + "frac13;": "\u2153", + "frac14": "\xbc", + "frac14;": "\xbc", + "frac15;": "\u2155", + "frac16;": "\u2159", + "frac18;": "\u215b", + "frac23;": "\u2154", + "frac25;": "\u2156", + "frac34": "\xbe", + "frac34;": "\xbe", + "frac35;": "\u2157", + "frac38;": "\u215c", + "frac45;": "\u2158", + "frac56;": "\u215a", + "frac58;": "\u215d", + "frac78;": "\u215e", + "frasl;": "\u2044", + "frown;": "\u2322", + "fscr;": "\U0001d4bb", + "gE;": "\u2267", + "gEl;": "\u2a8c", + "gacute;": "\u01f5", + "gamma;": "\u03b3", + "gammad;": "\u03dd", + "gap;": "\u2a86", + "gbreve;": "\u011f", + "gcirc;": "\u011d", + "gcy;": "\u0433", + "gdot;": "\u0121", + "ge;": "\u2265", + "gel;": "\u22db", + "geq;": "\u2265", + "geqq;": "\u2267", + "geqslant;": "\u2a7e", + "ges;": "\u2a7e", + "gescc;": "\u2aa9", + "gesdot;": "\u2a80", + "gesdoto;": "\u2a82", + "gesdotol;": "\u2a84", + "gesl;": "\u22db\ufe00", + "gesles;": "\u2a94", + "gfr;": "\U0001d524", + "gg;": "\u226b", + "ggg;": "\u22d9", + "gimel;": "\u2137", + "gjcy;": "\u0453", + "gl;": "\u2277", + "glE;": "\u2a92", + "gla;": "\u2aa5", + "glj;": "\u2aa4", + "gnE;": "\u2269", + "gnap;": "\u2a8a", + "gnapprox;": "\u2a8a", + "gne;": "\u2a88", + "gneq;": "\u2a88", + "gneqq;": "\u2269", + "gnsim;": "\u22e7", + "gopf;": "\U0001d558", + "grave;": "`", + "gscr;": "\u210a", + "gsim;": "\u2273", + "gsime;": "\u2a8e", + "gsiml;": "\u2a90", + "gt": ">", + "gt;": ">", + "gtcc;": "\u2aa7", + "gtcir;": "\u2a7a", + "gtdot;": "\u22d7", + "gtlPar;": "\u2995", + "gtquest;": "\u2a7c", + "gtrapprox;": "\u2a86", + "gtrarr;": "\u2978", + "gtrdot;": "\u22d7", + "gtreqless;": "\u22db", + "gtreqqless;": "\u2a8c", + "gtrless;": "\u2277", + "gtrsim;": "\u2273", + "gvertneqq;": "\u2269\ufe00", + "gvnE;": "\u2269\ufe00", + "hArr;": "\u21d4", + "hairsp;": "\u200a", + "half;": "\xbd", + "hamilt;": "\u210b", + "hardcy;": "\u044a", + "harr;": "\u2194", + "harrcir;": "\u2948", + "harrw;": "\u21ad", + "hbar;": "\u210f", + "hcirc;": "\u0125", + "hearts;": "\u2665", + "heartsuit;": "\u2665", + "hellip;": "\u2026", + "hercon;": "\u22b9", + "hfr;": "\U0001d525", + "hksearow;": "\u2925", + "hkswarow;": "\u2926", + "hoarr;": "\u21ff", + "homtht;": "\u223b", + "hookleftarrow;": "\u21a9", + "hookrightarrow;": "\u21aa", + "hopf;": "\U0001d559", + "horbar;": "\u2015", + "hscr;": "\U0001d4bd", + "hslash;": "\u210f", + "hstrok;": "\u0127", + "hybull;": "\u2043", + "hyphen;": "\u2010", + "iacute": "\xed", + "iacute;": "\xed", + "ic;": "\u2063", + "icirc": "\xee", + "icirc;": "\xee", + "icy;": "\u0438", + "iecy;": "\u0435", + "iexcl": "\xa1", + "iexcl;": "\xa1", + "iff;": "\u21d4", + "ifr;": "\U0001d526", + "igrave": "\xec", + "igrave;": "\xec", + "ii;": "\u2148", + "iiiint;": "\u2a0c", + "iiint;": "\u222d", + "iinfin;": "\u29dc", + "iiota;": "\u2129", + "ijlig;": "\u0133", + "imacr;": "\u012b", + "image;": "\u2111", + "imagline;": "\u2110", + "imagpart;": "\u2111", + "imath;": "\u0131", + "imof;": "\u22b7", + "imped;": "\u01b5", + "in;": "\u2208", + "incare;": "\u2105", + "infin;": "\u221e", + "infintie;": "\u29dd", + "inodot;": "\u0131", + "int;": "\u222b", + "intcal;": "\u22ba", + "integers;": "\u2124", + "intercal;": "\u22ba", + "intlarhk;": "\u2a17", + "intprod;": "\u2a3c", + "iocy;": "\u0451", + "iogon;": "\u012f", + "iopf;": "\U0001d55a", + "iota;": "\u03b9", + "iprod;": "\u2a3c", + "iquest": "\xbf", + "iquest;": "\xbf", + "iscr;": "\U0001d4be", + "isin;": "\u2208", + "isinE;": "\u22f9", + "isindot;": "\u22f5", + "isins;": "\u22f4", + "isinsv;": "\u22f3", + "isinv;": "\u2208", + "it;": "\u2062", + "itilde;": "\u0129", + "iukcy;": "\u0456", + "iuml": "\xef", + "iuml;": "\xef", + "jcirc;": "\u0135", + "jcy;": "\u0439", + "jfr;": "\U0001d527", + "jmath;": "\u0237", + "jopf;": "\U0001d55b", + "jscr;": "\U0001d4bf", + "jsercy;": "\u0458", + "jukcy;": "\u0454", + "kappa;": "\u03ba", + "kappav;": "\u03f0", + "kcedil;": "\u0137", + "kcy;": "\u043a", + "kfr;": "\U0001d528", + "kgreen;": "\u0138", + "khcy;": "\u0445", + "kjcy;": "\u045c", + "kopf;": "\U0001d55c", + "kscr;": "\U0001d4c0", + "lAarr;": "\u21da", + "lArr;": "\u21d0", + "lAtail;": "\u291b", + "lBarr;": "\u290e", + "lE;": "\u2266", + "lEg;": "\u2a8b", + "lHar;": "\u2962", + "lacute;": "\u013a", + "laemptyv;": "\u29b4", + "lagran;": "\u2112", + "lambda;": "\u03bb", + "lang;": "\u27e8", + "langd;": "\u2991", + "langle;": "\u27e8", + "lap;": "\u2a85", + "laquo": "\xab", + "laquo;": "\xab", + "larr;": "\u2190", + "larrb;": "\u21e4", + "larrbfs;": "\u291f", + "larrfs;": "\u291d", + "larrhk;": "\u21a9", + "larrlp;": "\u21ab", + "larrpl;": "\u2939", + "larrsim;": "\u2973", + "larrtl;": "\u21a2", + "lat;": "\u2aab", + "latail;": "\u2919", + "late;": "\u2aad", + "lates;": "\u2aad\ufe00", + "lbarr;": "\u290c", + "lbbrk;": "\u2772", + "lbrace;": "{", + "lbrack;": "[", + "lbrke;": "\u298b", + "lbrksld;": "\u298f", + "lbrkslu;": "\u298d", + "lcaron;": "\u013e", + "lcedil;": "\u013c", + "lceil;": "\u2308", + "lcub;": "{", + "lcy;": "\u043b", + "ldca;": "\u2936", + "ldquo;": "\u201c", + "ldquor;": "\u201e", + "ldrdhar;": "\u2967", + "ldrushar;": "\u294b", + "ldsh;": "\u21b2", + "le;": "\u2264", + "leftarrow;": "\u2190", + "leftarrowtail;": "\u21a2", + "leftharpoondown;": "\u21bd", + "leftharpoonup;": "\u21bc", + "leftleftarrows;": "\u21c7", + "leftrightarrow;": "\u2194", + "leftrightarrows;": "\u21c6", + "leftrightharpoons;": "\u21cb", + "leftrightsquigarrow;": "\u21ad", + "leftthreetimes;": "\u22cb", + "leg;": "\u22da", + "leq;": "\u2264", + "leqq;": "\u2266", + "leqslant;": "\u2a7d", + "les;": "\u2a7d", + "lescc;": "\u2aa8", + "lesdot;": "\u2a7f", + "lesdoto;": "\u2a81", + "lesdotor;": "\u2a83", + "lesg;": "\u22da\ufe00", + "lesges;": "\u2a93", + "lessapprox;": "\u2a85", + "lessdot;": "\u22d6", + "lesseqgtr;": "\u22da", + "lesseqqgtr;": "\u2a8b", + "lessgtr;": "\u2276", + "lesssim;": "\u2272", + "lfisht;": "\u297c", + "lfloor;": "\u230a", + "lfr;": "\U0001d529", + "lg;": "\u2276", + "lgE;": "\u2a91", + "lhard;": "\u21bd", + "lharu;": "\u21bc", + "lharul;": "\u296a", + "lhblk;": "\u2584", + "ljcy;": "\u0459", + "ll;": "\u226a", + "llarr;": "\u21c7", + "llcorner;": "\u231e", + "llhard;": "\u296b", + "lltri;": "\u25fa", + "lmidot;": "\u0140", + "lmoust;": "\u23b0", + "lmoustache;": "\u23b0", + "lnE;": "\u2268", + "lnap;": "\u2a89", + "lnapprox;": "\u2a89", + "lne;": "\u2a87", + "lneq;": "\u2a87", + "lneqq;": "\u2268", + "lnsim;": "\u22e6", + "loang;": "\u27ec", + "loarr;": "\u21fd", + "lobrk;": "\u27e6", + "longleftarrow;": "\u27f5", + "longleftrightarrow;": "\u27f7", + "longmapsto;": "\u27fc", + "longrightarrow;": "\u27f6", + "looparrowleft;": "\u21ab", + "looparrowright;": "\u21ac", + "lopar;": "\u2985", + "lopf;": "\U0001d55d", + "loplus;": "\u2a2d", + "lotimes;": "\u2a34", + "lowast;": "\u2217", + "lowbar;": "_", + "loz;": "\u25ca", + "lozenge;": "\u25ca", + "lozf;": "\u29eb", + "lpar;": "(", + "lparlt;": "\u2993", + "lrarr;": "\u21c6", + "lrcorner;": "\u231f", + "lrhar;": "\u21cb", + "lrhard;": "\u296d", + "lrm;": "\u200e", + "lrtri;": "\u22bf", + "lsaquo;": "\u2039", + "lscr;": "\U0001d4c1", + "lsh;": "\u21b0", + "lsim;": "\u2272", + "lsime;": "\u2a8d", + "lsimg;": "\u2a8f", + "lsqb;": "[", + "lsquo;": "\u2018", + "lsquor;": "\u201a", + "lstrok;": "\u0142", + "lt": "<", + "lt;": "<", + "ltcc;": "\u2aa6", + "ltcir;": "\u2a79", + "ltdot;": "\u22d6", + "lthree;": "\u22cb", + "ltimes;": "\u22c9", + "ltlarr;": "\u2976", + "ltquest;": "\u2a7b", + "ltrPar;": "\u2996", + "ltri;": "\u25c3", + "ltrie;": "\u22b4", + "ltrif;": "\u25c2", + "lurdshar;": "\u294a", + "luruhar;": "\u2966", + "lvertneqq;": "\u2268\ufe00", + "lvnE;": "\u2268\ufe00", + "mDDot;": "\u223a", + "macr": "\xaf", + "macr;": "\xaf", + "male;": "\u2642", + "malt;": "\u2720", + "maltese;": "\u2720", + "map;": "\u21a6", + "mapsto;": "\u21a6", + "mapstodown;": "\u21a7", + "mapstoleft;": "\u21a4", + "mapstoup;": "\u21a5", + "marker;": "\u25ae", + "mcomma;": "\u2a29", + "mcy;": "\u043c", + "mdash;": "\u2014", + "measuredangle;": "\u2221", + "mfr;": "\U0001d52a", + "mho;": "\u2127", + "micro": "\xb5", + "micro;": "\xb5", + "mid;": "\u2223", + "midast;": "*", + "midcir;": "\u2af0", + "middot": "\xb7", + "middot;": "\xb7", + "minus;": "\u2212", + "minusb;": "\u229f", + "minusd;": "\u2238", + "minusdu;": "\u2a2a", + "mlcp;": "\u2adb", + "mldr;": "\u2026", + "mnplus;": "\u2213", + "models;": "\u22a7", + "mopf;": "\U0001d55e", + "mp;": "\u2213", + "mscr;": "\U0001d4c2", + "mstpos;": "\u223e", + "mu;": "\u03bc", + "multimap;": "\u22b8", + "mumap;": "\u22b8", + "nGg;": "\u22d9\u0338", + "nGt;": "\u226b\u20d2", + "nGtv;": "\u226b\u0338", + "nLeftarrow;": "\u21cd", + "nLeftrightarrow;": "\u21ce", + "nLl;": "\u22d8\u0338", + "nLt;": "\u226a\u20d2", + "nLtv;": "\u226a\u0338", + "nRightarrow;": "\u21cf", + "nVDash;": "\u22af", + "nVdash;": "\u22ae", + "nabla;": "\u2207", + "nacute;": "\u0144", + "nang;": "\u2220\u20d2", + "nap;": "\u2249", + "napE;": "\u2a70\u0338", + "napid;": "\u224b\u0338", + "napos;": "\u0149", + "napprox;": "\u2249", + "natur;": "\u266e", + "natural;": "\u266e", + "naturals;": "\u2115", + "nbsp": "\xa0", + "nbsp;": "\xa0", + "nbump;": "\u224e\u0338", + "nbumpe;": "\u224f\u0338", + "ncap;": "\u2a43", + "ncaron;": "\u0148", + "ncedil;": "\u0146", + "ncong;": "\u2247", + "ncongdot;": "\u2a6d\u0338", + "ncup;": "\u2a42", + "ncy;": "\u043d", + "ndash;": "\u2013", + "ne;": "\u2260", + "neArr;": "\u21d7", + "nearhk;": "\u2924", + "nearr;": "\u2197", + "nearrow;": "\u2197", + "nedot;": "\u2250\u0338", + "nequiv;": "\u2262", + "nesear;": "\u2928", + "nesim;": "\u2242\u0338", + "nexist;": "\u2204", + "nexists;": "\u2204", + "nfr;": "\U0001d52b", + "ngE;": "\u2267\u0338", + "nge;": "\u2271", + "ngeq;": "\u2271", + "ngeqq;": "\u2267\u0338", + "ngeqslant;": "\u2a7e\u0338", + "nges;": "\u2a7e\u0338", + "ngsim;": "\u2275", + "ngt;": "\u226f", + "ngtr;": "\u226f", + "nhArr;": "\u21ce", + "nharr;": "\u21ae", + "nhpar;": "\u2af2", + "ni;": "\u220b", + "nis;": "\u22fc", + "nisd;": "\u22fa", + "niv;": "\u220b", + "njcy;": "\u045a", + "nlArr;": "\u21cd", + "nlE;": "\u2266\u0338", + "nlarr;": "\u219a", + "nldr;": "\u2025", + "nle;": "\u2270", + "nleftarrow;": "\u219a", + "nleftrightarrow;": "\u21ae", + "nleq;": "\u2270", + "nleqq;": "\u2266\u0338", + "nleqslant;": "\u2a7d\u0338", + "nles;": "\u2a7d\u0338", + "nless;": "\u226e", + "nlsim;": "\u2274", + "nlt;": "\u226e", + "nltri;": "\u22ea", + "nltrie;": "\u22ec", + "nmid;": "\u2224", + "nopf;": "\U0001d55f", + "not": "\xac", + "not;": "\xac", + "notin;": "\u2209", + "notinE;": "\u22f9\u0338", + "notindot;": "\u22f5\u0338", + "notinva;": "\u2209", + "notinvb;": "\u22f7", + "notinvc;": "\u22f6", + "notni;": "\u220c", + "notniva;": "\u220c", + "notnivb;": "\u22fe", + "notnivc;": "\u22fd", + "npar;": "\u2226", + "nparallel;": "\u2226", + "nparsl;": "\u2afd\u20e5", + "npart;": "\u2202\u0338", + "npolint;": "\u2a14", + "npr;": "\u2280", + "nprcue;": "\u22e0", + "npre;": "\u2aaf\u0338", + "nprec;": "\u2280", + "npreceq;": "\u2aaf\u0338", + "nrArr;": "\u21cf", + "nrarr;": "\u219b", + "nrarrc;": "\u2933\u0338", + "nrarrw;": "\u219d\u0338", + "nrightarrow;": "\u219b", + "nrtri;": "\u22eb", + "nrtrie;": "\u22ed", + "nsc;": "\u2281", + "nsccue;": "\u22e1", + "nsce;": "\u2ab0\u0338", + "nscr;": "\U0001d4c3", + "nshortmid;": "\u2224", + "nshortparallel;": "\u2226", + "nsim;": "\u2241", + "nsime;": "\u2244", + "nsimeq;": "\u2244", + "nsmid;": "\u2224", + "nspar;": "\u2226", + "nsqsube;": "\u22e2", + "nsqsupe;": "\u22e3", + "nsub;": "\u2284", + "nsubE;": "\u2ac5\u0338", + "nsube;": "\u2288", + "nsubset;": "\u2282\u20d2", + "nsubseteq;": "\u2288", + "nsubseteqq;": "\u2ac5\u0338", + "nsucc;": "\u2281", + "nsucceq;": "\u2ab0\u0338", + "nsup;": "\u2285", + "nsupE;": "\u2ac6\u0338", + "nsupe;": "\u2289", + "nsupset;": "\u2283\u20d2", + "nsupseteq;": "\u2289", + "nsupseteqq;": "\u2ac6\u0338", + "ntgl;": "\u2279", + "ntilde": "\xf1", + "ntilde;": "\xf1", + "ntlg;": "\u2278", + "ntriangleleft;": "\u22ea", + "ntrianglelefteq;": "\u22ec", + "ntriangleright;": "\u22eb", + "ntrianglerighteq;": "\u22ed", + "nu;": "\u03bd", + "num;": "#", + "numero;": "\u2116", + "numsp;": "\u2007", + "nvDash;": "\u22ad", + "nvHarr;": "\u2904", + "nvap;": "\u224d\u20d2", + "nvdash;": "\u22ac", + "nvge;": "\u2265\u20d2", + "nvgt;": ">\u20d2", + "nvinfin;": "\u29de", + "nvlArr;": "\u2902", + "nvle;": "\u2264\u20d2", + "nvlt;": "<\u20d2", + "nvltrie;": "\u22b4\u20d2", + "nvrArr;": "\u2903", + "nvrtrie;": "\u22b5\u20d2", + "nvsim;": "\u223c\u20d2", + "nwArr;": "\u21d6", + "nwarhk;": "\u2923", + "nwarr;": "\u2196", + "nwarrow;": "\u2196", + "nwnear;": "\u2927", + "oS;": "\u24c8", + "oacute": "\xf3", + "oacute;": "\xf3", + "oast;": "\u229b", + "ocir;": "\u229a", + "ocirc": "\xf4", + "ocirc;": "\xf4", + "ocy;": "\u043e", + "odash;": "\u229d", + "odblac;": "\u0151", + "odiv;": "\u2a38", + "odot;": "\u2299", + "odsold;": "\u29bc", + "oelig;": "\u0153", + "ofcir;": "\u29bf", + "ofr;": "\U0001d52c", + "ogon;": "\u02db", + "ograve": "\xf2", + "ograve;": "\xf2", + "ogt;": "\u29c1", + "ohbar;": "\u29b5", + "ohm;": "\u03a9", + "oint;": "\u222e", + "olarr;": "\u21ba", + "olcir;": "\u29be", + "olcross;": "\u29bb", + "oline;": "\u203e", + "olt;": "\u29c0", + "omacr;": "\u014d", + "omega;": "\u03c9", + "omicron;": "\u03bf", + "omid;": "\u29b6", + "ominus;": "\u2296", + "oopf;": "\U0001d560", + "opar;": "\u29b7", + "operp;": "\u29b9", + "oplus;": "\u2295", + "or;": "\u2228", + "orarr;": "\u21bb", + "ord;": "\u2a5d", + "order;": "\u2134", + "orderof;": "\u2134", + "ordf": "\xaa", + "ordf;": "\xaa", + "ordm": "\xba", + "ordm;": "\xba", + "origof;": "\u22b6", + "oror;": "\u2a56", + "orslope;": "\u2a57", + "orv;": "\u2a5b", + "oscr;": "\u2134", + "oslash": "\xf8", + "oslash;": "\xf8", + "osol;": "\u2298", + "otilde": "\xf5", + "otilde;": "\xf5", + "otimes;": "\u2297", + "otimesas;": "\u2a36", + "ouml": "\xf6", + "ouml;": "\xf6", + "ovbar;": "\u233d", + "par;": "\u2225", + "para": "\xb6", + "para;": "\xb6", + "parallel;": "\u2225", + "parsim;": "\u2af3", + "parsl;": "\u2afd", + "part;": "\u2202", + "pcy;": "\u043f", + "percnt;": "%", + "period;": ".", + "permil;": "\u2030", + "perp;": "\u22a5", + "pertenk;": "\u2031", + "pfr;": "\U0001d52d", + "phi;": "\u03c6", + "phiv;": "\u03d5", + "phmmat;": "\u2133", + "phone;": "\u260e", + "pi;": "\u03c0", + "pitchfork;": "\u22d4", + "piv;": "\u03d6", + "planck;": "\u210f", + "planckh;": "\u210e", + "plankv;": "\u210f", + "plus;": "+", + "plusacir;": "\u2a23", + "plusb;": "\u229e", + "pluscir;": "\u2a22", + "plusdo;": "\u2214", + "plusdu;": "\u2a25", + "pluse;": "\u2a72", + "plusmn": "\xb1", + "plusmn;": "\xb1", + "plussim;": "\u2a26", + "plustwo;": "\u2a27", + "pm;": "\xb1", + "pointint;": "\u2a15", + "popf;": "\U0001d561", + "pound": "\xa3", + "pound;": "\xa3", + "pr;": "\u227a", + "prE;": "\u2ab3", + "prap;": "\u2ab7", + "prcue;": "\u227c", + "pre;": "\u2aaf", + "prec;": "\u227a", + "precapprox;": "\u2ab7", + "preccurlyeq;": "\u227c", + "preceq;": "\u2aaf", + "precnapprox;": "\u2ab9", + "precneqq;": "\u2ab5", + "precnsim;": "\u22e8", + "precsim;": "\u227e", + "prime;": "\u2032", + "primes;": "\u2119", + "prnE;": "\u2ab5", + "prnap;": "\u2ab9", + "prnsim;": "\u22e8", + "prod;": "\u220f", + "profalar;": "\u232e", + "profline;": "\u2312", + "profsurf;": "\u2313", + "prop;": "\u221d", + "propto;": "\u221d", + "prsim;": "\u227e", + "prurel;": "\u22b0", + "pscr;": "\U0001d4c5", + "psi;": "\u03c8", + "puncsp;": "\u2008", + "qfr;": "\U0001d52e", + "qint;": "\u2a0c", + "qopf;": "\U0001d562", + "qprime;": "\u2057", + "qscr;": "\U0001d4c6", + "quaternions;": "\u210d", + "quatint;": "\u2a16", + "quest;": "?", + "questeq;": "\u225f", + "quot": "\"", + "quot;": "\"", + "rAarr;": "\u21db", + "rArr;": "\u21d2", + "rAtail;": "\u291c", + "rBarr;": "\u290f", + "rHar;": "\u2964", + "race;": "\u223d\u0331", + "racute;": "\u0155", + "radic;": "\u221a", + "raemptyv;": "\u29b3", + "rang;": "\u27e9", + "rangd;": "\u2992", + "range;": "\u29a5", + "rangle;": "\u27e9", + "raquo": "\xbb", + "raquo;": "\xbb", + "rarr;": "\u2192", + "rarrap;": "\u2975", + "rarrb;": "\u21e5", + "rarrbfs;": "\u2920", + "rarrc;": "\u2933", + "rarrfs;": "\u291e", + "rarrhk;": "\u21aa", + "rarrlp;": "\u21ac", + "rarrpl;": "\u2945", + "rarrsim;": "\u2974", + "rarrtl;": "\u21a3", + "rarrw;": "\u219d", + "ratail;": "\u291a", + "ratio;": "\u2236", + "rationals;": "\u211a", + "rbarr;": "\u290d", + "rbbrk;": "\u2773", + "rbrace;": "}", + "rbrack;": "]", + "rbrke;": "\u298c", + "rbrksld;": "\u298e", + "rbrkslu;": "\u2990", + "rcaron;": "\u0159", + "rcedil;": "\u0157", + "rceil;": "\u2309", + "rcub;": "}", + "rcy;": "\u0440", + "rdca;": "\u2937", + "rdldhar;": "\u2969", + "rdquo;": "\u201d", + "rdquor;": "\u201d", + "rdsh;": "\u21b3", + "real;": "\u211c", + "realine;": "\u211b", + "realpart;": "\u211c", + "reals;": "\u211d", + "rect;": "\u25ad", + "reg": "\xae", + "reg;": "\xae", + "rfisht;": "\u297d", + "rfloor;": "\u230b", + "rfr;": "\U0001d52f", + "rhard;": "\u21c1", + "rharu;": "\u21c0", + "rharul;": "\u296c", + "rho;": "\u03c1", + "rhov;": "\u03f1", + "rightarrow;": "\u2192", + "rightarrowtail;": "\u21a3", + "rightharpoondown;": "\u21c1", + "rightharpoonup;": "\u21c0", + "rightleftarrows;": "\u21c4", + "rightleftharpoons;": "\u21cc", + "rightrightarrows;": "\u21c9", + "rightsquigarrow;": "\u219d", + "rightthreetimes;": "\u22cc", + "ring;": "\u02da", + "risingdotseq;": "\u2253", + "rlarr;": "\u21c4", + "rlhar;": "\u21cc", + "rlm;": "\u200f", + "rmoust;": "\u23b1", + "rmoustache;": "\u23b1", + "rnmid;": "\u2aee", + "roang;": "\u27ed", + "roarr;": "\u21fe", + "robrk;": "\u27e7", + "ropar;": "\u2986", + "ropf;": "\U0001d563", + "roplus;": "\u2a2e", + "rotimes;": "\u2a35", + "rpar;": ")", + "rpargt;": "\u2994", + "rppolint;": "\u2a12", + "rrarr;": "\u21c9", + "rsaquo;": "\u203a", + "rscr;": "\U0001d4c7", + "rsh;": "\u21b1", + "rsqb;": "]", + "rsquo;": "\u2019", + "rsquor;": "\u2019", + "rthree;": "\u22cc", + "rtimes;": "\u22ca", + "rtri;": "\u25b9", + "rtrie;": "\u22b5", + "rtrif;": "\u25b8", + "rtriltri;": "\u29ce", + "ruluhar;": "\u2968", + "rx;": "\u211e", + "sacute;": "\u015b", + "sbquo;": "\u201a", + "sc;": "\u227b", + "scE;": "\u2ab4", + "scap;": "\u2ab8", + "scaron;": "\u0161", + "sccue;": "\u227d", + "sce;": "\u2ab0", + "scedil;": "\u015f", + "scirc;": "\u015d", + "scnE;": "\u2ab6", + "scnap;": "\u2aba", + "scnsim;": "\u22e9", + "scpolint;": "\u2a13", + "scsim;": "\u227f", + "scy;": "\u0441", + "sdot;": "\u22c5", + "sdotb;": "\u22a1", + "sdote;": "\u2a66", + "seArr;": "\u21d8", + "searhk;": "\u2925", + "searr;": "\u2198", + "searrow;": "\u2198", + "sect": "\xa7", + "sect;": "\xa7", + "semi;": ";", + "seswar;": "\u2929", + "setminus;": "\u2216", + "setmn;": "\u2216", + "sext;": "\u2736", + "sfr;": "\U0001d530", + "sfrown;": "\u2322", + "sharp;": "\u266f", + "shchcy;": "\u0449", + "shcy;": "\u0448", + "shortmid;": "\u2223", + "shortparallel;": "\u2225", + "shy": "\xad", + "shy;": "\xad", + "sigma;": "\u03c3", + "sigmaf;": "\u03c2", + "sigmav;": "\u03c2", + "sim;": "\u223c", + "simdot;": "\u2a6a", + "sime;": "\u2243", + "simeq;": "\u2243", + "simg;": "\u2a9e", + "simgE;": "\u2aa0", + "siml;": "\u2a9d", + "simlE;": "\u2a9f", + "simne;": "\u2246", + "simplus;": "\u2a24", + "simrarr;": "\u2972", + "slarr;": "\u2190", + "smallsetminus;": "\u2216", + "smashp;": "\u2a33", + "smeparsl;": "\u29e4", + "smid;": "\u2223", + "smile;": "\u2323", + "smt;": "\u2aaa", + "smte;": "\u2aac", + "smtes;": "\u2aac\ufe00", + "softcy;": "\u044c", + "sol;": "/", + "solb;": "\u29c4", + "solbar;": "\u233f", + "sopf;": "\U0001d564", + "spades;": "\u2660", + "spadesuit;": "\u2660", + "spar;": "\u2225", + "sqcap;": "\u2293", + "sqcaps;": "\u2293\ufe00", + "sqcup;": "\u2294", + "sqcups;": "\u2294\ufe00", + "sqsub;": "\u228f", + "sqsube;": "\u2291", + "sqsubset;": "\u228f", + "sqsubseteq;": "\u2291", + "sqsup;": "\u2290", + "sqsupe;": "\u2292", + "sqsupset;": "\u2290", + "sqsupseteq;": "\u2292", + "squ;": "\u25a1", + "square;": "\u25a1", + "squarf;": "\u25aa", + "squf;": "\u25aa", + "srarr;": "\u2192", + "sscr;": "\U0001d4c8", + "ssetmn;": "\u2216", + "ssmile;": "\u2323", + "sstarf;": "\u22c6", + "star;": "\u2606", + "starf;": "\u2605", + "straightepsilon;": "\u03f5", + "straightphi;": "\u03d5", + "strns;": "\xaf", + "sub;": "\u2282", + "subE;": "\u2ac5", + "subdot;": "\u2abd", + "sube;": "\u2286", + "subedot;": "\u2ac3", + "submult;": "\u2ac1", + "subnE;": "\u2acb", + "subne;": "\u228a", + "subplus;": "\u2abf", + "subrarr;": "\u2979", + "subset;": "\u2282", + "subseteq;": "\u2286", + "subseteqq;": "\u2ac5", + "subsetneq;": "\u228a", + "subsetneqq;": "\u2acb", + "subsim;": "\u2ac7", + "subsub;": "\u2ad5", + "subsup;": "\u2ad3", + "succ;": "\u227b", + "succapprox;": "\u2ab8", + "succcurlyeq;": "\u227d", + "succeq;": "\u2ab0", + "succnapprox;": "\u2aba", + "succneqq;": "\u2ab6", + "succnsim;": "\u22e9", + "succsim;": "\u227f", + "sum;": "\u2211", + "sung;": "\u266a", + "sup1": "\xb9", + "sup1;": "\xb9", + "sup2": "\xb2", + "sup2;": "\xb2", + "sup3": "\xb3", + "sup3;": "\xb3", + "sup;": "\u2283", + "supE;": "\u2ac6", + "supdot;": "\u2abe", + "supdsub;": "\u2ad8", + "supe;": "\u2287", + "supedot;": "\u2ac4", + "suphsol;": "\u27c9", + "suphsub;": "\u2ad7", + "suplarr;": "\u297b", + "supmult;": "\u2ac2", + "supnE;": "\u2acc", + "supne;": "\u228b", + "supplus;": "\u2ac0", + "supset;": "\u2283", + "supseteq;": "\u2287", + "supseteqq;": "\u2ac6", + "supsetneq;": "\u228b", + "supsetneqq;": "\u2acc", + "supsim;": "\u2ac8", + "supsub;": "\u2ad4", + "supsup;": "\u2ad6", + "swArr;": "\u21d9", + "swarhk;": "\u2926", + "swarr;": "\u2199", + "swarrow;": "\u2199", + "swnwar;": "\u292a", + "szlig": "\xdf", + "szlig;": "\xdf", + "target;": "\u2316", + "tau;": "\u03c4", + "tbrk;": "\u23b4", + "tcaron;": "\u0165", + "tcedil;": "\u0163", + "tcy;": "\u0442", + "tdot;": "\u20db", + "telrec;": "\u2315", + "tfr;": "\U0001d531", + "there4;": "\u2234", + "therefore;": "\u2234", + "theta;": "\u03b8", + "thetasym;": "\u03d1", + "thetav;": "\u03d1", + "thickapprox;": "\u2248", + "thicksim;": "\u223c", + "thinsp;": "\u2009", + "thkap;": "\u2248", + "thksim;": "\u223c", + "thorn": "\xfe", + "thorn;": "\xfe", + "tilde;": "\u02dc", + "times": "\xd7", + "times;": "\xd7", + "timesb;": "\u22a0", + "timesbar;": "\u2a31", + "timesd;": "\u2a30", + "tint;": "\u222d", + "toea;": "\u2928", + "top;": "\u22a4", + "topbot;": "\u2336", + "topcir;": "\u2af1", + "topf;": "\U0001d565", + "topfork;": "\u2ada", + "tosa;": "\u2929", + "tprime;": "\u2034", + "trade;": "\u2122", + "triangle;": "\u25b5", + "triangledown;": "\u25bf", + "triangleleft;": "\u25c3", + "trianglelefteq;": "\u22b4", + "triangleq;": "\u225c", + "triangleright;": "\u25b9", + "trianglerighteq;": "\u22b5", + "tridot;": "\u25ec", + "trie;": "\u225c", + "triminus;": "\u2a3a", + "triplus;": "\u2a39", + "trisb;": "\u29cd", + "tritime;": "\u2a3b", + "trpezium;": "\u23e2", + "tscr;": "\U0001d4c9", + "tscy;": "\u0446", + "tshcy;": "\u045b", + "tstrok;": "\u0167", + "twixt;": "\u226c", + "twoheadleftarrow;": "\u219e", + "twoheadrightarrow;": "\u21a0", + "uArr;": "\u21d1", + "uHar;": "\u2963", + "uacute": "\xfa", + "uacute;": "\xfa", + "uarr;": "\u2191", + "ubrcy;": "\u045e", + "ubreve;": "\u016d", + "ucirc": "\xfb", + "ucirc;": "\xfb", + "ucy;": "\u0443", + "udarr;": "\u21c5", + "udblac;": "\u0171", + "udhar;": "\u296e", + "ufisht;": "\u297e", + "ufr;": "\U0001d532", + "ugrave": "\xf9", + "ugrave;": "\xf9", + "uharl;": "\u21bf", + "uharr;": "\u21be", + "uhblk;": "\u2580", + "ulcorn;": "\u231c", + "ulcorner;": "\u231c", + "ulcrop;": "\u230f", + "ultri;": "\u25f8", + "umacr;": "\u016b", + "uml": "\xa8", + "uml;": "\xa8", + "uogon;": "\u0173", + "uopf;": "\U0001d566", + "uparrow;": "\u2191", + "updownarrow;": "\u2195", + "upharpoonleft;": "\u21bf", + "upharpoonright;": "\u21be", + "uplus;": "\u228e", + "upsi;": "\u03c5", + "upsih;": "\u03d2", + "upsilon;": "\u03c5", + "upuparrows;": "\u21c8", + "urcorn;": "\u231d", + "urcorner;": "\u231d", + "urcrop;": "\u230e", + "uring;": "\u016f", + "urtri;": "\u25f9", + "uscr;": "\U0001d4ca", + "utdot;": "\u22f0", + "utilde;": "\u0169", + "utri;": "\u25b5", + "utrif;": "\u25b4", + "uuarr;": "\u21c8", + "uuml": "\xfc", + "uuml;": "\xfc", + "uwangle;": "\u29a7", + "vArr;": "\u21d5", + "vBar;": "\u2ae8", + "vBarv;": "\u2ae9", + "vDash;": "\u22a8", + "vangrt;": "\u299c", + "varepsilon;": "\u03f5", + "varkappa;": "\u03f0", + "varnothing;": "\u2205", + "varphi;": "\u03d5", + "varpi;": "\u03d6", + "varpropto;": "\u221d", + "varr;": "\u2195", + "varrho;": "\u03f1", + "varsigma;": "\u03c2", + "varsubsetneq;": "\u228a\ufe00", + "varsubsetneqq;": "\u2acb\ufe00", + "varsupsetneq;": "\u228b\ufe00", + "varsupsetneqq;": "\u2acc\ufe00", + "vartheta;": "\u03d1", + "vartriangleleft;": "\u22b2", + "vartriangleright;": "\u22b3", + "vcy;": "\u0432", + "vdash;": "\u22a2", + "vee;": "\u2228", + "veebar;": "\u22bb", + "veeeq;": "\u225a", + "vellip;": "\u22ee", + "verbar;": "|", + "vert;": "|", + "vfr;": "\U0001d533", + "vltri;": "\u22b2", + "vnsub;": "\u2282\u20d2", + "vnsup;": "\u2283\u20d2", + "vopf;": "\U0001d567", + "vprop;": "\u221d", + "vrtri;": "\u22b3", + "vscr;": "\U0001d4cb", + "vsubnE;": "\u2acb\ufe00", + "vsubne;": "\u228a\ufe00", + "vsupnE;": "\u2acc\ufe00", + "vsupne;": "\u228b\ufe00", + "vzigzag;": "\u299a", + "wcirc;": "\u0175", + "wedbar;": "\u2a5f", + "wedge;": "\u2227", + "wedgeq;": "\u2259", + "weierp;": "\u2118", + "wfr;": "\U0001d534", + "wopf;": "\U0001d568", + "wp;": "\u2118", + "wr;": "\u2240", + "wreath;": "\u2240", + "wscr;": "\U0001d4cc", + "xcap;": "\u22c2", + "xcirc;": "\u25ef", + "xcup;": "\u22c3", + "xdtri;": "\u25bd", + "xfr;": "\U0001d535", + "xhArr;": "\u27fa", + "xharr;": "\u27f7", + "xi;": "\u03be", + "xlArr;": "\u27f8", + "xlarr;": "\u27f5", + "xmap;": "\u27fc", + "xnis;": "\u22fb", + "xodot;": "\u2a00", + "xopf;": "\U0001d569", + "xoplus;": "\u2a01", + "xotime;": "\u2a02", + "xrArr;": "\u27f9", + "xrarr;": "\u27f6", + "xscr;": "\U0001d4cd", + "xsqcup;": "\u2a06", + "xuplus;": "\u2a04", + "xutri;": "\u25b3", + "xvee;": "\u22c1", + "xwedge;": "\u22c0", + "yacute": "\xfd", + "yacute;": "\xfd", + "yacy;": "\u044f", + "ycirc;": "\u0177", + "ycy;": "\u044b", + "yen": "\xa5", + "yen;": "\xa5", + "yfr;": "\U0001d536", + "yicy;": "\u0457", + "yopf;": "\U0001d56a", + "yscr;": "\U0001d4ce", + "yucy;": "\u044e", + "yuml": "\xff", + "yuml;": "\xff", + "zacute;": "\u017a", + "zcaron;": "\u017e", + "zcy;": "\u0437", + "zdot;": "\u017c", + "zeetrf;": "\u2128", + "zeta;": "\u03b6", + "zfr;": "\U0001d537", + "zhcy;": "\u0436", + "zigrarr;": "\u21dd", + "zopf;": "\U0001d56b", + "zscr;": "\U0001d4cf", + "zwj;": "\u200d", + "zwnj;": "\u200c", } replacementCharacters = { - 0x0:u"\uFFFD", - 0x0d:u"\u000D", - 0x80:u"\u20AC", - 0x81:u"\u0081", - 0x81:u"\u0081", - 0x82:u"\u201A", - 0x83:u"\u0192", - 0x84:u"\u201E", - 0x85:u"\u2026", - 0x86:u"\u2020", - 0x87:u"\u2021", - 0x88:u"\u02C6", - 0x89:u"\u2030", - 0x8A:u"\u0160", - 0x8B:u"\u2039", - 0x8C:u"\u0152", - 0x8D:u"\u008D", - 0x8E:u"\u017D", - 0x8F:u"\u008F", - 0x90:u"\u0090", - 0x91:u"\u2018", - 0x92:u"\u2019", - 0x93:u"\u201C", - 0x94:u"\u201D", - 0x95:u"\u2022", - 0x96:u"\u2013", - 0x97:u"\u2014", - 0x98:u"\u02DC", - 0x99:u"\u2122", - 0x9A:u"\u0161", - 0x9B:u"\u203A", - 0x9C:u"\u0153", - 0x9D:u"\u009D", - 0x9E:u"\u017E", - 0x9F:u"\u0178", + 0x0: "\uFFFD", + 0x0d: "\u000D", + 0x80: "\u20AC", + 0x81: "\u0081", + 0x81: "\u0081", + 0x82: "\u201A", + 0x83: "\u0192", + 0x84: "\u201E", + 0x85: "\u2026", + 0x86: "\u2020", + 0x87: "\u2021", + 0x88: "\u02C6", + 0x89: "\u2030", + 0x8A: "\u0160", + 0x8B: "\u2039", + 0x8C: "\u0152", + 0x8D: "\u008D", + 0x8E: "\u017D", + 0x8F: "\u008F", + 0x90: "\u0090", + 0x91: "\u2018", + 0x92: "\u2019", + 0x93: "\u201C", + 0x94: "\u201D", + 0x95: "\u2022", + 0x96: "\u2013", + 0x97: "\u2014", + 0x98: "\u02DC", + 0x99: "\u2122", + 0x9A: "\u0161", + 0x9B: "\u203A", + 0x9C: "\u0153", + 0x9D: "\u009D", + 0x9E: "\u017E", + 0x9F: "\u0178", } encodings = { @@ -3061,25 +3078,27 @@ encodings = { 'x-x-big5': 'big5'} tokenTypes = { - "Doctype":0, - "Characters":1, - "SpaceCharacters":2, - "StartTag":3, - "EndTag":4, - "EmptyTag":5, - "Comment":6, - "ParseError":7 + "Doctype": 0, + "Characters": 1, + "SpaceCharacters": 2, + "StartTag": 3, + "EndTag": 4, + "EmptyTag": 5, + "Comment": 6, + "ParseError": 7 } -tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], +tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"])) -prefixes = dict([(v,k) for k,v in namespaces.iteritems()]) +prefixes = dict([(v, k) for k, v in namespaces.items()]) prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + class DataLossWarning(UserWarning): pass + class ReparseException(Exception): pass diff --git a/libs/html5lib/filters/_base.py b/libs/html5lib/filters/_base.py index bca94ada..c7dbaed0 100644 --- a/libs/html5lib/filters/_base.py +++ b/libs/html5lib/filters/_base.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division, unicode_literals + class Filter(object): def __init__(self, source): diff --git a/libs/html5lib/filters/alphabeticalattributes.py b/libs/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 00000000..fed6996c --- /dev/null +++ b/libs/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +class Filter(_base.Filter): + def __iter__(self): + for token in _base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=lambda x: x[0]): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/libs/html5lib/filters/formfiller.py b/libs/html5lib/filters/formfiller.py deleted file mode 100644 index 94001714..00000000 --- a/libs/html5lib/filters/formfiller.py +++ /dev/null @@ -1,127 +0,0 @@ -# -# The goal is to finally have a form filler where you pass data for -# each form, using the algorithm for "Seeding a form with initial values" -# See http://www.whatwg.org/specs/web-forms/current-work/#seeding -# - -import _base - -from html5lib.constants import spaceCharacters -spaceCharacters = u"".join(spaceCharacters) - -class SimpleFilter(_base.Filter): - def __init__(self, source, fieldStorage): - _base.Filter.__init__(self, source) - self.fieldStorage = fieldStorage - - def __iter__(self): - field_indices = {} - state = None - field_name = None - for token in _base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - name = token["name"].lower() - if name == "input": - field_name = None - field_type = None - input_value_index = -1 - input_checked_index = -1 - for i,(n,v) in enumerate(token["data"]): - n = n.lower() - if n == u"name": - field_name = v.strip(spaceCharacters) - elif n == u"type": - field_type = v.strip(spaceCharacters) - elif n == u"checked": - input_checked_index = i - elif n == u"value": - input_value_index = i - - value_list = self.fieldStorage.getlist(field_name) - field_index = field_indices.setdefault(field_name, 0) - if field_index < len(value_list): - value = value_list[field_index] - else: - value = "" - - if field_type in (u"checkbox", u"radio"): - if value_list: - if token["data"][input_value_index][1] == value: - if input_checked_index < 0: - token["data"].append((u"checked", u"")) - field_indices[field_name] = field_index + 1 - elif input_checked_index >= 0: - del token["data"][input_checked_index] - - elif field_type not in (u"button", u"submit", u"reset"): - if input_value_index >= 0: - token["data"][input_value_index] = (u"value", value) - else: - token["data"].append((u"value", value)) - field_indices[field_name] = field_index + 1 - - field_type = None - field_name = None - - elif name == "textarea": - field_type = "textarea" - field_name = dict((token["data"])[::-1])["name"] - - elif name == "select": - field_type = "select" - attributes = dict(token["data"][::-1]) - field_name = attributes.get("name") - is_select_multiple = "multiple" in attributes - is_selected_option_found = False - - elif field_type == "select" and field_name and name == "option": - option_selected_index = -1 - option_value = None - for i,(n,v) in enumerate(token["data"]): - n = n.lower() - if n == "selected": - option_selected_index = i - elif n == "value": - option_value = v.strip(spaceCharacters) - if option_value is None: - raise NotImplementedError("