Compare commits

...

21 Commits
master ... py3

Author SHA1 Message Date
Ruud
ac8dbe03b2 Py3 fixes 2014-10-06 23:02:30 +02:00
Ruud
6c2aef7a6d Six import 2014-10-06 23:01:47 +02:00
Ruud
e1eb68e226 CodernityDB updates 2014-10-06 23:00:50 +02:00
Ruud
4ae02a8764 Don't write binary to configparser 2014-10-06 17:32:36 +02:00
Ruud
cc59342e80 NotSupported Exception 2014-10-06 17:31:12 +02:00
Ruud
97099f4d69 Use six module 2014-10-06 17:30:58 +02:00
Ruud
c50c290c3e Database helper 2014-10-06 17:28:10 +02:00
Ruud
4775e4a36a CodernityDB 2to3 2014-10-06 17:26:14 +02:00
Ruud
08cb834b4d Don't load unsupported modules 2014-10-06 16:47:38 +02:00
Ruud
46cff26d92 Don't load Twitter notifier on Python 3 2014-10-06 16:47:04 +02:00
Ruud
b668e39296 Don't load xmpp on Python 3 2014-10-06 16:46:22 +02:00
Ruud
97ee16eb4e Use six on axel 2014-10-06 16:45:40 +02:00
Ruud
41fd190d38 Update cache lib 2014-10-06 16:45:26 +02:00
Ruud
2b0facb24c Update pytwitter 2014-10-06 16:44:38 +02:00
Ruud
94a29efea5 Remove tmdb3 lib 2014-10-06 16:44:22 +02:00
Ruud
827156485c Remove tmdb3 dependency 2014-10-06 16:43:46 +02:00
Ruud
6b4e6857de BeautifulSoup4 python 3 2014-10-06 11:12:35 +02:00
Ruud
5b7e814166 CodernityDB python 3 2014-10-06 10:59:22 +02:00
Ruud
f99b40c2f3 Runner fs encoding 2014-10-06 08:53:17 +02:00
Ruud
ae00e83c9d Path helpers 2014-10-06 08:52:48 +02:00
Ruud
d4f2f12924 Force logging utf8 2014-10-06 08:16:40 +02:00
93 changed files with 12828 additions and 7710 deletions

View File

@@ -61,7 +61,7 @@ class Loader(object):
self.log = CPLog(__name__)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', '%H:%M:%S')
hdlr = handlers.RotatingFileHandler(os.path.join(self.log_dir, 'error.log'), 'a', 500000, 10)
hdlr = handlers.RotatingFileHandler(os.path.join(self.log_dir, 'error.log'), 'a', 500000, 10, encoding = 'utf-8')
hdlr.setLevel(logging.CRITICAL)
hdlr.setFormatter(formatter)
self.log.logger.addHandler(hdlr)

View File

@@ -3,7 +3,7 @@ from threading import Thread
import json
import threading
import traceback
import urllib
from six.moves import urllib
from couchpotato.core.helpers.request import getParams
from couchpotato.core.logger import CPLog
@@ -102,7 +102,7 @@ class ApiHandler(RequestHandler):
kwargs = {}
for x in self.request.arguments:
kwargs[x] = urllib.unquote(self.get_argument(x))
kwargs[x] = urllib.parse.unquote(self.get_argument(x))
# Split array arguments
kwargs = getParams(kwargs)

View File

@@ -3,12 +3,12 @@ import os
import time
import traceback
from sqlite3 import OperationalError
from CodernityDB3.index import Index
from CodernityDB.database import RecordNotFound
from CodernityDB.index import IndexException, IndexNotFoundException, IndexConflict
from couchpotato import CPLog
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent, fireEventAsync
from couchpotato.core.helpers.database import IndexException, IndexNotFoundException, IndexConflict, RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode, sp
from couchpotato.core.helpers.variable import getImdb, tryInt, randomString

View File

@@ -1,7 +1,7 @@
from base64 import b16encode, b32decode
from datetime import timedelta
from hashlib import sha1
from urlparse import urlparse
from six.moves import urllib
import os
from couchpotato.core._base.downloader.main import DownloaderBase, ReleaseDownloadList
@@ -62,7 +62,7 @@ class rTorrent(DownloaderBase):
if self.conf('ssl') and url.startswith('httprpc://'):
url = url.replace('httprpc://', 'httprpc+https://')
parsed = urlparse(url)
parsed = urllib.urlparse(url)
# rpc_url is only used on http/https scgi pass-through
if parsed.scheme in ['http', 'https']:

View File

@@ -102,7 +102,7 @@ def fireEvent(name, *args, **kwargs):
# Fire
result = e(*args, **kwargs)
result_keys = result.keys()
result_keys = list(result.keys())
result_keys.sort(key = natsortKey)
if options['single'] and not options['merge']:

View File

@@ -0,0 +1,26 @@
from six import PY2
if PY2:
from CodernityDB.database_super_thread_safe import SuperThreadSafeDatabase
from CodernityDB.index import IndexException, IndexConflict, IndexNotFoundException
from CodernityDB.database import RecordNotFound, RecordDeleted
from CodernityDB.hash_index import HashIndex
from CodernityDB.tree_index import MultiTreeBasedIndex, TreeBasedIndex
else:
from CodernityDB3.database_super_thread_safe import SuperThreadSafeDatabase
from CodernityDB3.index import IndexException, IndexConflict, IndexNotFoundException
from CodernityDB3.database import RecordNotFound, RecordDeleted
from CodernityDB3.hash_index import HashIndex
from CodernityDB3.tree_index import MultiTreeBasedIndex, TreeBasedIndex
SuperThreadSafeDatabase = SuperThreadSafeDatabase
IndexException = IndexException
IndexNotFoundException = IndexNotFoundException
IndexConflict = IndexConflict
RecordNotFound = RecordNotFound
HashIndex = HashIndex
MultiTreeBasedIndex = MultiTreeBasedIndex
TreeBasedIndex = TreeBasedIndex
RecordDeleted = RecordDeleted

View File

@@ -1,11 +1,11 @@
from string import ascii_letters, digits
from urllib import quote_plus
import os
import re
import traceback
import unicodedata
from chardet import detect
from six.moves import urllib
from couchpotato.core.logger import CPLog
import six
@@ -16,7 +16,7 @@ log = CPLog(__name__)
def toSafeString(original):
valid_chars = "-_.() %s%s" % (ascii_letters, digits)
cleaned_filename = unicodedata.normalize('NFKD', toUnicode(original)).encode('ASCII', 'ignore')
valid_string = ''.join(c for c in cleaned_filename if c in valid_chars)
valid_string = ''.join(list(six.unichr(c) for c in cleaned_filename if six.unichr(c) in valid_chars))
return ' '.join(valid_string.split())
@@ -29,7 +29,7 @@ def simplifyString(original):
def toUnicode(original, *args):
try:
if isinstance(original, unicode):
if isinstance(original, six.text_type):
return original
else:
try:
@@ -47,16 +47,32 @@ def toUnicode(original, *args):
ascii_text = str(original).encode('string_escape')
return toUnicode(ascii_text)
def toUTF8(original):
try:
if isinstance(original, six.binary_type) and len(original) > 0:
# Try to detect
detected = detect(original)
return original.decode(detected.get('encoding')).encode('utf-8')
else:
return original
except:
#log.error('Failed encoding to UTF8: %s', traceback.format_exc())
raise
def ss(original, *args):
u_original = toUnicode(original, *args)
try:
from couchpotato.environment import Env
return u_original.encode(Env.get('encoding'))
if isinstance(u_original, six.text_type):
u_original = u_original.encode('unicode_escape')
else:
u_original = u_original
return six.u(u_original)
except Exception as e:
log.debug('Failed ss encoding char, force UTF8: %s', e)
try:
from couchpotato.environment import Env
return u_original.encode(Env.get('encoding'), 'replace')
except:
return u_original.encode('utf-8', 'replace')
@@ -72,7 +88,7 @@ def sp(path, *args):
if os.path.sep == '/' and '\\' in path:
path = '/' + path.replace(':', '').replace('\\', '/')
path = os.path.normpath(ss(path, *args))
path = os.path.normpath(path)
# Remove any trailing path separators
if path != os.path.sep:
@@ -114,14 +130,15 @@ def stripAccents(s):
def tryUrlencode(s):
new = six.u('')
if isinstance(s, dict):
for key, value in s.items():
for key, value in list(s.items()):
new += six.u('&%s=%s') % (key, tryUrlencode(value))
return new[1:]
else:
for letter in ss(s):
letter = six.unichr(letter)
try:
new += quote_plus(letter)
new += urllib.parse.quote_plus(letter)
except:
new += letter

View File

@@ -0,0 +1,51 @@
import os
from chardet import detect
from couchpotato import Env
fs_enc = Env.get('fs_encoding')
def list_dir(path, full_path = True):
"""
List directory don't error when it doesn't exist
"""
path = unicode_path(path)
if os.path.isdir(path):
for f in os.listdir(path):
if full_path:
yield join(path, f)
else:
yield f
def join(*args):
"""
Join path, encode properly before joining
"""
return os.path.join(*[safe(x) for x in args])
def unicode_path(path):
"""
Convert back to unicode
:param path: path string
"""
if isinstance(path, str):
detected = detect(path)
print detected
path = path.decode(detected.get('encoding'))
path = path.decode('unicode_escape')
return path
def safe(path):
if isinstance(path, unicode):
return path.encode('unicode_escape')
return path

View File

@@ -0,0 +1,2 @@
class NotSupported(Exception):
pass

View File

@@ -1,7 +1,7 @@
from urllib import unquote
import re
from couchpotato.core.helpers.encoding import toUnicode
from six.moves import urllib
from couchpotato.core.helpers.variable import natsortKey
@@ -10,7 +10,7 @@ def getParams(params):
reg = re.compile('^[a-z0-9_\.]+$')
# Sort keys
param_keys = params.keys()
param_keys = list(params.keys())
param_keys.sort(key = natsortKey)
temp = {}
@@ -28,7 +28,7 @@ def getParams(params):
for item in nested:
if item is nested[-1]:
current[item] = toUnicode(unquote(value))
current[item] = toUnicode(urllib.parse.unquote(value))
else:
try:
current[item]
@@ -37,7 +37,7 @@ def getParams(params):
current = current[item]
else:
temp[param] = toUnicode(unquote(value))
temp[param] = toUnicode(urllib.parse.unquote(value))
if temp[param].lower() in ['true', 'false']:
temp[param] = temp[param].lower() != 'false'

View File

@@ -3,6 +3,7 @@ import sys
import traceback
from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.py3 import NotSupported
from couchpotato.core.logger import CPLog
from importhelper import import_module
import six
@@ -131,7 +132,7 @@ class Loader(object):
return False
try:
# Load single file plugin
if isinstance(module.autoload, (str, unicode)):
if isinstance(module.autoload, (six.string_types, six.text_type)):
getattr(module, module.autoload)()
# Load folder plugin
else:
@@ -162,6 +163,8 @@ class Loader(object):
def loadModule(self, name):
try:
return import_module(name)
except NotSupported:
log.error('Module "%s" is not supported in Python 3', name)
except ImportError:
log.debug('Skip loading module plugin %s: %s', (name, traceback.format_exc()))
return None

View File

@@ -1,5 +1,6 @@
import logging
import re
import traceback
class CPLog(object):
@@ -54,19 +55,19 @@ class CPLog(object):
def safeMessage(self, msg, replace_tuple = ()):
from couchpotato.core.helpers.encoding import ss, toUnicode
from couchpotato.core.helpers.encoding import ss, toUTF8
msg = ss(msg)
msg = toUTF8(msg)
try:
if isinstance(replace_tuple, tuple):
msg = msg % tuple([ss(x) if not isinstance(x, (int, float)) else x for x in list(replace_tuple)])
msg = msg % tuple([toUTF8(x) for x in list(replace_tuple)])
elif isinstance(replace_tuple, dict):
msg = msg % dict((k, ss(v)) for k, v in replace_tuple.iteritems())
msg = msg % dict((k, toUTF8(v)) for k, v in replace_tuple.iteritems())
else:
msg = msg % ss(replace_tuple)
except Exception as e:
self.logger.error('Failed encoding stuff to log "%s": %s' % (msg, e))
msg = msg % toUTF8(replace_tuple)
except:
self.logger.error('Failed encoding stuff to log "%s": %s' % (msg, traceback.format_exc()))
self.setup()
if not self.is_develop:
@@ -83,4 +84,4 @@ class CPLog(object):
except:
pass
return toUnicode(msg)
return toUTF8(msg)

View File

@@ -89,7 +89,7 @@ class MediaBase(Plugin):
# Loop over type
for image in image_urls.get(image_type, []):
if not isinstance(image, (str, unicode)):
if not isinstance(image, six.string_types):
continue
if file_type not in existing_files or len(existing_files.get(file_type, [])) == 0:

View File

@@ -1,14 +1,14 @@
from string import ascii_letters
from hashlib import md5
from couchpotato.core.helpers.database import MultiTreeBasedIndex, TreeBasedIndex
from CodernityDB.tree_index import MultiTreeBasedIndex, TreeBasedIndex
from couchpotato.core.helpers.encoding import toUnicode, simplifyString
class MediaIndex(MultiTreeBasedIndex):
_version = 3
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex"""
custom_header = """from couchpotato.core.helpers.database import MultiTreeBasedIndex"""
def __init__(self, *args, **kwargs):
kwargs['key_format'] = '32s'
@@ -62,10 +62,11 @@ class MediaTypeIndex(TreeBasedIndex):
class TitleSearchIndex(MultiTreeBasedIndex):
_version = 1
_version = 2
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex
from itertools import izip
custom_header = """from couchpotato.core.helpers.database import MultiTreeBasedIndex
try: from itertools import izip
except: izip = zip
from couchpotato.core.helpers.encoding import simplifyString"""
def __init__(self, *args, **kwargs):
@@ -101,7 +102,7 @@ from couchpotato.core.helpers.encoding import simplifyString"""
class TitleIndex(TreeBasedIndex):
_version = 4
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
from string import ascii_letters
from couchpotato.core.helpers.encoding import toUnicode, simplifyString"""
@@ -134,7 +135,7 @@ from couchpotato.core.helpers.encoding import toUnicode, simplifyString"""
class StartsWithIndex(TreeBasedIndex):
_version = 3
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
from string import ascii_letters
from couchpotato.core.helpers.encoding import toUnicode, simplifyString"""
@@ -180,7 +181,7 @@ class MediaChildrenIndex(TreeBasedIndex):
class MediaTagIndex(MultiTreeBasedIndex):
_version = 2
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex"""
custom_header = """from couchpotato.core.helpers.database import MultiTreeBasedIndex"""
def __init__(self, *args, **kwargs):
kwargs['key_format'] = '32s'

View File

@@ -3,15 +3,16 @@ import time
import traceback
from string import ascii_lowercase
from CodernityDB.database import RecordNotFound, RecordDeleted
from couchpotato import tryInt, get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent, fireEventAsync, addEvent
from couchpotato.core.helpers.database import RecordNotFound, RecordDeleted
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import splitString, getImdb, getTitle
from couchpotato.core.logger import CPLog
from couchpotato.core.media import MediaBase
from .index import MediaIndex, MediaStatusIndex, MediaTypeIndex, TitleSearchIndex, TitleIndex, StartsWithIndex, MediaChildrenIndex, MediaTagIndex
import six
log = CPLog(__name__)
@@ -280,7 +281,7 @@ class MediaPlugin(MediaBase):
offset = 0
limit = -1
if limit_offset:
splt = splitString(limit_offset) if isinstance(limit_offset, (str, unicode)) else limit_offset
splt = splitString(limit_offset) if isinstance(limit_offset, six.string_types) else limit_offset
limit = tryInt(splt[0])
offset = tryInt(0 if len(splt) is 1 else splt[1])

View File

@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
import json
import re
import time
@@ -50,7 +50,7 @@ class Provider(Plugin):
if Env.get('dev'): return True
now = time.time()
host = urlparse(test_url).hostname
host = urllib.urlparse(test_url).hostname
if self.last_available_check.get(host) < now - 900:
self.last_available_check[host] = now
@@ -219,7 +219,7 @@ class YarrProvider(Provider):
if provider and provider == self.getName():
return self
hostname = urlparse(url).hostname
hostname = urllib.urlparse(url).hostname
if host and hostname in host:
return self
else:

View File

@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
import time
import traceback
import re
@@ -97,7 +97,7 @@ class Base(NZBProvider, RSS):
results.append({
'id': nzb_id,
'provider_extra': urlparse(host['host']).hostname or host['host'],
'provider_extra': urllib.urlparse(host['host']).hostname or host['host'],
'name': toUnicode(name),
'name_extra': name_extra,
'age': self.calculateAge(int(time.mktime(parse(date).timetuple()))),
@@ -175,7 +175,7 @@ class Base(NZBProvider, RSS):
return '&apikey=%s' % host['api_key']
def download(self, url = '', nzb_id = ''):
host = urlparse(url).hostname
host = urllib.urlparse(url).hostname
if self.limits_reached.get(host):
# Try again in 3 hours

View File

@@ -1,4 +1,4 @@
from urlparse import urlparse, parse_qs
from six.moves import urllib
import time
from couchpotato.core.event import fireEvent
@@ -52,7 +52,7 @@ class Base(NZBProvider, RSS):
for nzb in nzbs:
enclosure = self.getElement(nzb, 'enclosure').attrib
nzb_id = parse_qs(urlparse(self.getTextElement(nzb, 'link')).query).get('id')[0]
nzb_id = urllib.parse_qs(urllib.urlparse(self.getTextElement(nzb, 'link')).query).get('id')[0]
results.append({
'id': nzb_id,

View File

@@ -1,4 +1,4 @@
import htmlentitydefs
from six.moves import html_entities
import json
import re
import time
@@ -145,15 +145,15 @@ class Base(TorrentProvider):
# character reference
try:
if txt[:3] == "&#x":
return unichr(int(txt[3:-1], 16))
return six.unichr(int(txt[3:-1], 16))
else:
return unichr(int(txt[2:-1]))
return six.unichr(int(txt[2:-1]))
except ValueError:
pass
else:
# named entity
try:
txt = unichr(htmlentitydefs.name2codepoint[txt[1:-1]])
txt = six.unichr(html_entities.name2codepoint[txt[1:-1]])
except KeyError:
pass
return txt # leave as is

View File

@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
import re
import traceback
@@ -45,7 +45,7 @@ class Base(TorrentProvider):
results.append({
'id': torrent.get('torrent_id'),
'protocol': 'torrent' if re.match('^(http|https|ftp)://.*$', torrent.get('download_url')) else 'torrent_magnet',
'provider_extra': urlparse(host['host']).hostname or host['host'],
'provider_extra': urllib.urlparse(host['host']).hostname or host['host'],
'name': toUnicode(torrent.get('release_name')),
'url': torrent.get('download_url'),
'detail_url': torrent.get('details_url'),

View File

@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.encoding import simplifyString
@@ -34,7 +34,7 @@ class UserscriptBase(Plugin):
def belongsTo(self, url):
host = urlparse(url).hostname
host = urllib.urlparse(url).hostname
host_split = host.split('.')
if len(host_split) > 2:
host = host[len(host_split[0]):]

View File

@@ -3,6 +3,7 @@ from couchpotato.core.event import fireEvent, addEvent
from couchpotato.core.helpers.variable import mergeDicts, getImdb
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
import six
log = CPLog(__name__)
@@ -30,7 +31,7 @@ class Search(Plugin):
def search(self, q = '', types = None, **kwargs):
# Make sure types is the correct instance
if isinstance(types, (str, unicode)):
if isinstance(types, six.string_types):
types = [types]
elif isinstance(types, (list, tuple, set)):
types = list(types)

View File

@@ -1,10 +1,10 @@
import traceback
import time
from CodernityDB.database import RecordNotFound
from couchpotato import get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent, fireEventAsync, addEvent
from couchpotato.core.helpers.database import RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import splitString, getTitle, getImdb, getIdentifier
from couchpotato.core.logger import CPLog

View File

@@ -1,9 +1,9 @@
import copy
import traceback
from CodernityDB.database import RecordNotFound
from couchpotato import get_db
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import RecordNotFound
from couchpotato.core.helpers.variable import mergeDicts, randomString
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin

View File

@@ -7,6 +7,7 @@ from couchpotato.core.helpers.encoding import tryUrlencode
from couchpotato.core.helpers.variable import tryInt, tryFloat, splitString
from couchpotato.core.logger import CPLog
from couchpotato.core.media.movie.providers.base import MovieProvider
import six
log = CPLog(__name__)
@@ -72,7 +73,7 @@ class OMDBAPI(MovieProvider):
try:
try:
if isinstance(movie, (str, unicode)):
if isinstance(movie, six.string_types):
movie = json.loads(movie)
except ValueError:
log.info('No proper json to decode')

View File

@@ -1,11 +1,11 @@
import traceback
import time
from couchpotato.core.event import addEvent
from couchpotato.core.helpers.encoding import simplifyString, toUnicode, ss
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.encoding import simplifyString, toUnicode, ss, tryUrlencode
from couchpotato.core.helpers.variable import tryInt
from couchpotato.core.logger import CPLog
from couchpotato.core.media.movie.providers.base import MovieProvider
import tmdb3
log = CPLog(__name__)
@@ -13,15 +13,24 @@ autoload = 'TheMovieDb'
class TheMovieDb(MovieProvider):
MAX_EXTRATHUMBS = 4
http_time_between_calls = .3
configuration = {
'images': {
'secure_base_url': 'https://image.tmdb.org/t/p/',
},
}
def __init__(self):
addEvent('movie.info', self.getInfo, priority = 3)
addEvent('movie.info_by_tmdb', self.getInfo)
addEvent('app.load', self.config)
# Configure TMDB settings
tmdb3.set_key(self.conf('api_key'))
tmdb3.set_cache('null')
def config(self):
configuration = self.request('configuration')
if configuration:
self.configuration = configuration
def search(self, q, limit = 12):
""" Find movie by name """
@@ -31,14 +40,19 @@ class TheMovieDb(MovieProvider):
search_string = simplifyString(q)
cache_key = 'tmdb.cache.%s.%s' % (search_string, limit)
results = self.getCache(cache_key)
results = None #self.getCache(cache_key)
if not results:
log.debug('Searching for movie: %s', q)
raw = None
try:
raw = tmdb3.searchMovie(search_string)
#name_year = fireEvent('scanner.name_year', q, single = True)
raw = self.request('search/movie', {
'query': q
}, return_key = 'results')
except:
log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc()))
@@ -69,39 +83,27 @@ class TheMovieDb(MovieProvider):
if not identifier:
return {}
cache_key = 'tmdb.cache.%s%s' % (identifier, '.ex' if extended else '')
result = self.getCache(cache_key)
if not result:
try:
log.debug('Getting info: %s', cache_key)
# noinspection PyArgumentList
movie = tmdb3.Movie(identifier)
try: exists = movie.title is not None
except: exists = False
if exists:
result = self.parseMovie(movie, extended = extended)
self.setCache(cache_key, result)
else:
result = {}
except:
log.error('Failed getting info for %s: %s', (identifier, traceback.format_exc()))
result = self.parseMovie({
'id': identifier
}, extended = extended)
return result
def parseMovie(self, movie, extended = True):
cache_key = 'tmdb.cache.%s%s' % (movie.id, '.ex' if extended else '')
movie_data = self.getCache(cache_key)
cache_key = 'tmdb.cache.%s%s' % (movie.get('id'), '.ex' if extended else '')
movie_data = None #self.getCache(cache_key)
if not movie_data:
# Full data
movie = self.request('movie/%s' % movie.get('id'))
# Images
poster = self.getImage(movie, type = 'poster', size = 'w154')
poster_original = self.getImage(movie, type = 'poster', size = 'original')
backdrop_original = self.getImage(movie, type = 'backdrop', size = 'original')
extra_thumbs = self.getMultImages(movie, type = 'backdrops', size = 'original', n = self.MAX_EXTRATHUMBS, skipfirst = True)
extra_thumbs = self.getMultImages(movie, type = 'backdrops', size = 'original')
images = {
'poster': [poster] if poster else [],
@@ -114,39 +116,43 @@ class TheMovieDb(MovieProvider):
# Genres
try:
genres = [genre.name for genre in movie.genres]
genres = [genre.get('name') for genre in movie.get('genres', [])]
except:
genres = []
# 1900 is the same as None
year = str(movie.releasedate or '')[:4]
if not movie.releasedate or year == '1900' or year.lower() == 'none':
year = str(movie.get('release_date') or '')[:4]
if not movie.get('release_date') or year == '1900' or year.lower() == 'none':
year = None
# Gather actors data
actors = {}
if extended:
for cast_item in movie.cast:
# Full data
cast = self.request('movie/%s/casts' % movie.get('id'), return_key = 'cast')
for cast_item in cast:
try:
actors[toUnicode(cast_item.name)] = toUnicode(cast_item.character)
images['actors'][toUnicode(cast_item.name)] = self.getImage(cast_item, type = 'profile', size = 'original')
actors[toUnicode(cast_item.get('name'))] = toUnicode(cast_item.get('character'))
images['actors'][toUnicode(cast_item.get('name'))] = self.getImage(cast_item, type = 'profile', size = 'original')
except:
log.debug('Error getting cast info for %s: %s', (cast_item, traceback.format_exc()))
movie_data = {
'type': 'movie',
'via_tmdb': True,
'tmdb_id': movie.id,
'titles': [toUnicode(movie.title)],
'original_title': movie.originaltitle,
'tmdb_id': movie.get('id'),
'titles': [toUnicode(movie.get('title'))],
'original_title': movie.get('original_title'),
'images': images,
'imdb': movie.imdb,
'runtime': movie.runtime,
'released': str(movie.releasedate),
'imdb': movie.get('imdb_id'),
'runtime': movie.get('runtime'),
'released': str(movie.get('release_date')),
'year': tryInt(year, None),
'plot': movie.overview,
'plot': movie.get('overview'),
'genres': genres,
'collection': getattr(movie.collection, 'name', None),
'collection': getattr(movie.get('belongs_to_collection'), 'name', None),
'actor_roles': actors
}
@@ -157,8 +163,12 @@ class TheMovieDb(MovieProvider):
movie_data['titles'].append(movie_data['original_title'])
if extended:
for alt in movie.alternate_titles:
alt_name = alt.title
# Full data
alternate_titles = self.request('movie/%s/alternative_titles' % movie.get('id'), return_key = 'titles')
for alt in alternate_titles:
alt_name = alt.get('title')
if alt_name and alt_name not in movie_data['titles'] and alt_name.lower() != 'none' and alt_name is not None:
movie_data['titles'].append(alt_name)
@@ -171,36 +181,38 @@ class TheMovieDb(MovieProvider):
image_url = ''
try:
image_url = getattr(movie, type).geturl(size = size)
path = movie.get('%s_path' % type)
image_url = '%s%s%s' % (self.configuration['images']['secure_base_url'], size, path)
except:
log.debug('Failed getting %s.%s for "%s"', (type, size, ss(str(movie))))
return image_url
def getMultImages(self, movie, type = 'backdrops', size = 'original', n = -1, skipfirst = False):
"""
If n < 0, return all images. Otherwise return n images.
If n > len(getattr(movie, type)), then return all images.
If skipfirst is True, then it will skip getattr(movie, type)[0]. This
is because backdrops[0] is typically backdrop.
"""
def getMultImages(self, movie, type = 'backdrops', size = 'original'):
image_urls = []
try:
images = getattr(movie, type)
if n < 0 or n > len(images):
num_images = len(images)
else:
num_images = n
for i in range(int(skipfirst), num_images + int(skipfirst)):
image_urls.append(images[i].geturl(size = size))
# Full data
images = self.request('movie/%s/images' % movie.get('id'), return_key = type)
for image in images[1:5]:
image_urls.append(self.getImage(image, 'file', size))
except:
log.debug('Failed getting %i %s.%s for "%s"', (n, type, size, ss(str(movie))))
log.debug('Failed getting %s.%s for "%s"', (type, size, ss(str(movie))))
return image_urls
def request(self, call = '', params = {}, return_key = None):
params = tryUrlencode(params)
url = 'http://api.themoviedb.org/3/%s?api_key=%s%s' % (call, self.conf('api_key'), '&%s' % params if params else '')
data = self.getJsonData(url, cache_timeout = 0)
if data and return_key and data.get(return_key):
data = data.get(return_key)
return data
def isDisabled(self):
if self.conf('api_key') == '':
log.error('No API key provided.')

View File

@@ -1,10 +1,10 @@
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex
class NotificationIndex(TreeBasedIndex):
_version = 1
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
import time"""
def __init__(self, *args, **kwargs):
@@ -22,7 +22,7 @@ import time"""
class NotificationUnreadIndex(TreeBasedIndex):
_version = 1
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
import time"""
def __init__(self, *args, **kwargs):

View File

@@ -3,11 +3,11 @@ import threading
import time
import traceback
import uuid
from CodernityDB.database import RecordDeleted
from couchpotato import get_db
from couchpotato.api import addApiView, addNonBlockApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import RecordDeleted
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import tryInt, splitString
from couchpotato.core.logger import CPLog

View File

@@ -1,5 +1,5 @@
from datetime import timedelta, datetime
from urlparse import urlparse
from six.moves import urllib
import traceback
from couchpotato.core.helpers.variable import cleanHost
@@ -106,7 +106,7 @@ class PlexServer(object):
def createHost(self, host, port = None):
h = cleanHost(host)
p = urlparse(h)
p = urllib.urlparse(h)
h = h.rstrip('/')
if port and not p.port:

View File

@@ -1,8 +1,16 @@
from .main import Twitter
from six import PY3
try:
from .main import Twitter
def autoload():
return Twitter()
def autoload():
return Twitter()
except:
if PY3:
from couchpotato.core.helpers.py3 import NotSupported
raise NotSupported
else:
raise
config = [{
'name': 'twitter',

View File

@@ -3,12 +3,20 @@ import traceback
from couchpotato.core.logger import CPLog
from couchpotato.core.notifications.base import Notification
import xmpp
from six import PY3
log = CPLog(__name__)
autoload = 'Xmpp'
try:
import xmpp
autoload = 'Xmpp'
except:
if PY3:
from couchpotato.core.helpers.py3 import NotSupported
raise NotSupported
else:
raise
class Xmpp(Notification):

View File

@@ -1,6 +1,5 @@
import threading
from urllib import quote
from urlparse import urlparse
from six.moves import urllib
import glob
import inspect
import os.path
@@ -183,13 +182,13 @@ class Plugin(object):
# http request
def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, stream = False):
url = quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")
url = urllib.parse.quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")
if not headers: headers = {}
if not data: data = {}
# Fill in some headers
parsed_url = urlparse(url)
parsed_url = urllib.parse.urlparse(url)
host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else ''))
headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host))
@@ -232,7 +231,11 @@ class Plugin(object):
status_code = response.status_code
if response.status_code == requests.codes.ok:
data = response if stream else response.content
if stream:
data = response
else:
data = response.content
data = data.decode(response.encoding)
else:
response.raise_for_status()

View File

@@ -1,4 +1,4 @@
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex
class CategoryIndex(TreeBasedIndex):

View File

@@ -1,13 +1,14 @@
import random as rndm
import time
from CodernityDB.database import RecordDeleted
from couchpotato import get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.database import RecordDeleted
from couchpotato.core.helpers.variable import splitString, tryInt
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
import six
log = CPLog(__name__)
@@ -41,7 +42,7 @@ class Dashboard(Plugin):
# Add limit
limit = 12
if limit_offset:
splt = splitString(limit_offset) if isinstance(limit_offset, (str, unicode)) else limit_offset
splt = splitString(limit_offset) if isinstance(limit_offset, six.string_types) else limit_offset
limit = tryInt(splt[0])
# Get all active medias

View File

@@ -1,9 +1,9 @@
import codecs
import os
import re
import traceback
from couchpotato.api import addApiView
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import tryInt, splitString
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
@@ -103,9 +103,8 @@ class Logging(Plugin):
if not os.path.isfile(path):
break
f = open(path, 'r')
log_content = toUnicode(f.read())
raw_lines = self.toList(log_content)
f = codecs.open(path, 'r', 'utf-8')
raw_lines = self.toList(f.read())
raw_lines.reverse()
brk = False
@@ -131,7 +130,7 @@ class Logging(Plugin):
def toList(self, log_content = ''):
logs_raw = toUnicode(log_content).split('[0m\n')
logs_raw = log_content.split('[0m\n')
logs = []
for log_line in logs_raw:

View File

@@ -1,4 +1,4 @@
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex
class ProfileIndex(TreeBasedIndex):

View File

@@ -1,6 +1,5 @@
from hashlib import md5
from CodernityDB.hash_index import HashIndex
from couchpotato.core.helpers.database import HashIndex
class QualityIndex(HashIndex):

View File

@@ -2,10 +2,10 @@ from math import fabs, ceil
import traceback
import re
from CodernityDB.database import RecordNotFound
from couchpotato import get_db
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode, ss
from couchpotato.core.helpers.variable import mergeDicts, getExt, tryInt, splitString, tryFloat
from couchpotato.core.logger import CPLog

View File

@@ -1,7 +1,5 @@
from hashlib import md5
from CodernityDB.hash_index import HashIndex
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex, HashIndex
class ReleaseIndex(TreeBasedIndex):

View File

@@ -3,10 +3,10 @@ import os
import time
import traceback
from CodernityDB.database import RecordDeleted, RecordNotFound
from couchpotato import md5, get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent, addEvent
from couchpotato.core.helpers.database import RecordDeleted, RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode, sp
from couchpotato.core.helpers.variable import getTitle, tryInt
from couchpotato.core.logger import CPLog

View File

@@ -1,12 +1,14 @@
from __future__ import with_statement
import ConfigParser
from hashlib import md5
from CodernityDB.hash_index import HashIndex
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import HashIndex
from couchpotato.core.helpers.encoding import toUnicode
from six.moves import configparser
from couchpotato.core.helpers.variable import mergeDicts, tryInt, tryFloat
import six
class Settings(object):
@@ -62,7 +64,7 @@ class Settings(object):
def setFile(self, config_file):
self.file = config_file
self.p = ConfigParser.RawConfigParser()
self.p = configparser.RawConfigParser()
self.p.read(config_file)
from couchpotato.core.logger import CPLog
@@ -148,7 +150,10 @@ class Settings(object):
return tryFloat(self.p.get(section, option))
def getUnicode(self, section, option):
value = self.p.get(section, option).decode('unicode_escape')
value = self.p.get(section, option)
if six.PY2:
value = value.decode('unicode_escape')
return toUnicode(value).strip()
def getValues(self):
@@ -161,7 +166,7 @@ class Settings(object):
return values
def save(self):
with open(self.file, 'wb') as configfile:
with open(self.file, 'w') as configfile:
self.p.write(configfile)
self.log.debug('Saved settings')

View File

@@ -14,6 +14,7 @@ class Env(object):
''' Environment variables '''
_app = None
_encoding = 'UTF-8'
_fs_encoding = 'UTF-8'
_debug = False
_dev = False
_settings = Settings()

View File

@@ -11,12 +11,12 @@ import re
import tarfile
import shutil
from CodernityDB.database_super_thread_safe import SuperThreadSafeDatabase
from argparse import ArgumentParser
from cache import FileSystemCache
from couchpotato import KeyHandler, LoginHandler, LogoutHandler
from couchpotato.api import NonBlockHandler, ApiHandler
from couchpotato.core.event import fireEventAsync, fireEvent
from couchpotato.core.helpers.database import SuperThreadSafeDatabase
from couchpotato.core.helpers.encoding import sp
from couchpotato.core.helpers.variable import getDataDir, tryInt, getFreeSpace
import requests
@@ -86,6 +86,7 @@ def runCouchPotato(options, base_path, args, data_dir = None, log_dir = None, En
encoding = 'UTF-8'
Env.set('encoding', encoding)
Env.set('fs_encoding', sys.getfilesystemencoding())
# Do db stuff
db_path = sp(os.path.join(data_dir, 'database'))
@@ -204,7 +205,7 @@ def runCouchPotato(options, base_path, args, data_dir = None, log_dir = None, En
logger.addHandler(hdlr)
# To file
hdlr2 = handlers.RotatingFileHandler(Env.get('log_path'), 'a', 500000, 10, encoding = Env.get('encoding'))
hdlr2 = handlers.RotatingFileHandler(Env.get('log_path'), 'a', 500000, 10, encoding = 'utf-8')
hdlr2.setFormatter(formatter)
logger.addHandler(hdlr2)

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '0.4.2'
__license__ = "Apache 2.0"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from gevent.lock import RLock
from CodernityDB3.env import cdb_environment
cdb_environment['mode'] = "gevent"
cdb_environment['rlock_obj'] = RLock
# from CodernityDB3.database import Database
from CodernityDB3.database_safe_shared import SafeDatabase
class GeventDatabase(SafeDatabase):
pass

View File

@@ -0,0 +1,229 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.env import cdb_environment
from CodernityDB3.database import PreconditionsException, RevConflict, Database
# from database import Database
from collections import defaultdict
from functools import wraps
from types import MethodType
class th_safe_gen:
def __init__(self, name, gen, l=None):
self.lock = l
self.__gen = gen
self.name = name
def __iter__(self):
return self
def __next__(self):
with self.lock:
return next(self.__gen)
@staticmethod
def wrapper(method, index_name, meth_name, l=None):
@wraps(method)
def _inner(*args, **kwargs):
res = method(*args, **kwargs)
return th_safe_gen(index_name + "_" + meth_name, res, l)
return _inner
def safe_wrapper(method, lock):
@wraps(method)
def _inner(*args, **kwargs):
with lock:
return method(*args, **kwargs)
return _inner
class SafeDatabase(Database):
def __init__(self, path, *args, **kwargs):
super(SafeDatabase, self).__init__(path, *args, **kwargs)
self.indexes_locks = defaultdict(
lambda: cdb_environment['rlock_obj']())
self.close_open_lock = cdb_environment['rlock_obj']()
self.main_lock = cdb_environment['rlock_obj']()
self.id_revs = {}
def __patch_index_gens(self, name):
ind = self.indexes_names[name]
for c in ('all', 'get_many'):
m = getattr(ind, c)
if getattr(ind, c + "_orig", None):
return
m_fixed = th_safe_gen.wrapper(m, name, c, self.indexes_locks[name])
setattr(ind, c, m_fixed)
setattr(ind, c + '_orig', m)
def __patch_index_methods(self, name):
ind = self.indexes_names[name]
lock = self.indexes_locks[name]
for curr in dir(ind):
meth = getattr(ind, curr)
if not curr.startswith('_') and isinstance(meth, MethodType):
setattr(ind, curr, safe_wrapper(meth, lock))
stor = ind.storage
for curr in dir(stor):
meth = getattr(stor, curr)
if not curr.startswith('_') and isinstance(meth, MethodType):
setattr(stor, curr, safe_wrapper(meth, lock))
def __patch_index(self, name):
self.__patch_index_methods(name)
self.__patch_index_gens(name)
def initialize(self, *args, **kwargs):
with self.close_open_lock:
self.close_open_lock.acquire()
res = super(SafeDatabase, self).initialize(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.indexes_locks[name] = cdb_environment['rlock_obj']()
return res
def open(self, *args, **kwargs):
with self.close_open_lock:
res = super(SafeDatabase, self).open(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.indexes_locks[name] = cdb_environment['rlock_obj']()
self.__patch_index(name)
return res
def create(self, *args, **kwargs):
with self.close_open_lock:
res = super(SafeDatabase, self).create(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.indexes_locks[name] = cdb_environment['rlock_obj']()
self.__patch_index(name)
return res
def close(self):
with self.close_open_lock:
return super(SafeDatabase, self).close()
def destroy(self):
with self.close_open_lock:
return super(SafeDatabase, self).destroy()
def add_index(self, *args, **kwargs):
with self.main_lock:
res = super(SafeDatabase, self).add_index(*args, **kwargs)
if self.opened:
self.indexes_locks[res] = cdb_environment['rlock_obj']()
self.__patch_index(res)
return res
def _single_update_index(self, index, data, db_data, doc_id):
with self.indexes_locks[index.name]:
super(SafeDatabase, self)._single_update_index(
index, data, db_data, doc_id)
def _single_delete_index(self, index, data, doc_id, old_data):
with self.indexes_locks[index.name]:
super(SafeDatabase, self)._single_delete_index(
index, data, doc_id, old_data)
def edit_index(self, *args, **kwargs):
with self.main_lock:
res = super(SafeDatabase, self).edit_index(*args, **kwargs)
if self.opened:
self.indexes_locks[res] = cdb_environment['rlock_obj']()
self.__patch_index(res)
return res
def set_indexes(self, *args, **kwargs):
try:
self.main_lock.acquire()
super(SafeDatabase, self).set_indexes(*args, **kwargs)
finally:
self.main_lock.release()
def reindex_index(self, index, *args, **kwargs):
if isinstance(index, str):
if not index in self.indexes_names:
raise PreconditionsException("No index named %s" % index)
index = self.indexes_names[index]
key = index.name + "reind"
self.main_lock.acquire()
if key in self.indexes_locks:
lock = self.indexes_locks[index.name + "reind"]
else:
self.indexes_locks[index.name +
"reind"] = cdb_environment['rlock_obj']()
lock = self.indexes_locks[index.name + "reind"]
self.main_lock.release()
try:
lock.acquire()
super(SafeDatabase, self).reindex_index(
index, *args, **kwargs)
finally:
lock.release()
def flush(self):
try:
self.main_lock.acquire()
super(SafeDatabase, self).flush()
finally:
self.main_lock.release()
def fsync(self):
try:
self.main_lock.acquire()
super(SafeDatabase, self).fsync()
finally:
self.main_lock.release()
def _update_id_index(self, _rev, data):
with self.indexes_locks['id']:
return super(SafeDatabase, self)._update_id_index(_rev, data)
def _delete_id_index(self, _id, _rev, data):
with self.indexes_locks['id']:
return super(SafeDatabase, self)._delete_id_index(_id, _rev, data)
def _update_indexes(self, _rev, data):
_id, new_rev, db_data = self._update_id_index(_rev, data)
with self.main_lock:
self.id_revs[_id] = new_rev
for index in self.indexes[1:]:
with self.main_lock:
curr_rev = self.id_revs.get(_id) # get last _id, _rev
if curr_rev != new_rev:
break # new update on the way stop current
self._single_update_index(index, data, db_data, _id)
with self.main_lock:
if self.id_revs[_id] == new_rev:
del self.id_revs[_id]
return _id, new_rev
def _delete_indexes(self, _id, _rev, data):
old_data = self.get('id', _id)
if old_data['_rev'] != _rev:
raise RevConflict()
with self.main_lock:
self.id_revs[_id] = _rev
for index in self.indexes[1:]:
self._single_delete_index(index, data, _id, old_data)
self._delete_id_index(_id, _rev, data)
with self.main_lock:
if self.id_revs[_id] == _rev:
del self.id_revs[_id]

View File

@@ -0,0 +1,110 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from threading import RLock
from CodernityDB3.env import cdb_environment
cdb_environment['mode'] = "threads"
cdb_environment['rlock_obj'] = RLock
from .database import Database
from functools import wraps
from types import FunctionType, MethodType
from CodernityDB3.database_safe_shared import th_safe_gen
class SuperLock(type):
@staticmethod
def wrapper(f):
@wraps(f)
def _inner(*args, **kwargs):
db = args[0]
with db.super_lock:
# print '=>', f.__name__, repr(args[1:])
res = f(*args, **kwargs)
# if db.opened:
# db.flush()
# print '<=', f.__name__, repr(args[1:])
return res
return _inner
def __new__(cls, classname, bases, attr):
new_attr = {}
for base in bases:
for b_attr in dir(base):
a = getattr(base, b_attr, None)
if isinstance(a, MethodType) and not b_attr.startswith('_'):
if b_attr == 'flush' or b_attr == 'flush_indexes':
pass
else:
# setattr(base, b_attr, SuperLock.wrapper(a))
new_attr[b_attr] = SuperLock.wrapper(a)
for attr_name, attr_value in list(attr.items()):
if isinstance(attr_value, FunctionType) and not attr_name.startswith('_'):
attr_value = SuperLock.wrapper(attr_value)
new_attr[attr_name] = attr_value
new_attr['super_lock'] = RLock()
return type.__new__(cls, classname, bases, new_attr)
class SuperThreadSafeDatabase(Database, metaclass=SuperLock):
"""
Thread safe version that always allows single thread to use db.
It adds the same lock for all methods, so only one operation can be
performed in given time. Completely different implementation
than ThreadSafe version (without super word)
"""
def __init__(self, *args, **kwargs):
super(SuperThreadSafeDatabase, self).__init__(*args, **kwargs)
def __patch_index_gens(self, name):
ind = self.indexes_names[name]
for c in ('all', 'get_many'):
m = getattr(ind, c)
if getattr(ind, c + "_orig", None):
return
m_fixed = th_safe_gen.wrapper(m, name, c, self.super_lock)
setattr(ind, c, m_fixed)
setattr(ind, c + '_orig', m)
def open(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).open(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.__patch_index_gens(name)
return res
def create(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).create(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.__patch_index_gens(name)
return res
def add_index(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).add_index(*args, **kwargs)
self.__patch_index_gens(res)
return res
def edit_index(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).edit_index(*args, **kwargs)
self.__patch_index_gens(res)
return res

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from threading import RLock
from CodernityDB3.env import cdb_environment
cdb_environment['mode'] = "threads"
cdb_environment['rlock_obj'] = RLock
from .database_safe_shared import SafeDatabase
class ThreadSafeDatabase(SafeDatabase):
"""
Thread safe version of CodernityDB that uses several lock objects,
on different methods / different indexes etc. It's completely different
implementation of locking than SuperThreadSafe one.
"""
pass

View File

@@ -0,0 +1,211 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.tree_index import TreeBasedIndex
import struct
import os
import inspect
from functools import wraps
import json
class DebugTreeBasedIndex(TreeBasedIndex):
def __init__(self, *args, **kwargs):
super(DebugTreeBasedIndex, self).__init__(*args, **kwargs)
def print_tree(self):
print('-----CURRENT TREE-----')
print(self.root_flag)
if self.root_flag == 'l':
print('---ROOT---')
self._print_leaf_data(self.data_start)
return
else:
print('---ROOT---')
self._print_node_data(self.data_start)
nr_of_el, children_flag = self._read_node_nr_of_elements_and_children_flag(
self.data_start)
nodes = []
for index in range(nr_of_el):
l_pointer, key, r_pointer = self._read_single_node_key(
self.data_start, index)
nodes.append(l_pointer)
nodes.append(r_pointer)
print('ROOT NODES', nodes)
while children_flag == 'n':
self._print_level(nodes, 'n')
new_nodes = []
for node in nodes:
nr_of_el, children_flag = \
self._read_node_nr_of_elements_and_children_flag(node)
for index in range(nr_of_el):
l_pointer, key, r_pointer = self._read_single_node_key(
node, index)
new_nodes.append(l_pointer)
new_nodes.append(r_pointer)
nodes = new_nodes
self._print_level(nodes, 'l')
def _print_level(self, nodes, flag):
print('---NEXT LVL---')
if flag == 'n':
for node in nodes:
self._print_node_data(node)
elif flag == 'l':
for node in nodes:
self._print_leaf_data(node)
def _print_leaf_data(self, leaf_start_position):
print('printing data of leaf at', leaf_start_position)
nr_of_elements = self._read_leaf_nr_of_elements(leaf_start_position)
self.buckets.seek(leaf_start_position)
data = self.buckets.read(self.leaf_heading_size +
nr_of_elements * self.single_leaf_record_size)
leaf = struct.unpack('<' + self.leaf_heading_format +
nr_of_elements * self.single_leaf_record_format, data)
print(leaf)
print()
def _print_node_data(self, node_start_position):
print('printing data of node at', node_start_position)
nr_of_elements = self._read_node_nr_of_elements_and_children_flag(
node_start_position)[0]
self.buckets.seek(node_start_position)
data = self.buckets.read(self.node_heading_size + self.pointer_size
+ nr_of_elements * (self.key_size + self.pointer_size))
node = struct.unpack('<' + self.node_heading_format + self.pointer_format
+ nr_of_elements * (
self.key_format + self.pointer_format),
data)
print(node)
print()
# ------------------>
def database_step_by_step(db_obj, path=None):
if not path:
# ugly for multiplatform support....
p = db_obj.path
p1 = os.path.split(p)
p2 = os.path.split(p1[0])
p3 = '_'.join([p2[1], 'operation_logger.log'])
path = os.path.join(os.path.split(p2[0])[0], p3)
f_obj = open(path, 'wb')
__stack = [] # inspect.stack() is not working on pytest etc
def remove_from_stack(name):
for i in range(len(__stack)):
if __stack[-i] == name:
__stack.pop(-i)
def __dumper(f):
@wraps(f)
def __inner(*args, **kwargs):
funct_name = f.__name__
if funct_name == 'count':
name = args[0].__name__
meth_args = (name,) + args[1:]
elif funct_name in ('reindex_index', 'compact_index'):
name = args[0].name
meth_args = (name,) + args[1:]
else:
meth_args = args
kwargs_copy = kwargs.copy()
res = None
__stack.append(funct_name)
if funct_name == 'insert':
try:
res = f(*args, **kwargs)
except:
packed = json.dumps((funct_name,
meth_args, kwargs_copy, None))
f_obj.write('%s\n' % packed)
f_obj.flush()
raise
else:
packed = json.dumps((funct_name,
meth_args, kwargs_copy, res))
f_obj.write('%s\n' % packed)
f_obj.flush()
else:
if funct_name == 'get':
for curr in __stack:
if ('delete' in curr or 'update' in curr) and not curr.startswith('test'):
remove_from_stack(funct_name)
return f(*args, **kwargs)
packed = json.dumps((funct_name, meth_args, kwargs_copy))
f_obj.write('%s\n' % packed)
f_obj.flush()
res = f(*args, **kwargs)
remove_from_stack(funct_name)
return res
return __inner
for meth_name, meth_f in inspect.getmembers(db_obj, predicate=inspect.ismethod):
if not meth_name.startswith('_'):
setattr(db_obj, meth_name, __dumper(meth_f))
setattr(db_obj, 'operation_logger', f_obj)
def database_from_steps(db_obj, path):
# db_obj.insert=lambda data : insert_for_debug(db_obj, data)
with open(path, 'rb') as f_obj:
for current in f_obj:
line = json.loads(current[:-1])
if line[0] == 'count':
obj = getattr(db_obj, line[1][0])
line[1] = [obj] + line[1][1:]
name = line[0]
if name == 'insert':
try:
line[1][0].pop('_rev')
except:
pass
elif name in ('delete', 'update'):
el = db_obj.get('id', line[1][0]['_id'])
line[1][0]['_rev'] = el['_rev']
# print 'FROM STEPS doing', line
meth = getattr(db_obj, line[0], None)
if not meth:
raise Exception("Method = `%s` not found" % line[0])
meth(*line[1], **line[2])
# def insert_for_debug(self, data):
#
# _rev = data['_rev']
#
# if not '_id' in data:
# _id = uuid4().hex
# else:
# _id = data['_id']
# data['_id'] = _id
# try:
# _id = bytes(_id)
# except:
# raise DatabaseException("`_id` must be valid bytes object")
# self._insert_indexes(_id, _rev, data)
# ret = {'_id': _id, '_rev': _rev}
# data.update(ret)
# return ret

25
libs/CodernityDB3/env.py Normal file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
It's CodernityDB environment.
Handles internal informations.'
"""
cdb_environment = {
'mode': 'normal'
}

View File

@@ -0,0 +1,995 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.index import (Index,
IndexException,
DocIdNotFound,
ElemNotFound,
TryReindexException,
IndexPreconditionsException)
import os
import marshal
import io
import struct
import shutil
from CodernityDB3.storage import IU_Storage, DummyStorage
from CodernityDB3.env import cdb_environment
if cdb_environment.get('rlock_obj'):
from CodernityDB3 import patch
patch.patch_cache_rr(cdb_environment['rlock_obj'])
from CodernityDB3.rr_cache import cache1lvl
from CodernityDB3.misc import random_hex_32
try:
from CodernityDB3 import __version__
except ImportError:
from .__init__ import __version__
class IU_HashIndex(Index):
"""
That class is for Internal Use only, if you want to use HashIndex just subclass the :py:class:`HashIndex` instead this one.
That design is because main index logic should be always in database not in custom user indexes.
"""
def __init__(self, db_path, name, entry_line_format='<32s{key}IIcI', hash_lim=0xfffff, storage_class=None, key_format='c'):
"""
The index is capable to solve conflicts by `Separate chaining`
:param db_path: database path
:type db_path: string
:param name: index name
:type name: ascii string
:param line_format: line format, `key_format` parameter value will replace `{key}` if present.
:type line_format: string (32s{key}IIcI by default) {doc_id}{hash_key}{start}{size}{status}{next}
:param hash_lim: maximum hash functon results (remember about birthday problem) count from 0
:type hash_lim: integer
:param storage_class: Storage class by default it will open standard :py:class:`CodernityDB3.storage.Storage` (if string has to be accesible by globals()[storage_class])
:type storage_class: class name which will be instance of CodernityDB3.storage.Storage instance or None
:param key_format: a index key format
"""
# Fix types
if isinstance(db_path, str):
db_path = db_path.encode()
if isinstance(name, str):
name = name.encode()
if key_format and '{key}' in entry_line_format:
entry_line_format = entry_line_format.replace('{key}', key_format)
super(IU_HashIndex, self).__init__(db_path, name)
self.hash_lim = hash_lim
if not storage_class:
storage_class = IU_Storage
if storage_class and not isinstance(storage_class, str):
storage_class = storage_class.__name__
self.storage_class = storage_class
self.storage = None
self.bucket_line_format = "<I"
self.bucket_line_size = struct.calcsize(self.bucket_line_format)
self.entry_line_format = entry_line_format
self.entry_line_size = struct.calcsize(self.entry_line_format)
cache = cache1lvl(100)
self._find_key = cache(self._find_key)
self._locate_doc_id = cache(self._locate_doc_id)
self.bucket_struct = struct.Struct(self.bucket_line_format)
self.entry_struct = struct.Struct(self.entry_line_format)
self.data_start = (
self.hash_lim + 1) * self.bucket_line_size + self._start_ind + 2
def _fix_params(self):
super(IU_HashIndex, self)._fix_params()
self.bucket_line_size = struct.calcsize(self.bucket_line_format)
self.entry_line_size = struct.calcsize(self.entry_line_format)
self.bucket_struct = struct.Struct(self.bucket_line_format)
self.entry_struct = struct.Struct(self.entry_line_format)
self.data_start = (
self.hash_lim + 1) * self.bucket_line_size + self._start_ind + 2
def open_index(self):
if not os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
raise IndexException("Doesn't exists")
self.buckets = io.open(
os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
self._fix_params()
self._open_storage()
def create_index(self):
if os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
raise IndexException('Already exists')
with io.open(os.path.join(self.db_path, self.name + "_buck"), 'w+b') as f:
props = dict(name=self.name,
bucket_line_format=self.bucket_line_format,
entry_line_format=self.entry_line_format,
hash_lim=self.hash_lim,
version=self.__version__,
storage_class=self.storage_class)
f.write(marshal.dumps(props))
self.buckets = io.open(
os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
self._create_storage()
def destroy(self):
super(IU_HashIndex, self).destroy()
self._clear_cache()
def _open_storage(self):
s = globals()[self.storage_class]
if not self.storage:
self.storage = s(self.db_path, self.name)
self.storage.open()
def _create_storage(self):
s = globals()[self.storage_class]
if not self.storage:
self.storage = s(self.db_path, self.name)
self.storage.create()
# def close_index(self):
# self.buckets.flush()
# self.buckets.close()
# self.storage.close()
# @lfu_cache(100)
def _find_key(self, key):
"""
Find the key position
:param key: the key to find
"""
# Fix types
if isinstance(key, str):
key = key.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
if not location:
return None, None, 0, 0, 'u'
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, location)
if status == 'd': # when first record from many is deleted
while True:
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, _next)
if status != 'd':
break
return doc_id, l_key, start, size, status
else:
return None, None, 0, 0, 'u'
def _find_key_many(self, key, limit=1, offset=0):
# Fix types
if isinstance(key, str):
key = key.encode()
location = None
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
while offset:
if not location:
break
try:
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, location)
except IndexException:
break
else:
if status != 'd':
if l_key == key: # in case of hash function conflicts
offset -= 1
location = _next
while limit:
if not location:
break
try:
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, location)
except IndexException:
break
else:
if status != 'd':
if l_key == key: # in case of hash function conflicts
yield doc_id, start, size, status
limit -= 1
location = _next
def _calculate_position(self, key):
# Fix types
if isinstance(key, str):
key = key.encode()
return abs(hash(key) & self.hash_lim) * self.bucket_line_size + self._start_ind
# TODO add cache!
def _locate_key(self, key, start):
"""
Locate position of the key, it will iterate using `next` field in record
until required key will be find.
:param key: the key to locate
:param start: position to start from
"""
# Fix types
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
try:
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
except struct.error:
raise ElemNotFound(
"Not found") # not found but might be also broken
if l_key == key:
break
else:
if not _next:
# not found
raise ElemNotFound("Not found")
else:
location = _next # go to next record
return location, doc_id, l_key, start, size, status, _next
# @lfu_cache(100)
def _locate_doc_id(self, doc_id, key, start):
"""
Locate position of the doc_id, it will iterate using `next` field in record
until required key will be find.
:param doc_id: the doc_id to locate
:param key: key value
:param start: position to start from
"""
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
try:
l_doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
except:
raise DocIdNotFound(
"Doc_id '%s' for '%s' not found" % (doc_id, key))
if l_doc_id == doc_id and l_key == key: # added for consistency
break
else:
if not _next:
# not found
raise DocIdNotFound(
"Doc_id '%s' for '%s' not found" % (doc_id, key))
else:
location = _next # go to next record
return location, doc_id, l_key, start, size, status, _next
def _find_place(self, start):
"""
Find a place to where put the key. It will iterate using `next` field in record, until
empty `next` found
:param start: position to start from
"""
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
if not _next or status == 'd':
return self.buckets.tell() - self.entry_line_size, doc_id, l_key, start, size, status, _next
else:
location = _next # go to next record
def update(self, doc_id, key, u_start=0, u_size=0, u_status='o'):
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
if isinstance(u_status, str):
u_status = u_status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# test if it's unique or not really unique hash
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
raise ElemNotFound("Location '%s' not found" % doc_id)
found_at, _doc_id, _key, start, size, status, _next = self._locate_doc_id(doc_id, key, location)
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(doc_id,
key,
u_start,
u_size,
u_status,
_next))
self.flush()
self._find_key.delete(key)
self._locate_doc_id.delete(doc_id)
return True
def insert(self, doc_id, key, start, size, status='o'):
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
if isinstance(status, str):
status = status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# conflict occurs?
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
location = 0
if location:
# last key with that hash
try:
found_at, _doc_id, _key, _start, _size, _status, _next = self._locate_doc_id(doc_id, key, location)
except DocIdNotFound:
found_at, _doc_id, _key, _start, _size, _status, _next = self._find_place(location)
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
status,
_next))
# self.flush()
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(_doc_id,
_key,
_start,
_size,
_status,
wrote_at))
else:
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
status,
_next))
self.flush()
self._locate_doc_id.delete(doc_id)
self._find_key.delete(_key)
# self._find_key.delete(key)
# self._locate_key.delete(_key)
return True
# raise NotImplementedError
else:
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
# check if position is bigger than all hash entries...
if wrote_at < self.data_start:
self.buckets.seek(self.data_start)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
status,
0))
# self.flush()
self._find_key.delete(key)
self.buckets.seek(start_position)
self.buckets.write(self.bucket_struct.pack(wrote_at))
self.flush()
return True
def get(self, key):
# Fix types
if isinstance(key, str):
key = key.encode()
return self._find_key(self.make_key(key))
def get_many(self, key, limit=1, offset=0):
return self._find_key_many(self.make_key(key), limit, offset)
def all(self, limit=-1, offset=0):
self.buckets.seek(self.data_start)
while offset:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, key, start, size, status, _next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
offset -= 1
while limit:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, key, start, size, status, _next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
yield doc_id, key, start, size, status
limit -= 1
def _fix_link(self, key, pos_prev, pos_next):
# Fix types
if isinstance(key, str):
key = key.encode()
# CHECKIT why I need that hack
if pos_prev >= self.data_start:
self.buckets.seek(pos_prev)
data = self.buckets.read(self.entry_line_size)
if data:
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
self.buckets.seek(pos_prev)
self.buckets.write(self.entry_struct.pack(doc_id,
l_key,
start,
size,
status,
pos_next))
self.flush()
if pos_next:
self.buckets.seek(pos_next)
data = self.buckets.read(self.entry_line_size)
if data:
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
self.buckets.seek(pos_next)
self.buckets.write(self.entry_struct.pack(doc_id,
l_key,
start,
size,
status,
_next))
self.flush()
return
def delete(self, doc_id, key, start=0, size=0):
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
# case happens when trying to delete element with new index key in data
# after adding new index to database without reindex
raise TryReindexException()
found_at, _doc_id, _key, start, size, status, _next = self._locate_doc_id(doc_id, key, location)
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
'd',
_next))
self.flush()
# self._fix_link(_key, _prev, _next)
self._find_key.delete(key)
self._locate_doc_id.delete(doc_id)
return True
def compact(self, hash_lim=None):
if not hash_lim:
hash_lim = self.hash_lim
compact_ind = self.__class__(
self.db_path, self.name + '_compact', hash_lim=hash_lim)
compact_ind.create_index()
gen = self.all()
while True:
try:
doc_id, key, start, size, status = next(gen)
except StopIteration:
break
self.storage._f.seek(start)
value = self.storage._f.read(size)
start_ = compact_ind.storage._f.tell()
compact_ind.storage._f.write(value)
compact_ind.insert(doc_id, key, start_, size, status)
compact_ind.close_index()
original_name = self.name
# os.unlink(os.path.join(self.db_path, self.name + "_buck"))
self.close_index()
shutil.move(os.path.join(compact_ind.db_path, compact_ind.
name + "_buck"), os.path.join(self.db_path, self.name + "_buck"))
shutil.move(os.path.join(compact_ind.db_path, compact_ind.
name + "_stor"), os.path.join(self.db_path, self.name + "_stor"))
# self.name = original_name
self.open_index() # reload...
self.name = original_name.decode()
self._save_params(dict(name=original_name))
self._fix_params()
self._clear_cache()
return True
def make_key(self, key):
return key
def make_key_value(self, data):
return '1', data
def _clear_cache(self):
self._find_key.clear()
self._locate_doc_id.clear()
def close_index(self):
super(IU_HashIndex, self).close_index()
self._clear_cache()
class IU_UniqueHashIndex(IU_HashIndex):
"""
Index for *unique* keys! Designed to be a **id** index.
That class is for Internal Use only, if you want to use UniqueHashIndex just subclass the :py:class:`UniqueHashIndex` instead this one.
That design is because main index logic should be always in database not in custom user indexes.
"""
def __init__(self, db_path, name, entry_line_format="<32s8sIIcI", *args, **kwargs):
# Fix types
if isinstance(db_path, str):
db_path = db_path.encode()
if isinstance(name, str):
name = name.encode()
if 'key' in kwargs:
raise IndexPreconditionsException(
"UniqueHashIndex doesn't accept key parameter'")
super(IU_UniqueHashIndex, self).__init__(db_path, name,
entry_line_format, *args, **kwargs)
self.create_key = random_hex_32 # : set the function to create random key when no _id given
# self.entry_struct=struct.Struct(entry_line_format)
# @lfu_cache(100)
def _find_key(self, key):
"""
Find the key position
:param key: the key to find
"""
# Fix types
if isinstance(key, str):
key = key.encode()
start_position = self._calculate_position(key)
self.seek = self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
found_at, l_key, rev, start, size, status, _next = self._locate_key(
key, location)
# Fix types
if isinstance(l_key, bytes):
l_key = l_key.decode()
if isinstance(rev, bytes):
rev = rev.decode()
if isinstance(status, bytes):
status = status.decode()
return l_key, rev, start, size, status
else:
return None, None, 0, 0, 'u'
def _find_key_many(self, *args, **kwargs):
raise NotImplementedError()
def _find_place(self, start, key):
"""
Find a place to where put the key. It will iterate using `next` field in record, until
empty `next` found
:param start: position to start from
"""
# Fix types
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
l_key, rev, start, size, status, _next = self.entry_struct.unpack(
data)
if l_key == key:
raise IndexException("The '%s' key already exists" % key)
if not _next or status == 'd':
return self.buckets.tell() - self.entry_line_size, l_key, rev, start, size, status, _next
else:
location = _next # go to next record
# @lfu_cache(100)
def _locate_key(self, key, start):
"""
Locate position of the key, it will iterate using `next` field in record
until required key will be find.
:param key: the key to locate
:param start: position to start from
"""
# Fix types
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
try:
l_key, rev, start, size, status, _next = self.entry_struct.unpack(data)
except struct.error:
raise ElemNotFound("Location '%s' not found" % key)
if l_key == key:
break
else:
if not _next:
# not found
raise ElemNotFound("Location '%s' not found" % key)
else:
location = _next # go to next record
return self.buckets.tell() - self.entry_line_size, l_key, rev, start, size, status, _next
def update(self, key, rev, u_start=0, u_size=0, u_status='o'):
# Fix types
if isinstance(key, str):
key = key.encode()
if isinstance(rev, str):
rev = rev.encode()
if isinstance(u_status, str):
u_status = u_status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# test if it's unique or not really unique hash
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
raise ElemNotFound("Location '%s' not found" % key)
found_at, _key, _rev, start, size, status, _next = self._locate_key(
key, location)
if u_start == 0:
u_start = start
if u_size == 0:
u_size = size
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(key,
rev,
u_start,
u_size,
u_status,
_next))
self.flush()
self._find_key.delete(key)
return True
def insert(self, key, rev, start, size, status='o'):
# Fix types
if isinstance(key, str):
key = key.encode()
if isinstance(rev, str):
rev = rev.encode()
if isinstance(status, str):
status = status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# conflict occurs?
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
location = 0
if location:
# last key with that hash
found_at, _key, _rev, _start, _size, _status, _next = self._find_place(
location, key)
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
# check if position is bigger than all hash entries...
if wrote_at < self.data_start:
self.buckets.seek(self.data_start)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(key,
rev,
start,
size,
status,
_next))
# self.flush()
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(_key,
_rev,
_start,
_size,
_status,
wrote_at))
self.flush()
self._find_key.delete(_key)
# self._locate_key.delete(_key)
return True
# raise NotImplementedError
else:
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
# check if position is bigger than all hash entries...
if wrote_at < self.data_start:
self.buckets.seek(self.data_start)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(key,
rev,
start,
size,
status,
0))
self.buckets.seek(start_position)
self.buckets.write(self.bucket_struct.pack(wrote_at))
self.flush()
self._find_key.delete(key)
return True
def all(self, limit=-1, offset=0):
self.buckets.seek(self.data_start)
while offset:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, rev, start, size, status, next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
offset -= 1
while limit:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, rev, start, size, status, next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
yield doc_id, rev, start, size, status
limit -= 1
def get_many(self, *args, **kwargs):
raise NotImplementedError()
def delete(self, key, start=0, size=0):
# Fix types
if isinstance(key, str):
key = key.encode()
self.update(key, '00000000', start, size, 'd')
def make_key_value(self, data):
_id = data['_id']
try:
_id = data['_id'].encode()
except:
raise IndexPreconditionsException(
"_id must be valid string/bytes object")
if len(_id) != 32:
raise IndexPreconditionsException("Invalid _id lenght")
del data['_id']
del data['_rev']
return _id, data
def destroy(self):
Index.destroy(self)
self._clear_cache()
def _clear_cache(self):
self._find_key.clear()
def insert_with_storage(self, _id, _rev, value):
# Fix types
if isinstance(_id, str):
_id = _id.encode()
if isinstance(_rev, str):
_rev = _rev.encode()
if value:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.insert(_id, _rev, start, size)
def update_with_storage(self, _id, _rev, value):
# Fix types
if isinstance(_id, str):
_id = _id.encode()
if isinstance(_rev, str):
_rev = _rev.encode()
if value:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.update(_id, _rev, start, size)
class DummyHashIndex(IU_HashIndex):
def __init__(self, db_path, name, entry_line_format="<32s4sIIcI", *args, **kwargs):
super(DummyHashIndex, self).__init__(db_path, name,
entry_line_format, *args, **kwargs)
self.create_key = random_hex_32 # : set the function to create random key when no _id given
# self.entry_struct=struct.Struct(entry_line_format)
def update(self, *args, **kwargs):
return True
def insert(self, *args, **kwargs):
return True
def all(self, *args, **kwargs):
raise StopIteration
def get(self, *args, **kwargs):
raise ElemNotFound
def get_many(self, *args, **kwargs):
raise StopIteration
def delete(self, *args, **kwargs):
pass
def make_key_value(self, data):
return '1', {'_': 1}
def destroy(self):
pass
def _clear_cache(self):
pass
def _open_storage(self):
if not self.storage:
self.storage = DummyStorage()
self.storage.open()
def _create_storage(self):
if not self.storage:
self.storage = DummyStorage()
self.storage.create()
class IU_MultiHashIndex(IU_HashIndex):
"""
Class that allows to index more than one key per database record.
It operates very well on GET/INSERT. It's not optimized for
UPDATE operations (will always readd everything)
"""
def __init__(self, *args, **kwargs):
super(IU_MultiHashIndex, self).__init__(*args, **kwargs)
def insert(self, doc_id, key, start, size, status='o'):
if isinstance(key, (list, tuple)):
key = set(key)
elif not isinstance(key, set):
key = set([key])
ins = super(IU_MultiHashIndex, self).insert
for curr_key in key:
ins(doc_id, curr_key, start, size, status)
return True
def update(self, doc_id, key, u_start, u_size, u_status='o'):
if isinstance(key, (list, tuple)):
key = set(key)
elif not isinstance(key, set):
key = set([key])
upd = super(IU_MultiHashIndex, self).update
for curr_key in key:
upd(doc_id, curr_key, u_start, u_size, u_status)
def delete(self, doc_id, key, start=0, size=0):
if isinstance(key, (list, tuple)):
key = set(key)
elif not isinstance(key, set):
key = set([key])
delete = super(IU_MultiHashIndex, self).delete
for curr_key in key:
delete(doc_id, curr_key, start, size)
def get(self, key):
return super(IU_MultiHashIndex, self).get(key)
def make_key_value(self, data):
raise NotImplementedError()
# classes for public use, done in this way because of
# generation static files with indexes (_index directory)
class HashIndex(IU_HashIndex):
"""
That class is designed to be used in custom indexes.
"""
pass
class UniqueHashIndex(IU_UniqueHashIndex):
"""
That class is designed to be used in custom indexes. It's designed to be **id** index.
"""
pass
class MultiHashIndex(IU_MultiHashIndex):
"""
That class is designed to be used in custom indexes.
"""

200
libs/CodernityDB3/index.py Normal file
View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import marshal
import struct
import shutil
from CodernityDB3.storage import IU_Storage, DummyStorage
try:
from CodernityDB3 import __version__
except ImportError:
from .__init__ import __version__
import io
class IndexException(Exception):
pass
class IndexNotFoundException(IndexException):
pass
class ReindexException(IndexException):
pass
class TryReindexException(ReindexException):
pass
class ElemNotFound(IndexException):
pass
class DocIdNotFound(ElemNotFound):
pass
class IndexConflict(IndexException):
pass
class IndexPreconditionsException(IndexException):
pass
class Index(object):
__version__ = __version__
custom_header = "" # : use it for imports required by your index
def __init__(self,
db_path,
name):
if isinstance(name, bytes):
name = name.decode()
if isinstance(db_path, bytes):
db_path = db_path.decode()
self.name = name
self._start_ind = 500
self.db_path = db_path
def open_index(self):
if not os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
raise IndexException("Doesn't exists")
self.buckets = io.open(
os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
self._fix_params()
self._open_storage()
def _close(self):
self.buckets.close()
self.storage.close()
def close_index(self):
self.flush()
self.fsync()
self._close()
def create_index(self):
raise NotImplementedError()
def _fix_params(self):
self.buckets.seek(0)
props = marshal.loads(self.buckets.read(self._start_ind))
for k, v in list(props.items()):
self.__dict__[k] = v
self.buckets.seek(0, 2)
def _save_params(self, in_params={}):
self.buckets.seek(0)
props = marshal.loads(self.buckets.read(self._start_ind))
props.update(in_params)
self.buckets.seek(0)
data = marshal.dumps(props)
if len(data) > self._start_ind:
raise IndexException("To big props")
self.buckets.write(data)
self.flush()
self.buckets.seek(0, 2)
self.__dict__.update(props)
def _open_storage(self, *args, **kwargs):
pass
def _create_storage(self, *args, **kwargs):
pass
def _destroy_storage(self, *args, **kwargs):
self.storage.destroy()
def _find_key(self, key):
raise NotImplementedError()
def update(self, doc_id, key, start, size):
raise NotImplementedError()
def insert(self, doc_id, key, start, size):
raise NotImplementedError()
def get(self, key):
raise NotImplementedError()
def get_many(self, key, start_from=None, limit=0):
raise NotImplementedError()
def all(self, start_pos):
raise NotImplementedError()
def delete(self, key, start, size):
raise NotImplementedError()
def make_key_value(self, data):
raise NotImplementedError()
def make_key(self, data):
raise NotImplementedError()
def compact(self, *args, **kwargs):
raise NotImplementedError()
def destroy(self, *args, **kwargs):
self._close()
bucket_file = os.path.join(self.db_path, self.name + '_buck')
os.unlink(bucket_file)
self._destroy_storage()
self._find_key.clear()
def flush(self):
try:
self.buckets.flush()
self.storage.flush()
except:
pass
def fsync(self):
try:
os.fsync(self.buckets.fileno())
self.storage.fsync()
except:
pass
def update_with_storage(self, doc_id, key, value):
if value:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.update(doc_id, key, start, size)
def insert_with_storage(self, doc_id, key, value):
if value is not None:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.insert(doc_id, key, start, size)

View File

@@ -0,0 +1,645 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import tokenize
import token
import uuid
class IndexCreatorException(Exception):
def __init__(self, ex, line=None):
self.ex = ex
self.line = line
def __str__(self):
if self.line:
return repr(self.ex + "(in line: %d)" % self.line)
return repr(self.ex)
class IndexCreatorFunctionException(IndexCreatorException):
pass
class IndexCreatorValueException(IndexCreatorException):
pass
class Parser(object):
def __init__(self):
pass
def parse(self, data, name=None):
if not name:
self.name = "_" + uuid.uuid4().hex
else:
self.name = name
self.ind = 0
self.stage = 0
self.logic = ['and', 'or', 'in']
self.logic2 = ['&', '|']
self.allowed_props = {'TreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format'],
'HashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
'MultiHashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
'MultiTreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format']
}
self.funcs = {'md5': (['md5'], ['.digest()']),
'len': (['len'], []),
'str': (['str'], []),
'fix_r': (['self.fix_r'], []),
'prefix': (['self.prefix'], []),
'infix': (['self.infix'], []),
'suffix': (['self.suffix'], [])
}
self.handle_int_imports = {'infix': "from itertools import izip\n"}
self.funcs_with_body = {'fix_r':
(""" def fix_r(self,s,l):
e = len(s)
if e == l:
return s
elif e > l:
return s[:l]
else:
return s.rjust(l,'_')\n""", False),
'prefix':
(""" def prefix(self,s,m,l,f):
t = len(s)
if m < 1:
m = 1
o = set()
if t > l:
s = s[:l]
t = l
while m <= t:
o.add(s.rjust(f,'_'))
s = s[:-1]
t -= 1
return o\n""", False),
'suffix':
(""" def suffix(self,s,m,l,f):
t = len(s)
if m < 1:
m = 1
o = set()
if t > l:
s = s[t-l:]
t = len(s)
while m <= t:
o.add(s.rjust(f,'_'))
s = s[1:]
t -= 1
return o\n""", False),
'infix':
(""" def infix(self,s,m,l,f):
t = len(s)
o = set()
for x in xrange(m - 1, l):
t = (s, )
for y in xrange(0, x):
t += (s[y + 1:],)
o.update(set(''.join(x).rjust(f, '_').lower() for x in izip(*t)))
return o\n""", False)}
self.none = ['None', 'none', 'null']
self.props_assign = ['=', ':']
self.all_adj_num_comp = {token.NUMBER: (
token.NUMBER, token.NAME, '-', '('),
token.NAME: (token.NUMBER, token.NAME, '-', '('),
')': (token.NUMBER, token.NAME, '-', '(')
}
self.all_adj_num_op = {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, '('),
')': (token.NUMBER, token.NAME, '(')
}
self.allowed_adjacent = {
"<=": self.all_adj_num_comp,
">=": self.all_adj_num_comp,
">": self.all_adj_num_comp,
"<": self.all_adj_num_comp,
"==": {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
token.STRING: (token.NAME, token.STRING, '('),
')': (token.NUMBER, token.NAME, token.STRING, '('),
']': (token.NUMBER, token.NAME, token.STRING, '(')
},
"+": {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
token.STRING: (token.NAME, token.STRING, '('),
')': (token.NUMBER, token.NAME, token.STRING, '('),
']': (token.NUMBER, token.NAME, token.STRING, '(')
},
"-": {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, '('),
')': (token.NUMBER, token.NAME, '('),
'<': (token.NUMBER, token.NAME, '('),
'>': (token.NUMBER, token.NAME, '('),
'<=': (token.NUMBER, token.NAME, '('),
'>=': (token.NUMBER, token.NAME, '('),
'==': (token.NUMBER, token.NAME, '('),
']': (token.NUMBER, token.NAME, '(')
},
"*": self.all_adj_num_op,
"/": self.all_adj_num_op,
"%": self.all_adj_num_op,
",": {token.NUMBER: (token.NUMBER, token.NAME, token.STRING, '{', '[', '('),
token.NAME: (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
token.STRING: (token.NAME, token.STRING, token.NUMBER, '(', '{', '['),
')': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
']': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
'}': (token.NUMBER, token.NAME, token.STRING, '(', '{', '[')
}
}
def is_num(s):
m = re.search('[^0-9*()+\-\s/]+', s)
return not m
def is_string(s):
m = re.search('\s*(?P<a>[\'\"]+).*?(?P=a)\s*', s)
return m
data = re.split('make_key_value\:', data)
if len(data) < 2:
raise IndexCreatorFunctionException(
"Couldn't find a definition of make_key_value function!\n")
spl1 = re.split('make_key\:', data[0])
spl2 = re.split('make_key\:', data[1])
self.funcs_rev = False
if len(spl1) > 1:
data = [spl1[0]] + [data[1]] + [spl1[1]]
self.funcs_rev = True
elif len(spl2) > 1:
data = [data[0]] + spl2
else:
data.append("key")
if data[1] == re.search('\s*', data[1], re.S | re.M).group(0):
raise IndexCreatorFunctionException("Empty function body ",
len(re.split('\n', data[0])) + (len(re.split('\n', data[2])) if self.funcs_rev else 1) - 1)
if data[2] == re.search('\s*', data[2], re.S | re.M).group(0):
raise IndexCreatorFunctionException("Empty function body ",
len(re.split('\n', data[0])) + (1 if self.funcs_rev else len(re.split('\n', data[1]))) - 1)
if data[0] == re.search('\s*', data[0], re.S | re.M).group(0):
raise IndexCreatorValueException("You didn't set any properity or you set them not at the begining of the code\n")
data = [re.split(
'\n', data[0]), re.split('\n', data[1]), re.split('\n', data[2])]
self.cnt_lines = (len(data[0]), len(data[1]), len(data[2]))
ind = 0
self.predata = data
self.data = [[], [], []]
for i, v in enumerate(self.predata[0]):
for k, w in enumerate(self.predata[0][i]):
if self.predata[0][i][k] in self.props_assign:
if not is_num(self.predata[0][i][k + 1:]) and self.predata[0][i].strip()[:4] != 'type' and self.predata[0][i].strip()[:4] != 'name':
s = self.predata[0][i][k + 1:]
self.predata[0][i] = self.predata[0][i][:k + 1]
m = re.search('\s+', s.strip())
if not is_string(s) and not m:
s = "'" + s.strip() + "'"
self.predata[0][i] += s
break
for n, i in enumerate(self.predata):
for k in i:
k = k.strip()
if k:
self.data[ind].append(k)
self.check_enclosures(k, n)
ind += 1
return self.parse_ex()
def readline(self, stage):
def foo():
if len(self.data[stage]) <= self.ind:
self.ind = 0
return ""
else:
self.ind += 1
return self.data[stage][self.ind - 1]
return foo
def add(self, l, i):
def add_aux(*args):
# print args,self.ind
if len(l[i]) < self.ind:
l[i].append([])
l[i][self.ind - 1].append(args)
return add_aux
def parse_ex(self):
self.index_name = ""
self.index_type = ""
self.curLine = -1
self.con = -1
self.brackets = -1
self.curFunc = None
self.colons = 0
self.line_cons = ([], [], [])
self.pre_tokens = ([], [], [])
self.known_dicts_in_mkv = []
self.prop_name = True
self.prop_assign = False
self.is_one_arg_enough = False
self.funcs_stack = []
self.last_line = [-1, -1, -1]
self.props_set = []
self.custom_header = set()
self.tokens = []
self.tokens_head = ['# %s\n' % self.name, 'class %s(' % self.name, '):\n', ' def __init__(self, *args, **kwargs): ']
for i in range(3):
tokenize.tokenize(self.readline(i), self.add(self.pre_tokens, i))
# tokenize treats some keyword not in the right way, thats why we
# have to change some of them
for nk, k in enumerate(self.pre_tokens[i]):
for na, a in enumerate(k):
if a[0] == token.NAME and a[1] in self.logic:
self.pre_tokens[i][nk][
na] = (token.OP, a[1], a[2], a[3], a[4])
for i in self.pre_tokens[1]:
self.line_cons[1].append(self.check_colons(i, 1))
self.check_adjacents(i, 1)
if self.check_for_2nd_arg(i) == -1 and not self.is_one_arg_enough:
raise IndexCreatorValueException("No 2nd value to return (did u forget about ',None'?", self.cnt_line_nr(i[0][4], 1))
self.is_one_arg_enough = False
for i in self.pre_tokens[2]:
self.line_cons[2].append(self.check_colons(i, 2))
self.check_adjacents(i, 2)
for i in self.pre_tokens[0]:
self.handle_prop_line(i)
self.cur_brackets = 0
self.tokens += ['\n super(%s, self).__init__(*args, **kwargs)\n def make_key_value(self, data): ' % self.name]
for i in self.pre_tokens[1]:
for k in i:
self.handle_make_value(*k)
self.curLine = -1
self.con = -1
self.cur_brackets = 0
self.tokens += ['\n def make_key(self, key):']
for i in self.pre_tokens[2]:
for k in i:
self.handle_make_key(*k)
if self.index_type == "":
raise IndexCreatorValueException("Missing index type definition\n")
if self.index_name == "":
raise IndexCreatorValueException("Missing index name\n")
self.tokens_head[0] = "# " + self.index_name + "\n" + \
self.tokens_head[0]
for i in self.funcs_with_body:
if self.funcs_with_body[i][1]:
self.tokens_head.insert(4, self.funcs_with_body[i][0])
if None in self.custom_header:
self.custom_header.remove(None)
if self.custom_header:
s = ' custom_header = """'
for i in self.custom_header:
s += i
s += '"""\n'
self.tokens_head.insert(4, s)
if self.index_type in self.allowed_props:
for i in self.props_set:
if i not in self.allowed_props[self.index_type]:
raise IndexCreatorValueException("Properity %s is not allowed for index type: %s" % (i, self.index_type))
# print "".join(self.tokens_head)
# print "----------"
# print (" ".join(self.tokens))
return "".join(self.custom_header), "".join(self.tokens_head) + (" ".join(self.tokens))
# has to be run BEFORE tokenize
def check_enclosures(self, d, st):
encs = []
contr = {'(': ')', '{': '}', '[': ']', "'": "'", '"': '"'}
ends = [')', '}', ']', "'", '"']
for i in d:
if len(encs) > 0 and encs[-1] in ['"', "'"]:
if encs[-1] == i:
del encs[-1]
elif i in contr:
encs += [i]
elif i in ends:
if len(encs) < 1 or contr[encs[-1]] != i:
raise IndexCreatorValueException("Missing opening enclosure for \'%s\'" % i, self.cnt_line_nr(d, st))
del encs[-1]
if len(encs) > 0:
raise IndexCreatorValueException("Missing closing enclosure for \'%s\'" % encs[0], self.cnt_line_nr(d, st))
def check_adjacents(self, d, st):
def std_check(d, n):
if n == 0:
prev = -1
else:
prev = d[n - 1][1] if d[n - 1][0] == token.OP else d[n - 1][0]
cur = d[n][1] if d[n][0] == token.OP else d[n][0]
# there always is an endmarker at the end, but this is a precaution
if n + 2 > len(d):
nex = -1
else:
nex = d[n + 1][1] if d[n + 1][0] == token.OP else d[n + 1][0]
if prev not in self.allowed_adjacent[cur]:
raise IndexCreatorValueException("Wrong left value of the %s" % cur, self.cnt_line_nr(line, st))
# there is an assumption that whole data always ends with 0 marker, the idea prolly needs a rewritting to allow more whitespaces
# between tokens, so it will be handled anyway
elif nex not in self.allowed_adjacent[cur][prev]:
raise IndexCreatorValueException("Wrong right value of the %s" % cur, self.cnt_line_nr(line, st))
for n, (t, i, _, _, line) in enumerate(d):
if t == token.NAME or t == token.STRING:
if n + 1 < len(d) and d[n + 1][0] in [token.NAME, token.STRING]:
raise IndexCreatorValueException("Did you forget about an operator in between?", self.cnt_line_nr(line, st))
elif i in self.allowed_adjacent:
std_check(d, n)
def check_colons(self, d, st):
cnt = 0
br = 0
def check_ret_args_nr(a, s):
c_b_cnt = 0
s_b_cnt = 0
n_b_cnt = 0
comas_cnt = 0
for _, i, _, _, line in a:
if c_b_cnt == n_b_cnt == s_b_cnt == 0:
if i == ',':
comas_cnt += 1
if (s == 1 and comas_cnt > 1) or (s == 2 and comas_cnt > 0):
raise IndexCreatorFunctionException("Too much arguments to return", self.cnt_line_nr(line, st))
if s == 0 and comas_cnt > 0:
raise IndexCreatorValueException("A coma here doesn't make any sense", self.cnt_line_nr(line, st))
elif i == ':':
if s == 0:
raise IndexCreatorValueException("A colon here doesn't make any sense", self.cnt_line_nr(line, st))
raise IndexCreatorFunctionException("Two colons don't make any sense", self.cnt_line_nr(line, st))
if i == '{':
c_b_cnt += 1
elif i == '}':
c_b_cnt -= 1
elif i == '(':
n_b_cnt += 1
elif i == ')':
n_b_cnt -= 1
elif i == '[':
s_b_cnt += 1
elif i == ']':
s_b_cnt -= 1
def check_if_empty(a):
for i in a:
if i not in [token.NEWLINE, token.INDENT, token.ENDMARKER]:
return False
return True
if st == 0:
check_ret_args_nr(d, st)
return
for n, i in enumerate(d):
if i[1] == ':':
if br == 0:
if len(d) < n or check_if_empty(d[n + 1:]):
raise IndexCreatorValueException(
"Empty return value", self.cnt_line_nr(i[4], st))
elif len(d) >= n:
check_ret_args_nr(d[n + 1:], st)
return cnt
else:
cnt += 1
elif i[1] == '{':
br += 1
elif i[1] == '}':
br -= 1
check_ret_args_nr(d, st)
return -1
def check_for_2nd_arg(self, d):
c_b_cnt = 0 # curly brackets counter '{}'
s_b_cnt = 0 # square brackets counter '[]'
n_b_cnt = 0 # normal brackets counter '()'
def check_2nd_arg(d, ind):
d = d[ind[0]:]
for t, i, (n, r), _, line in d:
if i == '{' or i is None:
return 0
elif t == token.NAME:
self.known_dicts_in_mkv.append((i, (n, r)))
return 0
elif t == token.STRING or t == token.NUMBER:
raise IndexCreatorValueException("Second return value of make_key_value function has to be a dictionary!", self.cnt_line_nr(line, 1))
for ind in enumerate(d):
t, i, _, _, _ = ind[1]
if s_b_cnt == n_b_cnt == c_b_cnt == 0:
if i == ',':
return check_2nd_arg(d, ind)
elif (t == token.NAME and i not in self.funcs) or i == '{':
self.is_one_arg_enough = True
if i == '{':
c_b_cnt += 1
self.is_one_arg_enough = True
elif i == '}':
c_b_cnt -= 1
elif i == '(':
n_b_cnt += 1
elif i == ')':
n_b_cnt -= 1
elif i == '[':
s_b_cnt += 1
elif i == ']':
s_b_cnt -= 1
return -1
def cnt_line_nr(self, l, stage):
nr = -1
for n, i in enumerate(self.predata[stage]):
# print i,"|||",i.strip(),"|||",l
if l == i.strip():
nr = n
if nr == -1:
return -1
if stage == 0:
return nr + 1
elif stage == 1:
return nr + self.cnt_lines[0] + (self.cnt_lines[2] - 1 if self.funcs_rev else 0)
elif stage == 2:
return nr + self.cnt_lines[0] + (self.cnt_lines[1] - 1 if not self.funcs_rev else 0)
return -1
def handle_prop_line(self, d):
d_len = len(d)
if d[d_len - 1][0] == token.ENDMARKER:
d_len -= 1
if d_len < 3:
raise IndexCreatorValueException("Can't handle properity assingment ", self.cnt_line_nr(d[0][4], 0))
if not d[1][1] in self.props_assign:
raise IndexCreatorValueException(
"Did you forget : or =?", self.cnt_line_nr(d[0][4], 0))
if d[0][0] == token.NAME or d[0][0] == token.STRING:
if d[0][1] in self.props_set:
raise IndexCreatorValueException("Properity %s is set more than once" % d[0][1], self.cnt_line_nr(d[0][4], 0))
self.props_set += [d[0][1]]
if d[0][1] == "type" or d[0][1] == "name":
t, tk, _, _, line = d[2]
if d_len > 3:
raise IndexCreatorValueException(
"Wrong value to assign", self.cnt_line_nr(line, 0))
if t == token.STRING:
m = re.search('\s*(?P<a>[\'\"]+)(.*?)(?P=a)\s*', tk)
if m:
tk = m.groups()[1]
elif t != token.NAME:
raise IndexCreatorValueException(
"Wrong value to assign", self.cnt_line_nr(line, 0))
if d[0][1] == "type":
if d[2][1] == "TreeBasedIndex":
self.custom_header.add("from CodernityDB3.tree_index import TreeBasedIndex\n")
elif d[2][1] == "MultiTreeBasedIndex":
self.custom_header.add("from CodernityDB3.tree_index import MultiTreeBasedIndex\n")
elif d[2][1] == "MultiHashIndex":
self.custom_header.add("from CodernityDB3.hash_index import MultiHashIndex\n")
self.tokens_head.insert(2, tk)
self.index_type = tk
else:
self.index_name = tk
return
else:
self.tokens += ['\n kwargs["' + d[0][1] + '"]']
else:
raise IndexCreatorValueException("Can't handle properity assingment ", self.cnt_line_nr(d[0][4], 0))
self.tokens += ['=']
self.check_adjacents(d[2:], 0)
self.check_colons(d[2:], 0)
for i in d[2:]:
self.tokens += [i[1]]
def generate_func(self, t, tk, pos_start, pos_end, line, hdata, stage):
if self.last_line[stage] != -1 and pos_start[0] > self.last_line[stage] and line != '':
raise IndexCreatorFunctionException("This line will never be executed!", self.cnt_line_nr(line, stage))
if t == 0:
return
if pos_start[1] == 0:
if self.line_cons[stage][pos_start[0] - 1] == -1:
self.tokens += ['\n return']
self.last_line[stage] = pos_start[0]
else:
self.tokens += ['\n if']
elif tk == ':' and self.line_cons[stage][pos_start[0] - 1] > -1:
if self.line_cons[stage][pos_start[0] - 1] == 0:
self.tokens += [':\n return']
return
self.line_cons[stage][pos_start[0] - 1] -= 1
if tk in self.logic2:
# print tk
if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] != tk:
self.tokens += [tk]
if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] == tk:
if tk == '&':
self.tokens += ['and']
else:
self.tokens += ['or']
return
if self.brackets != 0:
def search_through_known_dicts(a):
for i, (n, r) in self.known_dicts_in_mkv:
if i == tk and r > pos_start[1] and n == pos_start[0] and hdata == 'data':
return True
return False
if t == token.NAME and len(self.funcs_stack) > 0 and self.funcs_stack[-1][0] == 'md5' and search_through_known_dicts(tk):
raise IndexCreatorValueException("Second value returned by make_key_value for sure isn't a dictionary ", self.cnt_line_nr(line, 1))
if tk == ')':
self.cur_brackets -= 1
if len(self.funcs_stack) > 0 and self.cur_brackets == self.funcs_stack[-1][1]:
self.tokens += [tk]
self.tokens += self.funcs[self.funcs_stack[-1][0]][1]
del self.funcs_stack[-1]
return
if tk == '(':
self.cur_brackets += 1
if tk in self.none:
self.tokens += ['None']
return
if t == token.NAME and tk not in self.logic and tk != hdata:
if tk not in self.funcs:
self.tokens += [hdata + '["' + tk + '"]']
else:
self.tokens += self.funcs[tk][0]
if tk in self.funcs_with_body:
self.funcs_with_body[tk] = (
self.funcs_with_body[tk][0], True)
self.custom_header.add(self.handle_int_imports.get(tk))
self.funcs_stack += [(tk, self.cur_brackets)]
else:
self.tokens += [tk]
def handle_make_value(self, t, tk, pos_start, pos_end, line):
self.generate_func(t, tk, pos_start, pos_end, line, 'data', 1)
def handle_make_key(self, t, tk, pos_start, pos_end, line):
self.generate_func(t, tk, pos_start, pos_end, line, 'key', 2)

View File

@@ -0,0 +1,150 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from heapq import nsmallest
from operator import itemgetter
from collections import defaultdict
try:
from collections import Counter
except ImportError:
class Counter(dict):
'Mapping where default values are zero'
def __missing__(self, key):
return 0
def cache1lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = Counter()
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
try:
result = cache[key]
except KeyError:
if len(cache) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iter(list(use_count.items())),
key=itemgetter(1)):
del cache[k], use_count[k]
cache[key] = user_function(key, *args, **kwargs)
result = cache[key]
# result = user_function(obj, key, *args, **kwargs)
finally:
use_count[key] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key):
try:
del cache[key]
del use_count[key]
except KeyError:
return False
else:
return True
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
return wrapper
return decorating_function
def twolvl_iterator(dict):
for k, v in list(dict.items()):
for kk, vv in list(v.items()):
yield k, kk, vv
def cache2lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = defaultdict(Counter)
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
# return user_function(*args, **kwargs)
try:
result = cache[args[0]][args[1]]
except KeyError:
if wrapper.cache_size == maxsize:
to_delete = maxsize // 10 or 1
for k1, k2, v in nsmallest(to_delete,
twolvl_iterator(use_count),
key=itemgetter(2)):
del cache[k1][k2], use_count[k1][k2]
if not cache[k1]:
del cache[k1]
del use_count[k1]
wrapper.cache_size -= to_delete
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
wrapper.cache_size += 1
finally:
use_count[args[0]][args[1]] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key, inner_key=None):
if inner_key is not None:
try:
del cache[key][inner_key]
del use_count[key][inner_key]
if not cache[key]:
del cache[key]
del use_count[key]
wrapper.cache_size -= 1
except KeyError:
return False
else:
return True
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
del use_count[key]
except KeyError:
return False
else:
return True
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function

View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from heapq import nsmallest
from operator import itemgetter
from collections import defaultdict
try:
from collections import Counter
except ImportError:
class Counter(dict):
'Mapping where default values are zero'
def __missing__(self, key):
return 0
def twolvl_iterator(dict):
for k, v in list(dict.items()):
for kk, vv in list(v.items()):
yield k, kk, vv
def create_cache1lvl(lock_obj):
def cache1lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = Counter()
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
try:
result = cache[key]
except KeyError:
with lock:
if len(cache) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iter(list(use_count.items())),
key=itemgetter(1)):
del cache[k], use_count[k]
cache[key] = user_function(key, *args, **kwargs)
result = cache[key]
use_count[key] += 1
else:
with lock:
use_count[key] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key):
try:
del cache[key]
del use_count[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
return wrapper
return decorating_function
return cache1lvl
def create_cache2lvl(lock_obj):
def cache2lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = defaultdict(Counter)
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
try:
result = cache[args[0]][args[1]]
except KeyError:
with lock:
if wrapper.cache_size == maxsize:
to_delete = maxsize / 10 or 1
for k1, k2, v in nsmallest(to_delete,
twolvl_iterator(
use_count),
key=itemgetter(2)):
del cache[k1][k2], use_count[k1][k2]
if not cache[k1]:
del cache[k1]
del use_count[k1]
wrapper.cache_size -= to_delete
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
use_count[args[0]][args[1]] += 1
wrapper.cache_size += 1
else:
use_count[args[0]][args[1]] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key, *args):
if args:
try:
del cache[key][args[0]]
del use_count[key][args[0]]
if not cache[key]:
del cache[key]
del use_count[key]
wrapper.cache_size -= 1
return True
except KeyError:
return False
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
del use_count[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function
return cache2lvl

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.database import Database
import shutil
import os
def migrate(source, destination):
"""
Very basic for now
"""
dbs = Database(source)
dbt = Database(destination)
dbs.open()
dbt.create()
dbt.close()
for curr in os.listdir(os.path.join(dbs.path, '_indexes')):
if curr != '00id.py':
shutil.copyfile(os.path.join(dbs.path, '_indexes', curr),
os.path.join(dbt.path, '_indexes', curr))
dbt.open()
for c in dbs.all('id'):
del c['_rev']
dbt.insert(c)
return True
if __name__ == '__main__':
import sys
migrate(sys.argv[1], sys.argv[2])

35
libs/CodernityDB3/misc.py Normal file
View File

@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from random import getrandbits, randrange
import uuid
class NONE:
"""
It's inteded to be None but different,
for internal use only!
"""
pass
def random_hex_32():
return uuid.UUID(int=getrandbits(128), version=4).hex
def random_hex_4(*args, **kwargs):
return '%04x' % randrange(256 ** 2)

View File

@@ -0,0 +1,99 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.misc import NONE
def __patch(obj, name, new):
n = NONE()
orig = getattr(obj, name, n)
if orig is not n:
if orig == new:
raise Exception("Shouldn't happen, new and orig are the same")
setattr(obj, name, new)
return
def patch_cache_lfu(lock_obj):
"""
Patnches cache mechanizm to be thread safe (gevent ones also)
.. note::
It's internal CodernityDB mechanizm, it will be called when needed
"""
from . import lfu_cache
from . import lfu_cache_with_lock
lfu_lock1lvl = lfu_cache_with_lock.create_cache1lvl(lock_obj)
lfu_lock2lvl = lfu_cache_with_lock.create_cache2lvl(lock_obj)
__patch(lfu_cache, 'cache1lvl', lfu_lock1lvl)
__patch(lfu_cache, 'cache2lvl', lfu_lock2lvl)
def patch_cache_rr(lock_obj):
"""
Patches cache mechanizm to be thread safe (gevent ones also)
.. note::
It's internal CodernityDB mechanizm, it will be called when needed
"""
from . import rr_cache
from . import rr_cache_with_lock
rr_lock1lvl = rr_cache_with_lock.create_cache1lvl(lock_obj)
rr_lock2lvl = rr_cache_with_lock.create_cache2lvl(lock_obj)
__patch(rr_cache, 'cache1lvl', rr_lock1lvl)
__patch(rr_cache, 'cache2lvl', rr_lock2lvl)
def patch_flush_fsync(db_obj):
"""
Will always execute index.fsync after index.flush.
.. note::
It's for advanced users, use when you understand difference between `flush` and `fsync`, and when you definitely need that.
It's important to call it **AFTER** database has all indexes etc (after db.create or db.open)
Example usage::
...
db = Database('/tmp/patch_demo')
db.create()
patch_flush_fsync(db)
...
"""
def always_fsync(ind_obj):
def _inner():
ind_obj.orig_flush()
ind_obj.fsync()
return _inner
for index in db_obj.indexes:
setattr(index, 'orig_flush', index.flush)
setattr(index, 'flush', always_fsync(index))
setattr(db_obj, 'orig_flush', db_obj.flush)
setattr(db_obj, 'flush', always_fsync(db_obj))
return

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from random import choice
def cache1lvl(maxsize=100):
def decorating_function(user_function):
cache1lvl = {}
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
if isinstance(key, bytes):
key = key.decode()
# print("cachedddd", key) ## TODO
try:
#result = cache1lvl[key]
result = cache1lvl[key]
except KeyError:
if len(cache1lvl) == maxsize:
for i in range(maxsize // 10 or 1):
del cache1lvl[choice(list(cache1lvl.keys()))]
## print("#" * 10, key) # TODO
## print(user_function) # TODO
## print("cache1lvl", key, user_function) # TODO
## print(cache1lvl) # TODO
cache1lvl[key] = user_function(key, *args, **kwargs)
## print(cache1lvl) # TODO
result = cache1lvl[key]
## print("result caching", result) # TODO
# result = user_function(obj, key, *args, **kwargs)
if isinstance(result, bytes):
result = key.decode()
## print("r" * 20, result) # TODO
return result
def clear():
cache1lvl.clear()
def delete(key):
if isinstance(key, bytes):
key = key.decode()
try:
del cache1lvl[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache1lvl
wrapper.delete = delete
return wrapper
return decorating_function
def cache2lvl(maxsize=100):
def decorating_function(user_function):
cache = {}
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
# return user_function(*args, **kwargs)
try:
result = cache[args[0]][args[1]]
except KeyError:
# print wrapper.cache_size
if wrapper.cache_size == maxsize:
to_delete = maxsize // 10 or 1
for i in range(to_delete):
key1 = choice(list(cache.keys()))
key2 = choice(list(cache[key1].keys()))
del cache[key1][key2]
if not cache[key1]:
del cache[key1]
wrapper.cache_size -= to_delete
# print wrapper.cache_size
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
wrapper.cache_size += 1
return result
def clear():
cache.clear()
wrapper.cache_size = 0
def delete(key, inner_key=None):
if inner_key:
try:
del cache[key][inner_key]
if not cache[key]:
del cache[key]
wrapper.cache_size -= 1
return True
except KeyError:
return False
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from random import choice
def create_cache1lvl(lock_obj):
def cache1lvl(maxsize=100):
def decorating_function(user_function):
cache = {}
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
try:
result = cache[key]
except KeyError:
with lock:
if len(cache) == maxsize:
for i in range(maxsize // 10 or 1):
del cache[choice(list(cache.keys()))]
cache[key] = user_function(key, *args, **kwargs)
result = cache[key]
return result
def clear():
cache.clear()
def delete(key):
try:
del cache[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
return wrapper
return decorating_function
return cache1lvl
def create_cache2lvl(lock_obj):
def cache2lvl(maxsize=100):
def decorating_function(user_function):
cache = {}
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
try:
result = cache[args[0]][args[1]]
except KeyError:
with lock:
if wrapper.cache_size == maxsize:
to_delete = maxsize // 10 or 1
for i in range(to_delete):
key1 = choice(list(cache.keys()))
key2 = choice(list(cache[key1].keys()))
del cache[key1][key2]
if not cache[key1]:
del cache[key1]
wrapper.cache_size -= to_delete
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
wrapper.cache_size += 1
return result
def clear():
cache.clear()
wrapper.cache_size = 0
def delete(key, *args):
if args:
try:
del cache[key][args[0]]
if not cache[key]:
del cache[key]
wrapper.cache_size -= 1
return True
except KeyError:
return False
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function
return cache2lvl

View File

@@ -0,0 +1,146 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.hash_index import UniqueHashIndex, HashIndex
from CodernityDB3.sharded_index import ShardedIndex
from CodernityDB3.index import IndexPreconditionsException
from random import getrandbits
import uuid
class IU_ShardedUniqueHashIndex(ShardedIndex):
custom_header = """import uuid
from random import getrandbits
from CodernityDB3.sharded_index import ShardedIndex
"""
def __init__(self, db_path, name, *args, **kwargs):
if kwargs.get('sh_nums', 0) > 255:
raise IndexPreconditionsException("Too many shards")
kwargs['ind_class'] = UniqueHashIndex
super(IU_ShardedUniqueHashIndex, self).__init__(db_path,
name, *args, **kwargs)
self.patchers.append(self.wrap_insert_id_index)
@staticmethod
def wrap_insert_id_index(db_obj, clean=False):
def _insert_id_index(_rev, data):
"""
Performs insert on **id** index.
"""
_id, value = db_obj.id_ind.make_key_value(data) # may be improved
trg_shard = _id[:2]
storage = db_obj.id_ind.shards_r[trg_shard].storage
start, size = storage.insert(value)
db_obj.id_ind.insert(_id, _rev, start, size)
return _id
if not clean:
if hasattr(db_obj, '_insert_id_index_orig'):
raise IndexPreconditionsException(
"Already patched, something went wrong")
setattr(db_obj, "_insert_id_index_orig", db_obj._insert_id_index)
setattr(db_obj, "_insert_id_index", _insert_id_index)
else:
setattr(db_obj, "_insert_id_index", db_obj._insert_id_index_orig)
delattr(db_obj, "_insert_id_index_orig")
def create_key(self):
h = uuid.UUID(int=getrandbits(128), version=4).hex
trg = self.last_used + 1
if trg >= self.sh_nums:
trg = 0
self.last_used = trg
h = '%02x%30s' % (trg, h[2:])
return h
def delete(self, key, *args, **kwargs):
trg_shard = key[:2]
op = self.shards_r[trg_shard]
return op.delete(key, *args, **kwargs)
def update(self, key, *args, **kwargs):
trg_shard = key[:2]
self.last_used = int(trg_shard, 16)
op = self.shards_r[trg_shard]
return op.update(key, *args, **kwargs)
def insert(self, key, *args, **kwargs):
trg_shard = key[:2] # in most cases it's in create_key BUT not always
self.last_used = int(key[:2], 16)
op = self.shards_r[trg_shard]
return op.insert(key, *args, **kwargs)
def get(self, key, *args, **kwargs):
trg_shard = key[:2]
self.last_used = int(trg_shard, 16)
op = self.shards_r[trg_shard]
return op.get(key, *args, **kwargs)
class ShardedUniqueHashIndex(IU_ShardedUniqueHashIndex):
# allow unique hash to be used directly
custom_header = 'from CodernityDB3.sharded_hash import IU_ShardedUniqueHashIndex'
pass
class IU_ShardedHashIndex(ShardedIndex):
custom_header = """from CodernityDB3.sharded_index import ShardedIndex"""
def __init__(self, db_path, name, *args, **kwargs):
kwargs['ind_class'] = HashIndex
super(IU_ShardedHashIndex, self).__init__(db_path, name, *
args, **kwargs)
def calculate_shard(self, key):
"""
Must be implemented. It has to return shard to be used by key
:param key: key
:returns: target shard
:rtype: int
"""
raise NotImplementedError()
def delete(self, doc_id, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.delete(doc_id, key, *args, **kwargs)
def insert(self, doc_id, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.insert(doc_id, key, *args, **kwargs)
def update(self, doc_id, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.insert(doc_id, key, *args, **kwargs)
def get(self, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.get(key, *args, **kwargs)
class ShardedHashIndex(IU_ShardedHashIndex):
pass

View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.index import Index
# from CodernityDB3.env import cdb_environment
# import warnings
class ShardedIndex(Index):
def __init__(self, db_path, name, *args, **kwargs):
"""
There are 3 additional parameters. You have to hardcode them in your custom class. **NEVER** use directly
:param int sh_nums: how many shards should be
:param class ind_class: Index class to use (HashIndex or your custom one)
:param bool use_make_keys: if True, `make_key`, and `make_key_value` will be overriden with those from first shard
The rest parameters are passed straight to `ind_class` shards.
"""
super(ShardedIndex, self).__init__(db_path, name)
try:
self.sh_nums = kwargs.pop('sh_nums')
except KeyError:
self.sh_nums = 5
try:
ind_class = kwargs.pop('ind_class')
except KeyError:
raise Exception("ind_class must be given")
else:
# if not isinstance(ind_class, basestring):
# ind_class = ind_class.__name__
self.ind_class = ind_class
if 'use_make_keys' in kwargs:
self.use_make_keys = kwargs.pop('use_make_keys')
else:
self.use_make_keys = False
self._set_shard_datas(*args, **kwargs)
self.patchers = [] # database object patchers
def _set_shard_datas(self, *args, **kwargs):
self.shards = {}
self.shards_r = {}
# ind_class = globals()[self.ind_class]
ind_class = self.ind_class
i = 0
for sh_name in [self.name + str(x) for x in range(self.sh_nums)]:
# dict is better than list in that case
self.shards[i] = ind_class(self.db_path, sh_name, *args, **kwargs)
self.shards_r['%02x' % i] = self.shards[i]
self.shards_r[i] = self.shards[i]
i += 1
if not self.use_make_keys:
self.make_key = self.shards[0].make_key
self.make_key_value = self.shards[0].make_key_value
self.last_used = 0
@property
def storage(self):
st = self.shards[self.last_used].storage
return st
def __getattr__(self, name):
return getattr(self.shards[self.last_used], name)
def open_index(self):
for curr in list(self.shards.values()):
curr.open_index()
def create_index(self):
for curr in list(self.shards.values()):
curr.create_index()
def destroy(self):
for curr in list(self.shards.values()):
curr.destroy()
def compact(self):
for curr in list(self.shards.values()):
curr.compact()
def reindex(self):
for curr in list(self.shards.values()):
curr.reindex()
def all(self, *args, **kwargs):
for curr in list(self.shards.values()):
for now in curr.all(*args, **kwargs):
yield now
def get_many(self, *args, **kwargs):
for curr in list(self.shards.values()):
for now in curr.get_many(*args, **kwargs):
yield now

View File

@@ -0,0 +1,162 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import struct
import shutil
import marshal
import io
try:
from CodernityDB3 import __version__
except ImportError:
from .__init__ import __version__
class StorageException(Exception):
pass
class DummyStorage(object):
"""
Storage mostly used to fake real storage
"""
def create(self, *args, **kwargs):
pass
def open(self, *args, **kwargs):
pass
def close(self, *args, **kwargs):
pass
def data_from(self, *args, **kwargs):
pass
def data_to(self, *args, **kwargs):
pass
def save(self, *args, **kwargs):
return 0, 0
def insert(self, *args, **kwargs):
return self.save(*args, **kwargs)
def update(self, *args, **kwargs):
return 0, 0
def get(self, *args, **kwargs):
return None
# def compact(self, *args, **kwargs):
# pass
def fsync(self, *args, **kwargs):
pass
def flush(self, *args, **kwargs):
pass
class IU_Storage(object):
__version__ = __version__
def __init__(self, db_path, name='main'):
if isinstance(db_path, bytes):
db_path = db_path.decode()
if isinstance(name, bytes):
name = name.decode()
self.db_path = db_path
self.name = name
self._header_size = 100
def create(self):
if os.path.exists(os.path.join(self.db_path, self.name + "_stor")):
raise IOError("Storage already exists!")
with io.open(os.path.join(self.db_path, self.name + "_stor"), 'wb') as f:
if isinstance(self.__version__, str):
new_version = self.__version__.encode()
else:
new_version = self.__version__
f.write(struct.pack(b'10s90s', new_version, b'|||||'))
f.close()
self._f = io.open(os.path.join(
self.db_path, self.name + "_stor"), 'r+b', buffering=0)
self.flush()
self._f.seek(0, 2)
def open(self):
if not os.path.exists(os.path.join(self.db_path, self.name + "_stor")):
raise IOError("Storage doesn't exists!")
self._f = io.open(os.path.join(
self.db_path, self.name + "_stor"), 'r+b', buffering=0)
self.flush()
self._f.seek(0, 2)
def destroy(self):
os.unlink(os.path.join(self.db_path, self.name + '_stor'))
def close(self):
self._f.close()
# self.flush()
# self.fsync()
def data_from(self, data):
return marshal.loads(data)
def data_to(self, data):
return marshal.dumps(data)
def save(self, data):
s_data = self.data_to(data)
self._f.seek(0, 2)
start = self._f.tell()
size = len(s_data)
self._f.write(s_data)
self.flush()
return start, size
def insert(self, data):
return self.save(data)
def update(self, data):
return self.save(data)
def get(self, start, size, status='c'):
if status == 'd':
return None
else:
self._f.seek(start)
return self.data_from(self._f.read(size))
def flush(self):
self._f.flush()
def fsync(self):
os.fsync(self._f.fileno())
# classes for public use, done in this way because of
# generation static files with indexes (_index directory)
class Storage(IU_Storage):
pass

File diff suppressed because it is too large Load Diff

View File

@@ -12,7 +12,8 @@
# Source: http://pypi.python.org/pypi/axel
# Docs: http://packages.python.org/axel
from Queue import Empty, Queue
from six.moves.queue import Queue, Empty
import hashlib
import sys
import threading
@@ -109,7 +110,7 @@ class Event(object):
self.memoize = {}
def hash(self, handler):
return hashlib.md5(str(handler)).hexdigest()
return hashlib.md5(repr(handler).encode('utf-8')).hexdigest()
def handle(self, handler, priority = 0):
""" Registers a handler. The handler can be transmitted together
@@ -161,7 +162,7 @@ class Event(object):
t.daemon = True
t.start()
handler_keys = self.handlers.keys()
handler_keys = list(self.handlers.keys())
handler_keys.sort(key = natsortKey)
for handler in handler_keys:

View File

@@ -45,7 +45,7 @@ from .element import (
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
syntax_error = 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
@@ -69,7 +69,7 @@ class BeautifulSoup(Tag):
like HTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME = u'[document]'
ROOT_TAG_NAME = '[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
@@ -135,12 +135,12 @@ class BeautifulSoup(Tag):
"fromEncoding", "from_encoding")
if len(kwargs) > 0:
arg = kwargs.keys().pop()
arg = list(kwargs.keys()).pop()
raise TypeError(
"__init__() got an unexpected keyword argument '%s'" % arg)
if builder is None:
if isinstance(features, basestring):
if isinstance(features, str):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
@@ -164,7 +164,7 @@ class BeautifulSoup(Tag):
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
if (isinstance(markup, unicode)
if (isinstance(markup, str)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
@@ -172,7 +172,7 @@ class BeautifulSoup(Tag):
is_file = False
try:
is_file = os.path.exists(possible_filename)
except Exception, e:
except Exception as e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
@@ -184,7 +184,7 @@ class BeautifulSoup(Tag):
# TODO: This is ugly but I couldn't get it to work in
# Python 3 otherwise.
if ((isinstance(markup, bytes) and not b' ' in markup)
or (isinstance(markup, unicode) and not u' ' in markup)):
or (isinstance(markup, str) and not ' ' in markup)):
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
@@ -259,7 +259,7 @@ class BeautifulSoup(Tag):
def endData(self, containerClass=NavigableString):
if self.current_data:
current_data = u''.join(self.current_data)
current_data = ''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
@@ -367,9 +367,9 @@ class BeautifulSoup(Tag):
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
prefix = '<?xml version="1.0"%s?>\n' % encoding_part
else:
prefix = u''
prefix = ''
if not pretty_print:
indent_level = None
else:
@@ -403,4 +403,4 @@ class FeatureNotFound(ValueError):
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
print soup.prettify()
print(soup.prettify())

View File

@@ -153,13 +153,13 @@ class TreeBuilder(object):
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
for attr in attrs.keys():
for attr in list(attrs.keys()):
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
if isinstance(value, basestring):
if isinstance(value, str):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice

View File

@@ -37,7 +37,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
doc = parser.parse(markup, encoding=self.user_specified_encoding)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, unicode):
if isinstance(markup, str):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
@@ -51,7 +51,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><head></head><body>%s</body></html>' % fragment
return '<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
@@ -124,7 +124,7 @@ class Element(html5lib.treebuilders._base.Node):
def appendChild(self, node):
string_child = child = None
if isinstance(node, basestring):
if isinstance(node, str):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
@@ -139,7 +139,7 @@ class Element(html5lib.treebuilders._base.Node):
else:
child = node.element
if not isinstance(child, basestring) and child.parent is not None:
if not isinstance(child, str) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
@@ -152,7 +152,7 @@ class Element(html5lib.treebuilders._base.Node):
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
if isinstance(node, basestring):
if isinstance(node, str):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
@@ -183,7 +183,7 @@ class Element(html5lib.treebuilders._base.Node):
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
for name, value in attributes.items():
for name, value in list(attributes.items()):
self.element[name] = value
# The attributes may contain variables that need substitution.

View File

@@ -4,7 +4,7 @@ __all__ = [
'HTMLParserTreeBuilder',
]
from HTMLParser import (
from html.parser import (
HTMLParser,
HTMLParseError,
)
@@ -72,9 +72,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
real_name = int(name)
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
data = chr(real_name)
except (ValueError, OverflowError) as e:
data = "\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@@ -142,7 +142,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
if isinstance(markup, str):
yield (markup, None, None, False)
return
@@ -158,7 +158,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e

View File

@@ -4,7 +4,7 @@ __all__ = [
]
from io import BytesIO
from StringIO import StringIO
from io import StringIO
import collections
from lxml import etree
from bs4.element import Comment, Doctype, NamespacedAttribute
@@ -78,12 +78,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
Each 4-tuple represents a strategy for parsing the document.
"""
if isinstance(markup, unicode):
if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
if isinstance(markup, unicode):
if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@@ -102,7 +102,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, unicode):
elif isinstance(markup, str):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -117,7 +117,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def close(self):
@@ -135,12 +135,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in nsmap.items():
for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@@ -149,7 +149,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in attrs.items():
for attr, value in list(attrs.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@@ -207,7 +207,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
@@ -224,10 +224,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><body>%s</body></html>' % fragment
return '<html><body>%s</body></html>' % fragment

View File

@@ -8,7 +8,7 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
import codecs
from htmlentitydefs import codepoint2name
from html.entities import codepoint2name
import re
import logging
import string
@@ -56,7 +56,7 @@ class EntitySubstitution(object):
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
character = chr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
@@ -340,9 +340,9 @@ class UnicodeDammit:
self.detector = EncodingDetector(markup, override_encodings, is_html)
# Short-circuit if the data is in Unicode to begin with.
if isinstance(markup, unicode) or markup == '':
if isinstance(markup, str) or markup == '':
self.markup = markup
self.unicode_markup = unicode(markup)
self.unicode_markup = str(markup)
self.original_encoding = None
return
@@ -425,7 +425,7 @@ class UnicodeDammit:
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
return unicode(data, encoding, errors)
return str(data, encoding, errors)
@property
def declared_html_encoding(self):

View File

@@ -1,7 +1,7 @@
"""Diagnostic functions, mainly for use when doing tech support."""
import cProfile
from StringIO import StringIO
from HTMLParser import HTMLParser
from io import StringIO
from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
@@ -17,8 +17,8 @@ import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
print("Diagnostic running on Beautiful Soup %s" % __version__)
print("Python version %s" % sys.version)
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
@@ -27,44 +27,44 @@ def diagnose(data):
break
else:
basic_parsers.remove(name)
print (
print((
"I noticed that %s is not installed. Installing it may help." %
name)
name))
if 'lxml' in basic_parsers:
basic_parsers.append(["lxml", "xml"])
from lxml import etree
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
if 'html5lib' in basic_parsers:
import html5lib
print "Found html5lib version %s" % html5lib.__version__
print("Found html5lib version %s" % html5lib.__version__)
if hasattr(data, 'read'):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
print('"%s" looks like a filename. Reading data from the file.' % data)
data = open(data).read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
print
print()
for parser in basic_parsers:
print "Trying to parse your markup with %s" % parser
print("Trying to parse your markup with %s" % parser)
success = False
try:
soup = BeautifulSoup(data, parser)
success = True
except Exception, e:
print "%s could not parse the markup." % parser
except Exception as e:
print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
print "Here's what %s did with the markup:" % parser
print soup.prettify()
print("Here's what %s did with the markup:" % parser)
print(soup.prettify())
print "-" * 80
print("-" * 80)
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
@@ -74,7 +74,7 @@ def lxml_trace(data, html=True, **kwargs):
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
print("%s, %4s, %s" % (event, element.tag, element.text))
print(("%s, %4s, %s" % (event, element.tag, element.text)))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
@@ -156,9 +156,9 @@ def rdoc(num_elements=1000):
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
data = rdoc(num_elements)
print "Generated a large invalid HTML document (%d bytes)." % len(data)
print("Generated a large invalid HTML document (%d bytes)." % len(data))
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
@@ -167,24 +167,24 @@ def benchmark_parsers(num_elements=100000):
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception, e:
print "%s could not parse the markup." % parser
except Exception as e:
print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
print("Raw lxml parsed the markup in %.2fs." % (b-a))
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
print("Raw html5lib parsed the markup in %.2fs." % (b-a))
def profile(num_elements=100000, parser="lxml"):

View File

@@ -21,22 +21,22 @@ def _alias(attr):
return alias
class NamespacedAttribute(unicode):
class NamespacedAttribute(str):
def __new__(cls, prefix, name, namespace=None):
if name is None:
obj = unicode.__new__(cls, prefix)
obj = str.__new__(cls, prefix)
elif prefix is None:
# Not really namespaced.
obj = unicode.__new__(cls, name)
obj = str.__new__(cls, name)
else:
obj = unicode.__new__(cls, prefix + ":" + name)
obj = str.__new__(cls, prefix + ":" + name)
obj.prefix = prefix
obj.name = name
obj.namespace = namespace
return obj
class AttributeValueWithCharsetSubstitution(unicode):
class AttributeValueWithCharsetSubstitution(str):
"""A stand-in object for a character encoding specified in HTML."""
class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -47,7 +47,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
"""
def __new__(cls, original_value):
obj = unicode.__new__(cls, original_value)
obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -70,9 +70,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
match = cls.CHARSET_RE.search(original_value)
if match is None:
# No substitution necessary.
return unicode.__new__(unicode, original_value)
return str.__new__(str, original_value)
obj = unicode.__new__(cls, original_value)
obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -152,7 +152,7 @@ class PageElement(object):
def format_string(self, s, formatter='minimal'):
"""Format the given string using the given formatter."""
if not callable(formatter):
if not isinstance(formatter, collections.Callable):
formatter = self._formatter_for_name(formatter)
if formatter is None:
output = s
@@ -272,7 +272,7 @@ class PageElement(object):
def insert(self, position, new_child):
if new_child is self:
raise ValueError("Cannot insert a tag into itself.")
if (isinstance(new_child, basestring)
if (isinstance(new_child, str)
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
@@ -489,7 +489,7 @@ class PageElement(object):
result = (element for element in generator
if isinstance(element, Tag))
return ResultSet(strainer, result)
elif isinstance(name, basestring):
elif isinstance(name, str):
# Optimization to find all tags with a given name.
result = (element for element in generator
if isinstance(element, Tag)
@@ -640,7 +640,7 @@ class PageElement(object):
return self.parents
class NavigableString(unicode, PageElement):
class NavigableString(str, PageElement):
PREFIX = ''
SUFFIX = ''
@@ -653,15 +653,15 @@ class NavigableString(unicode, PageElement):
passed in to the superclass's __new__ or the superclass won't know
how to handle non-ASCII characters.
"""
if isinstance(value, unicode):
return unicode.__new__(cls, value)
return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
if isinstance(value, str):
return str.__new__(cls, value)
return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
def __copy__(self):
return self
def __getnewargs__(self):
return (unicode(self),)
return (str(self),)
def __getattr__(self, attr):
"""text.string gives you text. This is for backwards
@@ -701,23 +701,23 @@ class PreformattedString(NavigableString):
class CData(PreformattedString):
PREFIX = u'<![CDATA['
SUFFIX = u']]>'
PREFIX = '<![CDATA['
SUFFIX = ']]>'
class ProcessingInstruction(PreformattedString):
PREFIX = u'<?'
SUFFIX = u'?>'
PREFIX = '<?'
SUFFIX = '?>'
class Comment(PreformattedString):
PREFIX = u'<!--'
SUFFIX = u'-->'
PREFIX = '<!--'
SUFFIX = '-->'
class Declaration(PreformattedString):
PREFIX = u'<!'
SUFFIX = u'!>'
PREFIX = '<!'
SUFFIX = '!>'
class Doctype(PreformattedString):
@@ -734,8 +734,8 @@ class Doctype(PreformattedString):
return Doctype(value)
PREFIX = u'<!DOCTYPE '
SUFFIX = u'>\n'
PREFIX = '<!DOCTYPE '
SUFFIX = '>\n'
class Tag(PageElement):
@@ -843,7 +843,7 @@ class Tag(PageElement):
for string in self._all_strings(True):
yield string
def get_text(self, separator=u"", strip=False,
def get_text(self, separator="", strip=False,
types=(NavigableString, CData)):
"""
Get all child strings, concatenated using the given separator.
@@ -915,7 +915,7 @@ class Tag(PageElement):
def __contains__(self, x):
return x in self.contents
def __nonzero__(self):
def __bool__(self):
"A tag is non-None even if it has no contents."
return True
@@ -1014,7 +1014,7 @@ class Tag(PageElement):
# First off, turn a string formatter into a function. This
# will stop the lookup from happening over and over again.
if not callable(formatter):
if not isinstance(formatter, collections.Callable):
formatter = self._formatter_for_name(formatter)
attrs = []
@@ -1025,8 +1025,8 @@ class Tag(PageElement):
else:
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
elif not isinstance(val, basestring):
val = unicode(val)
elif not isinstance(val, str):
val = str(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None):
@@ -1034,7 +1034,7 @@ class Tag(PageElement):
text = self.format_string(val, formatter)
decoded = (
unicode(key) + '='
str(key) + '='
+ EntitySubstitution.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
@@ -1112,7 +1112,7 @@ class Tag(PageElement):
"""
# First off, turn a string formatter into a function. This
# will stop the lookup from happening over and over again.
if not callable(formatter):
if not isinstance(formatter, collections.Callable):
formatter = self._formatter_for_name(formatter)
pretty_print = (indent_level is not None)
@@ -1210,16 +1210,16 @@ class Tag(PageElement):
raise ValueError(
'Final combinator "%s" is missing an argument.' % tokens[-1])
if self._select_debug:
print 'Running CSS selector "%s"' % selector
print('Running CSS selector "%s"' % selector)
for index, token in enumerate(tokens):
if self._select_debug:
print ' Considering token "%s"' % token
print(' Considering token "%s"' % token)
recursive_candidate_generator = None
tag_name = None
if tokens[index-1] in self._selector_combinators:
# This token was consumed by the previous combinator. Skip it.
if self._select_debug:
print ' Token was consumed by the previous combinator.'
print(' Token was consumed by the previous combinator.')
continue
# Each operation corresponds to a checker function, a rule
# for determining whether a candidate matches the
@@ -1325,14 +1325,14 @@ class Tag(PageElement):
next_token = tokens[index+1]
def recursive_select(tag):
if self._select_debug:
print ' Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
print '-' * 40
print(' Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs))
print('-' * 40)
for i in tag.select(next_token, recursive_candidate_generator):
if self._select_debug:
print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
print('(Recursive select picked up candidate %s %s)' % (i.name, i.attrs))
yield i
if self._select_debug:
print '-' * 40
print('-' * 40)
_use_candidate_generator = recursive_select
elif _candidate_generator is None:
# By default, a tag's candidates are all of its
@@ -1343,7 +1343,7 @@ class Tag(PageElement):
check = "[any]"
else:
check = tag_name
print ' Default candidate generator, tag name="%s"' % check
print(' Default candidate generator, tag name="%s"' % check)
if self._select_debug:
# This is redundant with later code, but it stops
# a bunch of bogus tags from cluttering up the
@@ -1365,8 +1365,8 @@ class Tag(PageElement):
new_context_ids = set([])
for tag in current_context:
if self._select_debug:
print " Running candidate generator on %s %s" % (
tag.name, repr(tag.attrs))
print(" Running candidate generator on %s %s" % (
tag.name, repr(tag.attrs)))
for candidate in _use_candidate_generator(tag):
if not isinstance(candidate, Tag):
continue
@@ -1381,21 +1381,21 @@ class Tag(PageElement):
break
if checker is None or result:
if self._select_debug:
print " SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
print(" SUCCESS %s %s" % (candidate.name, repr(candidate.attrs)))
if id(candidate) not in new_context_ids:
# If a tag matches a selector more than once,
# don't include it in the context more than once.
new_context.append(candidate)
new_context_ids.add(id(candidate))
elif self._select_debug:
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
print(" FAILURE %s %s" % (candidate.name, repr(candidate.attrs)))
current_context = new_context
if self._select_debug:
print "Final verdict:"
print("Final verdict:")
for i in current_context:
print " %s %s" % (i.name, i.attrs)
print(" %s %s" % (i.name, i.attrs))
return current_context
# Old names for backwards compatibility
@@ -1439,7 +1439,7 @@ class SoupStrainer(object):
else:
attrs = kwargs
normalized_attrs = {}
for key, value in attrs.items():
for key, value in list(attrs.items()):
normalized_attrs[key] = self._normalize_search_value(value)
self.attrs = normalized_attrs
@@ -1448,7 +1448,7 @@ class SoupStrainer(object):
def _normalize_search_value(self, value):
# Leave it alone if it's a Unicode string, a callable, a
# regular expression, a boolean, or None.
if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
if (isinstance(value, str) or isinstance(value, collections.Callable) or hasattr(value, 'match')
or isinstance(value, bool) or value is None):
return value
@@ -1461,7 +1461,7 @@ class SoupStrainer(object):
new_value = []
for v in value:
if (hasattr(v, '__iter__') and not isinstance(v, bytes)
and not isinstance(v, unicode)):
and not isinstance(v, str)):
# This is almost certainly the user's mistake. In the
# interests of avoiding infinite loops, we'll let
# it through as-is rather than doing a recursive call.
@@ -1473,7 +1473,7 @@ class SoupStrainer(object):
# Otherwise, convert it into a Unicode string.
# The unicode(str()) thing is so this will do the same thing on Python 2
# and Python 3.
return unicode(str(value))
return str(str(value))
def __str__(self):
if self.text:
@@ -1527,7 +1527,7 @@ class SoupStrainer(object):
found = None
# If given a list of items, scan it for a text element that
# matches.
if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
for element in markup:
if isinstance(element, NavigableString) \
and self.search(element):
@@ -1540,7 +1540,7 @@ class SoupStrainer(object):
found = self.search_tag(markup)
# If it's text, make sure the text matches.
elif isinstance(markup, NavigableString) or \
isinstance(markup, basestring):
isinstance(markup, str):
if not self.name and not self.attrs and self._matches(markup, self.text):
found = markup
else:
@@ -1554,7 +1554,7 @@ class SoupStrainer(object):
if isinstance(markup, list) or isinstance(markup, tuple):
# This should only happen when searching a multi-valued attribute
# like 'class'.
if (isinstance(match_against, unicode)
if (isinstance(match_against, str)
and ' ' in match_against):
# A bit of a special case. If they try to match "foo
# bar" on a multivalue attribute's value, only accept
@@ -1589,7 +1589,7 @@ class SoupStrainer(object):
# None matches None, False, an empty string, an empty list, and so on.
return not match_against
if isinstance(match_against, unicode):
if isinstance(match_against, str):
# Exact string match
return markup == match_against

View File

@@ -225,14 +225,14 @@ class HTMLTreeBuilderSmokeTest(object):
self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
def test_entities_in_attributes_converted_to_unicode(self):
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
def test_entities_in_text_converted_to_unicode(self):
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
@@ -243,7 +243,7 @@ class HTMLTreeBuilderSmokeTest(object):
'<p>I said "good day!"</p>')
def test_out_of_range_entity(self):
expect = u"\N{REPLACEMENT CHARACTER}"
expect = "\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("&#10000000000000;", expect)
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
@@ -285,9 +285,9 @@ class HTMLTreeBuilderSmokeTest(object):
# A seemingly innocuous document... but it's in Unicode! And
# it contains characters that can't be represented in the
# encoding found in the declaration! The horror!
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
@@ -327,7 +327,7 @@ class HTMLTreeBuilderSmokeTest(object):
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
@@ -337,15 +337,15 @@ class HTMLTreeBuilderSmokeTest(object):
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
@@ -354,7 +354,7 @@ class HTMLTreeBuilderSmokeTest(object):
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
@@ -493,15 +493,15 @@ class XMLTreeBuilderSmokeTest(object):
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
def test_can_parse_unicode_document(self):
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)
self.assertEqual(
unicode(soup.rss), markup)
str(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("<root/>")
@@ -532,17 +532,17 @@ class XMLTreeBuilderSmokeTest(object):
def test_closing_namespaced_tag(self):
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.p), markup)
self.assertEqual(str(soup.p), markup)
def test_namespaced_attributes(self):
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
self.assertEqual(str(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = '<foo xml:lang="fr">bar</foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
self.assertEqual(str(soup.foo), markup)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""

465
libs/cache/__init__.py vendored
View File

@@ -1,27 +1,75 @@
# -*- coding: utf-8 -*-
"""
copied from
werkzeug.contrib.cache
~~~~~~~~~~~~~~~~~~~~~~
:copyright: (c) 2011 by the Werkzeug Team, see AUTHORS for more details.
The main problem with dynamic Web sites is, well, they're dynamic. Each
time a user requests a page, the webserver executes a lot of code, queries
the database, renders templates until the visitor gets the page he sees.
This is a lot more expensive than just loading a file from the file system
and sending it to the visitor.
For most Web applications, this overhead isn't a big deal but once it
becomes, you will be glad to have a cache system in place.
How Caching Works
=================
Caching is pretty simple. Basically you have a cache object lurking around
somewhere that is connected to a remote cache or the file system or
something else. When the request comes in you check if the current page
is already in the cache and if so, you're returning it from the cache.
Otherwise you generate the page and put it into the cache. (Or a fragment
of the page, you don't have to cache the full thing)
Here is a simple example of how to cache a sidebar for a template::
def get_sidebar(user):
identifier = 'sidebar_for/user%d' % user.id
value = cache.get(identifier)
if value is not None:
return value
value = generate_sidebar_for(user=user)
cache.set(identifier, value, timeout=60 * 5)
return value
Creating a Cache Object
=======================
To create a cache object you just import the cache system of your choice
from the cache module and instantiate it. Then you can start working
with that object:
>>> from werkzeug.contrib.cache import SimpleCache
>>> c = SimpleCache()
>>> c.set("foo", "value")
>>> c.get("foo")
'value'
>>> c.get("missing") is None
True
Please keep in mind that you have to create the cache and put it somewhere
you have access to it (either as a module global you can import or you just
put it into your WSGI application).
:copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
from cache.posixemulation import rename
from itertools import izip
from time import time
import os
import re
import tempfile
try:
from hashlib import md5
except ImportError:
from md5 import new as md5
from hashlib import md5
from time import time
try:
import cPickle as pickle
except ImportError:
import pickle
from ._compat import iteritems, string_types, text_type, \
integer_types, to_bytes
from .posixemulation import rename
def _items(mappingorseq):
"""Wrapper for efficient iteration over mappings represented by dicts
@@ -34,8 +82,11 @@ def _items(mappingorseq):
... assert k*k == v
"""
return mappingorseq.iteritems() if hasattr(mappingorseq, 'iteritems') \
else mappingorseq
if hasattr(mappingorseq, "iteritems"):
return mappingorseq.iteritems()
elif hasattr(mappingorseq, "items"):
return mappingorseq.items()
return mappingorseq
class BaseCache(object):
@@ -46,9 +97,17 @@ class BaseCache(object):
specified on :meth:`set`.
"""
def __init__(self, default_timeout = 300):
def __init__(self, default_timeout=300):
self.default_timeout = default_timeout
def get(self, key):
"""Looks up key in the cache and returns the value for it.
If the key does not exist `None` is returned instead.
:param key: the key to be looked up.
"""
return None
def delete(self, key):
"""Deletes `key` from the cache. If it does not exist in the cache
nothing happens.
@@ -81,9 +140,9 @@ class BaseCache(object):
:param keys: The function accepts multiple keys as positional
arguments.
"""
return dict(izip(keys, self.get_many(*keys)))
return dict(zip(keys, self.get_many(*keys)))
def set(self, key, value, timeout = None):
def set(self, key, value, timeout=None):
"""Adds a new key/value to the cache (overwrites value, if key already
exists in the cache).
@@ -94,7 +153,7 @@ class BaseCache(object):
"""
pass
def add(self, key, value, timeout = None):
def add(self, key, value, timeout=None):
"""Works like :meth:`set` but does not overwrite the values of already
existing keys.
@@ -105,7 +164,7 @@ class BaseCache(object):
"""
pass
def set_many(self, mapping, timeout = None):
def set_many(self, mapping, timeout=None):
"""Sets multiple keys and values from a mapping.
:param mapping: a mapping with the keys/values to set.
@@ -130,7 +189,7 @@ class BaseCache(object):
"""
pass
def inc(self, key, delta = 1):
def inc(self, key, delta=1):
"""Increments the value of a key by `delta`. If the key does
not yet exist it is initialized with `delta`.
@@ -141,7 +200,7 @@ class BaseCache(object):
"""
self.set(key, (self.get(key) or 0) + delta)
def dec(self, key, delta = 1):
def dec(self, key, delta=1):
"""Decrements the value of a key by `delta`. If the key does
not yet exist it is initialized with `-delta`.
@@ -153,6 +212,362 @@ class BaseCache(object):
self.set(key, (self.get(key) or 0) - delta)
class NullCache(BaseCache):
"""A cache that doesn't cache. This can be useful for unit testing.
:param default_timeout: a dummy parameter that is ignored but exists
for API compatibility with other caches.
"""
class SimpleCache(BaseCache):
"""Simple memory cache for single process environments. This class exists
mainly for the development server and is not 100% thread safe. It tries
to use as many atomic operations as possible and no locks for simplicity
but it could happen under heavy load that keys are added multiple times.
:param threshold: the maximum number of items the cache stores before
it starts deleting some.
:param default_timeout: the default timeout that is used if no timeout is
specified on :meth:`~BaseCache.set`.
"""
def __init__(self, threshold=500, default_timeout=300):
BaseCache.__init__(self, default_timeout)
self._cache = {}
self.clear = self._cache.clear
self._threshold = threshold
def _prune(self):
if len(self._cache) > self._threshold:
now = time()
for idx, (key, (expires, _)) in enumerate(self._cache.items()):
if expires <= now or idx % 3 == 0:
self._cache.pop(key, None)
def get(self, key):
expires, value = self._cache.get(key, (0, None))
if expires > time():
return pickle.loads(value)
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
self._prune()
self._cache[key] = (time() + timeout, pickle.dumps(value,
pickle.HIGHEST_PROTOCOL))
def add(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
if len(self._cache) > self._threshold:
self._prune()
item = (time() + timeout, pickle.dumps(value,
pickle.HIGHEST_PROTOCOL))
self._cache.setdefault(key, item)
def delete(self, key):
self._cache.pop(key, None)
_test_memcached_key = re.compile(br'[^\x00-\x21\xff]{1,250}$').match
class MemcachedCache(BaseCache):
"""A cache that uses memcached as backend.
The first argument can either be an object that resembles the API of a
:class:`memcache.Client` or a tuple/list of server addresses. In the
event that a tuple/list is passed, Werkzeug tries to import the best
available memcache library.
Implementation notes: This cache backend works around some limitations in
memcached to simplify the interface. For example unicode keys are encoded
to utf-8 on the fly. Methods such as :meth:`~BaseCache.get_dict` return
the keys in the same format as passed. Furthermore all get methods
silently ignore key errors to not cause problems when untrusted user data
is passed to the get methods which is often the case in web applications.
:param servers: a list or tuple of server addresses or alternatively
a :class:`memcache.Client` or a compatible client.
:param default_timeout: the default timeout that is used if no timeout is
specified on :meth:`~BaseCache.set`.
:param key_prefix: a prefix that is added before all keys. This makes it
possible to use the same memcached server for different
applications. Keep in mind that
:meth:`~BaseCache.clear` will also clear keys with a
different prefix.
"""
def __init__(self, servers=None, default_timeout=300, key_prefix=None):
BaseCache.__init__(self, default_timeout)
if servers is None or isinstance(servers, (list, tuple)):
if servers is None:
servers = ['127.0.0.1:11211']
self._client = self.import_preferred_memcache_lib(servers)
if self._client is None:
raise RuntimeError('no memcache module found')
else:
# NOTE: servers is actually an already initialized memcache
# client.
self._client = servers
self.key_prefix = to_bytes(key_prefix)
def get(self, key):
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
# memcached doesn't support keys longer than that. Because often
# checks for so long keys can occour because it's tested from user
# submitted data etc we fail silently for getting.
if _test_memcached_key(key):
return self._client.get(key)
def get_dict(self, *keys):
key_mapping = {}
have_encoded_keys = False
for key in keys:
if isinstance(key, unicode):
encoded_key = key.encode('utf-8')
have_encoded_keys = True
else:
encoded_key = key
if self.key_prefix:
encoded_key = self.key_prefix + encoded_key
if _test_memcached_key(key):
key_mapping[encoded_key] = key
d = rv = self._client.get_multi(key_mapping.keys())
if have_encoded_keys or self.key_prefix:
rv = {}
for key, value in iteritems(d):
rv[key_mapping[key]] = value
if len(rv) < len(keys):
for key in keys:
if key not in rv:
rv[key] = None
return rv
def add(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.add(key, value, timeout)
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.set(key, value, timeout)
def get_many(self, *keys):
d = self.get_dict(*keys)
return [d[key] for key in keys]
def set_many(self, mapping, timeout=None):
if timeout is None:
timeout = self.default_timeout
new_mapping = {}
for key, value in _items(mapping):
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
new_mapping[key] = value
self._client.set_multi(new_mapping, timeout)
def delete(self, key):
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
if _test_memcached_key(key):
self._client.delete(key)
def delete_many(self, *keys):
new_keys = []
for key in keys:
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
if _test_memcached_key(key):
new_keys.append(key)
self._client.delete_multi(new_keys)
def clear(self):
self._client.flush_all()
def inc(self, key, delta=1):
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.incr(key, delta)
def dec(self, key, delta=1):
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.decr(key, delta)
def import_preferred_memcache_lib(self, servers):
"""Returns an initialized memcache client. Used by the constructor."""
try:
import pylibmc
except ImportError:
pass
else:
return pylibmc.Client(servers)
try:
from google.appengine.api import memcache
except ImportError:
pass
else:
return memcache.Client()
try:
import memcache
except ImportError:
pass
else:
return memcache.Client(servers)
# backwards compatibility
GAEMemcachedCache = MemcachedCache
class RedisCache(BaseCache):
"""Uses the Redis key-value store as a cache backend.
The first argument can be either a string denoting address of the Redis
server or an object resembling an instance of a redis.Redis class.
Note: Python Redis API already takes care of encoding unicode strings on
the fly.
.. versionadded:: 0.7
.. versionadded:: 0.8
`key_prefix` was added.
.. versionchanged:: 0.8
This cache backend now properly serializes objects.
.. versionchanged:: 0.8.3
This cache backend now supports password authentication.
:param host: address of the Redis server or an object which API is
compatible with the official Python Redis client (redis-py).
:param port: port number on which Redis server listens for connections.
:param password: password authentication for the Redis server.
:param db: db (zero-based numeric index) on Redis Server to connect.
:param default_timeout: the default timeout that is used if no timeout is
specified on :meth:`~BaseCache.set`.
:param key_prefix: A prefix that should be added to all keys.
"""
def __init__(self, host='localhost', port=6379, password=None,
db=0, default_timeout=300, key_prefix=None):
BaseCache.__init__(self, default_timeout)
if isinstance(host, string_types):
try:
import redis
except ImportError:
raise RuntimeError('no redis module found')
self._client = redis.Redis(host=host, port=port, password=password, db=db)
else:
self._client = host
self.key_prefix = key_prefix or ''
def dump_object(self, value):
"""Dumps an object into a string for redis. By default it serializes
integers as regular string and pickle dumps everything else.
"""
t = type(value)
if t in integer_types:
return str(value).encode('ascii')
return b'!' + pickle.dumps(value)
def load_object(self, value):
"""The reversal of :meth:`dump_object`. This might be callde with
None.
"""
if value is None:
return None
if value.startswith(b'!'):
return pickle.loads(value[1:])
try:
return int(value)
except ValueError:
# before 0.8 we did not have serialization. Still support that.
return value
def get(self, key):
return self.load_object(self._client.get(self.key_prefix + key))
def get_many(self, *keys):
if self.key_prefix:
keys = [self.key_prefix + key for key in keys]
return [self.load_object(x) for x in self._client.mget(keys)]
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
dump = self.dump_object(value)
self._client.setex(self.key_prefix + key, dump, timeout)
def add(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
dump = self.dump_object(value)
added = self._client.setnx(self.key_prefix + key, dump)
if added:
self._client.expire(self.key_prefix + key, timeout)
def set_many(self, mapping, timeout=None):
if timeout is None:
timeout = self.default_timeout
pipe = self._client.pipeline()
for key, value in _items(mapping):
dump = self.dump_object(value)
pipe.setex(self.key_prefix + key, dump, timeout)
pipe.execute()
def delete(self, key):
self._client.delete(self.key_prefix + key)
def delete_many(self, *keys):
if not keys:
return
if self.key_prefix:
keys = [self.key_prefix + key for key in keys]
self._client.delete(*keys)
def clear(self):
if self.key_prefix:
keys = self._client.keys(self.key_prefix + '*')
if keys:
self._client.delete(*keys)
else:
self._client.flushdb()
def inc(self, key, delta=1):
return self._client.incr(self.key_prefix + key, delta)
def dec(self, key, delta=1):
return self._client.decr(self.key_prefix + key, delta)
class FileSystemCache(BaseCache):
"""A cache that stores the items on the file system. This cache depends
on being the only user of the `cache_dir`. Make absolutely sure that
@@ -170,7 +585,7 @@ class FileSystemCache(BaseCache):
#: used for temporary files by the FileSystemCache
_fs_transaction_suffix = '.__wz_cache'
def __init__(self, cache_dir, threshold = 500, default_timeout = 300, mode = 0600):
def __init__(self, cache_dir, threshold=500, default_timeout=300, mode=0o600):
BaseCache.__init__(self, default_timeout)
self._path = cache_dir
self._threshold = threshold
@@ -215,6 +630,8 @@ class FileSystemCache(BaseCache):
pass
def _get_filename(self, key):
if isinstance(key, text_type):
key = key.encode('utf-8') #XXX unicode review
hash = md5(key).hexdigest()
return os.path.join(self._path, hash)
@@ -231,19 +648,19 @@ class FileSystemCache(BaseCache):
except Exception:
return None
def add(self, key, value, timeout = None):
def add(self, key, value, timeout=None):
filename = self._get_filename(key)
if not os.path.exists(filename):
self.set(key, value, timeout)
def set(self, key, value, timeout = None):
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
filename = self._get_filename(key)
self._prune()
try:
fd, tmp = tempfile.mkstemp(suffix = self._fs_transaction_suffix,
dir = self._path)
fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
dir=self._path)
f = os.fdopen(fd, 'wb')
try:
pickle.dump(int(time() + timeout), f, 1)

202
libs/cache/_compat.py vendored Normal file
View File

@@ -0,0 +1,202 @@
import sys
import operator
import functools
try:
import builtins
except ImportError:
import __builtin__ as builtins
PY2 = sys.version_info[0] == 2
_identity = lambda x: x
if PY2:
unichr = unichr
text_type = unicode
string_types = (str, unicode)
integer_types = (int, long)
int_to_byte = chr
iterkeys = lambda d, *args, **kwargs: d.iterkeys(*args, **kwargs)
itervalues = lambda d, *args, **kwargs: d.itervalues(*args, **kwargs)
iteritems = lambda d, *args, **kwargs: d.iteritems(*args, **kwargs)
iterlists = lambda d, *args, **kwargs: d.iterlists(*args, **kwargs)
iterlistvalues = lambda d, *args, **kwargs: d.iterlistvalues(*args, **kwargs)
iter_bytes = lambda x: iter(x)
exec('def reraise(tp, value, tb=None):\n raise tp, value, tb')
def fix_tuple_repr(obj):
def __repr__(self):
cls = self.__class__
return '%s(%s)' % (cls.__name__, ', '.join(
'%s=%r' % (field, self[index])
for index, field in enumerate(cls._fields)
))
obj.__repr__ = __repr__
return obj
def implements_iterator(cls):
cls.next = cls.__next__
del cls.__next__
return cls
def implements_to_string(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls
def native_string_result(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs).encode('utf-8')
return functools.update_wrapper(wrapper, func)
def implements_bool(cls):
cls.__nonzero__ = cls.__bool__
del cls.__bool__
return cls
from itertools import imap, izip, ifilter
range_type = xrange
from StringIO import StringIO
from cStringIO import StringIO as BytesIO
NativeStringIO = BytesIO
def make_literal_wrapper(reference):
return lambda x: x
def normalize_string_tuple(tup):
"""Normalizes a string tuple to a common type. Following Python 2
rules, upgrades to unicode are implicit.
"""
if any(isinstance(x, text_type) for x in tup):
return tuple(to_unicode(x) for x in tup)
return tup
def try_coerce_native(s):
"""Try to coerce a unicode string to native if possible. Otherwise,
leave it as unicode.
"""
try:
return str(s)
except UnicodeError:
return s
wsgi_get_bytes = _identity
def wsgi_decoding_dance(s, charset='utf-8', errors='replace'):
return s.decode(charset, errors)
def wsgi_encoding_dance(s, charset='utf-8', errors='replace'):
if isinstance(s, bytes):
return s
return s.encode(charset, errors)
def to_bytes(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None:
return None
if isinstance(x, (bytes, bytearray, buffer)):
return bytes(x)
if isinstance(x, unicode):
return x.encode(charset, errors)
raise TypeError('Expected bytes')
def to_native(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None or isinstance(x, str):
return x
return x.encode(charset, errors)
else:
unichr = chr
text_type = str
string_types = (str, )
integer_types = (int, )
iterkeys = lambda d, *args, **kwargs: iter(d.keys(*args, **kwargs))
itervalues = lambda d, *args, **kwargs: iter(d.values(*args, **kwargs))
iteritems = lambda d, *args, **kwargs: iter(d.items(*args, **kwargs))
iterlists = lambda d, *args, **kwargs: iter(d.lists(*args, **kwargs))
iterlistvalues = lambda d, *args, **kwargs: iter(d.listvalues(*args, **kwargs))
int_to_byte = operator.methodcaller('to_bytes', 1, 'big')
def iter_bytes(b):
return map(int_to_byte, b)
def reraise(tp, value, tb=None):
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
raise value
fix_tuple_repr = _identity
implements_iterator = _identity
implements_to_string = _identity
implements_bool = _identity
native_string_result = _identity
imap = map
izip = zip
ifilter = filter
range_type = range
from io import StringIO, BytesIO
NativeStringIO = StringIO
def make_literal_wrapper(reference):
if isinstance(reference, text_type):
return lambda x: x
return lambda x: x.encode('latin1')
def normalize_string_tuple(tup):
"""Ensures that all types in the tuple are either strings
or bytes.
"""
tupiter = iter(tup)
is_text = isinstance(next(tupiter, None), text_type)
for arg in tupiter:
if isinstance(arg, text_type) != is_text:
raise TypeError('Cannot mix str and bytes arguments (got %s)'
% repr(tup))
return tup
try_coerce_native = _identity
def wsgi_get_bytes(s):
return s.encode('latin1')
def wsgi_decoding_dance(s, charset='utf-8', errors='replace'):
return s.encode('latin1').decode(charset, errors)
def wsgi_encoding_dance(s, charset='utf-8', errors='replace'):
if isinstance(s, bytes):
return s.decode('latin1', errors)
return s.encode(charset).decode('latin1', errors)
def to_bytes(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None:
return None
if isinstance(x, (bytes, bytearray, memoryview)):
return bytes(x)
if isinstance(x, str):
return x.encode(charset, errors)
raise TypeError('Expected bytes')
def to_native(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None or isinstance(x, str):
return x
return x.decode(charset, errors)
def to_unicode(x, charset=sys.getdefaultencoding(), errors='strict',
allow_none_charset=False):
if x is None:
return None
if not isinstance(x, bytes):
return text_type(x)
if charset is None and allow_none_charset:
return x
return x.decode(charset, errors)

View File

@@ -14,7 +14,7 @@ r"""
This module was introduced in 0.6.1 and is not a public interface.
It might become one in later versions of Werkzeug.
:copyright: (c) 2011 by the Werkzeug Team, see AUTHORS for more details.
:copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
import sys
@@ -90,7 +90,7 @@ if os.name == 'nt': # pragma: no cover
# Fall back to "move away and replace"
try:
os.rename(src, dst)
except OSError, e:
except OSError as e:
if e.errno != errno.EEXIST:
raise
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))

View File

@@ -4,7 +4,6 @@
# `cssmin.py` - A Python port of the YUI CSS compressor.
from StringIO import StringIO # The pure-Python StringIO supports unicode.
import re
@@ -52,7 +51,7 @@ def remove_unnecessary_whitespace(css):
"""
Prevents 'p :link' from becoming 'p:link'.
Translates 'p :link' into 'p ___PSEUDOCLASSCOLON___link'; this is
translated back again later.
"""

View File

@@ -30,7 +30,7 @@
# SOFTWARE.
# */
from StringIO import StringIO
from six import StringIO
def jsmin(js):
ins = StringIO(js)

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env python
from tmdb_api import Configuration, searchMovie, searchMovieWithYear, \
searchPerson, searchStudio, searchList, searchCollection, \
searchSeries, Person, Movie, Collection, Genre, List, \
Series, Studio, Network, Episode, Season, __version__
from request import set_key, set_cache
from locales import get_locale, set_locale
from tmdb_auth import get_session, set_session
from cache_engine import CacheEngine
from tmdb_exceptions import *

View File

@@ -1,130 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: cache.py
# Python Library
# Author: Raymond Wagner
# Purpose: Caching framework to store TMDb API results
#-----------------------
import time
import os
from tmdb_exceptions import *
from cache_engine import Engines
import cache_null
import cache_file
class Cache(object):
"""
This class implements a cache framework, allowing selecting of a
pluggable engine. The framework stores data in a key/value manner,
along with a lifetime, after which data will be expired and
pulled fresh next time it is requested from the cache.
This class defines a wrapper to be used with query functions. The
wrapper will automatically cache the inputs and outputs of the
wrapped function, pulling the output from local storage for
subsequent calls with those inputs.
"""
def __init__(self, engine=None, *args, **kwargs):
self._engine = None
self._data = {}
self._age = 0
self.configure(engine, *args, **kwargs)
def _import(self, data=None):
if data is None:
data = self._engine.get(self._age)
for obj in sorted(data, key=lambda x: x.creation):
if not obj.expired:
self._data[obj.key] = obj
self._age = max(self._age, obj.creation)
def _expire(self):
for k, v in self._data.items():
if v.expired:
del self._data[k]
def configure(self, engine, *args, **kwargs):
if engine is None:
engine = 'file'
elif engine not in Engines:
raise TMDBCacheError("Invalid cache engine specified: "+engine)
self._engine = Engines[engine](self)
self._engine.configure(*args, **kwargs)
def put(self, key, data, lifetime=60*60*12):
# pull existing data, so cache will be fresh when written back out
if self._engine is None:
raise TMDBCacheError("No cache engine configured")
self._expire()
self._import(self._engine.put(key, data, lifetime))
def get(self, key):
if self._engine is None:
raise TMDBCacheError("No cache engine configured")
self._expire()
if key not in self._data:
self._import()
try:
return self._data[key].data
except:
return None
def cached(self, callback):
"""
Returns a decorator that uses a callback to specify the key to use
for caching the responses from the decorated function.
"""
return self.Cached(self, callback)
class Cached( object ):
def __init__(self, cache, callback, func=None, inst=None):
self.cache = cache
self.callback = callback
self.func = func
self.inst = inst
if func:
self.__module__ = func.__module__
self.__name__ = func.__name__
self.__doc__ = func.__doc__
def __call__(self, *args, **kwargs):
if self.func is None:
# decorator is waiting to be given a function
if len(kwargs) or (len(args) != 1):
raise TMDBCacheError(
'Cache.Cached decorator must be called a single ' +
'callable argument before it be used.')
elif args[0] is None:
raise TMDBCacheError(
'Cache.Cached decorator called before being given ' +
'a function to wrap.')
elif not callable(args[0]):
raise TMDBCacheError(
'Cache.Cached must be provided a callable object.')
return self.__class__(self.cache, self.callback, args[0])
elif self.inst.lifetime == 0:
# lifetime of zero means never cache
return self.func(*args, **kwargs)
else:
key = self.callback()
data = self.cache.get(key)
if data is None:
data = self.func(*args, **kwargs)
if hasattr(self.inst, 'lifetime'):
self.cache.put(key, data, self.inst.lifetime)
else:
self.cache.put(key, data)
return data
def __get__(self, inst, owner):
if inst is None:
return self
func = self.func.__get__(inst, owner)
callback = self.callback.__get__(inst, owner)
return self.__class__(self.cache, callback, func, inst)

View File

@@ -1,84 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: cache_engine.py
# Python Library
# Author: Raymond Wagner
# Purpose: Base cache engine class for collecting registered engines
#-----------------------
import time
from weakref import ref
class Engines(object):
"""
Static collector for engines to register against.
"""
def __init__(self):
self._engines = {}
def register(self, engine):
self._engines[engine.__name__] = engine
self._engines[engine.name] = engine
def __getitem__(self, key):
return self._engines[key]
def __contains__(self, key):
return self._engines.__contains__(key)
Engines = Engines()
class CacheEngineType(type):
"""
Cache Engine Metaclass that registers new engines against the cache
for named selection and use.
"""
def __init__(cls, name, bases, attrs):
super(CacheEngineType, cls).__init__(name, bases, attrs)
if name != 'CacheEngine':
# skip base class
Engines.register(cls)
class CacheEngine(object):
__metaclass__ = CacheEngineType
name = 'unspecified'
def __init__(self, parent):
self.parent = ref(parent)
def configure(self):
raise RuntimeError
def get(self, date):
raise RuntimeError
def put(self, key, value, lifetime):
raise RuntimeError
def expire(self, key):
raise RuntimeError
class CacheObject(object):
"""
Cache object class, containing one stored record.
"""
def __init__(self, key, data, lifetime=0, creation=None):
self.key = key
self.data = data
self.lifetime = lifetime
self.creation = creation if creation is not None else time.time()
def __len__(self):
return len(self.data)
@property
def expired(self):
return self.remaining == 0
@property
def remaining(self):
return max((self.creation + self.lifetime) - time.time(), 0)

View File

@@ -1,400 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: cache_file.py
# Python Library
# Author: Raymond Wagner
# Purpose: Persistant file-backed cache using /tmp/ to share data
# using flock or msvcrt.locking to allow safe concurrent
# access.
#-----------------------
import struct
import errno
import json
import time
import os
import io
from cStringIO import StringIO
from tmdb_exceptions import *
from cache_engine import CacheEngine, CacheObject
####################
# Cache File Format
#------------------
# cache version (2) unsigned short
# slot count (2) unsigned short
# slot 0: timestamp (8) double
# slot 0: lifetime (4) unsigned int
# slot 0: seek point (4) unsigned int
# slot 1: timestamp
# slot 1: lifetime index slots are IDd by their query date and
# slot 1: seek point are filled incrementally forwards. lifetime
# .... is how long after query date before the item
# .... expires, and seek point is the location of the
# slot N-2: timestamp start of data for that entry. 256 empty slots
# slot N-2: lifetime are pre-allocated, allowing fast updates.
# slot N-2: seek point when all slots are filled, the cache file is
# slot N-1: timestamp rewritten from scrach to add more slots.
# slot N-1: lifetime
# slot N-1: seek point
# block 1 (?) ASCII
# block 2
# .... blocks are just simple ASCII text, generated
# .... as independent objects by the JSON encoder
# block N-2
# block N-1
#
####################
def _donothing(*args, **kwargs):
pass
try:
import fcntl
class Flock(object):
"""
Context manager to flock file for the duration the object
exists. Referenced file will be automatically unflocked as the
interpreter exits the context.
Supports an optional callback to process the error and optionally
suppress it.
"""
LOCK_EX = fcntl.LOCK_EX
LOCK_SH = fcntl.LOCK_SH
def __init__(self, fileobj, operation, callback=None):
self.fileobj = fileobj
self.operation = operation
self.callback = callback
def __enter__(self):
fcntl.flock(self.fileobj, self.operation)
def __exit__(self, exc_type, exc_value, exc_tb):
suppress = False
if callable(self.callback):
suppress = self.callback(exc_type, exc_value, exc_tb)
fcntl.flock(self.fileobj, fcntl.LOCK_UN)
return suppress
def parse_filename(filename):
if '$' in filename:
# replace any environmental variables
filename = os.path.expandvars(filename)
if filename.startswith('~'):
# check for home directory
return os.path.expanduser(filename)
elif filename.startswith('/'):
# check for absolute path
return filename
# return path with temp directory prepended
return '/tmp/' + filename
except ImportError:
import msvcrt
class Flock( object ):
LOCK_EX = msvcrt.LK_LOCK
LOCK_SH = msvcrt.LK_LOCK
def __init__(self, fileobj, operation, callback=None):
self.fileobj = fileobj
self.operation = operation
self.callback = callback
def __enter__(self):
self.size = os.path.getsize(self.fileobj.name)
msvcrt.locking(self.fileobj.fileno(), self.operation, self.size)
def __exit__(self, exc_type, exc_value, exc_tb):
suppress = False
if callable(self.callback):
suppress = self.callback(exc_type, exc_value, exc_tb)
msvcrt.locking(self.fileobj.fileno(), msvcrt.LK_UNLCK, self.size)
return suppress
def parse_filename(filename):
if '%' in filename:
# replace any environmental variables
filename = os.path.expandvars(filename)
if filename.startswith('~'):
# check for home directory
return os.path.expanduser(filename)
elif (ord(filename[0]) in (range(65, 91) + range(99, 123))) \
and (filename[1:3] == ':\\'):
# check for absolute drive path (e.g. C:\...)
return filename
elif (filename.count('\\') >= 3) and (filename.startswith('\\\\')):
# check for absolute UNC path (e.g. \\server\...)
return filename
# return path with temp directory prepended
return os.path.expandvars(os.path.join('%TEMP%', filename))
class FileCacheObject(CacheObject):
_struct = struct.Struct('dII') # double and two ints
# timestamp, lifetime, position
@classmethod
def fromFile(cls, fd):
dat = cls._struct.unpack(fd.read(cls._struct.size))
obj = cls(None, None, dat[1], dat[0])
obj.position = dat[2]
return obj
def __init__(self, *args, **kwargs):
self._key = None
self._data = None
self._size = None
self._buff = StringIO()
super(FileCacheObject, self).__init__(*args, **kwargs)
@property
def size(self):
if self._size is None:
self._buff.seek(0, 2)
size = self._buff.tell()
if size == 0:
if (self._key is None) or (self._data is None):
raise RuntimeError
json.dump([self.key, self.data], self._buff)
self._size = self._buff.tell()
self._size = size
return self._size
@size.setter
def size(self, value):
self._size = value
@property
def key(self):
if self._key is None:
try:
self._key, self._data = json.loads(self._buff.getvalue())
except:
pass
return self._key
@key.setter
def key(self, value):
self._key = value
@property
def data(self):
if self._data is None:
self._key, self._data = json.loads(self._buff.getvalue())
return self._data
@data.setter
def data(self, value):
self._data = value
def load(self, fd):
fd.seek(self.position)
self._buff.seek(0)
self._buff.write(fd.read(self.size))
def dumpslot(self, fd):
pos = fd.tell()
fd.write(self._struct.pack(self.creation, self.lifetime, self.position))
def dumpdata(self, fd):
self.size
fd.seek(self.position)
fd.write(self._buff.getvalue())
class FileEngine( CacheEngine ):
"""Simple file-backed engine."""
name = 'file'
_struct = struct.Struct('HH') # two shorts for version and count
_version = 2
def __init__(self, parent):
super(FileEngine, self).__init__(parent)
self.configure(None)
def configure(self, filename, preallocate=256):
self.preallocate = preallocate
self.cachefile = filename
self.size = 0
self.free = 0
self.age = 0
def _init_cache(self):
# only run this once
self._init_cache = _donothing
if self.cachefile is None:
raise TMDBCacheError("No cache filename given.")
self.cachefile = parse_filename(self.cachefile)
try:
# attempt to read existing cache at filename
# handle any errors that occur
self._open('r+b')
# seems to have read fine, make sure we have write access
if not os.access(self.cachefile, os.W_OK):
raise TMDBCacheWriteError(self.cachefile)
except IOError as e:
if e.errno == errno.ENOENT:
# file does not exist, create a new one
try:
self._open('w+b')
self._write([])
except IOError as e:
if e.errno == errno.ENOENT:
# directory does not exist
raise TMDBCacheDirectoryError(self.cachefile)
elif e.errno == errno.EACCES:
# user does not have rights to create new file
raise TMDBCacheWriteError(self.cachefile)
else:
# let the unhandled error continue through
raise
elif e.errno == errno.EACCES:
# file exists, but we do not have permission to access it
raise TMDBCacheReadError(self.cachefile)
else:
# let the unhandled error continue through
raise
def get(self, date):
self._init_cache()
self._open('r+b')
with Flock(self.cachefd, Flock.LOCK_SH):
# return any new objects in the cache
return self._read(date)
def put(self, key, value, lifetime):
self._init_cache()
self._open('r+b')
with Flock(self.cachefd, Flock.LOCK_EX):
newobjs = self._read(self.age)
newobjs.append(FileCacheObject(key, value, lifetime))
# this will cause a new file object to be opened with the proper
# access mode, however the Flock should keep the old object open
# and properly locked
self._open('r+b')
self._write(newobjs)
return newobjs
def _open(self, mode='r+b'):
# enforce binary operation
try:
if self.cachefd.mode == mode:
# already opened in requested mode, nothing to do
self.cachefd.seek(0)
return
except:
pass # catch issue of no cachefile yet opened
self.cachefd = io.open(self.cachefile, mode)
def _read(self, date):
try:
self.cachefd.seek(0)
version, count = self._struct.unpack(\
self.cachefd.read(self._struct.size))
if version != self._version:
# old version, break out and well rewrite when finished
raise Exception
self.size = count
cache = []
while count:
# loop through storage definitions
obj = FileCacheObject.fromFile(self.cachefd)
cache.append(obj)
count -= 1
except:
# failed to read information, so just discard it and return empty
self.size = 0
self.free = 0
return []
# get end of file
self.cachefd.seek(0, 2)
position = self.cachefd.tell()
newobjs = []
emptycount = 0
# walk backward through all, collecting new content and populating size
while len(cache):
obj = cache.pop()
if obj.creation == 0:
# unused slot, skip
emptycount += 1
elif obj.expired:
# object has passed expiration date, no sense processing
continue
elif obj.creation > date:
# used slot with new data, process
obj.size, position = position - obj.position, obj.position
newobjs.append(obj)
# update age
self.age = max(self.age, obj.creation)
elif len(newobjs):
# end of new data, break
break
# walk forward and load new content
for obj in newobjs:
obj.load(self.cachefd)
self.free = emptycount
return newobjs
def _write(self, data):
if self.free and (self.size != self.free):
# we only care about the last data point, since the rest are
# already stored in the file
data = data[-1]
# determine write position of data in cache
self.cachefd.seek(0, 2)
end = self.cachefd.tell()
data.position = end
# write incremental update to free slot
self.cachefd.seek(4 + 16*(self.size-self.free))
data.dumpslot(self.cachefd)
data.dumpdata(self.cachefd)
else:
# rewrite cache file from scratch
# pull data from parent cache
data.extend(self.parent()._data.values())
data.sort(key=lambda x: x.creation)
# write header
size = len(data) + self.preallocate
self.cachefd.seek(0)
self.cachefd.truncate()
self.cachefd.write(self._struct.pack(self._version, size))
# write storage slot definitions
prev = None
for d in data:
if prev == None:
d.position = 4 + 16*size
else:
d.position = prev.position + prev.size
d.dumpslot(self.cachefd)
prev = d
# fill in allocated slots
for i in range(2**8):
self.cachefd.write(FileCacheObject._struct.pack(0, 0, 0))
# write stored data
for d in data:
d.dumpdata(self.cachefd)
self.cachefd.flush()
def expire(self, key):
pass

View File

@@ -1,27 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: cache_null.py
# Python Library
# Author: Raymond Wagner
# Purpose: Null caching engine for debugging purposes
#-----------------------
from cache_engine import CacheEngine
class NullEngine(CacheEngine):
"""Non-caching engine for debugging."""
name = 'null'
def configure(self):
pass
def get(self, date):
return []
def put(self, key, value, lifetime):
return []
def expire(self, key):
pass

View File

@@ -1,642 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: locales.py Stores locale information for filtering results
# Python Library
# Author: Raymond Wagner
#-----------------------
from tmdb_exceptions import *
import locale
syslocale = None
class LocaleBase(object):
__slots__ = ['__immutable']
_stored = {}
fallthrough = False
def __init__(self, *keys):
for key in keys:
self._stored[key.lower()] = self
self.__immutable = True
def __setattr__(self, key, value):
if getattr(self, '__immutable', False):
raise NotImplementedError(self.__class__.__name__ +
' does not support modification.')
super(LocaleBase, self).__setattr__(key, value)
def __delattr__(self, key):
if getattr(self, '__immutable', False):
raise NotImplementedError(self.__class__.__name__ +
' does not support modification.')
super(LocaleBase, self).__delattr__(key)
def __lt__(self, other):
return (id(self) != id(other)) and (str(self) > str(other))
def __gt__(self, other):
return (id(self) != id(other)) and (str(self) < str(other))
def __eq__(self, other):
return (id(self) == id(other)) or (str(self) == str(other))
@classmethod
def getstored(cls, key):
if key is None:
return None
try:
return cls._stored[key.lower()]
except:
raise TMDBLocaleError("'{0}' is not a known valid {1} code."\
.format(key, cls.__name__))
class Language(LocaleBase):
__slots__ = ['ISO639_1', 'ISO639_2', 'ISO639_2B', 'englishname',
'nativename']
_stored = {}
def __init__(self, iso1, iso2, ename):
self.ISO639_1 = iso1
self.ISO639_2 = iso2
# self.ISO639_2B = iso2b
self.englishname = ename
# self.nativename = nname
super(Language, self).__init__(iso1, iso2)
def __str__(self):
return self.ISO639_1
def __repr__(self):
return u"<Language '{0.englishname}' ({0.ISO639_1})>".format(self)
class Country(LocaleBase):
__slots__ = ['alpha2', 'name']
_stored = {}
def __init__(self, alpha2, name):
self.alpha2 = alpha2
self.name = name
super(Country, self).__init__(alpha2)
def __str__(self):
return self.alpha2
def __repr__(self):
return u"<Country '{0.name}' ({0.alpha2})>".format(self)
class Locale(LocaleBase):
__slots__ = ['language', 'country', 'encoding']
def __init__(self, language, country, encoding):
self.language = Language.getstored(language)
self.country = Country.getstored(country)
self.encoding = encoding if encoding else 'latin-1'
def __str__(self):
return u"{0}_{1}".format(self.language, self.country)
def __repr__(self):
return u"<Locale {0.language}_{0.country}>".format(self)
def encode(self, dat):
"""Encode using system default encoding for network/file output."""
try:
return dat.encode(self.encoding)
except AttributeError:
# not a string type, pass along
return dat
except UnicodeDecodeError:
# just return unmodified and hope for the best
return dat
def decode(self, dat):
"""Decode to system default encoding for internal use."""
try:
return dat.decode(self.encoding)
except AttributeError:
# not a string type, pass along
return dat
except UnicodeEncodeError:
# just return unmodified and hope for the best
return dat
def set_locale(language=None, country=None, fallthrough=False):
global syslocale
LocaleBase.fallthrough = fallthrough
sysloc, sysenc = locale.getdefaultlocale()
if (not language) or (not country):
dat = None
if syslocale is not None:
dat = (str(syslocale.language), str(syslocale.country))
else:
if (sysloc is None) or ('_' not in sysloc):
dat = ('en', 'US')
else:
dat = sysloc.split('_')
if language is None:
language = dat[0]
if country is None:
country = dat[1]
syslocale = Locale(language, country, sysenc)
def get_locale(language=-1, country=-1):
"""Output locale using provided attributes, or return system locale."""
global syslocale
# pull existing stored values
if syslocale is None:
loc = Locale(None, None, locale.getdefaultlocale()[1])
else:
loc = syslocale
# both options are default, return stored values
if language == country == -1:
return loc
# supplement default option with stored values
if language == -1:
language = loc.language
elif country == -1:
country = loc.country
return Locale(language, country, loc.encoding)
######## AUTOGENERATED LANGUAGE AND COUNTRY DATA BELOW HERE #########
Language("ab", "abk", u"Abkhazian")
Language("aa", "aar", u"Afar")
Language("af", "afr", u"Afrikaans")
Language("ak", "aka", u"Akan")
Language("sq", "alb/sqi", u"Albanian")
Language("am", "amh", u"Amharic")
Language("ar", "ara", u"Arabic")
Language("an", "arg", u"Aragonese")
Language("hy", "arm/hye", u"Armenian")
Language("as", "asm", u"Assamese")
Language("av", "ava", u"Avaric")
Language("ae", "ave", u"Avestan")
Language("ay", "aym", u"Aymara")
Language("az", "aze", u"Azerbaijani")
Language("bm", "bam", u"Bambara")
Language("ba", "bak", u"Bashkir")
Language("eu", "baq/eus", u"Basque")
Language("be", "bel", u"Belarusian")
Language("bn", "ben", u"Bengali")
Language("bh", "bih", u"Bihari languages")
Language("bi", "bis", u"Bislama")
Language("nb", "nob", u"Bokmål, Norwegian")
Language("bs", "bos", u"Bosnian")
Language("br", "bre", u"Breton")
Language("bg", "bul", u"Bulgarian")
Language("my", "bur/mya", u"Burmese")
Language("es", "spa", u"Castilian")
Language("ca", "cat", u"Catalan")
Language("km", "khm", u"Central Khmer")
Language("ch", "cha", u"Chamorro")
Language("ce", "che", u"Chechen")
Language("ny", "nya", u"Chewa")
Language("ny", "nya", u"Chichewa")
Language("zh", "chi/zho", u"Chinese")
Language("za", "zha", u"Chuang")
Language("cu", "chu", u"Church Slavic")
Language("cu", "chu", u"Church Slavonic")
Language("cv", "chv", u"Chuvash")
Language("kw", "cor", u"Cornish")
Language("co", "cos", u"Corsican")
Language("cr", "cre", u"Cree")
Language("hr", "hrv", u"Croatian")
Language("cs", "cze/ces", u"Czech")
Language("da", "dan", u"Danish")
Language("dv", "div", u"Dhivehi")
Language("dv", "div", u"Divehi")
Language("nl", "dut/nld", u"Dutch")
Language("dz", "dzo", u"Dzongkha")
Language("en", "eng", u"English")
Language("eo", "epo", u"Esperanto")
Language("et", "est", u"Estonian")
Language("ee", "ewe", u"Ewe")
Language("fo", "fao", u"Faroese")
Language("fj", "fij", u"Fijian")
Language("fi", "fin", u"Finnish")
Language("nl", "dut/nld", u"Flemish")
Language("fr", "fre/fra", u"French")
Language("ff", "ful", u"Fulah")
Language("gd", "gla", u"Gaelic")
Language("gl", "glg", u"Galician")
Language("lg", "lug", u"Ganda")
Language("ka", "geo/kat", u"Georgian")
Language("de", "ger/deu", u"German")
Language("ki", "kik", u"Gikuyu")
Language("el", "gre/ell", u"Greek, Modern (1453-)")
Language("kl", "kal", u"Greenlandic")
Language("gn", "grn", u"Guarani")
Language("gu", "guj", u"Gujarati")
Language("ht", "hat", u"Haitian")
Language("ht", "hat", u"Haitian Creole")
Language("ha", "hau", u"Hausa")
Language("he", "heb", u"Hebrew")
Language("hz", "her", u"Herero")
Language("hi", "hin", u"Hindi")
Language("ho", "hmo", u"Hiri Motu")
Language("hu", "hun", u"Hungarian")
Language("is", "ice/isl", u"Icelandic")
Language("io", "ido", u"Ido")
Language("ig", "ibo", u"Igbo")
Language("id", "ind", u"Indonesian")
Language("ia", "ina", u"Interlingua (International Auxiliary Language Association)")
Language("ie", "ile", u"Interlingue")
Language("iu", "iku", u"Inuktitut")
Language("ik", "ipk", u"Inupiaq")
Language("ga", "gle", u"Irish")
Language("it", "ita", u"Italian")
Language("ja", "jpn", u"Japanese")
Language("jv", "jav", u"Javanese")
Language("kl", "kal", u"Kalaallisut")
Language("kn", "kan", u"Kannada")
Language("kr", "kau", u"Kanuri")
Language("ks", "kas", u"Kashmiri")
Language("kk", "kaz", u"Kazakh")
Language("ki", "kik", u"Kikuyu")
Language("rw", "kin", u"Kinyarwanda")
Language("ky", "kir", u"Kirghiz")
Language("kv", "kom", u"Komi")
Language("kg", "kon", u"Kongo")
Language("ko", "kor", u"Korean")
Language("kj", "kua", u"Kuanyama")
Language("ku", "kur", u"Kurdish")
Language("kj", "kua", u"Kwanyama")
Language("ky", "kir", u"Kyrgyz")
Language("lo", "lao", u"Lao")
Language("la", "lat", u"Latin")
Language("lv", "lav", u"Latvian")
Language("lb", "ltz", u"Letzeburgesch")
Language("li", "lim", u"Limburgan")
Language("li", "lim", u"Limburger")
Language("li", "lim", u"Limburgish")
Language("ln", "lin", u"Lingala")
Language("lt", "lit", u"Lithuanian")
Language("lu", "lub", u"Luba-Katanga")
Language("lb", "ltz", u"Luxembourgish")
Language("mk", "mac/mkd", u"Macedonian")
Language("mg", "mlg", u"Malagasy")
Language("ms", "may/msa", u"Malay")
Language("ml", "mal", u"Malayalam")
Language("dv", "div", u"Maldivian")
Language("mt", "mlt", u"Maltese")
Language("gv", "glv", u"Manx")
Language("mi", "mao/mri", u"Maori")
Language("mr", "mar", u"Marathi")
Language("mh", "mah", u"Marshallese")
Language("ro", "rum/ron", u"Moldavian")
Language("ro", "rum/ron", u"Moldovan")
Language("mn", "mon", u"Mongolian")
Language("na", "nau", u"Nauru")
Language("nv", "nav", u"Navaho")
Language("nv", "nav", u"Navajo")
Language("nd", "nde", u"Ndebele, North")
Language("nr", "nbl", u"Ndebele, South")
Language("ng", "ndo", u"Ndonga")
Language("ne", "nep", u"Nepali")
Language("nd", "nde", u"North Ndebele")
Language("se", "sme", u"Northern Sami")
Language("no", "nor", u"Norwegian")
Language("nb", "nob", u"Norwegian Bokmål")
Language("nn", "nno", u"Norwegian Nynorsk")
Language("ii", "iii", u"Nuosu")
Language("ny", "nya", u"Nyanja")
Language("nn", "nno", u"Nynorsk, Norwegian")
Language("ie", "ile", u"Occidental")
Language("oc", "oci", u"Occitan (post 1500)")
Language("oj", "oji", u"Ojibwa")
Language("cu", "chu", u"Old Bulgarian")
Language("cu", "chu", u"Old Church Slavonic")
Language("cu", "chu", u"Old Slavonic")
Language("or", "ori", u"Oriya")
Language("om", "orm", u"Oromo")
Language("os", "oss", u"Ossetian")
Language("os", "oss", u"Ossetic")
Language("pi", "pli", u"Pali")
Language("pa", "pan", u"Panjabi")
Language("ps", "pus", u"Pashto")
Language("fa", "per/fas", u"Persian")
Language("pl", "pol", u"Polish")
Language("pt", "por", u"Portuguese")
Language("pa", "pan", u"Punjabi")
Language("ps", "pus", u"Pushto")
Language("qu", "que", u"Quechua")
Language("ro", "rum/ron", u"Romanian")
Language("rm", "roh", u"Romansh")
Language("rn", "run", u"Rundi")
Language("ru", "rus", u"Russian")
Language("sm", "smo", u"Samoan")
Language("sg", "sag", u"Sango")
Language("sa", "san", u"Sanskrit")
Language("sc", "srd", u"Sardinian")
Language("gd", "gla", u"Scottish Gaelic")
Language("sr", "srp", u"Serbian")
Language("sn", "sna", u"Shona")
Language("ii", "iii", u"Sichuan Yi")
Language("sd", "snd", u"Sindhi")
Language("si", "sin", u"Sinhala")
Language("si", "sin", u"Sinhalese")
Language("sk", "slo/slk", u"Slovak")
Language("sl", "slv", u"Slovenian")
Language("so", "som", u"Somali")
Language("st", "sot", u"Sotho, Southern")
Language("nr", "nbl", u"South Ndebele")
Language("es", "spa", u"Spanish")
Language("su", "sun", u"Sundanese")
Language("sw", "swa", u"Swahili")
Language("ss", "ssw", u"Swati")
Language("sv", "swe", u"Swedish")
Language("tl", "tgl", u"Tagalog")
Language("ty", "tah", u"Tahitian")
Language("tg", "tgk", u"Tajik")
Language("ta", "tam", u"Tamil")
Language("tt", "tat", u"Tatar")
Language("te", "tel", u"Telugu")
Language("th", "tha", u"Thai")
Language("bo", "tib/bod", u"Tibetan")
Language("ti", "tir", u"Tigrinya")
Language("to", "ton", u"Tonga (Tonga Islands)")
Language("ts", "tso", u"Tsonga")
Language("tn", "tsn", u"Tswana")
Language("tr", "tur", u"Turkish")
Language("tk", "tuk", u"Turkmen")
Language("tw", "twi", u"Twi")
Language("ug", "uig", u"Uighur")
Language("uk", "ukr", u"Ukrainian")
Language("ur", "urd", u"Urdu")
Language("ug", "uig", u"Uyghur")
Language("uz", "uzb", u"Uzbek")
Language("ca", "cat", u"Valencian")
Language("ve", "ven", u"Venda")
Language("vi", "vie", u"Vietnamese")
Language("vo", "vol", u"Volapük")
Language("wa", "wln", u"Walloon")
Language("cy", "wel/cym", u"Welsh")
Language("fy", "fry", u"Western Frisian")
Language("wo", "wol", u"Wolof")
Language("xh", "xho", u"Xhosa")
Language("yi", "yid", u"Yiddish")
Language("yo", "yor", u"Yoruba")
Language("za", "zha", u"Zhuang")
Language("zu", "zul", u"Zulu")
Country("AF", u"AFGHANISTAN")
Country("AX", u"ÅLAND ISLANDS")
Country("AL", u"ALBANIA")
Country("DZ", u"ALGERIA")
Country("AS", u"AMERICAN SAMOA")
Country("AD", u"ANDORRA")
Country("AO", u"ANGOLA")
Country("AI", u"ANGUILLA")
Country("AQ", u"ANTARCTICA")
Country("AG", u"ANTIGUA AND BARBUDA")
Country("AR", u"ARGENTINA")
Country("AM", u"ARMENIA")
Country("AW", u"ARUBA")
Country("AU", u"AUSTRALIA")
Country("AT", u"AUSTRIA")
Country("AZ", u"AZERBAIJAN")
Country("BS", u"BAHAMAS")
Country("BH", u"BAHRAIN")
Country("BD", u"BANGLADESH")
Country("BB", u"BARBADOS")
Country("BY", u"BELARUS")
Country("BE", u"BELGIUM")
Country("BZ", u"BELIZE")
Country("BJ", u"BENIN")
Country("BM", u"BERMUDA")
Country("BT", u"BHUTAN")
Country("BO", u"BOLIVIA, PLURINATIONAL STATE OF")
Country("BQ", u"BONAIRE, SINT EUSTATIUS AND SABA")
Country("BA", u"BOSNIA AND HERZEGOVINA")
Country("BW", u"BOTSWANA")
Country("BV", u"BOUVET ISLAND")
Country("BR", u"BRAZIL")
Country("IO", u"BRITISH INDIAN OCEAN TERRITORY")
Country("BN", u"BRUNEI DARUSSALAM")
Country("BG", u"BULGARIA")
Country("BF", u"BURKINA FASO")
Country("BI", u"BURUNDI")
Country("KH", u"CAMBODIA")
Country("CM", u"CAMEROON")
Country("CA", u"CANADA")
Country("CV", u"CAPE VERDE")
Country("KY", u"CAYMAN ISLANDS")
Country("CF", u"CENTRAL AFRICAN REPUBLIC")
Country("TD", u"CHAD")
Country("CL", u"CHILE")
Country("CN", u"CHINA")
Country("CX", u"CHRISTMAS ISLAND")
Country("CC", u"COCOS (KEELING) ISLANDS")
Country("CO", u"COLOMBIA")
Country("KM", u"COMOROS")
Country("CG", u"CONGO")
Country("CD", u"CONGO, THE DEMOCRATIC REPUBLIC OF THE")
Country("CK", u"COOK ISLANDS")
Country("CR", u"COSTA RICA")
Country("CI", u"CÔTE D'IVOIRE")
Country("HR", u"CROATIA")
Country("CU", u"CUBA")
Country("CW", u"CURAÇAO")
Country("CY", u"CYPRUS")
Country("CZ", u"CZECH REPUBLIC")
Country("DK", u"DENMARK")
Country("DJ", u"DJIBOUTI")
Country("DM", u"DOMINICA")
Country("DO", u"DOMINICAN REPUBLIC")
Country("EC", u"ECUADOR")
Country("EG", u"EGYPT")
Country("SV", u"EL SALVADOR")
Country("GQ", u"EQUATORIAL GUINEA")
Country("ER", u"ERITREA")
Country("EE", u"ESTONIA")
Country("ET", u"ETHIOPIA")
Country("FK", u"FALKLAND ISLANDS (MALVINAS)")
Country("FO", u"FAROE ISLANDS")
Country("FJ", u"FIJI")
Country("FI", u"FINLAND")
Country("FR", u"FRANCE")
Country("GF", u"FRENCH GUIANA")
Country("PF", u"FRENCH POLYNESIA")
Country("TF", u"FRENCH SOUTHERN TERRITORIES")
Country("GA", u"GABON")
Country("GM", u"GAMBIA")
Country("GE", u"GEORGIA")
Country("DE", u"GERMANY")
Country("GH", u"GHANA")
Country("GI", u"GIBRALTAR")
Country("GR", u"GREECE")
Country("GL", u"GREENLAND")
Country("GD", u"GRENADA")
Country("GP", u"GUADELOUPE")
Country("GU", u"GUAM")
Country("GT", u"GUATEMALA")
Country("GG", u"GUERNSEY")
Country("GN", u"GUINEA")
Country("GW", u"GUINEA-BISSAU")
Country("GY", u"GUYANA")
Country("HT", u"HAITI")
Country("HM", u"HEARD ISLAND AND MCDONALD ISLANDS")
Country("VA", u"HOLY SEE (VATICAN CITY STATE)")
Country("HN", u"HONDURAS")
Country("HK", u"HONG KONG")
Country("HU", u"HUNGARY")
Country("IS", u"ICELAND")
Country("IN", u"INDIA")
Country("ID", u"INDONESIA")
Country("IR", u"IRAN, ISLAMIC REPUBLIC OF")
Country("IQ", u"IRAQ")
Country("IE", u"IRELAND")
Country("IM", u"ISLE OF MAN")
Country("IL", u"ISRAEL")
Country("IT", u"ITALY")
Country("JM", u"JAMAICA")
Country("JP", u"JAPAN")
Country("JE", u"JERSEY")
Country("JO", u"JORDAN")
Country("KZ", u"KAZAKHSTAN")
Country("KE", u"KENYA")
Country("KI", u"KIRIBATI")
Country("KP", u"KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF")
Country("KR", u"KOREA, REPUBLIC OF")
Country("KW", u"KUWAIT")
Country("KG", u"KYRGYZSTAN")
Country("LA", u"LAO PEOPLE'S DEMOCRATIC REPUBLIC")
Country("LV", u"LATVIA")
Country("LB", u"LEBANON")
Country("LS", u"LESOTHO")
Country("LR", u"LIBERIA")
Country("LY", u"LIBYA")
Country("LI", u"LIECHTENSTEIN")
Country("LT", u"LITHUANIA")
Country("LU", u"LUXEMBOURG")
Country("MO", u"MACAO")
Country("MK", u"MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF")
Country("MG", u"MADAGASCAR")
Country("MW", u"MALAWI")
Country("MY", u"MALAYSIA")
Country("MV", u"MALDIVES")
Country("ML", u"MALI")
Country("MT", u"MALTA")
Country("MH", u"MARSHALL ISLANDS")
Country("MQ", u"MARTINIQUE")
Country("MR", u"MAURITANIA")
Country("MU", u"MAURITIUS")
Country("YT", u"MAYOTTE")
Country("MX", u"MEXICO")
Country("FM", u"MICRONESIA, FEDERATED STATES OF")
Country("MD", u"MOLDOVA, REPUBLIC OF")
Country("MC", u"MONACO")
Country("MN", u"MONGOLIA")
Country("ME", u"MONTENEGRO")
Country("MS", u"MONTSERRAT")
Country("MA", u"MOROCCO")
Country("MZ", u"MOZAMBIQUE")
Country("MM", u"MYANMAR")
Country("NA", u"NAMIBIA")
Country("NR", u"NAURU")
Country("NP", u"NEPAL")
Country("NL", u"NETHERLANDS")
Country("NC", u"NEW CALEDONIA")
Country("NZ", u"NEW ZEALAND")
Country("NI", u"NICARAGUA")
Country("NE", u"NIGER")
Country("NG", u"NIGERIA")
Country("NU", u"NIUE")
Country("NF", u"NORFOLK ISLAND")
Country("MP", u"NORTHERN MARIANA ISLANDS")
Country("NO", u"NORWAY")
Country("OM", u"OMAN")
Country("PK", u"PAKISTAN")
Country("PW", u"PALAU")
Country("PS", u"PALESTINIAN TERRITORY, OCCUPIED")
Country("PA", u"PANAMA")
Country("PG", u"PAPUA NEW GUINEA")
Country("PY", u"PARAGUAY")
Country("PE", u"PERU")
Country("PH", u"PHILIPPINES")
Country("PN", u"PITCAIRN")
Country("PL", u"POLAND")
Country("PT", u"PORTUGAL")
Country("PR", u"PUERTO RICO")
Country("QA", u"QATAR")
Country("RE", u"RÉUNION")
Country("RO", u"ROMANIA")
Country("RU", u"RUSSIAN FEDERATION")
Country("RW", u"RWANDA")
Country("BL", u"SAINT BARTHÉLEMY")
Country("SH", u"SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA")
Country("KN", u"SAINT KITTS AND NEVIS")
Country("LC", u"SAINT LUCIA")
Country("MF", u"SAINT MARTIN (FRENCH PART)")
Country("PM", u"SAINT PIERRE AND MIQUELON")
Country("VC", u"SAINT VINCENT AND THE GRENADINES")
Country("WS", u"SAMOA")
Country("SM", u"SAN MARINO")
Country("ST", u"SAO TOME AND PRINCIPE")
Country("SA", u"SAUDI ARABIA")
Country("SN", u"SENEGAL")
Country("RS", u"SERBIA")
Country("SC", u"SEYCHELLES")
Country("SL", u"SIERRA LEONE")
Country("SG", u"SINGAPORE")
Country("SX", u"SINT MAARTEN (DUTCH PART)")
Country("SK", u"SLOVAKIA")
Country("SI", u"SLOVENIA")
Country("SB", u"SOLOMON ISLANDS")
Country("SO", u"SOMALIA")
Country("ZA", u"SOUTH AFRICA")
Country("GS", u"SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS")
Country("SS", u"SOUTH SUDAN")
Country("ES", u"SPAIN")
Country("LK", u"SRI LANKA")
Country("SD", u"SUDAN")
Country("SR", u"SURINAME")
Country("SJ", u"SVALBARD AND JAN MAYEN")
Country("SZ", u"SWAZILAND")
Country("SE", u"SWEDEN")
Country("CH", u"SWITZERLAND")
Country("SY", u"SYRIAN ARAB REPUBLIC")
Country("TW", u"TAIWAN, PROVINCE OF CHINA")
Country("TJ", u"TAJIKISTAN")
Country("TZ", u"TANZANIA, UNITED REPUBLIC OF")
Country("TH", u"THAILAND")
Country("TL", u"TIMOR-LESTE")
Country("TG", u"TOGO")
Country("TK", u"TOKELAU")
Country("TO", u"TONGA")
Country("TT", u"TRINIDAD AND TOBAGO")
Country("TN", u"TUNISIA")
Country("TR", u"TURKEY")
Country("TM", u"TURKMENISTAN")
Country("TC", u"TURKS AND CAICOS ISLANDS")
Country("TV", u"TUVALU")
Country("UG", u"UGANDA")
Country("UA", u"UKRAINE")
Country("AE", u"UNITED ARAB EMIRATES")
Country("GB", u"UNITED KINGDOM")
Country("US", u"UNITED STATES")
Country("UM", u"UNITED STATES MINOR OUTLYING ISLANDS")
Country("UY", u"URUGUAY")
Country("UZ", u"UZBEKISTAN")
Country("VU", u"VANUATU")
Country("VE", u"VENEZUELA, BOLIVARIAN REPUBLIC OF")
Country("VN", u"VIET NAM")
Country("VG", u"VIRGIN ISLANDS, BRITISH")
Country("VI", u"VIRGIN ISLANDS, U.S.")
Country("WF", u"WALLIS AND FUTUNA")
Country("EH", u"WESTERN SAHARA")
Country("YE", u"YEMEN")
Country("ZM", u"ZAMBIA")
Country("ZW", u"ZIMBABWE")

View File

@@ -1,116 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: pager.py List-like structure designed for handling paged results
# Python Library
# Author: Raymond Wagner
#-----------------------
from collections import Sequence, Iterator
class PagedIterator(Iterator):
def __init__(self, parent):
self._parent = parent
self._index = -1
self._len = len(parent)
def __iter__(self):
return self
def next(self):
self._index += 1
if self._index == self._len:
raise StopIteration
return self._parent[self._index]
class UnpagedData(object):
def copy(self):
return self.__class__()
def __mul__(self, other):
return (self.copy() for a in range(other))
def __rmul__(self, other):
return (self.copy() for a in range(other))
class PagedList(Sequence):
"""
List-like object, with support for automatically grabbing
additional pages from a data source.
"""
_iter_class = None
def __iter__(self):
if self._iter_class is None:
self._iter_class = type(self.__class__.__name__ + 'Iterator',
(PagedIterator,), {})
return self._iter_class(self)
def __len__(self):
try:
return self._len
except:
return len(self._data)
def __init__(self, iterable, pagesize=20):
self._data = list(iterable)
self._pagesize = pagesize
def __getitem__(self, index):
if isinstance(index, slice):
return [self[x] for x in xrange(*index.indices(len(self)))]
if index >= len(self):
raise IndexError("list index outside range")
if (index >= len(self._data)) \
or isinstance(self._data[index], UnpagedData):
self._populatepage(index/self._pagesize + 1)
return self._data[index]
def __setitem__(self, index, value):
raise NotImplementedError
def __delitem__(self, index):
raise NotImplementedError
def __contains__(self, item):
raise NotImplementedError
def _populatepage(self, page):
pagestart = (page-1) * self._pagesize
if len(self._data) < pagestart:
self._data.extend(UnpagedData()*(pagestart-len(self._data)))
if len(self._data) == pagestart:
self._data.extend(self._getpage(page))
else:
for data in self._getpage(page):
self._data[pagestart] = data
pagestart += 1
def _getpage(self, page):
raise NotImplementedError("PagedList._getpage() must be provided " +
"by subclass")
class PagedRequest(PagedList):
"""
Derived PageList that provides a list-like object with automatic
paging intended for use with search requests.
"""
def __init__(self, request, handler=None):
self._request = request
if handler:
self._handler = handler
super(PagedRequest, self).__init__(self._getpage(1), 20)
def _getpage(self, page):
req = self._request.new(page=page)
res = req.readJSON()
self._len = res['total_results']
for item in res['results']:
if item is None:
yield None
else:
yield self._handler(item)

View File

@@ -1,167 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: tmdb_request.py
# Python Library
# Author: Raymond Wagner
# Purpose: Wrapped urllib2.Request class pre-configured for accessing the
# TMDb v3 API
#-----------------------
from tmdb_exceptions import *
from locales import get_locale
from cache import Cache
from urllib import urlencode
import urllib2
import json
import os
DEBUG = False
cache = Cache(filename='pytmdb3.cache')
#DEBUG = True
#cache = Cache(engine='null')
def set_key(key):
"""
Specify the API key to use retrieving data from themoviedb.org.
This key must be set before any calls will function.
"""
if len(key) != 32:
raise TMDBKeyInvalid("Specified API key must be 128-bit hex")
try:
int(key, 16)
except:
raise TMDBKeyInvalid("Specified API key must be 128-bit hex")
Request._api_key = key
def set_cache(engine=None, *args, **kwargs):
"""Specify caching engine and properties."""
cache.configure(engine, *args, **kwargs)
class Request(urllib2.Request):
_api_key = None
_base_url = "http://api.themoviedb.org/3/"
@property
def api_key(self):
if self._api_key is None:
raise TMDBKeyMissing("API key must be specified before " +
"requests can be made")
return self._api_key
def __init__(self, url, **kwargs):
"""
Return a request object, using specified API path and
arguments.
"""
kwargs['api_key'] = self.api_key
self._url = url.lstrip('/')
self._kwargs = dict([(kwa, kwv) for kwa, kwv in kwargs.items()
if kwv is not None])
locale = get_locale()
kwargs = {}
for k, v in self._kwargs.items():
kwargs[k] = locale.encode(v)
url = '{0}{1}?{2}'\
.format(self._base_url, self._url, urlencode(kwargs))
urllib2.Request.__init__(self, url)
self.add_header('Accept', 'application/json')
self.lifetime = 3600 # 1hr
def new(self, **kwargs):
"""
Create a new instance of the request, with tweaked arguments.
"""
args = dict(self._kwargs)
for k, v in kwargs.items():
if v is None:
if k in args:
del args[k]
else:
args[k] = v
obj = self.__class__(self._url, **args)
obj.lifetime = self.lifetime
return obj
def add_data(self, data):
"""Provide data to be sent with POST."""
urllib2.Request.add_data(self, urlencode(data))
def open(self):
"""Open a file object to the specified URL."""
try:
if DEBUG:
print 'loading '+self.get_full_url()
if self.has_data():
print ' '+self.get_data()
return urllib2.urlopen(self)
except urllib2.HTTPError, e:
raise TMDBHTTPError(e)
def read(self):
"""Return result from specified URL as a string."""
return self.open().read()
@cache.cached(urllib2.Request.get_full_url)
def readJSON(self):
"""Parse result from specified URL as JSON data."""
url = self.get_full_url()
try:
# catch HTTP error from open()
data = json.load(self.open())
except TMDBHTTPError, e:
try:
# try to load whatever was returned
data = json.loads(e.response)
except:
# cannot parse json, just raise existing error
raise e
else:
# response parsed, try to raise error from TMDB
handle_status(data, url)
# no error from TMDB, just raise existing error
raise e
handle_status(data, url)
if DEBUG:
import pprint
pprint.PrettyPrinter().pprint(data)
return data
status_handlers = {
1: None,
2: TMDBRequestInvalid('Invalid service - This service does not exist.'),
3: TMDBRequestError('Authentication Failed - You do not have ' +
'permissions to access this service.'),
4: TMDBRequestInvalid("Invalid format - This service doesn't exist " +
'in that format.'),
5: TMDBRequestInvalid('Invalid parameters - Your request parameters ' +
'are incorrect.'),
6: TMDBRequestInvalid('Invalid id - The pre-requisite id is invalid ' +
'or not found.'),
7: TMDBKeyInvalid('Invalid API key - You must be granted a valid key.'),
8: TMDBRequestError('Duplicate entry - The data you tried to submit ' +
'already exists.'),
9: TMDBOffline('This service is tempirarily offline. Try again later.'),
10: TMDBKeyRevoked('Suspended API key - Access to your account has been ' +
'suspended, contact TMDB.'),
11: TMDBError('Internal error - Something went wrong. Contact TMDb.'),
12: None,
13: None,
14: TMDBRequestError('Authentication Failed.'),
15: TMDBError('Failed'),
16: TMDBError('Device Denied'),
17: TMDBError('Session Denied')}
def handle_status(data, query):
status = status_handlers[data.get('status_code', 1)]
if status is not None:
status.tmdberrno = data['status_code']
status.query = query
raise status

View File

@@ -1,910 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: tmdb_api.py Simple-to-use Python interface to TMDB's API v3
# Python Library
# Author: Raymond Wagner
# Purpose: This Python library is intended to provide a series of classes
# and methods for search and retrieval of text metadata and image
# URLs from TMDB.
# Preliminary API specifications can be found at
# http://help.themoviedb.org/kb/api/about-3
# License: Creative Commons GNU GPL v2
# (http://creativecommons.org/licenses/GPL/2.0/)
#-----------------------
__title__ = ("tmdb_api - Simple-to-use Python interface to TMDB's API v3 " +
"(www.themoviedb.org)")
__author__ = "Raymond Wagner"
__purpose__ = """
This Python library is intended to provide a series of classes and methods
for search and retrieval of text metadata and image URLs from TMDB.
Preliminary API specifications can be found at
http://help.themoviedb.org/kb/api/about-3"""
__version__ = "v0.7.0"
# 0.1.0 Initial development
# 0.2.0 Add caching mechanism for API queries
# 0.2.1 Temporary work around for broken search paging
# 0.3.0 Rework backend machinery for managing OO interface to results
# 0.3.1 Add collection support
# 0.3.2 Remove MythTV key from results.py
# 0.3.3 Add functional language support
# 0.3.4 Re-enable search paging
# 0.3.5 Add methods for grabbing current, popular, and top rated movies
# 0.3.6 Rework paging mechanism
# 0.3.7 Generalize caching mechanism, and allow controllability
# 0.4.0 Add full locale support (language and country) and optional fall through
# 0.4.1 Add custom classmethod for dealing with IMDB movie IDs
# 0.4.2 Improve cache file selection for Windows systems
# 0.4.3 Add a few missed Person properties
# 0.4.4 Add support for additional Studio information
# 0.4.5 Add locale fallthrough for images and alternate titles
# 0.4.6 Add slice support for search results
# 0.5.0 Rework cache framework and improve file cache performance
# 0.6.0 Add user authentication support
# 0.6.1 Add adult filtering for people searches
# 0.6.2 Add similar movie search for Movie objects
# 0.6.3 Add Studio search
# 0.6.4 Add Genre list and associated Movie search
# 0.6.5 Prevent data from being blanked out by subsequent queries
# 0.6.6 Turn date processing errors into mutable warnings
# 0.6.7 Add support for searching by year
# 0.6.8 Add support for collection images
# 0.6.9 Correct Movie image language filtering
# 0.6.10 Add upcoming movie classmethod
# 0.6.11 Fix URL for top rated Movie query
# 0.6.12 Add support for Movie watchlist query and editing
# 0.6.13 Fix URL for rating Movies
# 0.6.14 Add support for Lists
# 0.6.15 Add ability to search Collections
# 0.6.16 Make absent primary images return None (previously u'')
# 0.6.17 Add userrating/votes to Image, add overview to Collection, remove
# releasedate sorting from Collection Movies
# 0.7.0 Add support for television series data
from request import set_key, Request
from util import Datapoint, Datalist, Datadict, Element, NameRepr, SearchRepr
from pager import PagedRequest
from locales import get_locale, set_locale
from tmdb_auth import get_session, set_session
from tmdb_exceptions import *
import json
import urllib
import urllib2
import datetime
DEBUG = False
def process_date(datestr):
try:
return datetime.date(*[int(x) for x in datestr.split('-')])
except (TypeError, ValueError):
import sys
import warnings
import traceback
_,_,tb = sys.exc_info()
f,l,_,_ = traceback.extract_tb(tb)[-1]
warnings.warn_explicit(('"{0}" is not a supported date format. ' +
'Please fix upstream data at ' +
'http://www.themoviedb.org.'
).format(datestr), Warning, f, l)
return None
class Configuration(Element):
images = Datapoint('images')
def _populate(self):
return Request('configuration')
Configuration = Configuration()
class Account(NameRepr, Element):
def _populate(self):
return Request('account', session_id=self._session.sessionid)
id = Datapoint('id')
adult = Datapoint('include_adult')
country = Datapoint('iso_3166_1')
language = Datapoint('iso_639_1')
name = Datapoint('name')
username = Datapoint('username')
@property
def locale(self):
return get_locale(self.language, self.country)
def searchMovie(query, locale=None, adult=False, year=None):
kwargs = {'query': query, 'include_adult': adult}
if year is not None:
try:
kwargs['year'] = year.year
except AttributeError:
kwargs['year'] = year
return MovieSearchResult(Request('search/movie', **kwargs), locale=locale)
def searchMovieWithYear(query, locale=None, adult=False):
year = None
if (len(query) > 6) and (query[-1] == ')') and (query[-6] == '('):
# simple syntax check, no need for regular expression
try:
year = int(query[-5:-1])
except ValueError:
pass
else:
if 1885 < year < 2050:
# strip out year from search
query = query[:-7]
else:
# sanity check on resolved year failed, pass through
year = None
return searchMovie(query, locale, adult, year)
class MovieSearchResult(SearchRepr, PagedRequest):
"""Stores a list of search matches."""
_name = None
def __init__(self, request, locale=None):
if locale is None:
locale = get_locale()
super(MovieSearchResult, self).__init__(
request.new(language=locale.language),
lambda x: Movie(raw=x, locale=locale))
def searchSeries(query, first_air_date_year=None, search_type=None, locale=None):
return SeriesSearchResult(
Request('search/tv', query=query, first_air_date_year=first_air_date_year, search_type=search_type),
locale=locale)
class SeriesSearchResult(SearchRepr, PagedRequest):
"""Stores a list of search matches."""
_name = None
def __init__(self, request, locale=None):
if locale is None:
locale = get_locale()
super(SeriesSearchResult, self).__init__(
request.new(language=locale.language),
lambda x: Series(raw=x, locale=locale))
def searchPerson(query, adult=False):
return PeopleSearchResult(Request('search/person', query=query,
include_adult=adult))
class PeopleSearchResult(SearchRepr, PagedRequest):
"""Stores a list of search matches."""
_name = None
def __init__(self, request):
super(PeopleSearchResult, self).__init__(
request, lambda x: Person(raw=x))
def searchStudio(query):
return StudioSearchResult(Request('search/company', query=query))
class StudioSearchResult(SearchRepr, PagedRequest):
"""Stores a list of search matches."""
_name = None
def __init__(self, request):
super(StudioSearchResult, self).__init__(
request, lambda x: Studio(raw=x))
def searchList(query, adult=False):
ListSearchResult(Request('search/list', query=query, include_adult=adult))
class ListSearchResult(SearchRepr, PagedRequest):
"""Stores a list of search matches."""
_name = None
def __init__(self, request):
super(ListSearchResult, self).__init__(
request, lambda x: List(raw=x))
def searchCollection(query, locale=None):
return CollectionSearchResult(Request('search/collection', query=query),
locale=locale)
class CollectionSearchResult(SearchRepr, PagedRequest):
"""Stores a list of search matches."""
_name=None
def __init__(self, request, locale=None):
if locale is None:
locale = get_locale()
super(CollectionSearchResult, self).__init__(
request.new(language=locale.language),
lambda x: Collection(raw=x, locale=locale))
class Image(Element):
filename = Datapoint('file_path', initarg=1,
handler=lambda x: x.lstrip('/'))
aspectratio = Datapoint('aspect_ratio')
height = Datapoint('height')
width = Datapoint('width')
language = Datapoint('iso_639_1')
userrating = Datapoint('vote_average')
votes = Datapoint('vote_count')
def sizes(self):
return ['original']
def geturl(self, size='original'):
if size not in self.sizes():
raise TMDBImageSizeError
url = Configuration.images['secure_base_url'].rstrip('/')
return url+'/{0}/{1}'.format(size, self.filename)
# sort preferring locale's language, but keep remaining ordering consistent
def __lt__(self, other):
if not isinstance(other, Image):
return False
return (self.language == self._locale.language) \
and (self.language != other.language)
def __gt__(self, other):
if not isinstance(other, Image):
return True
return (self.language != other.language) \
and (other.language == self._locale.language)
# direct match for comparison
def __eq__(self, other):
if not isinstance(other, Image):
return False
return self.filename == other.filename
# special handling for boolean to see if exists
def __nonzero__(self):
if len(self.filename) == 0:
return False
return True
def __repr__(self):
# BASE62 encoded filename, no need to worry about unicode
return u"<{0.__class__.__name__} '{0.filename}'>".format(self)
class Backdrop(Image):
def sizes(self):
return Configuration.images['backdrop_sizes']
class Poster(Image):
def sizes(self):
return Configuration.images['poster_sizes']
class Profile(Image):
def sizes(self):
return Configuration.images['profile_sizes']
class Logo(Image):
def sizes(self):
return Configuration.images['logo_sizes']
class AlternateTitle(Element):
country = Datapoint('iso_3166_1')
title = Datapoint('title')
# sort preferring locale's country, but keep remaining ordering consistent
def __lt__(self, other):
return (self.country == self._locale.country) \
and (self.country != other.country)
def __gt__(self, other):
return (self.country != other.country) \
and (other.country == self._locale.country)
def __eq__(self, other):
return self.country == other.country
def __repr__(self):
return u"<{0.__class__.__name__} '{0.title}' ({0.country})>"\
.format(self).encode('utf-8')
class Person(Element):
id = Datapoint('id', initarg=1)
name = Datapoint('name')
biography = Datapoint('biography')
dayofbirth = Datapoint('birthday', default=None, handler=process_date)
dayofdeath = Datapoint('deathday', default=None, handler=process_date)
homepage = Datapoint('homepage')
birthplace = Datapoint('place_of_birth')
profile = Datapoint('profile_path', handler=Profile,
raw=False, default=None)
adult = Datapoint('adult')
aliases = Datalist('also_known_as')
def __repr__(self):
return u"<{0.__class__.__name__} '{0.name}'>"\
.format(self).encode('utf-8')
def _populate(self):
return Request('person/{0}'.format(self.id))
def _populate_credits(self):
return Request('person/{0}/credits'.format(self.id),
language=self._locale.language)
def _populate_images(self):
return Request('person/{0}/images'.format(self.id))
roles = Datalist('cast', handler=lambda x: ReverseCast(raw=x),
poller=_populate_credits)
crew = Datalist('crew', handler=lambda x: ReverseCrew(raw=x),
poller=_populate_credits)
profiles = Datalist('profiles', handler=Profile, poller=_populate_images)
class Cast(Person):
character = Datapoint('character')
order = Datapoint('order')
def __repr__(self):
return u"<{0.__class__.__name__} '{0.name}' as '{0.character}'>"\
.format(self).encode('utf-8')
class Crew(Person):
job = Datapoint('job')
department = Datapoint('department')
def __repr__(self):
return u"<{0.__class__.__name__} '{0.name}','{0.job}'>"\
.format(self).encode('utf-8')
class Keyword(Element):
id = Datapoint('id')
name = Datapoint('name')
def __repr__(self):
return u"<{0.__class__.__name__} {0.name}>"\
.format(self).encode('utf-8')
class Release(Element):
certification = Datapoint('certification')
country = Datapoint('iso_3166_1')
releasedate = Datapoint('release_date', handler=process_date)
def __repr__(self):
return u"<{0.__class__.__name__} {0.country}, {0.releasedate}>"\
.format(self).encode('utf-8')
class Trailer(Element):
name = Datapoint('name')
size = Datapoint('size')
source = Datapoint('source')
class YoutubeTrailer(Trailer):
def geturl(self):
return "http://www.youtube.com/watch?v={0}".format(self.source)
def __repr__(self):
# modified BASE64 encoding, no need to worry about unicode
return u"<{0.__class__.__name__} '{0.name}'>".format(self)
class AppleTrailer(Element):
name = Datapoint('name')
sources = Datadict('sources', handler=Trailer, attr='size')
def sizes(self):
return self.sources.keys()
def geturl(self, size=None):
if size is None:
# sort assuming ###p format for now, take largest resolution
size = str(sorted(
[int(size[:-1]) for size in self.sources]
)[-1]) + 'p'
return self.sources[size].source
def __repr__(self):
return u"<{0.__class__.__name__} '{0.name}'>".format(self)
class Translation(Element):
name = Datapoint('name')
language = Datapoint('iso_639_1')
englishname = Datapoint('english_name')
def __repr__(self):
return u"<{0.__class__.__name__} '{0.name}' ({0.language})>"\
.format(self).encode('utf-8')
class Genre(NameRepr, Element):
id = Datapoint('id')
name = Datapoint('name')
def _populate_movies(self):
return Request('genre/{0}/movies'.format(self.id), \
language=self._locale.language)
@property
def movies(self):
if 'movies' not in self._data:
search = MovieSearchResult(self._populate_movies(), \
locale=self._locale)
search._name = "{0.name} Movies".format(self)
self._data['movies'] = search
return self._data['movies']
@classmethod
def getAll(cls, locale=None):
class GenreList(Element):
genres = Datalist('genres', handler=Genre)
def _populate(self):
return Request('genre/list', language=self._locale.language)
return GenreList(locale=locale).genres
class Studio(NameRepr, Element):
id = Datapoint('id', initarg=1)
name = Datapoint('name')
description = Datapoint('description')
headquarters = Datapoint('headquarters')
logo = Datapoint('logo_path', handler=Logo, raw=False, default=None)
# FIXME: manage not-yet-defined handlers in a way that will propogate
# locale information properly
parent = Datapoint('parent_company', handler=lambda x: Studio(raw=x))
def _populate(self):
return Request('company/{0}'.format(self.id))
def _populate_movies(self):
return Request('company/{0}/movies'.format(self.id),
language=self._locale.language)
# FIXME: add a cleaner way of adding types with no additional processing
@property
def movies(self):
if 'movies' not in self._data:
search = MovieSearchResult(self._populate_movies(),
locale=self._locale)
search._name = "{0.name} Movies".format(self)
self._data['movies'] = search
return self._data['movies']
class Country(NameRepr, Element):
code = Datapoint('iso_3166_1')
name = Datapoint('name')
class Language(NameRepr, Element):
code = Datapoint('iso_639_1')
name = Datapoint('name')
class Movie(Element):
@classmethod
def latest(cls):
req = Request('latest/movie')
req.lifetime = 600
return cls(raw=req.readJSON())
@classmethod
def nowplaying(cls, locale=None):
res = MovieSearchResult(Request('movie/now-playing'), locale=locale)
res._name = 'Now Playing'
return res
@classmethod
def mostpopular(cls, locale=None):
res = MovieSearchResult(Request('movie/popular'), locale=locale)
res._name = 'Popular'
return res
@classmethod
def toprated(cls, locale=None):
res = MovieSearchResult(Request('movie/top_rated'), locale=locale)
res._name = 'Top Rated'
return res
@classmethod
def upcoming(cls, locale=None):
res = MovieSearchResult(Request('movie/upcoming'), locale=locale)
res._name = 'Upcoming'
return res
@classmethod
def favorites(cls, session=None):
if session is None:
session = get_session()
account = Account(session=session)
res = MovieSearchResult(
Request('account/{0}/favorite_movies'.format(account.id),
session_id=session.sessionid))
res._name = "Favorites"
return res
@classmethod
def ratedmovies(cls, session=None):
if session is None:
session = get_session()
account = Account(session=session)
res = MovieSearchResult(
Request('account/{0}/rated_movies'.format(account.id),
session_id=session.sessionid))
res._name = "Movies You Rated"
return res
@classmethod
def watchlist(cls, session=None):
if session is None:
session = get_session()
account = Account(session=session)
res = MovieSearchResult(
Request('account/{0}/movie_watchlist'.format(account.id),
session_id=session.sessionid))
res._name = "Movies You're Watching"
return res
@classmethod
def fromIMDB(cls, imdbid, locale=None):
try:
# assume string
if not imdbid.startswith('tt'):
imdbid = "tt{0:0>7}".format(imdbid)
except AttributeError:
# assume integer
imdbid = "tt{0:0>7}".format(imdbid)
if locale is None:
locale = get_locale()
movie = cls(imdbid, locale=locale)
movie._populate()
return movie
id = Datapoint('id', initarg=1)
title = Datapoint('title')
originaltitle = Datapoint('original_title')
tagline = Datapoint('tagline')
overview = Datapoint('overview')
runtime = Datapoint('runtime')
budget = Datapoint('budget')
revenue = Datapoint('revenue')
releasedate = Datapoint('release_date', handler=process_date)
homepage = Datapoint('homepage')
imdb = Datapoint('imdb_id')
backdrop = Datapoint('backdrop_path', handler=Backdrop,
raw=False, default=None)
poster = Datapoint('poster_path', handler=Poster,
raw=False, default=None)
popularity = Datapoint('popularity')
userrating = Datapoint('vote_average')
votes = Datapoint('vote_count')
adult = Datapoint('adult')
collection = Datapoint('belongs_to_collection', handler=lambda x: \
Collection(raw=x))
genres = Datalist('genres', handler=Genre)
studios = Datalist('production_companies', handler=Studio)
countries = Datalist('production_countries', handler=Country)
languages = Datalist('spoken_languages', handler=Language)
def _populate(self):
return Request('movie/{0}'.format(self.id), \
language=self._locale.language)
def _populate_titles(self):
kwargs = {}
if not self._locale.fallthrough:
kwargs['country'] = self._locale.country
return Request('movie/{0}/alternative_titles'.format(self.id),
**kwargs)
def _populate_cast(self):
return Request('movie/{0}/casts'.format(self.id))
def _populate_images(self):
kwargs = {}
if not self._locale.fallthrough:
kwargs['language'] = self._locale.language
return Request('movie/{0}/images'.format(self.id), **kwargs)
def _populate_keywords(self):
return Request('movie/{0}/keywords'.format(self.id))
def _populate_releases(self):
return Request('movie/{0}/releases'.format(self.id))
def _populate_trailers(self):
return Request('movie/{0}/trailers'.format(self.id),
language=self._locale.language)
def _populate_translations(self):
return Request('movie/{0}/translations'.format(self.id))
alternate_titles = Datalist('titles', handler=AlternateTitle, \
poller=_populate_titles, sort=True)
# FIXME: this data point will need to be changed to 'credits' at some point
cast = Datalist('cast', handler=Cast,
poller=_populate_cast, sort='order')
crew = Datalist('crew', handler=Crew, poller=_populate_cast)
backdrops = Datalist('backdrops', handler=Backdrop,
poller=_populate_images, sort=True)
posters = Datalist('posters', handler=Poster,
poller=_populate_images, sort=True)
keywords = Datalist('keywords', handler=Keyword,
poller=_populate_keywords)
releases = Datadict('countries', handler=Release,
poller=_populate_releases, attr='country')
youtube_trailers = Datalist('youtube', handler=YoutubeTrailer,
poller=_populate_trailers)
apple_trailers = Datalist('quicktime', handler=AppleTrailer,
poller=_populate_trailers)
translations = Datalist('translations', handler=Translation,
poller=_populate_translations)
def setFavorite(self, value):
req = Request('account/{0}/favorite'.format(
Account(session=self._session).id),
session_id=self._session.sessionid)
req.add_data({'movie_id': self.id,
'favorite': str(bool(value)).lower()})
req.lifetime = 0
req.readJSON()
def setRating(self, value):
if not (0 <= value <= 10):
raise TMDBError("Ratings must be between '0' and '10'.")
req = Request('movie/{0}/rating'.format(self.id),
session_id=self._session.sessionid)
req.lifetime = 0
req.add_data({'value':value})
req.readJSON()
def setWatchlist(self, value):
req = Request('account/{0}/movie_watchlist'.format(
Account(session=self._session).id),
session_id=self._session.sessionid)
req.lifetime = 0
req.add_data({'movie_id': self.id,
'movie_watchlist': str(bool(value)).lower()})
req.readJSON()
def getSimilar(self):
return self.similar
@property
def similar(self):
res = MovieSearchResult(Request(
'movie/{0}/similar_movies'.format(self.id)),
locale=self._locale)
res._name = 'Similar to {0}'.format(self._printable_name())
return res
@property
def lists(self):
res = ListSearchResult(Request('movie/{0}/lists'.format(self.id)))
res._name = "Lists containing {0}".format(self._printable_name())
return res
def _printable_name(self):
if self.title is not None:
s = u"'{0}'".format(self.title)
elif self.originaltitle is not None:
s = u"'{0}'".format(self.originaltitle)
else:
s = u"'No Title'"
if self.releasedate:
s = u"{0} ({1})".format(s, self.releasedate.year)
return s
def __repr__(self):
return u"<{0} {1}>".format(self.__class__.__name__,
self._printable_name()).encode('utf-8')
class ReverseCast( Movie ):
character = Datapoint('character')
def __repr__(self):
return (u"<{0.__class__.__name__} '{0.character}' on {1}>"
.format(self, self._printable_name()).encode('utf-8'))
class ReverseCrew( Movie ):
department = Datapoint('department')
job = Datapoint('job')
def __repr__(self):
return (u"<{0.__class__.__name__} '{0.job}' for {1}>"
.format(self, self._printable_name()).encode('utf-8'))
class Collection(NameRepr, Element):
id = Datapoint('id', initarg=1)
name = Datapoint('name')
backdrop = Datapoint('backdrop_path', handler=Backdrop, \
raw=False, default=None)
poster = Datapoint('poster_path', handler=Poster, raw=False, default=None)
members = Datalist('parts', handler=Movie)
overview = Datapoint('overview')
def _populate(self):
return Request('collection/{0}'.format(self.id),
language=self._locale.language)
def _populate_images(self):
kwargs = {}
if not self._locale.fallthrough:
kwargs['language'] = self._locale.language
return Request('collection/{0}/images'.format(self.id), **kwargs)
backdrops = Datalist('backdrops', handler=Backdrop,
poller=_populate_images, sort=True)
posters = Datalist('posters', handler=Poster,
poller=_populate_images, sort=True)
class List(NameRepr, Element):
id = Datapoint('id', initarg=1)
name = Datapoint('name')
author = Datapoint('created_by')
description = Datapoint('description')
favorites = Datapoint('favorite_count')
language = Datapoint('iso_639_1')
count = Datapoint('item_count')
poster = Datapoint('poster_path', handler=Poster, raw=False, default=None)
members = Datalist('items', handler=Movie)
def _populate(self):
return Request('list/{0}'.format(self.id))
class Network(NameRepr,Element):
id = Datapoint('id', initarg=1)
name = Datapoint('name')
class Episode(NameRepr, Element):
episode_number = Datapoint('episode_number', initarg=3)
season_number = Datapoint('season_number', initarg=2)
series_id = Datapoint('series_id', initarg=1)
air_date = Datapoint('air_date', handler=process_date)
overview = Datapoint('overview')
name = Datapoint('name')
userrating = Datapoint('vote_average')
votes = Datapoint('vote_count')
id = Datapoint('id')
production_code = Datapoint('production_code')
still = Datapoint('still_path', handler=Backdrop, raw=False, default=None)
def _populate(self):
return Request('tv/{0}/season/{1}/episode/{2}'.format(self.series_id, self.season_number, self.episode_number),
language=self._locale.language)
def _populate_cast(self):
return Request('tv/{0}/season/{1}/episode/{2}/credits'.format(
self.series_id, self.season_number, self.episode_number),
language=self._locale.language)
def _populate_external_ids(self):
return Request('tv/{0}/season/{1}/episode/{2}/external_ids'.format(
self.series_id, self.season_number, self.episode_number))
def _populate_images(self):
kwargs = {}
if not self._locale.fallthrough:
kwargs['language'] = self._locale.language
return Request('tv/{0}/season/{1}/episode/{2}/images'.format(
self.series_id, self.season_number, self.episode_number), **kwargs)
cast = Datalist('cast', handler=Cast,
poller=_populate_cast, sort='order')
guest_stars = Datalist('guest_stars', handler=Cast,
poller=_populate_cast, sort='order')
crew = Datalist('crew', handler=Crew, poller=_populate_cast)
imdb_id = Datapoint('imdb_id', poller=_populate_external_ids)
freebase_id = Datapoint('freebase_id', poller=_populate_external_ids)
freebase_mid = Datapoint('freebase_mid', poller=_populate_external_ids)
tvdb_id = Datapoint('tvdb_id', poller=_populate_external_ids)
tvrage_id = Datapoint('tvrage_id', poller=_populate_external_ids)
stills = Datalist('stills', handler=Backdrop, poller=_populate_images, sort=True)
class Season(NameRepr, Element):
season_number = Datapoint('season_number', initarg=2)
series_id = Datapoint('series_id', initarg=1)
id = Datapoint('id')
air_date = Datapoint('air_date', handler=process_date)
poster = Datapoint('poster_path', handler=Poster, raw=False, default=None)
overview = Datapoint('overview')
name = Datapoint('name')
episodes = Datadict('episodes', attr='episode_number', handler=Episode,
passthrough={'series_id': 'series_id', 'season_number': 'season_number'})
def _populate(self):
return Request('tv/{0}/season/{1}'.format(self.series_id, self.season_number),
language=self._locale.language)
def _populate_images(self):
kwargs = {}
if not self._locale.fallthrough:
kwargs['language'] = self._locale.language
return Request('tv/{0}/season/{1}/images'.format(self.series_id, self.season_number), **kwargs)
def _populate_external_ids(self):
return Request('tv/{0}/season/{1}/external_ids'.format(self.series_id, self.season_number))
posters = Datalist('posters', handler=Poster,
poller=_populate_images, sort=True)
freebase_id = Datapoint('freebase_id', poller=_populate_external_ids)
freebase_mid = Datapoint('freebase_mid', poller=_populate_external_ids)
tvdb_id = Datapoint('tvdb_id', poller=_populate_external_ids)
tvrage_id = Datapoint('tvrage_id', poller=_populate_external_ids)
class Series(NameRepr, Element):
id = Datapoint('id', initarg=1)
backdrop = Datapoint('backdrop_path', handler=Backdrop, raw=False, default=None)
authors = Datalist('created_by', handler=Person)
episode_run_times = Datalist('episode_run_time')
first_air_date = Datapoint('first_air_date', handler=process_date)
last_air_date = Datapoint('last_air_date', handler=process_date)
genres = Datalist('genres', handler=Genre)
homepage = Datapoint('homepage')
in_production = Datapoint('in_production')
languages = Datalist('languages')
origin_countries = Datalist('origin_country')
name = Datapoint('name')
original_name = Datapoint('original_name')
number_of_episodes = Datapoint('number_of_episodes')
number_of_seasons = Datapoint('number_of_seasons')
overview = Datapoint('overview')
popularity = Datapoint('popularity')
status = Datapoint('status')
userrating = Datapoint('vote_average')
votes = Datapoint('vote_count')
poster = Datapoint('poster_path', handler=Poster, raw=False, default=None)
networks = Datalist('networks', handler=Network)
seasons = Datadict('seasons', attr='season_number', handler=Season, passthrough={'id': 'series_id'})
def _populate(self):
return Request('tv/{0}'.format(self.id),
language=self._locale.language)
def _populate_cast(self):
return Request('tv/{0}/credits'.format(self.id))
def _populate_images(self):
kwargs = {}
if not self._locale.fallthrough:
kwargs['language'] = self._locale.language
return Request('tv/{0}/images'.format(self.id), **kwargs)
def _populate_external_ids(self):
return Request('tv/{0}/external_ids'.format(self.id))
cast = Datalist('cast', handler=Cast,
poller=_populate_cast, sort='order')
crew = Datalist('crew', handler=Crew, poller=_populate_cast)
backdrops = Datalist('backdrops', handler=Backdrop,
poller=_populate_images, sort=True)
posters = Datalist('posters', handler=Poster,
poller=_populate_images, sort=True)
imdb_id = Datapoint('imdb_id', poller=_populate_external_ids)
freebase_id = Datapoint('freebase_id', poller=_populate_external_ids)
freebase_mid = Datapoint('freebase_mid', poller=_populate_external_ids)
tvdb_id = Datapoint('tvdb_id', poller=_populate_external_ids)
tvrage_id = Datapoint('tvrage_id', poller=_populate_external_ids)

View File

@@ -1,138 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: tmdb_auth.py
# Python Library
# Author: Raymond Wagner
# Purpose: Provide authentication and session services for
# calls against the TMDB v3 API
#-----------------------
from datetime import datetime as _pydatetime, \
tzinfo as _pytzinfo
import re
class datetime(_pydatetime):
"""Customized datetime class with ISO format parsing."""
_reiso = re.compile('(?P<year>[0-9]{4})'
'-(?P<month>[0-9]{1,2})'
'-(?P<day>[0-9]{1,2})'
'.'
'(?P<hour>[0-9]{2})'
':(?P<min>[0-9]{2})'
'(:(?P<sec>[0-9]{2}))?'
'(?P<tz>Z|'
'(?P<tzdirec>[-+])'
'(?P<tzhour>[0-9]{1,2})'
'(:)?'
'(?P<tzmin>[0-9]{2})?'
')?')
class _tzinfo(_pytzinfo):
def __init__(self, direc='+', hr=0, min=0):
if direc == '-':
hr = -1*int(hr)
self._offset = timedelta(hours=int(hr), minutes=int(min))
def utcoffset(self, dt):
return self._offset
def tzname(self, dt):
return ''
def dst(self, dt):
return timedelta(0)
@classmethod
def fromIso(cls, isotime, sep='T'):
match = cls._reiso.match(isotime)
if match is None:
raise TypeError("time data '%s' does not match ISO 8601 format"
% isotime)
dt = [int(a) for a in match.groups()[:5]]
if match.group('sec') is not None:
dt.append(int(match.group('sec')))
else:
dt.append(0)
if match.group('tz'):
if match.group('tz') == 'Z':
tz = cls._tzinfo()
elif match.group('tzmin'):
tz = cls._tzinfo(*match.group('tzdirec', 'tzhour', 'tzmin'))
else:
tz = cls._tzinfo(*match.group('tzdirec', 'tzhour'))
dt.append(0)
dt.append(tz)
return cls(*dt)
from request import Request
from tmdb_exceptions import *
syssession = None
def set_session(sessionid):
global syssession
syssession = Session(sessionid)
def get_session(sessionid=None):
global syssession
if sessionid:
return Session(sessionid)
elif syssession is not None:
return syssession
else:
return Session.new()
class Session(object):
@classmethod
def new(cls):
return cls(None)
def __init__(self, sessionid):
self.sessionid = sessionid
@property
def sessionid(self):
if self._sessionid is None:
if self._authtoken is None:
raise TMDBError("No Auth Token to produce Session for")
# TODO: check authtoken expiration against current time
req = Request('authentication/session/new',
request_token=self._authtoken)
req.lifetime = 0
dat = req.readJSON()
if not dat['success']:
raise TMDBError("Session generation failed")
self._sessionid = dat['session_id']
return self._sessionid
@sessionid.setter
def sessionid(self, value):
self._sessionid = value
self._authtoken = None
self._authtokenexpiration = None
if value is None:
self.authenticated = False
else:
self.authenticated = True
@property
def authtoken(self):
if self.authenticated:
raise TMDBError("Session is already authenticated")
if self._authtoken is None:
req = Request('authentication/token/new')
req.lifetime = 0
dat = req.readJSON()
if not dat['success']:
raise TMDBError("Auth Token request failed")
self._authtoken = dat['request_token']
self._authtokenexpiration = datetime.fromIso(dat['expires_at'])
return self._authtoken
@property
def callbackurl(self):
return "http://www.themoviedb.org/authenticate/"+self._authtoken

View File

@@ -1,107 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: tmdb_exceptions.py Common exceptions used in tmdbv3 API library
# Python Library
# Author: Raymond Wagner
#-----------------------
class TMDBError(Exception):
Error = 0
KeyError = 10
KeyMissing = 20
KeyInvalid = 30
KeyRevoked = 40
RequestError = 50
RequestInvalid = 51
PagingIssue = 60
CacheError = 70
CacheReadError = 71
CacheWriteError = 72
CacheDirectoryError = 73
ImageSizeError = 80
HTTPError = 90
Offline = 100
LocaleError = 110
def __init__(self, msg=None, errno=0):
self.errno = errno
if errno == 0:
self.errno = getattr(self, 'TMDB'+self.__class__.__name__, errno)
self.args = (msg,)
class TMDBKeyError(TMDBError):
pass
class TMDBKeyMissing(TMDBKeyError):
pass
class TMDBKeyInvalid(TMDBKeyError):
pass
class TMDBKeyRevoked(TMDBKeyInvalid):
pass
class TMDBRequestError(TMDBError):
pass
class TMDBRequestInvalid(TMDBRequestError):
pass
class TMDBPagingIssue(TMDBRequestError):
pass
class TMDBCacheError(TMDBRequestError):
pass
class TMDBCacheReadError(TMDBCacheError):
def __init__(self, filename):
super(TMDBCacheReadError, self).__init__(
"User does not have permission to access cache file: {0}."\
.format(filename))
self.filename = filename
class TMDBCacheWriteError(TMDBCacheError):
def __init__(self, filename):
super(TMDBCacheWriteError, self).__init__(
"User does not have permission to write cache file: {0}."\
.format(filename))
self.filename = filename
class TMDBCacheDirectoryError(TMDBCacheError):
def __init__(self, filename):
super(TMDBCacheDirectoryError, self).__init__(
"Directory containing cache file does not exist: {0}."\
.format(filename))
self.filename = filename
class TMDBImageSizeError(TMDBError ):
pass
class TMDBHTTPError(TMDBError):
def __init__(self, err):
self.httperrno = err.code
self.response = err.fp.read()
super(TMDBHTTPError, self).__init__(str(err))
class TMDBOffline(TMDBError):
pass
class TMDBLocaleError(TMDBError):
pass

View File

@@ -1,403 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: util.py Assorted utilities used in tmdb_api
# Python Library
# Author: Raymond Wagner
#-----------------------
from copy import copy
from locales import get_locale
from tmdb_auth import get_session
class NameRepr(object):
"""Mixin for __repr__ methods using 'name' attribute."""
def __repr__(self):
return u"<{0.__class__.__name__} '{0.name}'>"\
.format(self).encode('utf-8')
class SearchRepr(object):
"""
Mixin for __repr__ methods for classes with '_name' and
'_request' attributes.
"""
def __repr__(self):
name = self._name if self._name else self._request._kwargs['query']
return u"<Search Results: {0}>".format(name).encode('utf-8')
class Poller(object):
"""
Wrapper for an optional callable to populate an Element derived
class with raw data, or data from a Request.
"""
def __init__(self, func, lookup, inst=None):
self.func = func
self.lookup = lookup
self.inst = inst
if func:
# with function, this allows polling data from the API
self.__doc__ = func.__doc__
self.__name__ = func.__name__
self.__module__ = func.__module__
else:
# without function, this is just a dummy poller used for applying
# raw data to a new Element class with the lookup table
self.__name__ = '_populate'
def __get__(self, inst, owner):
# normal decorator stuff
# return self for a class
# return instantiated copy of self for an object
if inst is None:
return self
func = None
if self.func:
func = self.func.__get__(inst, owner)
return self.__class__(func, self.lookup, inst)
def __call__(self):
# retrieve data from callable function, and apply
if not callable(self.func):
raise RuntimeError('Poller object called without a source function')
req = self.func()
if ('language' in req._kwargs) or ('country' in req._kwargs) \
and self.inst._locale.fallthrough:
# request specifies a locale filter, and fallthrough is enabled
# run a first pass with specified filter
if not self.apply(req.readJSON(), False):
return
# if first pass results in missed data, run a second pass to
# fill in the gaps
self.apply(req.new(language=None, country=None).readJSON())
# re-apply the filtered first pass data over top the second
# unfiltered set. this is to work around the issue that the
# properties have no way of knowing when they should or
# should not overwrite existing data. the cache engine will
# take care of the duplicate query
self.apply(req.readJSON())
def apply(self, data, set_nones=True):
# apply data directly, bypassing callable function
unfilled = False
for k, v in self.lookup.items():
if (k in data) and \
((data[k] is not None) if callable(self.func) else True):
# argument received data, populate it
setattr(self.inst, v, data[k])
elif v in self.inst._data:
# argument did not receive data, but Element already contains
# some value, so skip this
continue
elif set_nones:
# argument did not receive data, so fill it with None
# to indicate such and prevent a repeat scan
setattr(self.inst, v, None)
else:
# argument does not need data, so ignore it allowing it to
# trigger a later poll. this is intended for use when
# initializing a class with raw data, or when performing a
# first pass through when performing locale fall through
unfilled = True
return unfilled
class Data(object):
"""
Basic response definition class
This maps to a single key in a JSON dictionary received from the API
"""
def __init__(self, field, initarg=None, handler=None, poller=None,
raw=True, default=u'', lang=None, passthrough={}):
"""
This defines how the dictionary value is to be processed by the
poller
field -- defines the dictionary key that filters what data
this uses
initarg -- (optional) specifies that this field must be
supplied when creating a new instance of the Element
class this definition is mapped to. Takes an integer
for the order it should be used in the input
arguments
handler -- (optional) callable used to process the received
value before being stored in the Element object.
poller -- (optional) callable to be used if data is requested
and this value has not yet been defined. the
callable should return a dictionary of data from a
JSON query. many definitions may share a single
poller, which will be and the data used to populate
all referenced definitions based off their defined
field
raw -- (optional) if the specified handler is an Element
class, the data will be passed into it using the
'raw' keyword attribute. setting this to false
will force the data to instead be passed in as the
first argument
"""
self.field = field
self.initarg = initarg
self.poller = poller
self.raw = raw
self.default = default
self.sethandler(handler)
self.passthrough = passthrough
def __get__(self, inst, owner):
if inst is None:
return self
if self.field not in inst._data:
if self.poller is None:
return None
self.poller.__get__(inst, owner)()
return inst._data[self.field]
def __set__(self, inst, value):
if (value is not None) and (value != ''):
value = self.handler(value)
else:
value = self.default
if isinstance(value, Element):
value._locale = inst._locale
value._session = inst._session
for source, dest in self.passthrough:
setattr(value, dest, getattr(inst, source))
inst._data[self.field] = value
def sethandler(self, handler):
# ensure handler is always callable, even for passthrough data
if handler is None:
self.handler = lambda x: x
elif isinstance(handler, ElementType) and self.raw:
self.handler = lambda x: handler(raw=x)
else:
self.handler = lambda x: handler(x)
class Datapoint(Data):
pass
class Datalist(Data):
"""
Response definition class for list data
This maps to a key in a JSON dictionary storing a list of data
"""
def __init__(self, field, handler=None, poller=None, sort=None, raw=True, passthrough={}):
"""
This defines how the dictionary value is to be processed by the
poller
field -- defines the dictionary key that filters what data
this uses
handler -- (optional) callable used to process the received
value before being stored in the Element object.
poller -- (optional) callable to be used if data is requested
and this value has not yet been defined. the
callable should return a dictionary of data from a
JSON query. many definitions may share a single
poller, which will be and the data used to populate
all referenced definitions based off their defined
field
sort -- (optional) name of attribute in resultant data to be
used to sort the list after processing. this
effectively requires a handler be defined to process
the data into something that has attributes
raw -- (optional) if the specified handler is an Element
class, the data will be passed into it using the
'raw' keyword attribute. setting this to false will
force the data to instead be passed in as the first
argument
"""
super(Datalist, self).__init__(field, None, handler, poller, raw, passthrough=passthrough)
self.sort = sort
def __set__(self, inst, value):
data = []
if value:
for val in value:
val = self.handler(val)
if isinstance(val, Element):
val._locale = inst._locale
val._session = inst._session
for source, dest in self.passthrough.items():
setattr(val, dest, getattr(inst, source))
data.append(val)
if self.sort:
if self.sort is True:
data.sort()
else:
data.sort(key=lambda x: getattr(x, self.sort))
inst._data[self.field] = data
class Datadict(Data):
"""
Response definition class for dictionary data
This maps to a key in a JSON dictionary storing a dictionary of data
"""
def __init__(self, field, handler=None, poller=None, raw=True,
key=None, attr=None, passthrough={}):
"""
This defines how the dictionary value is to be processed by the
poller
field -- defines the dictionary key that filters what data
this uses
handler -- (optional) callable used to process the received
value before being stored in the Element object.
poller -- (optional) callable to be used if data is requested
and this value has not yet been defined. the
callable should return a dictionary of data from a
JSON query. many definitions may share a single
poller, which will be and the data used to populate
all referenced definitions based off their defined
field
key -- (optional) name of key in resultant data to be used
as the key in the stored dictionary. if this is not
the field name from the source data is used instead
attr -- (optional) name of attribute in resultant data to be
used as the key in the stored dictionary. if this is
not the field name from the source data is used
instead
raw -- (optional) if the specified handler is an Element
class, the data will be passed into it using the
'raw' keyword attribute. setting this to false will
force the data to instead be passed in as the first
argument
"""
if key and attr:
raise TypeError("`key` and `attr` cannot both be defined")
super(Datadict, self).__init__(field, None, handler, poller, raw, passthrough=passthrough)
if key:
self.getkey = lambda x: x[key]
elif attr:
self.getkey = lambda x: getattr(x, attr)
else:
raise TypeError("Datadict requires `key` or `attr` be defined " +
"for populating the dictionary")
def __set__(self, inst, value):
data = {}
if value:
for val in value:
val = self.handler(val)
if isinstance(val, Element):
val._locale = inst._locale
val._session = inst._session
for source, dest in self.passthrough.items():
setattr(val, dest, getattr(inst, source))
data[self.getkey(val)] = val
inst._data[self.field] = data
class ElementType( type ):
"""
MetaClass used to pre-process Element-derived classes and set up the
Data definitions
"""
def __new__(mcs, name, bases, attrs):
# any Data or Poller object defined in parent classes must be cloned
# and processed in this class to function properly
# scan through available bases for all such definitions and insert
# a copy into this class's attributes
# run in reverse order so higher priority values overwrite lower ones
data = {}
pollers = {'_populate':None}
for base in reversed(bases):
if isinstance(base, mcs):
for k, attr in base.__dict__.items():
if isinstance(attr, Data):
# extract copies of each defined Data element from
# parent classes
attr = copy(attr)
attr.poller = attr.poller.func
data[k] = attr
elif isinstance(attr, Poller):
# extract copies of each defined Poller function
# from parent classes
pollers[k] = attr.func
for k, attr in attrs.items():
if isinstance(attr, Data):
data[k] = attr
if '_populate' in attrs:
pollers['_populate'] = attrs['_populate']
# process all defined Data attribues, testing for use as an initial
# argument, and building a list of what Pollers are used to populate
# which Data points
pollermap = dict([(k, []) for k in pollers])
initargs = []
for k, v in data.items():
v.name = k
if v.initarg:
initargs.append(v)
if v.poller:
pn = v.poller.__name__
if pn not in pollermap:
pollermap[pn] = []
if pn not in pollers:
pollers[pn] = v.poller
pollermap[pn].append(v)
else:
pollermap['_populate'].append(v)
# wrap each used poller function with a Poller class, and push into
# the new class attributes
for k, v in pollermap.items():
if len(v) == 0:
continue
lookup = dict([(attr.field, attr.name) for attr in v])
poller = Poller(pollers[k], lookup)
attrs[k] = poller
# backfill wrapped Poller into each mapped Data object, and ensure
# the data elements are defined for this new class
for attr in v:
attr.poller = poller
attrs[attr.name] = attr
# build sorted list of arguments used for intialization
attrs['_InitArgs'] = tuple(
[a.name for a in sorted(initargs, key=lambda x: x.initarg)])
return type.__new__(mcs, name, bases, attrs)
def __call__(cls, *args, **kwargs):
obj = cls.__new__(cls)
if ('locale' in kwargs) and (kwargs['locale'] is not None):
obj._locale = kwargs['locale']
else:
obj._locale = get_locale()
if 'session' in kwargs:
obj._session = kwargs['session']
else:
obj._session = get_session()
obj._data = {}
if 'raw' in kwargs:
# if 'raw' keyword is supplied, create populate object manually
if len(args) != 0:
raise TypeError(
'__init__() takes exactly 2 arguments (1 given)')
obj._populate.apply(kwargs['raw'], False)
else:
# if not, the number of input arguments must exactly match that
# defined by the Data definitions
if len(args) != len(cls._InitArgs):
raise TypeError(
'__init__() takes exactly {0} arguments ({1} given)'\
.format(len(cls._InitArgs)+1, len(args)+1))
for a, v in zip(cls._InitArgs, args):
setattr(obj, a, v)
obj.__init__()
return obj
class Element( object ):
__metaclass__ = ElementType
_lang = 'en'