Compare commits

..

21 Commits

Author SHA1 Message Date
Ruud ac8dbe03b2 Py3 fixes 2014-10-06 23:02:30 +02:00
Ruud 6c2aef7a6d Six import 2014-10-06 23:01:47 +02:00
Ruud e1eb68e226 CodernityDB updates 2014-10-06 23:00:50 +02:00
Ruud 4ae02a8764 Don't write binary to configparser 2014-10-06 17:32:36 +02:00
Ruud cc59342e80 NotSupported Exception 2014-10-06 17:31:12 +02:00
Ruud 97099f4d69 Use six module 2014-10-06 17:30:58 +02:00
Ruud c50c290c3e Database helper 2014-10-06 17:28:10 +02:00
Ruud 4775e4a36a CodernityDB 2to3 2014-10-06 17:26:14 +02:00
Ruud 08cb834b4d Don't load unsupported modules 2014-10-06 16:47:38 +02:00
Ruud 46cff26d92 Don't load Twitter notifier on Python 3 2014-10-06 16:47:04 +02:00
Ruud b668e39296 Don't load xmpp on Python 3 2014-10-06 16:46:22 +02:00
Ruud 97ee16eb4e Use six on axel 2014-10-06 16:45:40 +02:00
Ruud 41fd190d38 Update cache lib 2014-10-06 16:45:26 +02:00
Ruud 2b0facb24c Update pytwitter 2014-10-06 16:44:38 +02:00
Ruud 94a29efea5 Remove tmdb3 lib 2014-10-06 16:44:22 +02:00
Ruud 827156485c Remove tmdb3 dependency 2014-10-06 16:43:46 +02:00
Ruud 6b4e6857de BeautifulSoup4 python 3 2014-10-06 11:12:35 +02:00
Ruud 5b7e814166 CodernityDB python 3 2014-10-06 10:59:22 +02:00
Ruud f99b40c2f3 Runner fs encoding 2014-10-06 08:53:17 +02:00
Ruud ae00e83c9d Path helpers 2014-10-06 08:52:48 +02:00
Ruud d4f2f12924 Force logging utf8 2014-10-06 08:16:40 +02:00
144 changed files with 20632 additions and 5127 deletions
+1 -1
View File
@@ -61,7 +61,7 @@ class Loader(object):
self.log = CPLog(__name__)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', '%H:%M:%S')
hdlr = handlers.RotatingFileHandler(os.path.join(self.log_dir, 'error.log'), 'a', 500000, 10)
hdlr = handlers.RotatingFileHandler(os.path.join(self.log_dir, 'error.log'), 'a', 500000, 10, encoding = 'utf-8')
hdlr.setLevel(logging.CRITICAL)
hdlr.setFormatter(formatter)
self.log.logger.addHandler(hdlr)
-241
View File
@@ -1,241 +0,0 @@
from esky.util import appdir_from_executable #@UnresolvedImport
from threading import Thread
from version import VERSION
from wx.lib.softwareupdate import SoftwareUpdate
import os
import sys
import time
import webbrowser
import wx
# Include proper dirs
if hasattr(sys, 'frozen'):
import libs
base_path = os.path.dirname(os.path.dirname(os.path.abspath(libs.__file__)))
else:
base_path = os.path.dirname(os.path.abspath(__file__))
def icon():
icon = 'icon_windows.png'
if os.path.isfile('icon_mac.png'):
icon = 'icon_mac.png'
return wx.Icon(icon, wx.BITMAP_TYPE_PNG)
lib_dir = os.path.join(base_path, 'libs')
sys.path.insert(0, base_path)
sys.path.insert(0, lib_dir)
from couchpotato.environment import Env
class TaskBarIcon(wx.TaskBarIcon):
TBMENU_OPEN = wx.NewId()
TBMENU_SETTINGS = wx.NewId()
TBMENU_EXIT = wx.ID_EXIT
closed = False
menu = False
enabled = False
def __init__(self, frame):
wx.TaskBarIcon.__init__(self)
self.frame = frame
self.SetIcon(icon())
self.Bind(wx.EVT_TASKBAR_LEFT_UP, self.OnTaskBarClick)
self.Bind(wx.EVT_TASKBAR_RIGHT_UP, self.OnTaskBarClick)
self.Bind(wx.EVT_MENU, self.onOpen, id = self.TBMENU_OPEN)
self.Bind(wx.EVT_MENU, self.onSettings, id = self.TBMENU_SETTINGS)
self.Bind(wx.EVT_MENU, self.onTaskBarClose, id = self.TBMENU_EXIT)
def OnTaskBarClick(self, evt):
menu = self.CreatePopupMenu()
self.PopupMenu(menu)
menu.Destroy()
def enable(self):
self.enabled = True
if self.menu:
self.open_menu.Enable(True)
self.setting_menu.Enable(True)
self.open_menu.SetText('Open')
def CreatePopupMenu(self):
if not self.menu:
self.menu = wx.Menu()
self.open_menu = self.menu.Append(self.TBMENU_OPEN, 'Open')
self.setting_menu = self.menu.Append(self.TBMENU_SETTINGS, 'About')
self.exit_menu = self.menu.Append(self.TBMENU_EXIT, 'Quit')
if not self.enabled:
self.open_menu.Enable(False)
self.setting_menu.Enable(False)
self.open_menu.SetText('Loading...')
return self.menu
def onOpen(self, event):
url = self.frame.parent.getSetting('base_url')
webbrowser.open(url)
def onSettings(self, event):
url = self.frame.parent.getSetting('base_url') + 'settings/about/'
webbrowser.open(url)
def onTaskBarClose(self, evt):
if self.closed:
return
self.closed = True
self.RemoveIcon()
wx.CallAfter(self.frame.Close)
def makeIcon(self, img):
if "wxMSW" in wx.PlatformInfo:
img = img.Scale(16, 16)
elif "wxGTK" in wx.PlatformInfo:
img = img.Scale(22, 22)
icon = wx.IconFromBitmap(img.CopyFromBitmap())
return icon
class MainFrame(wx.Frame):
def __init__(self, parent):
wx.Frame.__init__(self, None, style = wx.FRAME_NO_TASKBAR)
self.parent = parent
self.tbicon = TaskBarIcon(self)
class WorkerThread(Thread):
def __init__(self, desktop):
Thread.__init__(self)
self.daemon = True
self._desktop = desktop
self.start()
def run(self):
# Get options via arg
from couchpotato.runner import getOptions
args = ['--quiet']
self.options = getOptions(args)
# Load settings
settings = Env.get('settings')
settings.setFile(self.options.config_file)
# Create data dir if needed
self.data_dir = os.path.expanduser(Env.setting('data_dir'))
if self.data_dir == '':
from couchpotato.core.helpers.variable import getDataDir
self.data_dir = getDataDir()
if not os.path.isdir(self.data_dir):
os.makedirs(self.data_dir)
# Create logging dir
self.log_dir = os.path.join(self.data_dir, 'logs');
if not os.path.isdir(self.log_dir):
os.mkdir(self.log_dir)
try:
from couchpotato.runner import runCouchPotato
runCouchPotato(self.options, base_path, args, data_dir = self.data_dir, log_dir = self.log_dir, Env = Env, desktop = self._desktop)
except:
pass
self._desktop.frame.Close()
self._desktop.ExitMainLoop()
class CouchPotatoApp(wx.App, SoftwareUpdate):
settings = {}
events = {}
restart = False
closing = False
triggered_onClose = False
def OnInit(self):
# Updater
base_url = 'https://api.couchpota.to/updates/%s'
self.InitUpdates(base_url % VERSION + '/', 'https://couchpota.to/updates/%s' % 'changelog.html',
icon = icon())
self.frame = MainFrame(self)
self.frame.Bind(wx.EVT_CLOSE, self.onClose)
# CouchPotato thread
self.worker = WorkerThread(self)
return True
def onAppLoad(self):
self.frame.tbicon.enable()
def setSettings(self, settings = {}):
self.settings = settings
def getSetting(self, name):
return self.settings.get(name)
def addEvents(self, events = {}):
for name in events.iterkeys():
self.events[name] = events[name]
def onClose(self, event):
if not self.closing:
self.closing = True
self.frame.tbicon.onTaskBarClose(event)
onClose = self.events.get('onClose')
if onClose and not self.triggered_onClose:
self.triggered_onClose = True
onClose(event)
def afterShutdown(self, restart = False):
self.frame.Destroy()
self.restart = restart
self.ExitMainLoop()
if __name__ == '__main__':
app = CouchPotatoApp(redirect = False)
app.MainLoop()
time.sleep(1)
if app.restart:
def appexe_from_executable(exepath):
appdir = appdir_from_executable(exepath)
exename = os.path.basename(exepath)
if sys.platform == "darwin":
if os.path.isdir(os.path.join(appdir, "Contents", "MacOS")):
return os.path.join(appdir, "Contents", "MacOS", exename)
return os.path.join(appdir, exename)
exe = appexe_from_executable(sys.executable)
os.chdir(os.path.dirname(exe))
os.execv(exe, [exe] + sys.argv[1:])
+2 -2
View File
@@ -3,7 +3,7 @@ from threading import Thread
import json
import threading
import traceback
import urllib
from six.moves import urllib
from couchpotato.core.helpers.request import getParams
from couchpotato.core.logger import CPLog
@@ -102,7 +102,7 @@ class ApiHandler(RequestHandler):
kwargs = {}
for x in self.request.arguments:
kwargs[x] = urllib.unquote(self.get_argument(x))
kwargs[x] = urllib.parse.unquote(self.get_argument(x))
# Split array arguments
kwargs = getParams(kwargs)
+3 -3
View File
@@ -181,13 +181,13 @@ class Core(Plugin):
return '%sapi/%s' % (self.createBaseUrl(), Env.setting('api_key'))
def version(self):
ver = fireEvent('updater.info', single = True) or {'version': {}}
ver = fireEvent('updater.info', single = True)
if os.name == 'nt': platf = 'windows'
elif 'Darwin' in platform.platform(): platf = 'osx'
else: platf = 'linux'
return '%s - %s-%s - v2' % (platf, ver.get('version').get('type') or 'unknown', ver.get('version').get('hash') or 'unknown')
return '%s - %s-%s - v2' % (platf, ver.get('version')['type'], ver.get('version')['hash'])
def versionView(self, **kwargs):
return {
@@ -290,7 +290,7 @@ config = [{
},
{
'name': 'permission_file',
'default': '0644',
'default': '0755',
'label': 'File CHMOD',
'description': 'See Folder CHMOD description, but for files',
},
+8 -17
View File
@@ -205,28 +205,19 @@ class GitUpdater(BaseUpdater):
def getVersion(self):
if not self.version:
hash = None
date = None
branch = self.branch
try:
output = self.repo.getHead() # Yes, please
log.debug('Git version output: %s', output.hash)
hash = output.hash[:8]
date = output.getDate()
branch = self.repo.getCurrentBranch().name
self.version = {
'repr': 'git:(%s:%s % s) %s (%s)' % (self.repo_user, self.repo_name, self.repo.getCurrentBranch().name or self.branch, output.hash[:8], datetime.fromtimestamp(output.getDate())),
'hash': output.hash[:8],
'date': output.getDate(),
'type': 'git',
'branch': self.repo.getCurrentBranch().name
}
except Exception as e:
log.error('Failed using GIT updater, running from source, you need to have GIT installed. %s', e)
self.version = {
'repr': 'git:(%s:%s % s) %s (%s)' % (self.repo_user, self.repo_name, branch, hash or 'unknown_hash', datetime.fromtimestamp(date) if date else 'unknown_date'),
'hash': hash,
'date': date,
'type': 'git',
'branch': branch
}
return 'No GIT'
return self.version
+2 -2
View File
@@ -3,12 +3,12 @@ import os
import time
import traceback
from sqlite3 import OperationalError
from CodernityDB3.index import Index
from CodernityDB.database import RecordNotFound
from CodernityDB.index import IndexException, IndexNotFoundException, IndexConflict
from couchpotato import CPLog
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent, fireEventAsync
from couchpotato.core.helpers.database import IndexException, IndexNotFoundException, IndexConflict, RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode, sp
from couchpotato.core.helpers.variable import getImdb, tryInt, randomString
+2 -2
View File
@@ -1,7 +1,7 @@
from base64 import b16encode, b32decode
from datetime import timedelta
from hashlib import sha1
from urlparse import urlparse
from six.moves import urllib
import os
from couchpotato.core._base.downloader.main import DownloaderBase, ReleaseDownloadList
@@ -62,7 +62,7 @@ class rTorrent(DownloaderBase):
if self.conf('ssl') and url.startswith('httprpc://'):
url = url.replace('httprpc://', 'httprpc+https://')
parsed = urlparse(url)
parsed = urllib.urlparse(url)
# rpc_url is only used on http/https scgi pass-through
if parsed.scheme in ['http', 'https']:
+2 -4
View File
@@ -78,14 +78,12 @@ class Transmission(DownloaderBase):
log.error('Failed sending torrent to Transmission')
return False
data = remote_torrent.get('torrent-added') or remote_torrent.get('torrent-duplicate')
# Change settings of added torrents
if torrent_params:
self.trpc.set_torrent(data['hashString'], torrent_params)
self.trpc.set_torrent(remote_torrent['torrent-added']['hashString'], torrent_params)
log.info('Torrent sent to Transmission successfully.')
return self.downloadReturnId(data['hashString'])
return self.downloadReturnId(remote_torrent['torrent-added']['hashString'])
def test(self):
if self.connect() and self.trpc.get_session():
+1 -1
View File
@@ -102,7 +102,7 @@ def fireEvent(name, *args, **kwargs):
# Fire
result = e(*args, **kwargs)
result_keys = result.keys()
result_keys = list(result.keys())
result_keys.sort(key = natsortKey)
if options['single'] and not options['merge']:
+26
View File
@@ -0,0 +1,26 @@
from six import PY2
if PY2:
from CodernityDB.database_super_thread_safe import SuperThreadSafeDatabase
from CodernityDB.index import IndexException, IndexConflict, IndexNotFoundException
from CodernityDB.database import RecordNotFound, RecordDeleted
from CodernityDB.hash_index import HashIndex
from CodernityDB.tree_index import MultiTreeBasedIndex, TreeBasedIndex
else:
from CodernityDB3.database_super_thread_safe import SuperThreadSafeDatabase
from CodernityDB3.index import IndexException, IndexConflict, IndexNotFoundException
from CodernityDB3.database import RecordNotFound, RecordDeleted
from CodernityDB3.hash_index import HashIndex
from CodernityDB3.tree_index import MultiTreeBasedIndex, TreeBasedIndex
SuperThreadSafeDatabase = SuperThreadSafeDatabase
IndexException = IndexException
IndexNotFoundException = IndexNotFoundException
IndexConflict = IndexConflict
RecordNotFound = RecordNotFound
HashIndex = HashIndex
MultiTreeBasedIndex = MultiTreeBasedIndex
TreeBasedIndex = TreeBasedIndex
RecordDeleted = RecordDeleted
+25 -8
View File
@@ -1,11 +1,11 @@
from string import ascii_letters, digits
from urllib import quote_plus
import os
import re
import traceback
import unicodedata
from chardet import detect
from six.moves import urllib
from couchpotato.core.logger import CPLog
import six
@@ -16,7 +16,7 @@ log = CPLog(__name__)
def toSafeString(original):
valid_chars = "-_.() %s%s" % (ascii_letters, digits)
cleaned_filename = unicodedata.normalize('NFKD', toUnicode(original)).encode('ASCII', 'ignore')
valid_string = ''.join(c for c in cleaned_filename if c in valid_chars)
valid_string = ''.join(list(six.unichr(c) for c in cleaned_filename if six.unichr(c) in valid_chars))
return ' '.join(valid_string.split())
@@ -29,7 +29,7 @@ def simplifyString(original):
def toUnicode(original, *args):
try:
if isinstance(original, unicode):
if isinstance(original, six.text_type):
return original
else:
try:
@@ -47,16 +47,32 @@ def toUnicode(original, *args):
ascii_text = str(original).encode('string_escape')
return toUnicode(ascii_text)
def toUTF8(original):
try:
if isinstance(original, six.binary_type) and len(original) > 0:
# Try to detect
detected = detect(original)
return original.decode(detected.get('encoding')).encode('utf-8')
else:
return original
except:
#log.error('Failed encoding to UTF8: %s', traceback.format_exc())
raise
def ss(original, *args):
u_original = toUnicode(original, *args)
try:
from couchpotato.environment import Env
return u_original.encode(Env.get('encoding'))
if isinstance(u_original, six.text_type):
u_original = u_original.encode('unicode_escape')
else:
u_original = u_original
return six.u(u_original)
except Exception as e:
log.debug('Failed ss encoding char, force UTF8: %s', e)
try:
from couchpotato.environment import Env
return u_original.encode(Env.get('encoding'), 'replace')
except:
return u_original.encode('utf-8', 'replace')
@@ -72,7 +88,7 @@ def sp(path, *args):
if os.path.sep == '/' and '\\' in path:
path = '/' + path.replace(':', '').replace('\\', '/')
path = os.path.normpath(ss(path, *args))
path = os.path.normpath(path)
# Remove any trailing path separators
if path != os.path.sep:
@@ -114,14 +130,15 @@ def stripAccents(s):
def tryUrlencode(s):
new = six.u('')
if isinstance(s, dict):
for key, value in s.items():
for key, value in list(s.items()):
new += six.u('&%s=%s') % (key, tryUrlencode(value))
return new[1:]
else:
for letter in ss(s):
letter = six.unichr(letter)
try:
new += quote_plus(letter)
new += urllib.parse.quote_plus(letter)
except:
new += letter
+51
View File
@@ -0,0 +1,51 @@
import os
from chardet import detect
from couchpotato import Env
fs_enc = Env.get('fs_encoding')
def list_dir(path, full_path = True):
"""
List directory don't error when it doesn't exist
"""
path = unicode_path(path)
if os.path.isdir(path):
for f in os.listdir(path):
if full_path:
yield join(path, f)
else:
yield f
def join(*args):
"""
Join path, encode properly before joining
"""
return os.path.join(*[safe(x) for x in args])
def unicode_path(path):
"""
Convert back to unicode
:param path: path string
"""
if isinstance(path, str):
detected = detect(path)
print detected
path = path.decode(detected.get('encoding'))
path = path.decode('unicode_escape')
return path
def safe(path):
if isinstance(path, unicode):
return path.encode('unicode_escape')
return path
+2
View File
@@ -0,0 +1,2 @@
class NotSupported(Exception):
pass
+4 -4
View File
@@ -1,7 +1,7 @@
from urllib import unquote
import re
from couchpotato.core.helpers.encoding import toUnicode
from six.moves import urllib
from couchpotato.core.helpers.variable import natsortKey
@@ -10,7 +10,7 @@ def getParams(params):
reg = re.compile('^[a-z0-9_\.]+$')
# Sort keys
param_keys = params.keys()
param_keys = list(params.keys())
param_keys.sort(key = natsortKey)
temp = {}
@@ -28,7 +28,7 @@ def getParams(params):
for item in nested:
if item is nested[-1]:
current[item] = toUnicode(unquote(value))
current[item] = toUnicode(urllib.parse.unquote(value))
else:
try:
current[item]
@@ -37,7 +37,7 @@ def getParams(params):
current = current[item]
else:
temp[param] = toUnicode(unquote(value))
temp[param] = toUnicode(urllib.parse.unquote(value))
if temp[param].lower() in ['true', 'false']:
temp[param] = temp[param].lower() != 'false'
+4 -1
View File
@@ -3,6 +3,7 @@ import sys
import traceback
from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.py3 import NotSupported
from couchpotato.core.logger import CPLog
from importhelper import import_module
import six
@@ -131,7 +132,7 @@ class Loader(object):
return False
try:
# Load single file plugin
if isinstance(module.autoload, (str, unicode)):
if isinstance(module.autoload, (six.string_types, six.text_type)):
getattr(module, module.autoload)()
# Load folder plugin
else:
@@ -162,6 +163,8 @@ class Loader(object):
def loadModule(self, name):
try:
return import_module(name)
except NotSupported:
log.error('Module "%s" is not supported in Python 3', name)
except ImportError:
log.debug('Skip loading module plugin %s: %s', (name, traceback.format_exc()))
return None
+9 -8
View File
@@ -1,5 +1,6 @@
import logging
import re
import traceback
class CPLog(object):
@@ -54,19 +55,19 @@ class CPLog(object):
def safeMessage(self, msg, replace_tuple = ()):
from couchpotato.core.helpers.encoding import ss, toUnicode
from couchpotato.core.helpers.encoding import ss, toUTF8
msg = ss(msg)
msg = toUTF8(msg)
try:
if isinstance(replace_tuple, tuple):
msg = msg % tuple([ss(x) if not isinstance(x, (int, float)) else x for x in list(replace_tuple)])
msg = msg % tuple([toUTF8(x) for x in list(replace_tuple)])
elif isinstance(replace_tuple, dict):
msg = msg % dict((k, ss(v)) for k, v in replace_tuple.iteritems())
msg = msg % dict((k, toUTF8(v)) for k, v in replace_tuple.iteritems())
else:
msg = msg % ss(replace_tuple)
except Exception as e:
self.logger.error('Failed encoding stuff to log "%s": %s' % (msg, e))
msg = msg % toUTF8(replace_tuple)
except:
self.logger.error('Failed encoding stuff to log "%s": %s' % (msg, traceback.format_exc()))
self.setup()
if not self.is_develop:
@@ -83,4 +84,4 @@ class CPLog(object):
except:
pass
return toUnicode(msg)
return toUTF8(msg)
+1 -1
View File
@@ -89,7 +89,7 @@ class MediaBase(Plugin):
# Loop over type
for image in image_urls.get(image_type, []):
if not isinstance(image, (str, unicode)):
if not isinstance(image, six.string_types):
continue
if file_type not in existing_files or len(existing_files.get(file_type, [])) == 0:
+9 -8
View File
@@ -1,14 +1,14 @@
from string import ascii_letters
from hashlib import md5
from couchpotato.core.helpers.database import MultiTreeBasedIndex, TreeBasedIndex
from CodernityDB.tree_index import MultiTreeBasedIndex, TreeBasedIndex
from couchpotato.core.helpers.encoding import toUnicode, simplifyString
class MediaIndex(MultiTreeBasedIndex):
_version = 3
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex"""
custom_header = """from couchpotato.core.helpers.database import MultiTreeBasedIndex"""
def __init__(self, *args, **kwargs):
kwargs['key_format'] = '32s'
@@ -62,10 +62,11 @@ class MediaTypeIndex(TreeBasedIndex):
class TitleSearchIndex(MultiTreeBasedIndex):
_version = 1
_version = 2
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex
from itertools import izip
custom_header = """from couchpotato.core.helpers.database import MultiTreeBasedIndex
try: from itertools import izip
except: izip = zip
from couchpotato.core.helpers.encoding import simplifyString"""
def __init__(self, *args, **kwargs):
@@ -101,7 +102,7 @@ from couchpotato.core.helpers.encoding import simplifyString"""
class TitleIndex(TreeBasedIndex):
_version = 4
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
from string import ascii_letters
from couchpotato.core.helpers.encoding import toUnicode, simplifyString"""
@@ -134,7 +135,7 @@ from couchpotato.core.helpers.encoding import toUnicode, simplifyString"""
class StartsWithIndex(TreeBasedIndex):
_version = 3
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
from string import ascii_letters
from couchpotato.core.helpers.encoding import toUnicode, simplifyString"""
@@ -180,7 +181,7 @@ class MediaChildrenIndex(TreeBasedIndex):
class MediaTagIndex(MultiTreeBasedIndex):
_version = 2
custom_header = """from CodernityDB.tree_index import MultiTreeBasedIndex"""
custom_header = """from couchpotato.core.helpers.database import MultiTreeBasedIndex"""
def __init__(self, *args, **kwargs):
kwargs['key_format'] = '32s'
+5 -9
View File
@@ -3,15 +3,16 @@ import time
import traceback
from string import ascii_lowercase
from CodernityDB.database import RecordNotFound, RecordDeleted
from couchpotato import tryInt, get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent, fireEventAsync, addEvent
from couchpotato.core.helpers.database import RecordNotFound, RecordDeleted
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import splitString, getImdb, getTitle
from couchpotato.core.logger import CPLog
from couchpotato.core.media import MediaBase
from .index import MediaIndex, MediaStatusIndex, MediaTypeIndex, TitleSearchIndex, TitleIndex, StartsWithIndex, MediaChildrenIndex, MediaTagIndex
import six
log = CPLog(__name__)
@@ -280,7 +281,7 @@ class MediaPlugin(MediaBase):
offset = 0
limit = -1
if limit_offset:
splt = splitString(limit_offset) if isinstance(limit_offset, (str, unicode)) else limit_offset
splt = splitString(limit_offset) if isinstance(limit_offset, six.string_types) else limit_offset
limit = tryInt(splt[0])
offset = tryInt(0 if len(splt) is 1 else splt[1])
@@ -456,11 +457,6 @@ class MediaPlugin(MediaBase):
deleted = True
elif new_media_status:
media['status'] = new_media_status
# Remove profile (no use for in manage)
if new_media_status == 'done':
media['profile_id'] = None
db.update(media)
fireEvent('media.untag', media['_id'], 'recent', single = True)
@@ -496,7 +492,7 @@ class MediaPlugin(MediaBase):
}
})
def restatus(self, media_id, tag_recent = True, allowed_restatus = None):
def restatus(self, media_id, tag_recent = True):
try:
db = get_db()
@@ -531,7 +527,7 @@ class MediaPlugin(MediaBase):
m['status'] = previous_status
# Only update when status has changed
if previous_status != m['status'] and (not allowed_restatus or m['status'] in allowed_restatus):
if previous_status != m['status']:
db.update(m)
# Tag media as recent
@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
import json
import re
import time
@@ -50,7 +50,7 @@ class Provider(Plugin):
if Env.get('dev'): return True
now = time.time()
host = urlparse(test_url).hostname
host = urllib.urlparse(test_url).hostname
if self.last_available_check.get(host) < now - 900:
self.last_available_check[host] = now
@@ -219,7 +219,7 @@ class YarrProvider(Provider):
if provider and provider == self.getName():
return self
hostname = urlparse(url).hostname
hostname = urllib.urlparse(url).hostname
if host and hostname in host:
return self
else:
@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
import time
import traceback
import re
@@ -97,7 +97,7 @@ class Base(NZBProvider, RSS):
results.append({
'id': nzb_id,
'provider_extra': urlparse(host['host']).hostname or host['host'],
'provider_extra': urllib.urlparse(host['host']).hostname or host['host'],
'name': toUnicode(name),
'name_extra': name_extra,
'age': self.calculateAge(int(time.mktime(parse(date).timetuple()))),
@@ -175,7 +175,7 @@ class Base(NZBProvider, RSS):
return '&apikey=%s' % host['api_key']
def download(self, url = '', nzb_id = ''):
host = urlparse(url).hostname
host = urllib.urlparse(url).hostname
if self.limits_reached.get(host):
# Try again in 3 hours
@@ -1,4 +1,4 @@
from urlparse import urlparse, parse_qs
from six.moves import urllib
import time
from couchpotato.core.event import fireEvent
@@ -52,7 +52,7 @@ class Base(NZBProvider, RSS):
for nzb in nzbs:
enclosure = self.getElement(nzb, 'enclosure').attrib
nzb_id = parse_qs(urlparse(self.getTextElement(nzb, 'link')).query).get('id')[0]
nzb_id = urllib.parse_qs(urllib.urlparse(self.getTextElement(nzb, 'link')).query).get('id')[0]
results.append({
'id': nzb_id,
@@ -1,4 +1,4 @@
import htmlentitydefs
from six.moves import html_entities
import json
import re
import time
@@ -145,15 +145,15 @@ class Base(TorrentProvider):
# character reference
try:
if txt[:3] == "&#x":
return unichr(int(txt[3:-1], 16))
return six.unichr(int(txt[3:-1], 16))
else:
return unichr(int(txt[2:-1]))
return six.unichr(int(txt[2:-1]))
except ValueError:
pass
else:
# named entity
try:
txt = unichr(htmlentitydefs.name2codepoint[txt[1:-1]])
txt = six.unichr(html_entities.name2codepoint[txt[1:-1]])
except KeyError:
pass
return txt # leave as is
@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
import re
import traceback
@@ -45,7 +45,7 @@ class Base(TorrentProvider):
results.append({
'id': torrent.get('torrent_id'),
'protocol': 'torrent' if re.match('^(http|https|ftp)://.*$', torrent.get('download_url')) else 'torrent_magnet',
'provider_extra': urlparse(host['host']).hostname or host['host'],
'provider_extra': urllib.urlparse(host['host']).hostname or host['host'],
'name': toUnicode(torrent.get('release_name')),
'url': torrent.get('download_url'),
'detail_url': torrent.get('details_url'),
@@ -1,4 +1,4 @@
from urlparse import urlparse
from six.moves import urllib
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.encoding import simplifyString
@@ -34,7 +34,7 @@ class UserscriptBase(Plugin):
def belongsTo(self, url):
host = urlparse(url).hostname
host = urllib.urlparse(url).hostname
host_split = host.split('.')
if len(host_split) > 2:
host = host[len(host_split[0]):]
+2 -1
View File
@@ -3,6 +3,7 @@ from couchpotato.core.event import fireEvent, addEvent
from couchpotato.core.helpers.variable import mergeDicts, getImdb
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
import six
log = CPLog(__name__)
@@ -30,7 +31,7 @@ class Search(Plugin):
def search(self, q = '', types = None, **kwargs):
# Make sure types is the correct instance
if isinstance(types, (str, unicode)):
if isinstance(types, six.string_types):
types = [types]
elif isinstance(types, (list, tuple, set)):
types = list(types)
+1 -1
View File
@@ -1,10 +1,10 @@
import traceback
import time
from CodernityDB.database import RecordNotFound
from couchpotato import get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent, fireEventAsync, addEvent
from couchpotato.core.helpers.database import RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import splitString, getTitle, getImdb, getIdentifier
from couchpotato.core.logger import CPLog
@@ -1,9 +1,9 @@
import copy
import traceback
from CodernityDB.database import RecordNotFound
from couchpotato import get_db
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import RecordNotFound
from couchpotato.core.helpers.variable import mergeDicts, randomString
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
@@ -7,6 +7,7 @@ from couchpotato.core.helpers.encoding import tryUrlencode
from couchpotato.core.helpers.variable import tryInt, tryFloat, splitString
from couchpotato.core.logger import CPLog
from couchpotato.core.media.movie.providers.base import MovieProvider
import six
log = CPLog(__name__)
@@ -72,7 +73,7 @@ class OMDBAPI(MovieProvider):
try:
try:
if isinstance(movie, (str, unicode)):
if isinstance(movie, six.string_types):
movie = json.loads(movie)
except ValueError:
log.info('No proper json to decode')
@@ -1,7 +1,8 @@
import traceback
import time
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.encoding import toUnicode, ss, tryUrlencode
from couchpotato.core.helpers.encoding import simplifyString, toUnicode, ss, tryUrlencode
from couchpotato.core.helpers.variable import tryInt
from couchpotato.core.logger import CPLog
from couchpotato.core.media.movie.providers.base import MovieProvider
@@ -13,7 +14,7 @@ autoload = 'TheMovieDb'
class TheMovieDb(MovieProvider):
http_time_between_calls = .35
http_time_between_calls = .3
configuration = {
'images': {
@@ -22,8 +23,6 @@ class TheMovieDb(MovieProvider):
}
def __init__(self):
addEvent('info.search', self.search, priority = 3)
addEvent('movie.search', self.search, priority = 3)
addEvent('movie.info', self.getInfo, priority = 3)
addEvent('movie.info_by_tmdb', self.getInfo)
addEvent('app.load', self.config)
@@ -33,44 +32,49 @@ class TheMovieDb(MovieProvider):
if configuration:
self.configuration = configuration
def search(self, q, limit = 3):
def search(self, q, limit = 12):
""" Find movie by name """
if self.isDisabled():
return False
log.debug('Searching for movie: %s', q)
search_string = simplifyString(q)
cache_key = 'tmdb.cache.%s.%s' % (search_string, limit)
results = None #self.getCache(cache_key)
raw = None
try:
name_year = fireEvent('scanner.name_year', q, single = True)
raw = self.request('search/movie', {
'query': name_year.get('name', q),
'year': name_year.get('year'),
'search_type': 'ngram' if limit > 1 else 'phrase'
}, return_key = 'results')
except:
log.error('Failed searching TMDB for "%s": %s', (q, traceback.format_exc()))
if not results:
log.debug('Searching for movie: %s', q)
results = []
if raw:
raw = None
try:
nr = 0
for movie in raw:
parsed_movie = self.parseMovie(movie, extended = False)
results.append(parsed_movie)
#name_year = fireEvent('scanner.name_year', q, single = True)
nr += 1
if nr == limit:
break
raw = self.request('search/movie', {
'query': q
}, return_key = 'results')
except:
log.error('Failed searching TMDB for "%s": %s', (search_string, traceback.format_exc()))
log.info('Found: %s', [result['titles'][0] + ' (' + str(result.get('year', 0)) + ')' for result in results])
results = []
if raw:
try:
nr = 0
return results
except SyntaxError as e:
log.error('Failed to parse XML response: %s', e)
return False
for movie in raw:
results.append(self.parseMovie(movie, extended = False))
nr += 1
if nr == limit:
break
log.info('Found: %s', [result['titles'][0] + ' (' + str(result.get('year', 0)) + ')' for result in results])
self.setCache(cache_key, results)
return results
except SyntaxError as e:
log.error('Failed to parse XML response: %s', e)
return False
return results
@@ -87,81 +91,89 @@ class TheMovieDb(MovieProvider):
def parseMovie(self, movie, extended = True):
# Do request, append other items
movie = self.request('movie/%s' % movie.get('id'), {
'append_to_response': 'alternative_titles' + (',images,casts' if extended else '')
})
cache_key = 'tmdb.cache.%s%s' % (movie.get('id'), '.ex' if extended else '')
movie_data = None #self.getCache(cache_key)
# Images
poster = self.getImage(movie, type = 'poster', size = 'w154')
poster_original = self.getImage(movie, type = 'poster', size = 'original')
backdrop_original = self.getImage(movie, type = 'backdrop', size = 'original')
extra_thumbs = self.getMultImages(movie, type = 'backdrops', size = 'original') if extended else []
images = {
'poster': [poster] if poster else [],
#'backdrop': [backdrop] if backdrop else [],
'poster_original': [poster_original] if poster_original else [],
'backdrop_original': [backdrop_original] if backdrop_original else [],
'actors': {},
'extra_thumbs': extra_thumbs
}
# Genres
try:
genres = [genre.get('name') for genre in movie.get('genres', [])]
except:
genres = []
# 1900 is the same as None
year = str(movie.get('release_date') or '')[:4]
if not movie.get('release_date') or year == '1900' or year.lower() == 'none':
year = None
# Gather actors data
actors = {}
if extended:
if not movie_data:
# Full data
cast = movie.get('casts', {}).get('cast', [])
movie = self.request('movie/%s' % movie.get('id'))
for cast_item in cast:
try:
actors[toUnicode(cast_item.get('name'))] = toUnicode(cast_item.get('character'))
images['actors'][toUnicode(cast_item.get('name'))] = self.getImage(cast_item, type = 'profile', size = 'original')
except:
log.debug('Error getting cast info for %s: %s', (cast_item, traceback.format_exc()))
# Images
poster = self.getImage(movie, type = 'poster', size = 'w154')
poster_original = self.getImage(movie, type = 'poster', size = 'original')
backdrop_original = self.getImage(movie, type = 'backdrop', size = 'original')
extra_thumbs = self.getMultImages(movie, type = 'backdrops', size = 'original')
movie_data = {
'type': 'movie',
'via_tmdb': True,
'tmdb_id': movie.get('id'),
'titles': [toUnicode(movie.get('title'))],
'original_title': movie.get('original_title'),
'images': images,
'imdb': movie.get('imdb_id'),
'runtime': movie.get('runtime'),
'released': str(movie.get('release_date')),
'year': tryInt(year, None),
'plot': movie.get('overview'),
'genres': genres,
'collection': getattr(movie.get('belongs_to_collection'), 'name', None),
'actor_roles': actors
}
images = {
'poster': [poster] if poster else [],
#'backdrop': [backdrop] if backdrop else [],
'poster_original': [poster_original] if poster_original else [],
'backdrop_original': [backdrop_original] if backdrop_original else [],
'actors': {},
'extra_thumbs': extra_thumbs
}
movie_data = dict((k, v) for k, v in movie_data.items() if v)
# Genres
try:
genres = [genre.get('name') for genre in movie.get('genres', [])]
except:
genres = []
# Add alternative names
if movie_data['original_title'] and movie_data['original_title'] not in movie_data['titles']:
movie_data['titles'].append(movie_data['original_title'])
# 1900 is the same as None
year = str(movie.get('release_date') or '')[:4]
if not movie.get('release_date') or year == '1900' or year.lower() == 'none':
year = None
# Add alternative titles
alternate_titles = movie.get('alternative_titles', {}).get('titles', [])
# Gather actors data
actors = {}
if extended:
for alt in alternate_titles:
alt_name = alt.get('title')
if alt_name and alt_name not in movie_data['titles'] and alt_name.lower() != 'none' and alt_name is not None:
movie_data['titles'].append(alt_name)
# Full data
cast = self.request('movie/%s/casts' % movie.get('id'), return_key = 'cast')
for cast_item in cast:
try:
actors[toUnicode(cast_item.get('name'))] = toUnicode(cast_item.get('character'))
images['actors'][toUnicode(cast_item.get('name'))] = self.getImage(cast_item, type = 'profile', size = 'original')
except:
log.debug('Error getting cast info for %s: %s', (cast_item, traceback.format_exc()))
movie_data = {
'type': 'movie',
'via_tmdb': True,
'tmdb_id': movie.get('id'),
'titles': [toUnicode(movie.get('title'))],
'original_title': movie.get('original_title'),
'images': images,
'imdb': movie.get('imdb_id'),
'runtime': movie.get('runtime'),
'released': str(movie.get('release_date')),
'year': tryInt(year, None),
'plot': movie.get('overview'),
'genres': genres,
'collection': getattr(movie.get('belongs_to_collection'), 'name', None),
'actor_roles': actors
}
movie_data = dict((k, v) for k, v in movie_data.items() if v)
# Add alternative names
if movie_data['original_title'] and movie_data['original_title'] not in movie_data['titles']:
movie_data['titles'].append(movie_data['original_title'])
if extended:
# Full data
alternate_titles = self.request('movie/%s/alternative_titles' % movie.get('id'), return_key = 'titles')
for alt in alternate_titles:
alt_name = alt.get('title')
if alt_name and alt_name not in movie_data['titles'] and alt_name.lower() != 'none' and alt_name is not None:
movie_data['titles'].append(alt_name)
# Cache movie parsed
self.setCache(cache_key, movie_data)
return movie_data
@@ -180,22 +192,23 @@ class TheMovieDb(MovieProvider):
image_urls = []
try:
for image in movie.get('images', {}).get(type, [])[1:5]:
# Full data
images = self.request('movie/%s/images' % movie.get('id'), return_key = type)
for image in images[1:5]:
image_urls.append(self.getImage(image, 'file', size))
except:
log.debug('Failed getting %s.%s for "%s"', (type, size, ss(str(movie))))
return image_urls
def request(self, call = '', params = {}, return_key = None):
params = dict((k, v) for k, v in params.items() if v)
params = tryUrlencode(params)
url = 'http://api.themoviedb.org/3/%s?api_key=%s%s' % (call, self.conf('api_key'), '&%s' % params if params else '')
data = self.getJsonData(url)
data = self.getJsonData(url, cache_timeout = 0)
if data and return_key and return_key in data:
if data and return_key and data.get(return_key):
data = data.get(return_key)
return data
+1 -2
View File
@@ -166,8 +166,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
'quality': q_identifier,
'finish': profile['finish'][index],
'wait_for': tryInt(profile['wait_for'][index]),
'3d': profile['3d'][index] if profile.get('3d') else False,
'minimum_score': profile.get('minimum_score', 1),
'3d': profile['3d'][index] if profile.get('3d') else False
}
could_not_be_released = not self.couldBeReleased(q_identifier in pre_releases, release_dates, movie['info']['year'])
+3 -3
View File
@@ -1,10 +1,10 @@
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex
class NotificationIndex(TreeBasedIndex):
_version = 1
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
import time"""
def __init__(self, *args, **kwargs):
@@ -22,7 +22,7 @@ import time"""
class NotificationUnreadIndex(TreeBasedIndex):
_version = 1
custom_header = """from CodernityDB.tree_index import TreeBasedIndex
custom_header = """from couchpotato.core.helpers.database import TreeBasedIndex
import time"""
def __init__(self, *args, **kwargs):
+1 -1
View File
@@ -3,11 +3,11 @@ import threading
import time
import traceback
import uuid
from CodernityDB.database import RecordDeleted
from couchpotato import get_db
from couchpotato.api import addApiView, addNonBlockApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import RecordDeleted
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import tryInt, splitString
from couchpotato.core.logger import CPLog
@@ -0,0 +1,68 @@
from couchpotato.core.helpers.variable import splitString
from couchpotato.core.logger import CPLog
from couchpotato.core.notifications.base import Notification
from pynmwp import PyNMWP
import six
log = CPLog(__name__)
autoload = 'NotifyMyWP'
class NotifyMyWP(Notification):
def notify(self, message = '', data = None, listener = None):
if not data: data = {}
keys = splitString(self.conf('api_key'))
p = PyNMWP(keys, self.conf('dev_key'))
response = p.push(application = self.default_title, event = message, description = message, priority = self.conf('priority'), batch_mode = len(keys) > 1)
for key in keys:
if not response[key]['Code'] == six.u('200'):
log.error('Could not send notification to NotifyMyWindowsPhone (%s). %s', (key, response[key]['message']))
return False
return response
config = [{
'name': 'notifymywp',
'groups': [
{
'tab': 'notifications',
'list': 'notification_providers',
'name': 'notifymywp',
'label': 'Windows Phone',
'options': [
{
'name': 'enabled',
'default': 0,
'type': 'enabler',
},
{
'name': 'api_key',
'description': 'Multiple keys seperated by a comma. Maximum of 5.'
},
{
'name': 'dev_key',
'advanced': True,
},
{
'name': 'priority',
'default': 0,
'type': 'dropdown',
'values': [('Very Low', -2), ('Moderate', -1), ('Normal', 0), ('High', 1), ('Emergency', 2)],
},
{
'name': 'on_snatch',
'default': 0,
'type': 'bool',
'advanced': True,
'description': 'Also send message when movie is snatched.',
},
],
}
],
}]
@@ -1,5 +1,5 @@
from datetime import timedelta, datetime
from urlparse import urlparse
from six.moves import urllib
import traceback
from couchpotato.core.helpers.variable import cleanHost
@@ -106,7 +106,7 @@ class PlexServer(object):
def createHost(self, host, port = None):
h = cleanHost(host)
p = urlparse(h)
p = urllib.urlparse(h)
h = h.rstrip('/')
if port and not p.port:
@@ -1,8 +1,16 @@
from .main import Twitter
from six import PY3
try:
from .main import Twitter
def autoload():
return Twitter()
def autoload():
return Twitter()
except:
if PY3:
from couchpotato.core.helpers.py3 import NotSupported
raise NotSupported
else:
raise
config = [{
'name': 'twitter',
+10 -2
View File
@@ -3,12 +3,20 @@ import traceback
from couchpotato.core.logger import CPLog
from couchpotato.core.notifications.base import Notification
import xmpp
from six import PY3
log = CPLog(__name__)
autoload = 'Xmpp'
try:
import xmpp
autoload = 'Xmpp'
except:
if PY3:
from couchpotato.core.helpers.py3 import NotSupported
raise NotSupported
else:
raise
class Xmpp(Notification):
+10 -7
View File
@@ -1,6 +1,5 @@
import threading
from urllib import quote
from urlparse import urlparse
from six.moves import urllib
import glob
import inspect
import os.path
@@ -183,20 +182,20 @@ class Plugin(object):
# http request
def urlopen(self, url, timeout = 30, data = None, headers = None, files = None, show_error = True, stream = False):
url = quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")
url = urllib.parse.quote(ss(url), safe = "%/:=&?~#+!$,;'@()*[]")
if not headers: headers = {}
if not data: data = {}
# Fill in some headers
parsed_url = urlparse(url)
parsed_url = urllib.parse.urlparse(url)
host = '%s%s' % (parsed_url.hostname, (':' + str(parsed_url.port) if parsed_url.port else ''))
headers['Referer'] = headers.get('Referer', '%s://%s' % (parsed_url.scheme, host))
headers['Host'] = headers.get('Host', None)
headers['User-Agent'] = headers.get('User-Agent', self.user_agent)
headers['Accept-encoding'] = headers.get('Accept-encoding', 'gzip')
headers['Connection'] = headers.get('Connection', 'close')
headers['Connection'] = headers.get('Connection', 'keep-alive')
headers['Cache-Control'] = headers.get('Cache-Control', 'max-age=0')
r = Env.get('http_opener')
@@ -232,7 +231,11 @@ class Plugin(object):
status_code = response.status_code
if response.status_code == requests.codes.ok:
data = response if stream else response.content
if stream:
data = response
else:
data = response.content
data = data.decode(response.encoding)
else:
response.raise_for_status()
@@ -279,7 +282,7 @@ class Plugin(object):
wait = (last_use - now) + self.http_time_between_calls
if wait > 0:
log.debug('Waiting for %s, %d seconds', (self.getName(), max(1, wait)))
log.debug('Waiting for %s, %d seconds', (self.getName(), wait))
time.sleep(min(wait, 30))
def beforeCall(self, handler):
+1 -1
View File
@@ -1,4 +1,4 @@
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex
class CategoryIndex(TreeBasedIndex):
+3 -2
View File
@@ -1,13 +1,14 @@
import random as rndm
import time
from CodernityDB.database import RecordDeleted
from couchpotato import get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.database import RecordDeleted
from couchpotato.core.helpers.variable import splitString, tryInt
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
import six
log = CPLog(__name__)
@@ -41,7 +42,7 @@ class Dashboard(Plugin):
# Add limit
limit = 12
if limit_offset:
splt = splitString(limit_offset) if isinstance(limit_offset, (str, unicode)) else limit_offset
splt = splitString(limit_offset) if isinstance(limit_offset, six.string_types) else limit_offset
limit = tryInt(splt[0])
# Get all active medias
+4 -5
View File
@@ -1,9 +1,9 @@
import codecs
import os
import re
import traceback
from couchpotato.api import addApiView
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import tryInt, splitString
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
@@ -103,9 +103,8 @@ class Logging(Plugin):
if not os.path.isfile(path):
break
f = open(path, 'r')
log_content = toUnicode(f.read())
raw_lines = self.toList(log_content)
f = codecs.open(path, 'r', 'utf-8')
raw_lines = self.toList(f.read())
raw_lines.reverse()
brk = False
@@ -131,7 +130,7 @@ class Logging(Plugin):
def toList(self, log_content = ''):
logs_raw = toUnicode(log_content).split('[0m\n')
logs_raw = log_content.split('[0m\n')
logs = []
for log_line in logs_raw:
+1 -1
View File
@@ -123,7 +123,7 @@ class Manage(Plugin):
fireEvent('notify.frontend', type = 'manage.update', data = True, message = 'Scanning for movies in "%s"' % folder)
onFound = self.createAddToLibrary(folder, added_identifiers)
fireEvent('scanner.scan', folder = folder, simple = True, newer_than = last_update if not full else 0, check_file_date = False, on_found = onFound, single = True)
fireEvent('scanner.scan', folder = folder, simple = True, newer_than = last_update if not full else 0, on_found = onFound, single = True)
# Break if CP wants to shut down
if self.shuttingDown():
+1 -1
View File
@@ -1,4 +1,4 @@
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex
class ProfileIndex(TreeBasedIndex):
-2
View File
@@ -86,7 +86,6 @@ class ProfilePlugin(Plugin):
'label': toUnicode(kwargs.get('label')),
'order': tryInt(kwargs.get('order', 999)),
'core': kwargs.get('core', False),
'minimum_score': tryInt(kwargs.get('minimum_score', 1)),
'qualities': [],
'wait_for': [],
'stop_after': [],
@@ -218,7 +217,6 @@ class ProfilePlugin(Plugin):
'label': toUnicode(profile.get('label')),
'order': order,
'qualities': profile.get('qualities'),
'minimum_score': 1,
'finish': [],
'wait_for': [],
'stop_after': [],
@@ -51,11 +51,6 @@
margin: 0 5px !important;
}
.profile .wait_for .minimum_score_input {
width: 40px !important;
text-align: left;
}
.profile .types {
padding: 0;
margin: 0 20px 0 -4px;
@@ -53,21 +53,12 @@ var Profile = new Class({
}),
new Element('span', {'text':'day(s) for a better quality '}),
new Element('span.advanced', {'text':'and keep searching'}),
// "After a checked quality is found and downloaded, continue searching for even better quality releases for the entered number of days."
new Element('input.inlay.xsmall.stop_after_input.advanced', {
'type':'text',
'value': data.stop_after && data.stop_after.length > 0 ? data.stop_after[0] : 0
}),
new Element('span.advanced', {'text':'day(s) for a better (checked) quality.'}),
// Minimum score of
new Element('span.advanced', {'html':'<br/>Releases need a minimum score of'}),
new Element('input.advanced.inlay.xsmall.minimum_score_input', {
'size': 4,
'type':'text',
'value': data.minimum_score || 1
})
new Element('span.advanced', {'text':'day(s) for a better (checked) quality.'})
)
);
@@ -135,7 +126,6 @@ var Profile = new Class({
'label' : self.el.getElement('.quality_label input').get('value'),
'wait_for' : self.el.getElement('.wait_for_input').get('value'),
'stop_after' : self.el.getElement('.stop_after_input').get('value'),
'minimum_score' : self.el.getElement('.minimum_score_input').get('value'),
'types': []
};
+1 -2
View File
@@ -1,6 +1,5 @@
from hashlib import md5
from CodernityDB.hash_index import HashIndex
from couchpotato.core.helpers.database import HashIndex
class QualityIndex(HashIndex):
+6 -12
View File
@@ -2,10 +2,10 @@ from math import fabs, ceil
import traceback
import re
from CodernityDB.database import RecordNotFound
from couchpotato import get_db
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode, ss
from couchpotato.core.helpers.variable import mergeDicts, getExt, tryInt, splitString, tryFloat
from couchpotato.core.logger import CPLog
@@ -30,10 +30,10 @@ class QualityPlugin(Plugin):
{'identifier': 'dvdr', 'size': (3000, 10000), 'median_size': 4500, 'label': 'DVD-R', 'alternative': ['br2dvd', ('dvd', 'r')], 'allow': [], 'ext':['iso', 'img', 'vob'], 'tags': ['pal', 'ntsc', 'video_ts', 'audio_ts', ('dvd', 'r'), 'dvd9']},
{'identifier': 'dvdrip', 'size': (600, 2400), 'median_size': 1500, 'label': 'DVD-Rip', 'width': 720, 'alternative': [('dvd', 'rip')], 'allow': [], 'ext':['avi'], 'tags': [('dvd', 'rip'), ('dvd', 'xvid'), ('dvd', 'divx')]},
{'identifier': 'scr', 'size': (600, 1600), 'median_size': 700, 'label': 'Screener', 'alternative': ['screener', 'dvdscr', 'ppvrip', 'dvdscreener', 'hdscr', 'webrip', ('web', 'rip')], 'allow': ['dvdr', 'dvdrip', '720p', '1080p'], 'ext':[], 'tags': []},
{'identifier': 'r5', 'size': (600, 1000), 'median_size': 700, 'label': 'R5', 'alternative': ['r6'], 'allow': ['dvdr', '720p', '1080p'], 'ext':[]},
{'identifier': 'tc', 'size': (600, 1000), 'median_size': 700, 'label': 'TeleCine', 'alternative': ['telecine'], 'allow': ['720p', '1080p'], 'ext':[]},
{'identifier': 'ts', 'size': (600, 1000), 'median_size': 700, 'label': 'TeleSync', 'alternative': ['telesync', 'hdts'], 'allow': ['720p', '1080p'], 'ext':[]},
{'identifier': 'cam', 'size': (600, 1000), 'median_size': 700, 'label': 'Cam', 'alternative': ['camrip', 'hdcam'], 'allow': ['720p', '1080p'], 'ext':[]}
{'identifier': 'r5', 'size': (600, 1000), 'median_size': 700, 'label': 'R5', 'alternative': ['r6'], 'allow': ['dvdr', '720p'], 'ext':[]},
{'identifier': 'tc', 'size': (600, 1000), 'median_size': 700, 'label': 'TeleCine', 'alternative': ['telecine'], 'allow': ['720p'], 'ext':[]},
{'identifier': 'ts', 'size': (600, 1000), 'median_size': 700, 'label': 'TeleSync', 'alternative': ['telesync', 'hdts'], 'allow': ['720p'], 'ext':[]},
{'identifier': 'cam', 'size': (600, 1000), 'median_size': 700, 'label': 'Cam', 'alternative': ['camrip', 'hdcam'], 'allow': ['720p'], 'ext':[]}
]
pre_releases = ['cam', 'ts', 'tc', 'r5', 'scr']
threed_tags = {
@@ -278,8 +278,6 @@ class QualityPlugin(Plugin):
'ext': 5,
}
scored_on = []
# Check alt and tags
for tag_type in ['identifier', 'alternative', 'tags', 'label']:
qualities = quality.get(tag_type, [])
@@ -291,13 +289,10 @@ class QualityPlugin(Plugin):
log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file))
score += points.get(tag_type)
if isinstance(alt, (str, unicode)) and ss(alt.lower()) in words and ss(alt.lower()) not in scored_on:
if isinstance(alt, (str, unicode)) and ss(alt.lower()) in words:
log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file))
score += points.get(tag_type)
# Don't score twice on same tag
scored_on.append(ss(alt).lower())
# Check extention
for ext in quality.get('ext', []):
if ext == extension:
@@ -490,7 +485,6 @@ class QualityPlugin(Plugin):
'Movie Name (2015).mp4': {'size': 6500, 'quality': 'brrip'},
'Movie Name.2014.720p Web-Dl Aac2.0 h264-ReleaseGroup': {'size': 3800, 'quality': 'brrip'},
'Movie Name.2014.720p.WEBRip.x264.AC3-ReleaseGroup': {'size': 3000, 'quality': 'scr'},
'Movie.Name.2014.1080p.HDCAM.-.ReleaseGroup': {'size': 5300, 'quality': 'cam'},
}
correct = 0
+1 -3
View File
@@ -1,7 +1,5 @@
from hashlib import md5
from CodernityDB.hash_index import HashIndex
from CodernityDB.tree_index import TreeBasedIndex
from couchpotato.core.helpers.database import TreeBasedIndex, HashIndex
class ReleaseIndex(TreeBasedIndex):
+4 -4
View File
@@ -3,10 +3,10 @@ import os
import time
import traceback
from CodernityDB.database import RecordDeleted, RecordNotFound
from couchpotato import md5, get_db
from couchpotato.api import addApiView
from couchpotato.core.event import fireEvent, addEvent
from couchpotato.core.helpers.database import RecordDeleted, RecordNotFound
from couchpotato.core.helpers.encoding import toUnicode, sp
from couchpotato.core.helpers.variable import getTitle, tryInt
from couchpotato.core.logger import CPLog
@@ -187,7 +187,7 @@ class Release(Plugin):
release['files'] = dict((k, [toUnicode(x) for x in v]) for k, v in group['files'].items() if v)
db.update(release)
fireEvent('media.restatus', media['_id'], allowed_restatus = ['done'], single = True)
fireEvent('media.restatus', media['_id'], single = True)
return True
except:
@@ -389,8 +389,8 @@ class Release(Plugin):
log.info('Ignored: %s', rel['name'])
continue
if rel['score'] < quality_custom.get('minimum_score'):
log.info('Ignored, score "%s" to low, need at least "%s": %s', (rel['score'], quality_custom.get('minimum_score'), rel['name']))
if rel['score'] <= 0:
log.info('Ignored, score "%s" to low: %s', (rel['score'], rel['name']))
continue
if rel['size'] <= 50:
+2 -2
View File
@@ -222,8 +222,8 @@ class Renamer(Plugin):
cd_keys = ['<cd>','<cd_nr>', '<original>']
if not any(x in folder_name for x in cd_keys) and not any(x in file_name for x in cd_keys):
log.error('Missing `cd` or `cd_nr` in the renamer. This will cause multi-file releases of being renamed to the same file. '
'Please add it in the renamer settings. Force adding it for now.')
log.error('Missing `cd` or `cd_nr` in the renamer. This will cause multi-file releases of being renamed to the same file.'
'Force adding it')
file_name = '%s %s' % ('<cd>', file_name)
# Tag release folder as failed_rename in case no groups were found. This prevents check_snatched from removing the release from the downloader.
+2 -1
View File
@@ -131,7 +131,7 @@ class Scanner(Plugin):
addEvent('scanner.name_year', self.getReleaseNameYear)
addEvent('scanner.partnumber', self.getPartNumber)
def scan(self, folder = None, files = None, release_download = None, simple = False, newer_than = 0, return_ignored = True, check_file_date = True, on_found = None):
def scan(self, folder = None, files = None, release_download = None, simple = False, newer_than = 0, return_ignored = True, on_found = None):
folder = sp(folder)
@@ -145,6 +145,7 @@ class Scanner(Plugin):
# Scan all files of the folder if no files are set
if not files:
check_file_date = True
try:
files = []
for root, dirs, walk_files in os.walk(folder, followlinks=True):
+10 -5
View File
@@ -1,12 +1,14 @@
from __future__ import with_statement
import ConfigParser
from hashlib import md5
from CodernityDB.hash_index import HashIndex
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.database import HashIndex
from couchpotato.core.helpers.encoding import toUnicode
from six.moves import configparser
from couchpotato.core.helpers.variable import mergeDicts, tryInt, tryFloat
import six
class Settings(object):
@@ -62,7 +64,7 @@ class Settings(object):
def setFile(self, config_file):
self.file = config_file
self.p = ConfigParser.RawConfigParser()
self.p = configparser.RawConfigParser()
self.p.read(config_file)
from couchpotato.core.logger import CPLog
@@ -148,7 +150,10 @@ class Settings(object):
return tryFloat(self.p.get(section, option))
def getUnicode(self, section, option):
value = self.p.get(section, option).decode('unicode_escape')
value = self.p.get(section, option)
if six.PY2:
value = value.decode('unicode_escape')
return toUnicode(value).strip()
def getValues(self):
@@ -161,7 +166,7 @@ class Settings(object):
return values
def save(self):
with open(self.file, 'wb') as configfile:
with open(self.file, 'w') as configfile:
self.p.write(configfile)
self.log.debug('Saved settings')
+1
View File
@@ -14,6 +14,7 @@ class Env(object):
''' Environment variables '''
_app = None
_encoding = 'UTF-8'
_fs_encoding = 'UTF-8'
_debug = False
_dev = False
_settings = Settings()
+4 -4
View File
@@ -11,12 +11,12 @@ import re
import tarfile
import shutil
from CodernityDB.database_super_thread_safe import SuperThreadSafeDatabase
from argparse import ArgumentParser
from cache import FileSystemCache
from couchpotato import KeyHandler, LoginHandler, LogoutHandler
from couchpotato.api import NonBlockHandler, ApiHandler
from couchpotato.core.event import fireEventAsync, fireEvent
from couchpotato.core.helpers.database import SuperThreadSafeDatabase
from couchpotato.core.helpers.encoding import sp
from couchpotato.core.helpers.variable import getDataDir, tryInt, getFreeSpace
import requests
@@ -86,6 +86,7 @@ def runCouchPotato(options, base_path, args, data_dir = None, log_dir = None, En
encoding = 'UTF-8'
Env.set('encoding', encoding)
Env.set('fs_encoding', sys.getfilesystemencoding())
# Do db stuff
db_path = sp(os.path.join(data_dir, 'database'))
@@ -116,8 +117,7 @@ def runCouchPotato(options, base_path, args, data_dir = None, log_dir = None, En
# Delete non zip files
if len(ints) != 1:
try: os.remove(os.path.join(root, backup_file))
except: pass
os.remove(os.path.join(root, backup_file))
else:
existing_backups.append((int(ints[0]), backup_file))
else:
@@ -205,7 +205,7 @@ def runCouchPotato(options, base_path, args, data_dir = None, log_dir = None, En
logger.addHandler(hdlr)
# To file
hdlr2 = handlers.RotatingFileHandler(Env.get('log_path'), 'a', 500000, 10, encoding = Env.get('encoding'))
hdlr2 = handlers.RotatingFileHandler(Env.get('log_path'), 'a', 500000, 10, encoding = 'utf-8')
hdlr2.setFormatter(formatter)
logger.addHandler(hdlr2)
+4 -11
View File
@@ -54,22 +54,16 @@
},
pushState: function(e){
var self = this;
if((!e.meta && self.isMac()) || (!e.control && !self.isMac())){
if((!e.meta && Browser.platform.mac) || (!e.control && !Browser.platform.mac)){
(e).preventDefault();
var url = e.target.get('href');
// Middle click
if(e.event && e.event.button == 1)
window.open(url);
else if(History.getPath() != url)
if(History.getPath() != url)
History.push(url);
}
},
isMac: function(){
return Browser.platform == 'mac'
return Browser.platform.mac
},
createLayout: function(){
@@ -331,12 +325,11 @@
},
openDerefered: function(e, el){
var self = this;
(e).stop();
var url = 'http://www.dereferer.org/?' + el.get('href');
if(el.get('target') == '_blank' || (e.meta && self.isMac()) || (e.control && !self.isMac()))
if(el.get('target') == '_blank' || (e.meta && Browser.platform.mac) || (e.control && !Browser.platform.mac))
window.open(url);
else
window.location = url;
+1 -1
View File
@@ -117,7 +117,7 @@ var AboutSettingTab = new Class({
var self = this;
var date = new Date(json.version.date * 1000);
self.version_text.set('text', json.version.hash + (json.version.date ? ' ('+date.toLocaleString()+')' : ''));
self.updater_type.set('text', (json.version.type != json.branch) ? (json.version.type + ', ' + json.branch) : json.branch);
self.updater_type.set('text', json.version.type + ', ' + json.branch);
}
});
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 345 KiB

BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 435 B

BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 367 B

-52
View File
@@ -1,52 +0,0 @@
#define MyAppName "CouchPotato"
#define MyAppVer "2.6.0"
#define MyAppBit "win32"
//#define MyAppBit "win-amd64"
[Setup]
AppName={#MyAppName}
AppVersion=2
AppVerName={#MyAppName}
DefaultDirName={userappdata}\{#MyAppName}\application
DisableProgramGroupPage=yes
DisableDirPage=yes
UninstallDisplayIcon=./icon.ico
SetupIconFile=./icon.ico
OutputDir=./dist
OutputBaseFilename={#MyAppName}-{#MyAppVer}.{#MyAppBit}.installer
AppPublisher=Your Mom
AppPublisherURL=http://couchpota.to
PrivilegesRequired=none
WizardSmallImageFile=installer_icon.bmp
WizardImageFile=installer_banner.bmp
UsePreviousAppDir=no
[Messages]
WelcomeLabel1=Installing [name]!
WelcomeLabel2=This wizard will install [name] to your AppData folder. It does this so it can use the build in updater without needing admin rights.
[CustomMessages]
LaunchProgram=Launch {#MyAppName} right now.
[Files]
Source: "./dist/{#MyAppName}-{#MyAppVer}.{#MyAppBit}/*"; Flags: recursesubdirs; DestDir: "{app}"
[Icons]
Name: "{commonprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppName}.exe"
Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppName}.exe"; Tasks: startup
[Tasks]
Name: "startup"; Description: "Run {#MyAppName} at startup"; Flags: unchecked
[Run]
Filename: {app}\{#MyAppName}.exe; Description: {cm:LaunchProgram,{#MyAppName}}; Flags: nowait postinstall skipifsilent
[UninstallDelete]
Type: filesandordirs; Name: "{app}\appdata"
Type: filesandordirs; Name: "{app}\Microsoft.VC90.CRT"
Type: filesandordirs; Name: "{app}\updates"
Type: filesandordirs; Name: "{app}\CouchPotato*"
Type: filesandordirs; Name: "{app}\python27.dll"
Type: filesandordirs; Name: "{app}\unins000.dat"
Type: filesandordirs; Name: "{app}\unins000.exe"
Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.6 KiB

+20
View File
@@ -0,0 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '0.4.2'
__license__ = "Apache 2.0"
File diff suppressed because it is too large Load Diff
+31
View File
@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from gevent.lock import RLock
from CodernityDB3.env import cdb_environment
cdb_environment['mode'] = "gevent"
cdb_environment['rlock_obj'] = RLock
# from CodernityDB3.database import Database
from CodernityDB3.database_safe_shared import SafeDatabase
class GeventDatabase(SafeDatabase):
pass
+229
View File
@@ -0,0 +1,229 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.env import cdb_environment
from CodernityDB3.database import PreconditionsException, RevConflict, Database
# from database import Database
from collections import defaultdict
from functools import wraps
from types import MethodType
class th_safe_gen:
def __init__(self, name, gen, l=None):
self.lock = l
self.__gen = gen
self.name = name
def __iter__(self):
return self
def __next__(self):
with self.lock:
return next(self.__gen)
@staticmethod
def wrapper(method, index_name, meth_name, l=None):
@wraps(method)
def _inner(*args, **kwargs):
res = method(*args, **kwargs)
return th_safe_gen(index_name + "_" + meth_name, res, l)
return _inner
def safe_wrapper(method, lock):
@wraps(method)
def _inner(*args, **kwargs):
with lock:
return method(*args, **kwargs)
return _inner
class SafeDatabase(Database):
def __init__(self, path, *args, **kwargs):
super(SafeDatabase, self).__init__(path, *args, **kwargs)
self.indexes_locks = defaultdict(
lambda: cdb_environment['rlock_obj']())
self.close_open_lock = cdb_environment['rlock_obj']()
self.main_lock = cdb_environment['rlock_obj']()
self.id_revs = {}
def __patch_index_gens(self, name):
ind = self.indexes_names[name]
for c in ('all', 'get_many'):
m = getattr(ind, c)
if getattr(ind, c + "_orig", None):
return
m_fixed = th_safe_gen.wrapper(m, name, c, self.indexes_locks[name])
setattr(ind, c, m_fixed)
setattr(ind, c + '_orig', m)
def __patch_index_methods(self, name):
ind = self.indexes_names[name]
lock = self.indexes_locks[name]
for curr in dir(ind):
meth = getattr(ind, curr)
if not curr.startswith('_') and isinstance(meth, MethodType):
setattr(ind, curr, safe_wrapper(meth, lock))
stor = ind.storage
for curr in dir(stor):
meth = getattr(stor, curr)
if not curr.startswith('_') and isinstance(meth, MethodType):
setattr(stor, curr, safe_wrapper(meth, lock))
def __patch_index(self, name):
self.__patch_index_methods(name)
self.__patch_index_gens(name)
def initialize(self, *args, **kwargs):
with self.close_open_lock:
self.close_open_lock.acquire()
res = super(SafeDatabase, self).initialize(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.indexes_locks[name] = cdb_environment['rlock_obj']()
return res
def open(self, *args, **kwargs):
with self.close_open_lock:
res = super(SafeDatabase, self).open(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.indexes_locks[name] = cdb_environment['rlock_obj']()
self.__patch_index(name)
return res
def create(self, *args, **kwargs):
with self.close_open_lock:
res = super(SafeDatabase, self).create(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.indexes_locks[name] = cdb_environment['rlock_obj']()
self.__patch_index(name)
return res
def close(self):
with self.close_open_lock:
return super(SafeDatabase, self).close()
def destroy(self):
with self.close_open_lock:
return super(SafeDatabase, self).destroy()
def add_index(self, *args, **kwargs):
with self.main_lock:
res = super(SafeDatabase, self).add_index(*args, **kwargs)
if self.opened:
self.indexes_locks[res] = cdb_environment['rlock_obj']()
self.__patch_index(res)
return res
def _single_update_index(self, index, data, db_data, doc_id):
with self.indexes_locks[index.name]:
super(SafeDatabase, self)._single_update_index(
index, data, db_data, doc_id)
def _single_delete_index(self, index, data, doc_id, old_data):
with self.indexes_locks[index.name]:
super(SafeDatabase, self)._single_delete_index(
index, data, doc_id, old_data)
def edit_index(self, *args, **kwargs):
with self.main_lock:
res = super(SafeDatabase, self).edit_index(*args, **kwargs)
if self.opened:
self.indexes_locks[res] = cdb_environment['rlock_obj']()
self.__patch_index(res)
return res
def set_indexes(self, *args, **kwargs):
try:
self.main_lock.acquire()
super(SafeDatabase, self).set_indexes(*args, **kwargs)
finally:
self.main_lock.release()
def reindex_index(self, index, *args, **kwargs):
if isinstance(index, str):
if not index in self.indexes_names:
raise PreconditionsException("No index named %s" % index)
index = self.indexes_names[index]
key = index.name + "reind"
self.main_lock.acquire()
if key in self.indexes_locks:
lock = self.indexes_locks[index.name + "reind"]
else:
self.indexes_locks[index.name +
"reind"] = cdb_environment['rlock_obj']()
lock = self.indexes_locks[index.name + "reind"]
self.main_lock.release()
try:
lock.acquire()
super(SafeDatabase, self).reindex_index(
index, *args, **kwargs)
finally:
lock.release()
def flush(self):
try:
self.main_lock.acquire()
super(SafeDatabase, self).flush()
finally:
self.main_lock.release()
def fsync(self):
try:
self.main_lock.acquire()
super(SafeDatabase, self).fsync()
finally:
self.main_lock.release()
def _update_id_index(self, _rev, data):
with self.indexes_locks['id']:
return super(SafeDatabase, self)._update_id_index(_rev, data)
def _delete_id_index(self, _id, _rev, data):
with self.indexes_locks['id']:
return super(SafeDatabase, self)._delete_id_index(_id, _rev, data)
def _update_indexes(self, _rev, data):
_id, new_rev, db_data = self._update_id_index(_rev, data)
with self.main_lock:
self.id_revs[_id] = new_rev
for index in self.indexes[1:]:
with self.main_lock:
curr_rev = self.id_revs.get(_id) # get last _id, _rev
if curr_rev != new_rev:
break # new update on the way stop current
self._single_update_index(index, data, db_data, _id)
with self.main_lock:
if self.id_revs[_id] == new_rev:
del self.id_revs[_id]
return _id, new_rev
def _delete_indexes(self, _id, _rev, data):
old_data = self.get('id', _id)
if old_data['_rev'] != _rev:
raise RevConflict()
with self.main_lock:
self.id_revs[_id] = _rev
for index in self.indexes[1:]:
self._single_delete_index(index, data, _id, old_data)
self._delete_id_index(_id, _rev, data)
with self.main_lock:
if self.id_revs[_id] == _rev:
del self.id_revs[_id]
@@ -0,0 +1,110 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from threading import RLock
from CodernityDB3.env import cdb_environment
cdb_environment['mode'] = "threads"
cdb_environment['rlock_obj'] = RLock
from .database import Database
from functools import wraps
from types import FunctionType, MethodType
from CodernityDB3.database_safe_shared import th_safe_gen
class SuperLock(type):
@staticmethod
def wrapper(f):
@wraps(f)
def _inner(*args, **kwargs):
db = args[0]
with db.super_lock:
# print '=>', f.__name__, repr(args[1:])
res = f(*args, **kwargs)
# if db.opened:
# db.flush()
# print '<=', f.__name__, repr(args[1:])
return res
return _inner
def __new__(cls, classname, bases, attr):
new_attr = {}
for base in bases:
for b_attr in dir(base):
a = getattr(base, b_attr, None)
if isinstance(a, MethodType) and not b_attr.startswith('_'):
if b_attr == 'flush' or b_attr == 'flush_indexes':
pass
else:
# setattr(base, b_attr, SuperLock.wrapper(a))
new_attr[b_attr] = SuperLock.wrapper(a)
for attr_name, attr_value in list(attr.items()):
if isinstance(attr_value, FunctionType) and not attr_name.startswith('_'):
attr_value = SuperLock.wrapper(attr_value)
new_attr[attr_name] = attr_value
new_attr['super_lock'] = RLock()
return type.__new__(cls, classname, bases, new_attr)
class SuperThreadSafeDatabase(Database, metaclass=SuperLock):
"""
Thread safe version that always allows single thread to use db.
It adds the same lock for all methods, so only one operation can be
performed in given time. Completely different implementation
than ThreadSafe version (without super word)
"""
def __init__(self, *args, **kwargs):
super(SuperThreadSafeDatabase, self).__init__(*args, **kwargs)
def __patch_index_gens(self, name):
ind = self.indexes_names[name]
for c in ('all', 'get_many'):
m = getattr(ind, c)
if getattr(ind, c + "_orig", None):
return
m_fixed = th_safe_gen.wrapper(m, name, c, self.super_lock)
setattr(ind, c, m_fixed)
setattr(ind, c + '_orig', m)
def open(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).open(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.__patch_index_gens(name)
return res
def create(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).create(*args, **kwargs)
for name in list(self.indexes_names.keys()):
self.__patch_index_gens(name)
return res
def add_index(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).add_index(*args, **kwargs)
self.__patch_index_gens(res)
return res
def edit_index(self, *args, **kwargs):
res = super(SuperThreadSafeDatabase, self).edit_index(*args, **kwargs)
self.__patch_index_gens(res)
return res
+35
View File
@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from threading import RLock
from CodernityDB3.env import cdb_environment
cdb_environment['mode'] = "threads"
cdb_environment['rlock_obj'] = RLock
from .database_safe_shared import SafeDatabase
class ThreadSafeDatabase(SafeDatabase):
"""
Thread safe version of CodernityDB that uses several lock objects,
on different methods / different indexes etc. It's completely different
implementation of locking than SuperThreadSafe one.
"""
pass
+211
View File
@@ -0,0 +1,211 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.tree_index import TreeBasedIndex
import struct
import os
import inspect
from functools import wraps
import json
class DebugTreeBasedIndex(TreeBasedIndex):
def __init__(self, *args, **kwargs):
super(DebugTreeBasedIndex, self).__init__(*args, **kwargs)
def print_tree(self):
print('-----CURRENT TREE-----')
print(self.root_flag)
if self.root_flag == 'l':
print('---ROOT---')
self._print_leaf_data(self.data_start)
return
else:
print('---ROOT---')
self._print_node_data(self.data_start)
nr_of_el, children_flag = self._read_node_nr_of_elements_and_children_flag(
self.data_start)
nodes = []
for index in range(nr_of_el):
l_pointer, key, r_pointer = self._read_single_node_key(
self.data_start, index)
nodes.append(l_pointer)
nodes.append(r_pointer)
print('ROOT NODES', nodes)
while children_flag == 'n':
self._print_level(nodes, 'n')
new_nodes = []
for node in nodes:
nr_of_el, children_flag = \
self._read_node_nr_of_elements_and_children_flag(node)
for index in range(nr_of_el):
l_pointer, key, r_pointer = self._read_single_node_key(
node, index)
new_nodes.append(l_pointer)
new_nodes.append(r_pointer)
nodes = new_nodes
self._print_level(nodes, 'l')
def _print_level(self, nodes, flag):
print('---NEXT LVL---')
if flag == 'n':
for node in nodes:
self._print_node_data(node)
elif flag == 'l':
for node in nodes:
self._print_leaf_data(node)
def _print_leaf_data(self, leaf_start_position):
print('printing data of leaf at', leaf_start_position)
nr_of_elements = self._read_leaf_nr_of_elements(leaf_start_position)
self.buckets.seek(leaf_start_position)
data = self.buckets.read(self.leaf_heading_size +
nr_of_elements * self.single_leaf_record_size)
leaf = struct.unpack('<' + self.leaf_heading_format +
nr_of_elements * self.single_leaf_record_format, data)
print(leaf)
print()
def _print_node_data(self, node_start_position):
print('printing data of node at', node_start_position)
nr_of_elements = self._read_node_nr_of_elements_and_children_flag(
node_start_position)[0]
self.buckets.seek(node_start_position)
data = self.buckets.read(self.node_heading_size + self.pointer_size
+ nr_of_elements * (self.key_size + self.pointer_size))
node = struct.unpack('<' + self.node_heading_format + self.pointer_format
+ nr_of_elements * (
self.key_format + self.pointer_format),
data)
print(node)
print()
# ------------------>
def database_step_by_step(db_obj, path=None):
if not path:
# ugly for multiplatform support....
p = db_obj.path
p1 = os.path.split(p)
p2 = os.path.split(p1[0])
p3 = '_'.join([p2[1], 'operation_logger.log'])
path = os.path.join(os.path.split(p2[0])[0], p3)
f_obj = open(path, 'wb')
__stack = [] # inspect.stack() is not working on pytest etc
def remove_from_stack(name):
for i in range(len(__stack)):
if __stack[-i] == name:
__stack.pop(-i)
def __dumper(f):
@wraps(f)
def __inner(*args, **kwargs):
funct_name = f.__name__
if funct_name == 'count':
name = args[0].__name__
meth_args = (name,) + args[1:]
elif funct_name in ('reindex_index', 'compact_index'):
name = args[0].name
meth_args = (name,) + args[1:]
else:
meth_args = args
kwargs_copy = kwargs.copy()
res = None
__stack.append(funct_name)
if funct_name == 'insert':
try:
res = f(*args, **kwargs)
except:
packed = json.dumps((funct_name,
meth_args, kwargs_copy, None))
f_obj.write('%s\n' % packed)
f_obj.flush()
raise
else:
packed = json.dumps((funct_name,
meth_args, kwargs_copy, res))
f_obj.write('%s\n' % packed)
f_obj.flush()
else:
if funct_name == 'get':
for curr in __stack:
if ('delete' in curr or 'update' in curr) and not curr.startswith('test'):
remove_from_stack(funct_name)
return f(*args, **kwargs)
packed = json.dumps((funct_name, meth_args, kwargs_copy))
f_obj.write('%s\n' % packed)
f_obj.flush()
res = f(*args, **kwargs)
remove_from_stack(funct_name)
return res
return __inner
for meth_name, meth_f in inspect.getmembers(db_obj, predicate=inspect.ismethod):
if not meth_name.startswith('_'):
setattr(db_obj, meth_name, __dumper(meth_f))
setattr(db_obj, 'operation_logger', f_obj)
def database_from_steps(db_obj, path):
# db_obj.insert=lambda data : insert_for_debug(db_obj, data)
with open(path, 'rb') as f_obj:
for current in f_obj:
line = json.loads(current[:-1])
if line[0] == 'count':
obj = getattr(db_obj, line[1][0])
line[1] = [obj] + line[1][1:]
name = line[0]
if name == 'insert':
try:
line[1][0].pop('_rev')
except:
pass
elif name in ('delete', 'update'):
el = db_obj.get('id', line[1][0]['_id'])
line[1][0]['_rev'] = el['_rev']
# print 'FROM STEPS doing', line
meth = getattr(db_obj, line[0], None)
if not meth:
raise Exception("Method = `%s` not found" % line[0])
meth(*line[1], **line[2])
# def insert_for_debug(self, data):
#
# _rev = data['_rev']
#
# if not '_id' in data:
# _id = uuid4().hex
# else:
# _id = data['_id']
# data['_id'] = _id
# try:
# _id = bytes(_id)
# except:
# raise DatabaseException("`_id` must be valid bytes object")
# self._insert_indexes(_id, _rev, data)
# ret = {'_id': _id, '_rev': _rev}
# data.update(ret)
# return ret
+25
View File
@@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
It's CodernityDB environment.
Handles internal informations.'
"""
cdb_environment = {
'mode': 'normal'
}
+995
View File
@@ -0,0 +1,995 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.index import (Index,
IndexException,
DocIdNotFound,
ElemNotFound,
TryReindexException,
IndexPreconditionsException)
import os
import marshal
import io
import struct
import shutil
from CodernityDB3.storage import IU_Storage, DummyStorage
from CodernityDB3.env import cdb_environment
if cdb_environment.get('rlock_obj'):
from CodernityDB3 import patch
patch.patch_cache_rr(cdb_environment['rlock_obj'])
from CodernityDB3.rr_cache import cache1lvl
from CodernityDB3.misc import random_hex_32
try:
from CodernityDB3 import __version__
except ImportError:
from .__init__ import __version__
class IU_HashIndex(Index):
"""
That class is for Internal Use only, if you want to use HashIndex just subclass the :py:class:`HashIndex` instead this one.
That design is because main index logic should be always in database not in custom user indexes.
"""
def __init__(self, db_path, name, entry_line_format='<32s{key}IIcI', hash_lim=0xfffff, storage_class=None, key_format='c'):
"""
The index is capable to solve conflicts by `Separate chaining`
:param db_path: database path
:type db_path: string
:param name: index name
:type name: ascii string
:param line_format: line format, `key_format` parameter value will replace `{key}` if present.
:type line_format: string (32s{key}IIcI by default) {doc_id}{hash_key}{start}{size}{status}{next}
:param hash_lim: maximum hash functon results (remember about birthday problem) count from 0
:type hash_lim: integer
:param storage_class: Storage class by default it will open standard :py:class:`CodernityDB3.storage.Storage` (if string has to be accesible by globals()[storage_class])
:type storage_class: class name which will be instance of CodernityDB3.storage.Storage instance or None
:param key_format: a index key format
"""
# Fix types
if isinstance(db_path, str):
db_path = db_path.encode()
if isinstance(name, str):
name = name.encode()
if key_format and '{key}' in entry_line_format:
entry_line_format = entry_line_format.replace('{key}', key_format)
super(IU_HashIndex, self).__init__(db_path, name)
self.hash_lim = hash_lim
if not storage_class:
storage_class = IU_Storage
if storage_class and not isinstance(storage_class, str):
storage_class = storage_class.__name__
self.storage_class = storage_class
self.storage = None
self.bucket_line_format = "<I"
self.bucket_line_size = struct.calcsize(self.bucket_line_format)
self.entry_line_format = entry_line_format
self.entry_line_size = struct.calcsize(self.entry_line_format)
cache = cache1lvl(100)
self._find_key = cache(self._find_key)
self._locate_doc_id = cache(self._locate_doc_id)
self.bucket_struct = struct.Struct(self.bucket_line_format)
self.entry_struct = struct.Struct(self.entry_line_format)
self.data_start = (
self.hash_lim + 1) * self.bucket_line_size + self._start_ind + 2
def _fix_params(self):
super(IU_HashIndex, self)._fix_params()
self.bucket_line_size = struct.calcsize(self.bucket_line_format)
self.entry_line_size = struct.calcsize(self.entry_line_format)
self.bucket_struct = struct.Struct(self.bucket_line_format)
self.entry_struct = struct.Struct(self.entry_line_format)
self.data_start = (
self.hash_lim + 1) * self.bucket_line_size + self._start_ind + 2
def open_index(self):
if not os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
raise IndexException("Doesn't exists")
self.buckets = io.open(
os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
self._fix_params()
self._open_storage()
def create_index(self):
if os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
raise IndexException('Already exists')
with io.open(os.path.join(self.db_path, self.name + "_buck"), 'w+b') as f:
props = dict(name=self.name,
bucket_line_format=self.bucket_line_format,
entry_line_format=self.entry_line_format,
hash_lim=self.hash_lim,
version=self.__version__,
storage_class=self.storage_class)
f.write(marshal.dumps(props))
self.buckets = io.open(
os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
self._create_storage()
def destroy(self):
super(IU_HashIndex, self).destroy()
self._clear_cache()
def _open_storage(self):
s = globals()[self.storage_class]
if not self.storage:
self.storage = s(self.db_path, self.name)
self.storage.open()
def _create_storage(self):
s = globals()[self.storage_class]
if not self.storage:
self.storage = s(self.db_path, self.name)
self.storage.create()
# def close_index(self):
# self.buckets.flush()
# self.buckets.close()
# self.storage.close()
# @lfu_cache(100)
def _find_key(self, key):
"""
Find the key position
:param key: the key to find
"""
# Fix types
if isinstance(key, str):
key = key.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
if not location:
return None, None, 0, 0, 'u'
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, location)
if status == 'd': # when first record from many is deleted
while True:
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, _next)
if status != 'd':
break
return doc_id, l_key, start, size, status
else:
return None, None, 0, 0, 'u'
def _find_key_many(self, key, limit=1, offset=0):
# Fix types
if isinstance(key, str):
key = key.encode()
location = None
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
while offset:
if not location:
break
try:
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, location)
except IndexException:
break
else:
if status != 'd':
if l_key == key: # in case of hash function conflicts
offset -= 1
location = _next
while limit:
if not location:
break
try:
found_at, doc_id, l_key, start, size, status, _next = self._locate_key(
key, location)
except IndexException:
break
else:
if status != 'd':
if l_key == key: # in case of hash function conflicts
yield doc_id, start, size, status
limit -= 1
location = _next
def _calculate_position(self, key):
# Fix types
if isinstance(key, str):
key = key.encode()
return abs(hash(key) & self.hash_lim) * self.bucket_line_size + self._start_ind
# TODO add cache!
def _locate_key(self, key, start):
"""
Locate position of the key, it will iterate using `next` field in record
until required key will be find.
:param key: the key to locate
:param start: position to start from
"""
# Fix types
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
try:
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
except struct.error:
raise ElemNotFound(
"Not found") # not found but might be also broken
if l_key == key:
break
else:
if not _next:
# not found
raise ElemNotFound("Not found")
else:
location = _next # go to next record
return location, doc_id, l_key, start, size, status, _next
# @lfu_cache(100)
def _locate_doc_id(self, doc_id, key, start):
"""
Locate position of the doc_id, it will iterate using `next` field in record
until required key will be find.
:param doc_id: the doc_id to locate
:param key: key value
:param start: position to start from
"""
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
try:
l_doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
except:
raise DocIdNotFound(
"Doc_id '%s' for '%s' not found" % (doc_id, key))
if l_doc_id == doc_id and l_key == key: # added for consistency
break
else:
if not _next:
# not found
raise DocIdNotFound(
"Doc_id '%s' for '%s' not found" % (doc_id, key))
else:
location = _next # go to next record
return location, doc_id, l_key, start, size, status, _next
def _find_place(self, start):
"""
Find a place to where put the key. It will iterate using `next` field in record, until
empty `next` found
:param start: position to start from
"""
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
if not _next or status == 'd':
return self.buckets.tell() - self.entry_line_size, doc_id, l_key, start, size, status, _next
else:
location = _next # go to next record
def update(self, doc_id, key, u_start=0, u_size=0, u_status='o'):
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
if isinstance(u_status, str):
u_status = u_status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# test if it's unique or not really unique hash
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
raise ElemNotFound("Location '%s' not found" % doc_id)
found_at, _doc_id, _key, start, size, status, _next = self._locate_doc_id(doc_id, key, location)
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(doc_id,
key,
u_start,
u_size,
u_status,
_next))
self.flush()
self._find_key.delete(key)
self._locate_doc_id.delete(doc_id)
return True
def insert(self, doc_id, key, start, size, status='o'):
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
if isinstance(status, str):
status = status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# conflict occurs?
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
location = 0
if location:
# last key with that hash
try:
found_at, _doc_id, _key, _start, _size, _status, _next = self._locate_doc_id(doc_id, key, location)
except DocIdNotFound:
found_at, _doc_id, _key, _start, _size, _status, _next = self._find_place(location)
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
status,
_next))
# self.flush()
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(_doc_id,
_key,
_start,
_size,
_status,
wrote_at))
else:
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
status,
_next))
self.flush()
self._locate_doc_id.delete(doc_id)
self._find_key.delete(_key)
# self._find_key.delete(key)
# self._locate_key.delete(_key)
return True
# raise NotImplementedError
else:
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
# check if position is bigger than all hash entries...
if wrote_at < self.data_start:
self.buckets.seek(self.data_start)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
status,
0))
# self.flush()
self._find_key.delete(key)
self.buckets.seek(start_position)
self.buckets.write(self.bucket_struct.pack(wrote_at))
self.flush()
return True
def get(self, key):
# Fix types
if isinstance(key, str):
key = key.encode()
return self._find_key(self.make_key(key))
def get_many(self, key, limit=1, offset=0):
return self._find_key_many(self.make_key(key), limit, offset)
def all(self, limit=-1, offset=0):
self.buckets.seek(self.data_start)
while offset:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, key, start, size, status, _next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
offset -= 1
while limit:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, key, start, size, status, _next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
yield doc_id, key, start, size, status
limit -= 1
def _fix_link(self, key, pos_prev, pos_next):
# Fix types
if isinstance(key, str):
key = key.encode()
# CHECKIT why I need that hack
if pos_prev >= self.data_start:
self.buckets.seek(pos_prev)
data = self.buckets.read(self.entry_line_size)
if data:
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
self.buckets.seek(pos_prev)
self.buckets.write(self.entry_struct.pack(doc_id,
l_key,
start,
size,
status,
pos_next))
self.flush()
if pos_next:
self.buckets.seek(pos_next)
data = self.buckets.read(self.entry_line_size)
if data:
doc_id, l_key, start, size, status, _next = self.entry_struct.unpack(data)
self.buckets.seek(pos_next)
self.buckets.write(self.entry_struct.pack(doc_id,
l_key,
start,
size,
status,
_next))
self.flush()
return
def delete(self, doc_id, key, start=0, size=0):
# Fix types
if isinstance(doc_id, str):
doc_id = doc_id.encode()
if isinstance(key, str):
key = key.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
# case happens when trying to delete element with new index key in data
# after adding new index to database without reindex
raise TryReindexException()
found_at, _doc_id, _key, start, size, status, _next = self._locate_doc_id(doc_id, key, location)
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(doc_id,
key,
start,
size,
'd',
_next))
self.flush()
# self._fix_link(_key, _prev, _next)
self._find_key.delete(key)
self._locate_doc_id.delete(doc_id)
return True
def compact(self, hash_lim=None):
if not hash_lim:
hash_lim = self.hash_lim
compact_ind = self.__class__(
self.db_path, self.name + '_compact', hash_lim=hash_lim)
compact_ind.create_index()
gen = self.all()
while True:
try:
doc_id, key, start, size, status = next(gen)
except StopIteration:
break
self.storage._f.seek(start)
value = self.storage._f.read(size)
start_ = compact_ind.storage._f.tell()
compact_ind.storage._f.write(value)
compact_ind.insert(doc_id, key, start_, size, status)
compact_ind.close_index()
original_name = self.name
# os.unlink(os.path.join(self.db_path, self.name + "_buck"))
self.close_index()
shutil.move(os.path.join(compact_ind.db_path, compact_ind.
name + "_buck"), os.path.join(self.db_path, self.name + "_buck"))
shutil.move(os.path.join(compact_ind.db_path, compact_ind.
name + "_stor"), os.path.join(self.db_path, self.name + "_stor"))
# self.name = original_name
self.open_index() # reload...
self.name = original_name.decode()
self._save_params(dict(name=original_name))
self._fix_params()
self._clear_cache()
return True
def make_key(self, key):
return key
def make_key_value(self, data):
return '1', data
def _clear_cache(self):
self._find_key.clear()
self._locate_doc_id.clear()
def close_index(self):
super(IU_HashIndex, self).close_index()
self._clear_cache()
class IU_UniqueHashIndex(IU_HashIndex):
"""
Index for *unique* keys! Designed to be a **id** index.
That class is for Internal Use only, if you want to use UniqueHashIndex just subclass the :py:class:`UniqueHashIndex` instead this one.
That design is because main index logic should be always in database not in custom user indexes.
"""
def __init__(self, db_path, name, entry_line_format="<32s8sIIcI", *args, **kwargs):
# Fix types
if isinstance(db_path, str):
db_path = db_path.encode()
if isinstance(name, str):
name = name.encode()
if 'key' in kwargs:
raise IndexPreconditionsException(
"UniqueHashIndex doesn't accept key parameter'")
super(IU_UniqueHashIndex, self).__init__(db_path, name,
entry_line_format, *args, **kwargs)
self.create_key = random_hex_32 # : set the function to create random key when no _id given
# self.entry_struct=struct.Struct(entry_line_format)
# @lfu_cache(100)
def _find_key(self, key):
"""
Find the key position
:param key: the key to find
"""
# Fix types
if isinstance(key, str):
key = key.encode()
start_position = self._calculate_position(key)
self.seek = self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
found_at, l_key, rev, start, size, status, _next = self._locate_key(
key, location)
# Fix types
if isinstance(l_key, bytes):
l_key = l_key.decode()
if isinstance(rev, bytes):
rev = rev.decode()
if isinstance(status, bytes):
status = status.decode()
return l_key, rev, start, size, status
else:
return None, None, 0, 0, 'u'
def _find_key_many(self, *args, **kwargs):
raise NotImplementedError()
def _find_place(self, start, key):
"""
Find a place to where put the key. It will iterate using `next` field in record, until
empty `next` found
:param start: position to start from
"""
# Fix types
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
l_key, rev, start, size, status, _next = self.entry_struct.unpack(
data)
if l_key == key:
raise IndexException("The '%s' key already exists" % key)
if not _next or status == 'd':
return self.buckets.tell() - self.entry_line_size, l_key, rev, start, size, status, _next
else:
location = _next # go to next record
# @lfu_cache(100)
def _locate_key(self, key, start):
"""
Locate position of the key, it will iterate using `next` field in record
until required key will be find.
:param key: the key to locate
:param start: position to start from
"""
# Fix types
if isinstance(key, str):
key = key.encode()
location = start
while True:
self.buckets.seek(location)
data = self.buckets.read(self.entry_line_size)
# todo, maybe partial read there...
try:
l_key, rev, start, size, status, _next = self.entry_struct.unpack(data)
except struct.error:
raise ElemNotFound("Location '%s' not found" % key)
if l_key == key:
break
else:
if not _next:
# not found
raise ElemNotFound("Location '%s' not found" % key)
else:
location = _next # go to next record
return self.buckets.tell() - self.entry_line_size, l_key, rev, start, size, status, _next
def update(self, key, rev, u_start=0, u_size=0, u_status='o'):
# Fix types
if isinstance(key, str):
key = key.encode()
if isinstance(rev, str):
rev = rev.encode()
if isinstance(u_status, str):
u_status = u_status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# test if it's unique or not really unique hash
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
raise ElemNotFound("Location '%s' not found" % key)
found_at, _key, _rev, start, size, status, _next = self._locate_key(
key, location)
if u_start == 0:
u_start = start
if u_size == 0:
u_size = size
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(key,
rev,
u_start,
u_size,
u_status,
_next))
self.flush()
self._find_key.delete(key)
return True
def insert(self, key, rev, start, size, status='o'):
# Fix types
if isinstance(key, str):
key = key.encode()
if isinstance(rev, str):
rev = rev.encode()
if isinstance(status, str):
status = status.encode()
start_position = self._calculate_position(key)
self.buckets.seek(start_position)
curr_data = self.buckets.read(self.bucket_line_size)
# conflict occurs?
if curr_data:
location = self.bucket_struct.unpack(curr_data)[0]
else:
location = 0
if location:
# last key with that hash
found_at, _key, _rev, _start, _size, _status, _next = self._find_place(
location, key)
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
# check if position is bigger than all hash entries...
if wrote_at < self.data_start:
self.buckets.seek(self.data_start)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(key,
rev,
start,
size,
status,
_next))
# self.flush()
self.buckets.seek(found_at)
self.buckets.write(self.entry_struct.pack(_key,
_rev,
_start,
_size,
_status,
wrote_at))
self.flush()
self._find_key.delete(_key)
# self._locate_key.delete(_key)
return True
# raise NotImplementedError
else:
self.buckets.seek(0, 2)
wrote_at = self.buckets.tell()
# check if position is bigger than all hash entries...
if wrote_at < self.data_start:
self.buckets.seek(self.data_start)
wrote_at = self.buckets.tell()
self.buckets.write(self.entry_struct.pack(key,
rev,
start,
size,
status,
0))
self.buckets.seek(start_position)
self.buckets.write(self.bucket_struct.pack(wrote_at))
self.flush()
self._find_key.delete(key)
return True
def all(self, limit=-1, offset=0):
self.buckets.seek(self.data_start)
while offset:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, rev, start, size, status, next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
offset -= 1
while limit:
curr_data = self.buckets.read(self.entry_line_size)
if not curr_data:
break
try:
doc_id, rev, start, size, status, next = self.entry_struct.unpack(curr_data)
except IndexException:
break
else:
if status != 'd':
yield doc_id, rev, start, size, status
limit -= 1
def get_many(self, *args, **kwargs):
raise NotImplementedError()
def delete(self, key, start=0, size=0):
# Fix types
if isinstance(key, str):
key = key.encode()
self.update(key, '00000000', start, size, 'd')
def make_key_value(self, data):
_id = data['_id']
try:
_id = data['_id'].encode()
except:
raise IndexPreconditionsException(
"_id must be valid string/bytes object")
if len(_id) != 32:
raise IndexPreconditionsException("Invalid _id lenght")
del data['_id']
del data['_rev']
return _id, data
def destroy(self):
Index.destroy(self)
self._clear_cache()
def _clear_cache(self):
self._find_key.clear()
def insert_with_storage(self, _id, _rev, value):
# Fix types
if isinstance(_id, str):
_id = _id.encode()
if isinstance(_rev, str):
_rev = _rev.encode()
if value:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.insert(_id, _rev, start, size)
def update_with_storage(self, _id, _rev, value):
# Fix types
if isinstance(_id, str):
_id = _id.encode()
if isinstance(_rev, str):
_rev = _rev.encode()
if value:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.update(_id, _rev, start, size)
class DummyHashIndex(IU_HashIndex):
def __init__(self, db_path, name, entry_line_format="<32s4sIIcI", *args, **kwargs):
super(DummyHashIndex, self).__init__(db_path, name,
entry_line_format, *args, **kwargs)
self.create_key = random_hex_32 # : set the function to create random key when no _id given
# self.entry_struct=struct.Struct(entry_line_format)
def update(self, *args, **kwargs):
return True
def insert(self, *args, **kwargs):
return True
def all(self, *args, **kwargs):
raise StopIteration
def get(self, *args, **kwargs):
raise ElemNotFound
def get_many(self, *args, **kwargs):
raise StopIteration
def delete(self, *args, **kwargs):
pass
def make_key_value(self, data):
return '1', {'_': 1}
def destroy(self):
pass
def _clear_cache(self):
pass
def _open_storage(self):
if not self.storage:
self.storage = DummyStorage()
self.storage.open()
def _create_storage(self):
if not self.storage:
self.storage = DummyStorage()
self.storage.create()
class IU_MultiHashIndex(IU_HashIndex):
"""
Class that allows to index more than one key per database record.
It operates very well on GET/INSERT. It's not optimized for
UPDATE operations (will always readd everything)
"""
def __init__(self, *args, **kwargs):
super(IU_MultiHashIndex, self).__init__(*args, **kwargs)
def insert(self, doc_id, key, start, size, status='o'):
if isinstance(key, (list, tuple)):
key = set(key)
elif not isinstance(key, set):
key = set([key])
ins = super(IU_MultiHashIndex, self).insert
for curr_key in key:
ins(doc_id, curr_key, start, size, status)
return True
def update(self, doc_id, key, u_start, u_size, u_status='o'):
if isinstance(key, (list, tuple)):
key = set(key)
elif not isinstance(key, set):
key = set([key])
upd = super(IU_MultiHashIndex, self).update
for curr_key in key:
upd(doc_id, curr_key, u_start, u_size, u_status)
def delete(self, doc_id, key, start=0, size=0):
if isinstance(key, (list, tuple)):
key = set(key)
elif not isinstance(key, set):
key = set([key])
delete = super(IU_MultiHashIndex, self).delete
for curr_key in key:
delete(doc_id, curr_key, start, size)
def get(self, key):
return super(IU_MultiHashIndex, self).get(key)
def make_key_value(self, data):
raise NotImplementedError()
# classes for public use, done in this way because of
# generation static files with indexes (_index directory)
class HashIndex(IU_HashIndex):
"""
That class is designed to be used in custom indexes.
"""
pass
class UniqueHashIndex(IU_UniqueHashIndex):
"""
That class is designed to be used in custom indexes. It's designed to be **id** index.
"""
pass
class MultiHashIndex(IU_MultiHashIndex):
"""
That class is designed to be used in custom indexes.
"""
+200
View File
@@ -0,0 +1,200 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import marshal
import struct
import shutil
from CodernityDB3.storage import IU_Storage, DummyStorage
try:
from CodernityDB3 import __version__
except ImportError:
from .__init__ import __version__
import io
class IndexException(Exception):
pass
class IndexNotFoundException(IndexException):
pass
class ReindexException(IndexException):
pass
class TryReindexException(ReindexException):
pass
class ElemNotFound(IndexException):
pass
class DocIdNotFound(ElemNotFound):
pass
class IndexConflict(IndexException):
pass
class IndexPreconditionsException(IndexException):
pass
class Index(object):
__version__ = __version__
custom_header = "" # : use it for imports required by your index
def __init__(self,
db_path,
name):
if isinstance(name, bytes):
name = name.decode()
if isinstance(db_path, bytes):
db_path = db_path.decode()
self.name = name
self._start_ind = 500
self.db_path = db_path
def open_index(self):
if not os.path.isfile(os.path.join(self.db_path, self.name + '_buck')):
raise IndexException("Doesn't exists")
self.buckets = io.open(
os.path.join(self.db_path, self.name + "_buck"), 'r+b', buffering=0)
self._fix_params()
self._open_storage()
def _close(self):
self.buckets.close()
self.storage.close()
def close_index(self):
self.flush()
self.fsync()
self._close()
def create_index(self):
raise NotImplementedError()
def _fix_params(self):
self.buckets.seek(0)
props = marshal.loads(self.buckets.read(self._start_ind))
for k, v in list(props.items()):
self.__dict__[k] = v
self.buckets.seek(0, 2)
def _save_params(self, in_params={}):
self.buckets.seek(0)
props = marshal.loads(self.buckets.read(self._start_ind))
props.update(in_params)
self.buckets.seek(0)
data = marshal.dumps(props)
if len(data) > self._start_ind:
raise IndexException("To big props")
self.buckets.write(data)
self.flush()
self.buckets.seek(0, 2)
self.__dict__.update(props)
def _open_storage(self, *args, **kwargs):
pass
def _create_storage(self, *args, **kwargs):
pass
def _destroy_storage(self, *args, **kwargs):
self.storage.destroy()
def _find_key(self, key):
raise NotImplementedError()
def update(self, doc_id, key, start, size):
raise NotImplementedError()
def insert(self, doc_id, key, start, size):
raise NotImplementedError()
def get(self, key):
raise NotImplementedError()
def get_many(self, key, start_from=None, limit=0):
raise NotImplementedError()
def all(self, start_pos):
raise NotImplementedError()
def delete(self, key, start, size):
raise NotImplementedError()
def make_key_value(self, data):
raise NotImplementedError()
def make_key(self, data):
raise NotImplementedError()
def compact(self, *args, **kwargs):
raise NotImplementedError()
def destroy(self, *args, **kwargs):
self._close()
bucket_file = os.path.join(self.db_path, self.name + '_buck')
os.unlink(bucket_file)
self._destroy_storage()
self._find_key.clear()
def flush(self):
try:
self.buckets.flush()
self.storage.flush()
except:
pass
def fsync(self):
try:
os.fsync(self.buckets.fileno())
self.storage.fsync()
except:
pass
def update_with_storage(self, doc_id, key, value):
if value:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.update(doc_id, key, start, size)
def insert_with_storage(self, doc_id, key, value):
if value is not None:
start, size = self.storage.insert(value)
else:
start = 1
size = 0
return self.insert(doc_id, key, start, size)
+645
View File
@@ -0,0 +1,645 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import tokenize
import token
import uuid
class IndexCreatorException(Exception):
def __init__(self, ex, line=None):
self.ex = ex
self.line = line
def __str__(self):
if self.line:
return repr(self.ex + "(in line: %d)" % self.line)
return repr(self.ex)
class IndexCreatorFunctionException(IndexCreatorException):
pass
class IndexCreatorValueException(IndexCreatorException):
pass
class Parser(object):
def __init__(self):
pass
def parse(self, data, name=None):
if not name:
self.name = "_" + uuid.uuid4().hex
else:
self.name = name
self.ind = 0
self.stage = 0
self.logic = ['and', 'or', 'in']
self.logic2 = ['&', '|']
self.allowed_props = {'TreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format'],
'HashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
'MultiHashIndex': ['type', 'name', 'key_format', 'hash_lim', 'entry_line_format'],
'MultiTreeBasedIndex': ['type', 'name', 'key_format', 'node_capacity', 'pointer_format', 'meta_format']
}
self.funcs = {'md5': (['md5'], ['.digest()']),
'len': (['len'], []),
'str': (['str'], []),
'fix_r': (['self.fix_r'], []),
'prefix': (['self.prefix'], []),
'infix': (['self.infix'], []),
'suffix': (['self.suffix'], [])
}
self.handle_int_imports = {'infix': "from itertools import izip\n"}
self.funcs_with_body = {'fix_r':
(""" def fix_r(self,s,l):
e = len(s)
if e == l:
return s
elif e > l:
return s[:l]
else:
return s.rjust(l,'_')\n""", False),
'prefix':
(""" def prefix(self,s,m,l,f):
t = len(s)
if m < 1:
m = 1
o = set()
if t > l:
s = s[:l]
t = l
while m <= t:
o.add(s.rjust(f,'_'))
s = s[:-1]
t -= 1
return o\n""", False),
'suffix':
(""" def suffix(self,s,m,l,f):
t = len(s)
if m < 1:
m = 1
o = set()
if t > l:
s = s[t-l:]
t = len(s)
while m <= t:
o.add(s.rjust(f,'_'))
s = s[1:]
t -= 1
return o\n""", False),
'infix':
(""" def infix(self,s,m,l,f):
t = len(s)
o = set()
for x in xrange(m - 1, l):
t = (s, )
for y in xrange(0, x):
t += (s[y + 1:],)
o.update(set(''.join(x).rjust(f, '_').lower() for x in izip(*t)))
return o\n""", False)}
self.none = ['None', 'none', 'null']
self.props_assign = ['=', ':']
self.all_adj_num_comp = {token.NUMBER: (
token.NUMBER, token.NAME, '-', '('),
token.NAME: (token.NUMBER, token.NAME, '-', '('),
')': (token.NUMBER, token.NAME, '-', '(')
}
self.all_adj_num_op = {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, '('),
')': (token.NUMBER, token.NAME, '(')
}
self.allowed_adjacent = {
"<=": self.all_adj_num_comp,
">=": self.all_adj_num_comp,
">": self.all_adj_num_comp,
"<": self.all_adj_num_comp,
"==": {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
token.STRING: (token.NAME, token.STRING, '('),
')': (token.NUMBER, token.NAME, token.STRING, '('),
']': (token.NUMBER, token.NAME, token.STRING, '(')
},
"+": {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, token.STRING, '('),
token.STRING: (token.NAME, token.STRING, '('),
')': (token.NUMBER, token.NAME, token.STRING, '('),
']': (token.NUMBER, token.NAME, token.STRING, '(')
},
"-": {token.NUMBER: (token.NUMBER, token.NAME, '('),
token.NAME: (token.NUMBER, token.NAME, '('),
')': (token.NUMBER, token.NAME, '('),
'<': (token.NUMBER, token.NAME, '('),
'>': (token.NUMBER, token.NAME, '('),
'<=': (token.NUMBER, token.NAME, '('),
'>=': (token.NUMBER, token.NAME, '('),
'==': (token.NUMBER, token.NAME, '('),
']': (token.NUMBER, token.NAME, '(')
},
"*": self.all_adj_num_op,
"/": self.all_adj_num_op,
"%": self.all_adj_num_op,
",": {token.NUMBER: (token.NUMBER, token.NAME, token.STRING, '{', '[', '('),
token.NAME: (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
token.STRING: (token.NAME, token.STRING, token.NUMBER, '(', '{', '['),
')': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
']': (token.NUMBER, token.NAME, token.STRING, '(', '{', '['),
'}': (token.NUMBER, token.NAME, token.STRING, '(', '{', '[')
}
}
def is_num(s):
m = re.search('[^0-9*()+\-\s/]+', s)
return not m
def is_string(s):
m = re.search('\s*(?P<a>[\'\"]+).*?(?P=a)\s*', s)
return m
data = re.split('make_key_value\:', data)
if len(data) < 2:
raise IndexCreatorFunctionException(
"Couldn't find a definition of make_key_value function!\n")
spl1 = re.split('make_key\:', data[0])
spl2 = re.split('make_key\:', data[1])
self.funcs_rev = False
if len(spl1) > 1:
data = [spl1[0]] + [data[1]] + [spl1[1]]
self.funcs_rev = True
elif len(spl2) > 1:
data = [data[0]] + spl2
else:
data.append("key")
if data[1] == re.search('\s*', data[1], re.S | re.M).group(0):
raise IndexCreatorFunctionException("Empty function body ",
len(re.split('\n', data[0])) + (len(re.split('\n', data[2])) if self.funcs_rev else 1) - 1)
if data[2] == re.search('\s*', data[2], re.S | re.M).group(0):
raise IndexCreatorFunctionException("Empty function body ",
len(re.split('\n', data[0])) + (1 if self.funcs_rev else len(re.split('\n', data[1]))) - 1)
if data[0] == re.search('\s*', data[0], re.S | re.M).group(0):
raise IndexCreatorValueException("You didn't set any properity or you set them not at the begining of the code\n")
data = [re.split(
'\n', data[0]), re.split('\n', data[1]), re.split('\n', data[2])]
self.cnt_lines = (len(data[0]), len(data[1]), len(data[2]))
ind = 0
self.predata = data
self.data = [[], [], []]
for i, v in enumerate(self.predata[0]):
for k, w in enumerate(self.predata[0][i]):
if self.predata[0][i][k] in self.props_assign:
if not is_num(self.predata[0][i][k + 1:]) and self.predata[0][i].strip()[:4] != 'type' and self.predata[0][i].strip()[:4] != 'name':
s = self.predata[0][i][k + 1:]
self.predata[0][i] = self.predata[0][i][:k + 1]
m = re.search('\s+', s.strip())
if not is_string(s) and not m:
s = "'" + s.strip() + "'"
self.predata[0][i] += s
break
for n, i in enumerate(self.predata):
for k in i:
k = k.strip()
if k:
self.data[ind].append(k)
self.check_enclosures(k, n)
ind += 1
return self.parse_ex()
def readline(self, stage):
def foo():
if len(self.data[stage]) <= self.ind:
self.ind = 0
return ""
else:
self.ind += 1
return self.data[stage][self.ind - 1]
return foo
def add(self, l, i):
def add_aux(*args):
# print args,self.ind
if len(l[i]) < self.ind:
l[i].append([])
l[i][self.ind - 1].append(args)
return add_aux
def parse_ex(self):
self.index_name = ""
self.index_type = ""
self.curLine = -1
self.con = -1
self.brackets = -1
self.curFunc = None
self.colons = 0
self.line_cons = ([], [], [])
self.pre_tokens = ([], [], [])
self.known_dicts_in_mkv = []
self.prop_name = True
self.prop_assign = False
self.is_one_arg_enough = False
self.funcs_stack = []
self.last_line = [-1, -1, -1]
self.props_set = []
self.custom_header = set()
self.tokens = []
self.tokens_head = ['# %s\n' % self.name, 'class %s(' % self.name, '):\n', ' def __init__(self, *args, **kwargs): ']
for i in range(3):
tokenize.tokenize(self.readline(i), self.add(self.pre_tokens, i))
# tokenize treats some keyword not in the right way, thats why we
# have to change some of them
for nk, k in enumerate(self.pre_tokens[i]):
for na, a in enumerate(k):
if a[0] == token.NAME and a[1] in self.logic:
self.pre_tokens[i][nk][
na] = (token.OP, a[1], a[2], a[3], a[4])
for i in self.pre_tokens[1]:
self.line_cons[1].append(self.check_colons(i, 1))
self.check_adjacents(i, 1)
if self.check_for_2nd_arg(i) == -1 and not self.is_one_arg_enough:
raise IndexCreatorValueException("No 2nd value to return (did u forget about ',None'?", self.cnt_line_nr(i[0][4], 1))
self.is_one_arg_enough = False
for i in self.pre_tokens[2]:
self.line_cons[2].append(self.check_colons(i, 2))
self.check_adjacents(i, 2)
for i in self.pre_tokens[0]:
self.handle_prop_line(i)
self.cur_brackets = 0
self.tokens += ['\n super(%s, self).__init__(*args, **kwargs)\n def make_key_value(self, data): ' % self.name]
for i in self.pre_tokens[1]:
for k in i:
self.handle_make_value(*k)
self.curLine = -1
self.con = -1
self.cur_brackets = 0
self.tokens += ['\n def make_key(self, key):']
for i in self.pre_tokens[2]:
for k in i:
self.handle_make_key(*k)
if self.index_type == "":
raise IndexCreatorValueException("Missing index type definition\n")
if self.index_name == "":
raise IndexCreatorValueException("Missing index name\n")
self.tokens_head[0] = "# " + self.index_name + "\n" + \
self.tokens_head[0]
for i in self.funcs_with_body:
if self.funcs_with_body[i][1]:
self.tokens_head.insert(4, self.funcs_with_body[i][0])
if None in self.custom_header:
self.custom_header.remove(None)
if self.custom_header:
s = ' custom_header = """'
for i in self.custom_header:
s += i
s += '"""\n'
self.tokens_head.insert(4, s)
if self.index_type in self.allowed_props:
for i in self.props_set:
if i not in self.allowed_props[self.index_type]:
raise IndexCreatorValueException("Properity %s is not allowed for index type: %s" % (i, self.index_type))
# print "".join(self.tokens_head)
# print "----------"
# print (" ".join(self.tokens))
return "".join(self.custom_header), "".join(self.tokens_head) + (" ".join(self.tokens))
# has to be run BEFORE tokenize
def check_enclosures(self, d, st):
encs = []
contr = {'(': ')', '{': '}', '[': ']', "'": "'", '"': '"'}
ends = [')', '}', ']', "'", '"']
for i in d:
if len(encs) > 0 and encs[-1] in ['"', "'"]:
if encs[-1] == i:
del encs[-1]
elif i in contr:
encs += [i]
elif i in ends:
if len(encs) < 1 or contr[encs[-1]] != i:
raise IndexCreatorValueException("Missing opening enclosure for \'%s\'" % i, self.cnt_line_nr(d, st))
del encs[-1]
if len(encs) > 0:
raise IndexCreatorValueException("Missing closing enclosure for \'%s\'" % encs[0], self.cnt_line_nr(d, st))
def check_adjacents(self, d, st):
def std_check(d, n):
if n == 0:
prev = -1
else:
prev = d[n - 1][1] if d[n - 1][0] == token.OP else d[n - 1][0]
cur = d[n][1] if d[n][0] == token.OP else d[n][0]
# there always is an endmarker at the end, but this is a precaution
if n + 2 > len(d):
nex = -1
else:
nex = d[n + 1][1] if d[n + 1][0] == token.OP else d[n + 1][0]
if prev not in self.allowed_adjacent[cur]:
raise IndexCreatorValueException("Wrong left value of the %s" % cur, self.cnt_line_nr(line, st))
# there is an assumption that whole data always ends with 0 marker, the idea prolly needs a rewritting to allow more whitespaces
# between tokens, so it will be handled anyway
elif nex not in self.allowed_adjacent[cur][prev]:
raise IndexCreatorValueException("Wrong right value of the %s" % cur, self.cnt_line_nr(line, st))
for n, (t, i, _, _, line) in enumerate(d):
if t == token.NAME or t == token.STRING:
if n + 1 < len(d) and d[n + 1][0] in [token.NAME, token.STRING]:
raise IndexCreatorValueException("Did you forget about an operator in between?", self.cnt_line_nr(line, st))
elif i in self.allowed_adjacent:
std_check(d, n)
def check_colons(self, d, st):
cnt = 0
br = 0
def check_ret_args_nr(a, s):
c_b_cnt = 0
s_b_cnt = 0
n_b_cnt = 0
comas_cnt = 0
for _, i, _, _, line in a:
if c_b_cnt == n_b_cnt == s_b_cnt == 0:
if i == ',':
comas_cnt += 1
if (s == 1 and comas_cnt > 1) or (s == 2 and comas_cnt > 0):
raise IndexCreatorFunctionException("Too much arguments to return", self.cnt_line_nr(line, st))
if s == 0 and comas_cnt > 0:
raise IndexCreatorValueException("A coma here doesn't make any sense", self.cnt_line_nr(line, st))
elif i == ':':
if s == 0:
raise IndexCreatorValueException("A colon here doesn't make any sense", self.cnt_line_nr(line, st))
raise IndexCreatorFunctionException("Two colons don't make any sense", self.cnt_line_nr(line, st))
if i == '{':
c_b_cnt += 1
elif i == '}':
c_b_cnt -= 1
elif i == '(':
n_b_cnt += 1
elif i == ')':
n_b_cnt -= 1
elif i == '[':
s_b_cnt += 1
elif i == ']':
s_b_cnt -= 1
def check_if_empty(a):
for i in a:
if i not in [token.NEWLINE, token.INDENT, token.ENDMARKER]:
return False
return True
if st == 0:
check_ret_args_nr(d, st)
return
for n, i in enumerate(d):
if i[1] == ':':
if br == 0:
if len(d) < n or check_if_empty(d[n + 1:]):
raise IndexCreatorValueException(
"Empty return value", self.cnt_line_nr(i[4], st))
elif len(d) >= n:
check_ret_args_nr(d[n + 1:], st)
return cnt
else:
cnt += 1
elif i[1] == '{':
br += 1
elif i[1] == '}':
br -= 1
check_ret_args_nr(d, st)
return -1
def check_for_2nd_arg(self, d):
c_b_cnt = 0 # curly brackets counter '{}'
s_b_cnt = 0 # square brackets counter '[]'
n_b_cnt = 0 # normal brackets counter '()'
def check_2nd_arg(d, ind):
d = d[ind[0]:]
for t, i, (n, r), _, line in d:
if i == '{' or i is None:
return 0
elif t == token.NAME:
self.known_dicts_in_mkv.append((i, (n, r)))
return 0
elif t == token.STRING or t == token.NUMBER:
raise IndexCreatorValueException("Second return value of make_key_value function has to be a dictionary!", self.cnt_line_nr(line, 1))
for ind in enumerate(d):
t, i, _, _, _ = ind[1]
if s_b_cnt == n_b_cnt == c_b_cnt == 0:
if i == ',':
return check_2nd_arg(d, ind)
elif (t == token.NAME and i not in self.funcs) or i == '{':
self.is_one_arg_enough = True
if i == '{':
c_b_cnt += 1
self.is_one_arg_enough = True
elif i == '}':
c_b_cnt -= 1
elif i == '(':
n_b_cnt += 1
elif i == ')':
n_b_cnt -= 1
elif i == '[':
s_b_cnt += 1
elif i == ']':
s_b_cnt -= 1
return -1
def cnt_line_nr(self, l, stage):
nr = -1
for n, i in enumerate(self.predata[stage]):
# print i,"|||",i.strip(),"|||",l
if l == i.strip():
nr = n
if nr == -1:
return -1
if stage == 0:
return nr + 1
elif stage == 1:
return nr + self.cnt_lines[0] + (self.cnt_lines[2] - 1 if self.funcs_rev else 0)
elif stage == 2:
return nr + self.cnt_lines[0] + (self.cnt_lines[1] - 1 if not self.funcs_rev else 0)
return -1
def handle_prop_line(self, d):
d_len = len(d)
if d[d_len - 1][0] == token.ENDMARKER:
d_len -= 1
if d_len < 3:
raise IndexCreatorValueException("Can't handle properity assingment ", self.cnt_line_nr(d[0][4], 0))
if not d[1][1] in self.props_assign:
raise IndexCreatorValueException(
"Did you forget : or =?", self.cnt_line_nr(d[0][4], 0))
if d[0][0] == token.NAME or d[0][0] == token.STRING:
if d[0][1] in self.props_set:
raise IndexCreatorValueException("Properity %s is set more than once" % d[0][1], self.cnt_line_nr(d[0][4], 0))
self.props_set += [d[0][1]]
if d[0][1] == "type" or d[0][1] == "name":
t, tk, _, _, line = d[2]
if d_len > 3:
raise IndexCreatorValueException(
"Wrong value to assign", self.cnt_line_nr(line, 0))
if t == token.STRING:
m = re.search('\s*(?P<a>[\'\"]+)(.*?)(?P=a)\s*', tk)
if m:
tk = m.groups()[1]
elif t != token.NAME:
raise IndexCreatorValueException(
"Wrong value to assign", self.cnt_line_nr(line, 0))
if d[0][1] == "type":
if d[2][1] == "TreeBasedIndex":
self.custom_header.add("from CodernityDB3.tree_index import TreeBasedIndex\n")
elif d[2][1] == "MultiTreeBasedIndex":
self.custom_header.add("from CodernityDB3.tree_index import MultiTreeBasedIndex\n")
elif d[2][1] == "MultiHashIndex":
self.custom_header.add("from CodernityDB3.hash_index import MultiHashIndex\n")
self.tokens_head.insert(2, tk)
self.index_type = tk
else:
self.index_name = tk
return
else:
self.tokens += ['\n kwargs["' + d[0][1] + '"]']
else:
raise IndexCreatorValueException("Can't handle properity assingment ", self.cnt_line_nr(d[0][4], 0))
self.tokens += ['=']
self.check_adjacents(d[2:], 0)
self.check_colons(d[2:], 0)
for i in d[2:]:
self.tokens += [i[1]]
def generate_func(self, t, tk, pos_start, pos_end, line, hdata, stage):
if self.last_line[stage] != -1 and pos_start[0] > self.last_line[stage] and line != '':
raise IndexCreatorFunctionException("This line will never be executed!", self.cnt_line_nr(line, stage))
if t == 0:
return
if pos_start[1] == 0:
if self.line_cons[stage][pos_start[0] - 1] == -1:
self.tokens += ['\n return']
self.last_line[stage] = pos_start[0]
else:
self.tokens += ['\n if']
elif tk == ':' and self.line_cons[stage][pos_start[0] - 1] > -1:
if self.line_cons[stage][pos_start[0] - 1] == 0:
self.tokens += [':\n return']
return
self.line_cons[stage][pos_start[0] - 1] -= 1
if tk in self.logic2:
# print tk
if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] != tk:
self.tokens += [tk]
if line[pos_start[1] - 1] != tk and line[pos_start[1] + 1] == tk:
if tk == '&':
self.tokens += ['and']
else:
self.tokens += ['or']
return
if self.brackets != 0:
def search_through_known_dicts(a):
for i, (n, r) in self.known_dicts_in_mkv:
if i == tk and r > pos_start[1] and n == pos_start[0] and hdata == 'data':
return True
return False
if t == token.NAME and len(self.funcs_stack) > 0 and self.funcs_stack[-1][0] == 'md5' and search_through_known_dicts(tk):
raise IndexCreatorValueException("Second value returned by make_key_value for sure isn't a dictionary ", self.cnt_line_nr(line, 1))
if tk == ')':
self.cur_brackets -= 1
if len(self.funcs_stack) > 0 and self.cur_brackets == self.funcs_stack[-1][1]:
self.tokens += [tk]
self.tokens += self.funcs[self.funcs_stack[-1][0]][1]
del self.funcs_stack[-1]
return
if tk == '(':
self.cur_brackets += 1
if tk in self.none:
self.tokens += ['None']
return
if t == token.NAME and tk not in self.logic and tk != hdata:
if tk not in self.funcs:
self.tokens += [hdata + '["' + tk + '"]']
else:
self.tokens += self.funcs[tk][0]
if tk in self.funcs_with_body:
self.funcs_with_body[tk] = (
self.funcs_with_body[tk][0], True)
self.custom_header.add(self.handle_int_imports.get(tk))
self.funcs_stack += [(tk, self.cur_brackets)]
else:
self.tokens += [tk]
def handle_make_value(self, t, tk, pos_start, pos_end, line):
self.generate_func(t, tk, pos_start, pos_end, line, 'data', 1)
def handle_make_key(self, t, tk, pos_start, pos_end, line):
self.generate_func(t, tk, pos_start, pos_end, line, 'key', 2)
+150
View File
@@ -0,0 +1,150 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from heapq import nsmallest
from operator import itemgetter
from collections import defaultdict
try:
from collections import Counter
except ImportError:
class Counter(dict):
'Mapping where default values are zero'
def __missing__(self, key):
return 0
def cache1lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = Counter()
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
try:
result = cache[key]
except KeyError:
if len(cache) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iter(list(use_count.items())),
key=itemgetter(1)):
del cache[k], use_count[k]
cache[key] = user_function(key, *args, **kwargs)
result = cache[key]
# result = user_function(obj, key, *args, **kwargs)
finally:
use_count[key] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key):
try:
del cache[key]
del use_count[key]
except KeyError:
return False
else:
return True
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
return wrapper
return decorating_function
def twolvl_iterator(dict):
for k, v in list(dict.items()):
for kk, vv in list(v.items()):
yield k, kk, vv
def cache2lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = defaultdict(Counter)
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
# return user_function(*args, **kwargs)
try:
result = cache[args[0]][args[1]]
except KeyError:
if wrapper.cache_size == maxsize:
to_delete = maxsize // 10 or 1
for k1, k2, v in nsmallest(to_delete,
twolvl_iterator(use_count),
key=itemgetter(2)):
del cache[k1][k2], use_count[k1][k2]
if not cache[k1]:
del cache[k1]
del use_count[k1]
wrapper.cache_size -= to_delete
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
wrapper.cache_size += 1
finally:
use_count[args[0]][args[1]] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key, inner_key=None):
if inner_key is not None:
try:
del cache[key][inner_key]
del use_count[key][inner_key]
if not cache[key]:
del cache[key]
del use_count[key]
wrapper.cache_size -= 1
except KeyError:
return False
else:
return True
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
del use_count[key]
except KeyError:
return False
else:
return True
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function
+158
View File
@@ -0,0 +1,158 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from heapq import nsmallest
from operator import itemgetter
from collections import defaultdict
try:
from collections import Counter
except ImportError:
class Counter(dict):
'Mapping where default values are zero'
def __missing__(self, key):
return 0
def twolvl_iterator(dict):
for k, v in list(dict.items()):
for kk, vv in list(v.items()):
yield k, kk, vv
def create_cache1lvl(lock_obj):
def cache1lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = Counter()
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
try:
result = cache[key]
except KeyError:
with lock:
if len(cache) == maxsize:
for k, _ in nsmallest(maxsize // 10 or 1,
iter(list(use_count.items())),
key=itemgetter(1)):
del cache[k], use_count[k]
cache[key] = user_function(key, *args, **kwargs)
result = cache[key]
use_count[key] += 1
else:
with lock:
use_count[key] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key):
try:
del cache[key]
del use_count[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
return wrapper
return decorating_function
return cache1lvl
def create_cache2lvl(lock_obj):
def cache2lvl(maxsize=100):
"""
modified version of http://code.activestate.com/recipes/498245/
"""
def decorating_function(user_function):
cache = {}
use_count = defaultdict(Counter)
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
try:
result = cache[args[0]][args[1]]
except KeyError:
with lock:
if wrapper.cache_size == maxsize:
to_delete = maxsize / 10 or 1
for k1, k2, v in nsmallest(to_delete,
twolvl_iterator(
use_count),
key=itemgetter(2)):
del cache[k1][k2], use_count[k1][k2]
if not cache[k1]:
del cache[k1]
del use_count[k1]
wrapper.cache_size -= to_delete
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
use_count[args[0]][args[1]] += 1
wrapper.cache_size += 1
else:
use_count[args[0]][args[1]] += 1
return result
def clear():
cache.clear()
use_count.clear()
def delete(key, *args):
if args:
try:
del cache[key][args[0]]
del use_count[key][args[0]]
if not cache[key]:
del cache[key]
del use_count[key]
wrapper.cache_size -= 1
return True
except KeyError:
return False
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
del use_count[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function
return cache2lvl
+45
View File
@@ -0,0 +1,45 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.database import Database
import shutil
import os
def migrate(source, destination):
"""
Very basic for now
"""
dbs = Database(source)
dbt = Database(destination)
dbs.open()
dbt.create()
dbt.close()
for curr in os.listdir(os.path.join(dbs.path, '_indexes')):
if curr != '00id.py':
shutil.copyfile(os.path.join(dbs.path, '_indexes', curr),
os.path.join(dbt.path, '_indexes', curr))
dbt.open()
for c in dbs.all('id'):
del c['_rev']
dbt.insert(c)
return True
if __name__ == '__main__':
import sys
migrate(sys.argv[1], sys.argv[2])
+35
View File
@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from random import getrandbits, randrange
import uuid
class NONE:
"""
It's inteded to be None but different,
for internal use only!
"""
pass
def random_hex_32():
return uuid.UUID(int=getrandbits(128), version=4).hex
def random_hex_4(*args, **kwargs):
return '%04x' % randrange(256 ** 2)
+99
View File
@@ -0,0 +1,99 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.misc import NONE
def __patch(obj, name, new):
n = NONE()
orig = getattr(obj, name, n)
if orig is not n:
if orig == new:
raise Exception("Shouldn't happen, new and orig are the same")
setattr(obj, name, new)
return
def patch_cache_lfu(lock_obj):
"""
Patnches cache mechanizm to be thread safe (gevent ones also)
.. note::
It's internal CodernityDB mechanizm, it will be called when needed
"""
from . import lfu_cache
from . import lfu_cache_with_lock
lfu_lock1lvl = lfu_cache_with_lock.create_cache1lvl(lock_obj)
lfu_lock2lvl = lfu_cache_with_lock.create_cache2lvl(lock_obj)
__patch(lfu_cache, 'cache1lvl', lfu_lock1lvl)
__patch(lfu_cache, 'cache2lvl', lfu_lock2lvl)
def patch_cache_rr(lock_obj):
"""
Patches cache mechanizm to be thread safe (gevent ones also)
.. note::
It's internal CodernityDB mechanizm, it will be called when needed
"""
from . import rr_cache
from . import rr_cache_with_lock
rr_lock1lvl = rr_cache_with_lock.create_cache1lvl(lock_obj)
rr_lock2lvl = rr_cache_with_lock.create_cache2lvl(lock_obj)
__patch(rr_cache, 'cache1lvl', rr_lock1lvl)
__patch(rr_cache, 'cache2lvl', rr_lock2lvl)
def patch_flush_fsync(db_obj):
"""
Will always execute index.fsync after index.flush.
.. note::
It's for advanced users, use when you understand difference between `flush` and `fsync`, and when you definitely need that.
It's important to call it **AFTER** database has all indexes etc (after db.create or db.open)
Example usage::
...
db = Database('/tmp/patch_demo')
db.create()
patch_flush_fsync(db)
...
"""
def always_fsync(ind_obj):
def _inner():
ind_obj.orig_flush()
ind_obj.fsync()
return _inner
for index in db_obj.indexes:
setattr(index, 'orig_flush', index.flush)
setattr(index, 'flush', always_fsync(index))
setattr(db_obj, 'orig_flush', db_obj.flush)
setattr(db_obj, 'flush', always_fsync(db_obj))
return
+127
View File
@@ -0,0 +1,127 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from random import choice
def cache1lvl(maxsize=100):
def decorating_function(user_function):
cache1lvl = {}
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
if isinstance(key, bytes):
key = key.decode()
# print("cachedddd", key) ## TODO
try:
#result = cache1lvl[key]
result = cache1lvl[key]
except KeyError:
if len(cache1lvl) == maxsize:
for i in range(maxsize // 10 or 1):
del cache1lvl[choice(list(cache1lvl.keys()))]
## print("#" * 10, key) # TODO
## print(user_function) # TODO
## print("cache1lvl", key, user_function) # TODO
## print(cache1lvl) # TODO
cache1lvl[key] = user_function(key, *args, **kwargs)
## print(cache1lvl) # TODO
result = cache1lvl[key]
## print("result caching", result) # TODO
# result = user_function(obj, key, *args, **kwargs)
if isinstance(result, bytes):
result = key.decode()
## print("r" * 20, result) # TODO
return result
def clear():
cache1lvl.clear()
def delete(key):
if isinstance(key, bytes):
key = key.decode()
try:
del cache1lvl[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache1lvl
wrapper.delete = delete
return wrapper
return decorating_function
def cache2lvl(maxsize=100):
def decorating_function(user_function):
cache = {}
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
# return user_function(*args, **kwargs)
try:
result = cache[args[0]][args[1]]
except KeyError:
# print wrapper.cache_size
if wrapper.cache_size == maxsize:
to_delete = maxsize // 10 or 1
for i in range(to_delete):
key1 = choice(list(cache.keys()))
key2 = choice(list(cache[key1].keys()))
del cache[key1][key2]
if not cache[key1]:
del cache[key1]
wrapper.cache_size -= to_delete
# print wrapper.cache_size
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
wrapper.cache_size += 1
return result
def clear():
cache.clear()
wrapper.cache_size = 0
def delete(key, inner_key=None):
if inner_key:
try:
del cache[key][inner_key]
if not cache[key]:
del cache[key]
wrapper.cache_size -= 1
return True
except KeyError:
return False
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function
+117
View File
@@ -0,0 +1,117 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from random import choice
def create_cache1lvl(lock_obj):
def cache1lvl(maxsize=100):
def decorating_function(user_function):
cache = {}
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(key, *args, **kwargs):
try:
result = cache[key]
except KeyError:
with lock:
if len(cache) == maxsize:
for i in range(maxsize // 10 or 1):
del cache[choice(list(cache.keys()))]
cache[key] = user_function(key, *args, **kwargs)
result = cache[key]
return result
def clear():
cache.clear()
def delete(key):
try:
del cache[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
return wrapper
return decorating_function
return cache1lvl
def create_cache2lvl(lock_obj):
def cache2lvl(maxsize=100):
def decorating_function(user_function):
cache = {}
lock = lock_obj()
@functools.wraps(user_function)
def wrapper(*args, **kwargs):
try:
result = cache[args[0]][args[1]]
except KeyError:
with lock:
if wrapper.cache_size == maxsize:
to_delete = maxsize // 10 or 1
for i in range(to_delete):
key1 = choice(list(cache.keys()))
key2 = choice(list(cache[key1].keys()))
del cache[key1][key2]
if not cache[key1]:
del cache[key1]
wrapper.cache_size -= to_delete
result = user_function(*args, **kwargs)
try:
cache[args[0]][args[1]] = result
except KeyError:
cache[args[0]] = {args[1]: result}
wrapper.cache_size += 1
return result
def clear():
cache.clear()
wrapper.cache_size = 0
def delete(key, *args):
if args:
try:
del cache[key][args[0]]
if not cache[key]:
del cache[key]
wrapper.cache_size -= 1
return True
except KeyError:
return False
else:
try:
wrapper.cache_size -= len(cache[key])
del cache[key]
return True
except KeyError:
return False
wrapper.clear = clear
wrapper.cache = cache
wrapper.delete = delete
wrapper.cache_size = 0
return wrapper
return decorating_function
return cache2lvl
+146
View File
@@ -0,0 +1,146 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.hash_index import UniqueHashIndex, HashIndex
from CodernityDB3.sharded_index import ShardedIndex
from CodernityDB3.index import IndexPreconditionsException
from random import getrandbits
import uuid
class IU_ShardedUniqueHashIndex(ShardedIndex):
custom_header = """import uuid
from random import getrandbits
from CodernityDB3.sharded_index import ShardedIndex
"""
def __init__(self, db_path, name, *args, **kwargs):
if kwargs.get('sh_nums', 0) > 255:
raise IndexPreconditionsException("Too many shards")
kwargs['ind_class'] = UniqueHashIndex
super(IU_ShardedUniqueHashIndex, self).__init__(db_path,
name, *args, **kwargs)
self.patchers.append(self.wrap_insert_id_index)
@staticmethod
def wrap_insert_id_index(db_obj, clean=False):
def _insert_id_index(_rev, data):
"""
Performs insert on **id** index.
"""
_id, value = db_obj.id_ind.make_key_value(data) # may be improved
trg_shard = _id[:2]
storage = db_obj.id_ind.shards_r[trg_shard].storage
start, size = storage.insert(value)
db_obj.id_ind.insert(_id, _rev, start, size)
return _id
if not clean:
if hasattr(db_obj, '_insert_id_index_orig'):
raise IndexPreconditionsException(
"Already patched, something went wrong")
setattr(db_obj, "_insert_id_index_orig", db_obj._insert_id_index)
setattr(db_obj, "_insert_id_index", _insert_id_index)
else:
setattr(db_obj, "_insert_id_index", db_obj._insert_id_index_orig)
delattr(db_obj, "_insert_id_index_orig")
def create_key(self):
h = uuid.UUID(int=getrandbits(128), version=4).hex
trg = self.last_used + 1
if trg >= self.sh_nums:
trg = 0
self.last_used = trg
h = '%02x%30s' % (trg, h[2:])
return h
def delete(self, key, *args, **kwargs):
trg_shard = key[:2]
op = self.shards_r[trg_shard]
return op.delete(key, *args, **kwargs)
def update(self, key, *args, **kwargs):
trg_shard = key[:2]
self.last_used = int(trg_shard, 16)
op = self.shards_r[trg_shard]
return op.update(key, *args, **kwargs)
def insert(self, key, *args, **kwargs):
trg_shard = key[:2] # in most cases it's in create_key BUT not always
self.last_used = int(key[:2], 16)
op = self.shards_r[trg_shard]
return op.insert(key, *args, **kwargs)
def get(self, key, *args, **kwargs):
trg_shard = key[:2]
self.last_used = int(trg_shard, 16)
op = self.shards_r[trg_shard]
return op.get(key, *args, **kwargs)
class ShardedUniqueHashIndex(IU_ShardedUniqueHashIndex):
# allow unique hash to be used directly
custom_header = 'from CodernityDB3.sharded_hash import IU_ShardedUniqueHashIndex'
pass
class IU_ShardedHashIndex(ShardedIndex):
custom_header = """from CodernityDB3.sharded_index import ShardedIndex"""
def __init__(self, db_path, name, *args, **kwargs):
kwargs['ind_class'] = HashIndex
super(IU_ShardedHashIndex, self).__init__(db_path, name, *
args, **kwargs)
def calculate_shard(self, key):
"""
Must be implemented. It has to return shard to be used by key
:param key: key
:returns: target shard
:rtype: int
"""
raise NotImplementedError()
def delete(self, doc_id, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.delete(doc_id, key, *args, **kwargs)
def insert(self, doc_id, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.insert(doc_id, key, *args, **kwargs)
def update(self, doc_id, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.insert(doc_id, key, *args, **kwargs)
def get(self, key, *args, **kwargs):
trg_shard = self.calculate_shard(key)
op = self.shards_r[trg_shard]
return op.get(key, *args, **kwargs)
class ShardedHashIndex(IU_ShardedHashIndex):
pass
+112
View File
@@ -0,0 +1,112 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from CodernityDB3.index import Index
# from CodernityDB3.env import cdb_environment
# import warnings
class ShardedIndex(Index):
def __init__(self, db_path, name, *args, **kwargs):
"""
There are 3 additional parameters. You have to hardcode them in your custom class. **NEVER** use directly
:param int sh_nums: how many shards should be
:param class ind_class: Index class to use (HashIndex or your custom one)
:param bool use_make_keys: if True, `make_key`, and `make_key_value` will be overriden with those from first shard
The rest parameters are passed straight to `ind_class` shards.
"""
super(ShardedIndex, self).__init__(db_path, name)
try:
self.sh_nums = kwargs.pop('sh_nums')
except KeyError:
self.sh_nums = 5
try:
ind_class = kwargs.pop('ind_class')
except KeyError:
raise Exception("ind_class must be given")
else:
# if not isinstance(ind_class, basestring):
# ind_class = ind_class.__name__
self.ind_class = ind_class
if 'use_make_keys' in kwargs:
self.use_make_keys = kwargs.pop('use_make_keys')
else:
self.use_make_keys = False
self._set_shard_datas(*args, **kwargs)
self.patchers = [] # database object patchers
def _set_shard_datas(self, *args, **kwargs):
self.shards = {}
self.shards_r = {}
# ind_class = globals()[self.ind_class]
ind_class = self.ind_class
i = 0
for sh_name in [self.name + str(x) for x in range(self.sh_nums)]:
# dict is better than list in that case
self.shards[i] = ind_class(self.db_path, sh_name, *args, **kwargs)
self.shards_r['%02x' % i] = self.shards[i]
self.shards_r[i] = self.shards[i]
i += 1
if not self.use_make_keys:
self.make_key = self.shards[0].make_key
self.make_key_value = self.shards[0].make_key_value
self.last_used = 0
@property
def storage(self):
st = self.shards[self.last_used].storage
return st
def __getattr__(self, name):
return getattr(self.shards[self.last_used], name)
def open_index(self):
for curr in list(self.shards.values()):
curr.open_index()
def create_index(self):
for curr in list(self.shards.values()):
curr.create_index()
def destroy(self):
for curr in list(self.shards.values()):
curr.destroy()
def compact(self):
for curr in list(self.shards.values()):
curr.compact()
def reindex(self):
for curr in list(self.shards.values()):
curr.reindex()
def all(self, *args, **kwargs):
for curr in list(self.shards.values()):
for now in curr.all(*args, **kwargs):
yield now
def get_many(self, *args, **kwargs):
for curr in list(self.shards.values()):
for now in curr.get_many(*args, **kwargs):
yield now
+162
View File
@@ -0,0 +1,162 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2011-2013 Codernity (http://codernity.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import struct
import shutil
import marshal
import io
try:
from CodernityDB3 import __version__
except ImportError:
from .__init__ import __version__
class StorageException(Exception):
pass
class DummyStorage(object):
"""
Storage mostly used to fake real storage
"""
def create(self, *args, **kwargs):
pass
def open(self, *args, **kwargs):
pass
def close(self, *args, **kwargs):
pass
def data_from(self, *args, **kwargs):
pass
def data_to(self, *args, **kwargs):
pass
def save(self, *args, **kwargs):
return 0, 0
def insert(self, *args, **kwargs):
return self.save(*args, **kwargs)
def update(self, *args, **kwargs):
return 0, 0
def get(self, *args, **kwargs):
return None
# def compact(self, *args, **kwargs):
# pass
def fsync(self, *args, **kwargs):
pass
def flush(self, *args, **kwargs):
pass
class IU_Storage(object):
__version__ = __version__
def __init__(self, db_path, name='main'):
if isinstance(db_path, bytes):
db_path = db_path.decode()
if isinstance(name, bytes):
name = name.decode()
self.db_path = db_path
self.name = name
self._header_size = 100
def create(self):
if os.path.exists(os.path.join(self.db_path, self.name + "_stor")):
raise IOError("Storage already exists!")
with io.open(os.path.join(self.db_path, self.name + "_stor"), 'wb') as f:
if isinstance(self.__version__, str):
new_version = self.__version__.encode()
else:
new_version = self.__version__
f.write(struct.pack(b'10s90s', new_version, b'|||||'))
f.close()
self._f = io.open(os.path.join(
self.db_path, self.name + "_stor"), 'r+b', buffering=0)
self.flush()
self._f.seek(0, 2)
def open(self):
if not os.path.exists(os.path.join(self.db_path, self.name + "_stor")):
raise IOError("Storage doesn't exists!")
self._f = io.open(os.path.join(
self.db_path, self.name + "_stor"), 'r+b', buffering=0)
self.flush()
self._f.seek(0, 2)
def destroy(self):
os.unlink(os.path.join(self.db_path, self.name + '_stor'))
def close(self):
self._f.close()
# self.flush()
# self.fsync()
def data_from(self, data):
return marshal.loads(data)
def data_to(self, data):
return marshal.dumps(data)
def save(self, data):
s_data = self.data_to(data)
self._f.seek(0, 2)
start = self._f.tell()
size = len(s_data)
self._f.write(s_data)
self.flush()
return start, size
def insert(self, data):
return self.save(data)
def update(self, data):
return self.save(data)
def get(self, start, size, status='c'):
if status == 'd':
return None
else:
self._f.seek(start)
return self.data_from(self._f.read(size))
def flush(self):
self._f.flush()
def fsync(self):
os.fsync(self._f.fileno())
# classes for public use, done in this way because of
# generation static files with indexes (_index directory)
class Storage(IU_Storage):
pass
File diff suppressed because it is too large Load Diff
+4 -3
View File
@@ -12,7 +12,8 @@
# Source: http://pypi.python.org/pypi/axel
# Docs: http://packages.python.org/axel
from Queue import Empty, Queue
from six.moves.queue import Queue, Empty
import hashlib
import sys
import threading
@@ -109,7 +110,7 @@ class Event(object):
self.memoize = {}
def hash(self, handler):
return hashlib.md5(str(handler)).hexdigest()
return hashlib.md5(repr(handler).encode('utf-8')).hexdigest()
def handle(self, handler, priority = 0):
""" Registers a handler. The handler can be transmitted together
@@ -161,7 +162,7 @@ class Event(object):
t.daemon = True
t.start()
handler_keys = self.handlers.keys()
handler_keys = list(self.handlers.keys())
handler_keys.sort(key = natsortKey)
for handler in handler_keys:
+11 -11
View File
@@ -45,7 +45,7 @@ from .element import (
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
syntax_error = 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
@@ -69,7 +69,7 @@ class BeautifulSoup(Tag):
like HTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME = u'[document]'
ROOT_TAG_NAME = '[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
@@ -135,12 +135,12 @@ class BeautifulSoup(Tag):
"fromEncoding", "from_encoding")
if len(kwargs) > 0:
arg = kwargs.keys().pop()
arg = list(kwargs.keys()).pop()
raise TypeError(
"__init__() got an unexpected keyword argument '%s'" % arg)
if builder is None:
if isinstance(features, basestring):
if isinstance(features, str):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
@@ -164,7 +164,7 @@ class BeautifulSoup(Tag):
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
if (isinstance(markup, unicode)
if (isinstance(markup, str)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
@@ -172,7 +172,7 @@ class BeautifulSoup(Tag):
is_file = False
try:
is_file = os.path.exists(possible_filename)
except Exception, e:
except Exception as e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
@@ -184,7 +184,7 @@ class BeautifulSoup(Tag):
# TODO: This is ugly but I couldn't get it to work in
# Python 3 otherwise.
if ((isinstance(markup, bytes) and not b' ' in markup)
or (isinstance(markup, unicode) and not u' ' in markup)):
or (isinstance(markup, str) and not ' ' in markup)):
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
@@ -259,7 +259,7 @@ class BeautifulSoup(Tag):
def endData(self, containerClass=NavigableString):
if self.current_data:
current_data = u''.join(self.current_data)
current_data = ''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
@@ -367,9 +367,9 @@ class BeautifulSoup(Tag):
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
prefix = '<?xml version="1.0"%s?>\n' % encoding_part
else:
prefix = u''
prefix = ''
if not pretty_print:
indent_level = None
else:
@@ -403,4 +403,4 @@ class FeatureNotFound(ValueError):
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
print soup.prettify()
print(soup.prettify())
+2 -2
View File
@@ -153,13 +153,13 @@ class TreeBuilder(object):
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
for attr in attrs.keys():
for attr in list(attrs.keys()):
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
if isinstance(value, basestring):
if isinstance(value, str):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
+6 -6
View File
@@ -37,7 +37,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
doc = parser.parse(markup, encoding=self.user_specified_encoding)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, unicode):
if isinstance(markup, str):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
@@ -51,7 +51,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><head></head><body>%s</body></html>' % fragment
return '<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
@@ -124,7 +124,7 @@ class Element(html5lib.treebuilders._base.Node):
def appendChild(self, node):
string_child = child = None
if isinstance(node, basestring):
if isinstance(node, str):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
@@ -139,7 +139,7 @@ class Element(html5lib.treebuilders._base.Node):
else:
child = node.element
if not isinstance(child, basestring) and child.parent is not None:
if not isinstance(child, str) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
@@ -152,7 +152,7 @@ class Element(html5lib.treebuilders._base.Node):
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
if isinstance(node, basestring):
if isinstance(node, str):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
@@ -183,7 +183,7 @@ class Element(html5lib.treebuilders._base.Node):
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
for name, value in attributes.items():
for name, value in list(attributes.items()):
self.element[name] = value
# The attributes may contain variables that need substitution.
+6 -6
View File
@@ -4,7 +4,7 @@ __all__ = [
'HTMLParserTreeBuilder',
]
from HTMLParser import (
from html.parser import (
HTMLParser,
HTMLParseError,
)
@@ -72,9 +72,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
real_name = int(name)
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
data = chr(real_name)
except (ValueError, OverflowError) as e:
data = "\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@@ -142,7 +142,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
if isinstance(markup, str):
yield (markup, None, None, False)
return
@@ -158,7 +158,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
+11 -11
View File
@@ -4,7 +4,7 @@ __all__ = [
]
from io import BytesIO
from StringIO import StringIO
from io import StringIO
import collections
from lxml import etree
from bs4.element import Comment, Doctype, NamespacedAttribute
@@ -78,12 +78,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
Each 4-tuple represents a strategy for parsing the document.
"""
if isinstance(markup, unicode):
if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
if isinstance(markup, unicode):
if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@@ -102,7 +102,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, unicode):
elif isinstance(markup, str):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -117,7 +117,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def close(self):
@@ -135,12 +135,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in nsmap.items():
for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@@ -149,7 +149,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in attrs.items():
for attr, value in list(attrs.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@@ -207,7 +207,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
@@ -224,10 +224,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><body>%s</body></html>' % fragment
return '<html><body>%s</body></html>' % fragment
+5 -5
View File
@@ -8,7 +8,7 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
import codecs
from htmlentitydefs import codepoint2name
from html.entities import codepoint2name
import re
import logging
import string
@@ -56,7 +56,7 @@ class EntitySubstitution(object):
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
character = chr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
@@ -340,9 +340,9 @@ class UnicodeDammit:
self.detector = EncodingDetector(markup, override_encodings, is_html)
# Short-circuit if the data is in Unicode to begin with.
if isinstance(markup, unicode) or markup == '':
if isinstance(markup, str) or markup == '':
self.markup = markup
self.unicode_markup = unicode(markup)
self.unicode_markup = str(markup)
self.original_encoding = None
return
@@ -425,7 +425,7 @@ class UnicodeDammit:
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
return unicode(data, encoding, errors)
return str(data, encoding, errors)
@property
def declared_html_encoding(self):
+26 -26
View File
@@ -1,7 +1,7 @@
"""Diagnostic functions, mainly for use when doing tech support."""
import cProfile
from StringIO import StringIO
from HTMLParser import HTMLParser
from io import StringIO
from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
@@ -17,8 +17,8 @@ import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
print("Diagnostic running on Beautiful Soup %s" % __version__)
print("Python version %s" % sys.version)
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
@@ -27,44 +27,44 @@ def diagnose(data):
break
else:
basic_parsers.remove(name)
print (
print((
"I noticed that %s is not installed. Installing it may help." %
name)
name))
if 'lxml' in basic_parsers:
basic_parsers.append(["lxml", "xml"])
from lxml import etree
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
if 'html5lib' in basic_parsers:
import html5lib
print "Found html5lib version %s" % html5lib.__version__
print("Found html5lib version %s" % html5lib.__version__)
if hasattr(data, 'read'):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
print('"%s" looks like a filename. Reading data from the file.' % data)
data = open(data).read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
print
print()
for parser in basic_parsers:
print "Trying to parse your markup with %s" % parser
print("Trying to parse your markup with %s" % parser)
success = False
try:
soup = BeautifulSoup(data, parser)
success = True
except Exception, e:
print "%s could not parse the markup." % parser
except Exception as e:
print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
print "Here's what %s did with the markup:" % parser
print soup.prettify()
print("Here's what %s did with the markup:" % parser)
print(soup.prettify())
print "-" * 80
print("-" * 80)
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
@@ -74,7 +74,7 @@ def lxml_trace(data, html=True, **kwargs):
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
print("%s, %4s, %s" % (event, element.tag, element.text))
print(("%s, %4s, %s" % (event, element.tag, element.text)))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
@@ -156,9 +156,9 @@ def rdoc(num_elements=1000):
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
data = rdoc(num_elements)
print "Generated a large invalid HTML document (%d bytes)." % len(data)
print("Generated a large invalid HTML document (%d bytes)." % len(data))
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
@@ -167,24 +167,24 @@ def benchmark_parsers(num_elements=100000):
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception, e:
print "%s could not parse the markup." % parser
except Exception as e:
print("%s could not parse the markup." % parser)
traceback.print_exc()
if success:
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
print("Raw lxml parsed the markup in %.2fs." % (b-a))
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
print("Raw html5lib parsed the markup in %.2fs." % (b-a))
def profile(num_elements=100000, parser="lxml"):
+55 -55
View File
@@ -21,22 +21,22 @@ def _alias(attr):
return alias
class NamespacedAttribute(unicode):
class NamespacedAttribute(str):
def __new__(cls, prefix, name, namespace=None):
if name is None:
obj = unicode.__new__(cls, prefix)
obj = str.__new__(cls, prefix)
elif prefix is None:
# Not really namespaced.
obj = unicode.__new__(cls, name)
obj = str.__new__(cls, name)
else:
obj = unicode.__new__(cls, prefix + ":" + name)
obj = str.__new__(cls, prefix + ":" + name)
obj.prefix = prefix
obj.name = name
obj.namespace = namespace
return obj
class AttributeValueWithCharsetSubstitution(unicode):
class AttributeValueWithCharsetSubstitution(str):
"""A stand-in object for a character encoding specified in HTML."""
class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -47,7 +47,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
"""
def __new__(cls, original_value):
obj = unicode.__new__(cls, original_value)
obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -70,9 +70,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
match = cls.CHARSET_RE.search(original_value)
if match is None:
# No substitution necessary.
return unicode.__new__(unicode, original_value)
return str.__new__(str, original_value)
obj = unicode.__new__(cls, original_value)
obj = str.__new__(cls, original_value)
obj.original_value = original_value
return obj
@@ -152,7 +152,7 @@ class PageElement(object):
def format_string(self, s, formatter='minimal'):
"""Format the given string using the given formatter."""
if not callable(formatter):
if not isinstance(formatter, collections.Callable):
formatter = self._formatter_for_name(formatter)
if formatter is None:
output = s
@@ -272,7 +272,7 @@ class PageElement(object):
def insert(self, position, new_child):
if new_child is self:
raise ValueError("Cannot insert a tag into itself.")
if (isinstance(new_child, basestring)
if (isinstance(new_child, str)
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
@@ -489,7 +489,7 @@ class PageElement(object):
result = (element for element in generator
if isinstance(element, Tag))
return ResultSet(strainer, result)
elif isinstance(name, basestring):
elif isinstance(name, str):
# Optimization to find all tags with a given name.
result = (element for element in generator
if isinstance(element, Tag)
@@ -640,7 +640,7 @@ class PageElement(object):
return self.parents
class NavigableString(unicode, PageElement):
class NavigableString(str, PageElement):
PREFIX = ''
SUFFIX = ''
@@ -653,15 +653,15 @@ class NavigableString(unicode, PageElement):
passed in to the superclass's __new__ or the superclass won't know
how to handle non-ASCII characters.
"""
if isinstance(value, unicode):
return unicode.__new__(cls, value)
return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
if isinstance(value, str):
return str.__new__(cls, value)
return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
def __copy__(self):
return self
def __getnewargs__(self):
return (unicode(self),)
return (str(self),)
def __getattr__(self, attr):
"""text.string gives you text. This is for backwards
@@ -701,23 +701,23 @@ class PreformattedString(NavigableString):
class CData(PreformattedString):
PREFIX = u'<![CDATA['
SUFFIX = u']]>'
PREFIX = '<![CDATA['
SUFFIX = ']]>'
class ProcessingInstruction(PreformattedString):
PREFIX = u'<?'
SUFFIX = u'?>'
PREFIX = '<?'
SUFFIX = '?>'
class Comment(PreformattedString):
PREFIX = u'<!--'
SUFFIX = u'-->'
PREFIX = '<!--'
SUFFIX = '-->'
class Declaration(PreformattedString):
PREFIX = u'<!'
SUFFIX = u'!>'
PREFIX = '<!'
SUFFIX = '!>'
class Doctype(PreformattedString):
@@ -734,8 +734,8 @@ class Doctype(PreformattedString):
return Doctype(value)
PREFIX = u'<!DOCTYPE '
SUFFIX = u'>\n'
PREFIX = '<!DOCTYPE '
SUFFIX = '>\n'
class Tag(PageElement):
@@ -843,7 +843,7 @@ class Tag(PageElement):
for string in self._all_strings(True):
yield string
def get_text(self, separator=u"", strip=False,
def get_text(self, separator="", strip=False,
types=(NavigableString, CData)):
"""
Get all child strings, concatenated using the given separator.
@@ -915,7 +915,7 @@ class Tag(PageElement):
def __contains__(self, x):
return x in self.contents
def __nonzero__(self):
def __bool__(self):
"A tag is non-None even if it has no contents."
return True
@@ -1014,7 +1014,7 @@ class Tag(PageElement):
# First off, turn a string formatter into a function. This
# will stop the lookup from happening over and over again.
if not callable(formatter):
if not isinstance(formatter, collections.Callable):
formatter = self._formatter_for_name(formatter)
attrs = []
@@ -1025,8 +1025,8 @@ class Tag(PageElement):
else:
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
elif not isinstance(val, basestring):
val = unicode(val)
elif not isinstance(val, str):
val = str(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None):
@@ -1034,7 +1034,7 @@ class Tag(PageElement):
text = self.format_string(val, formatter)
decoded = (
unicode(key) + '='
str(key) + '='
+ EntitySubstitution.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
@@ -1112,7 +1112,7 @@ class Tag(PageElement):
"""
# First off, turn a string formatter into a function. This
# will stop the lookup from happening over and over again.
if not callable(formatter):
if not isinstance(formatter, collections.Callable):
formatter = self._formatter_for_name(formatter)
pretty_print = (indent_level is not None)
@@ -1210,16 +1210,16 @@ class Tag(PageElement):
raise ValueError(
'Final combinator "%s" is missing an argument.' % tokens[-1])
if self._select_debug:
print 'Running CSS selector "%s"' % selector
print('Running CSS selector "%s"' % selector)
for index, token in enumerate(tokens):
if self._select_debug:
print ' Considering token "%s"' % token
print(' Considering token "%s"' % token)
recursive_candidate_generator = None
tag_name = None
if tokens[index-1] in self._selector_combinators:
# This token was consumed by the previous combinator. Skip it.
if self._select_debug:
print ' Token was consumed by the previous combinator.'
print(' Token was consumed by the previous combinator.')
continue
# Each operation corresponds to a checker function, a rule
# for determining whether a candidate matches the
@@ -1325,14 +1325,14 @@ class Tag(PageElement):
next_token = tokens[index+1]
def recursive_select(tag):
if self._select_debug:
print ' Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
print '-' * 40
print(' Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs))
print('-' * 40)
for i in tag.select(next_token, recursive_candidate_generator):
if self._select_debug:
print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
print('(Recursive select picked up candidate %s %s)' % (i.name, i.attrs))
yield i
if self._select_debug:
print '-' * 40
print('-' * 40)
_use_candidate_generator = recursive_select
elif _candidate_generator is None:
# By default, a tag's candidates are all of its
@@ -1343,7 +1343,7 @@ class Tag(PageElement):
check = "[any]"
else:
check = tag_name
print ' Default candidate generator, tag name="%s"' % check
print(' Default candidate generator, tag name="%s"' % check)
if self._select_debug:
# This is redundant with later code, but it stops
# a bunch of bogus tags from cluttering up the
@@ -1365,8 +1365,8 @@ class Tag(PageElement):
new_context_ids = set([])
for tag in current_context:
if self._select_debug:
print " Running candidate generator on %s %s" % (
tag.name, repr(tag.attrs))
print(" Running candidate generator on %s %s" % (
tag.name, repr(tag.attrs)))
for candidate in _use_candidate_generator(tag):
if not isinstance(candidate, Tag):
continue
@@ -1381,21 +1381,21 @@ class Tag(PageElement):
break
if checker is None or result:
if self._select_debug:
print " SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
print(" SUCCESS %s %s" % (candidate.name, repr(candidate.attrs)))
if id(candidate) not in new_context_ids:
# If a tag matches a selector more than once,
# don't include it in the context more than once.
new_context.append(candidate)
new_context_ids.add(id(candidate))
elif self._select_debug:
print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
print(" FAILURE %s %s" % (candidate.name, repr(candidate.attrs)))
current_context = new_context
if self._select_debug:
print "Final verdict:"
print("Final verdict:")
for i in current_context:
print " %s %s" % (i.name, i.attrs)
print(" %s %s" % (i.name, i.attrs))
return current_context
# Old names for backwards compatibility
@@ -1439,7 +1439,7 @@ class SoupStrainer(object):
else:
attrs = kwargs
normalized_attrs = {}
for key, value in attrs.items():
for key, value in list(attrs.items()):
normalized_attrs[key] = self._normalize_search_value(value)
self.attrs = normalized_attrs
@@ -1448,7 +1448,7 @@ class SoupStrainer(object):
def _normalize_search_value(self, value):
# Leave it alone if it's a Unicode string, a callable, a
# regular expression, a boolean, or None.
if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
if (isinstance(value, str) or isinstance(value, collections.Callable) or hasattr(value, 'match')
or isinstance(value, bool) or value is None):
return value
@@ -1461,7 +1461,7 @@ class SoupStrainer(object):
new_value = []
for v in value:
if (hasattr(v, '__iter__') and not isinstance(v, bytes)
and not isinstance(v, unicode)):
and not isinstance(v, str)):
# This is almost certainly the user's mistake. In the
# interests of avoiding infinite loops, we'll let
# it through as-is rather than doing a recursive call.
@@ -1473,7 +1473,7 @@ class SoupStrainer(object):
# Otherwise, convert it into a Unicode string.
# The unicode(str()) thing is so this will do the same thing on Python 2
# and Python 3.
return unicode(str(value))
return str(str(value))
def __str__(self):
if self.text:
@@ -1527,7 +1527,7 @@ class SoupStrainer(object):
found = None
# If given a list of items, scan it for a text element that
# matches.
if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
for element in markup:
if isinstance(element, NavigableString) \
and self.search(element):
@@ -1540,7 +1540,7 @@ class SoupStrainer(object):
found = self.search_tag(markup)
# If it's text, make sure the text matches.
elif isinstance(markup, NavigableString) or \
isinstance(markup, basestring):
isinstance(markup, str):
if not self.name and not self.attrs and self._matches(markup, self.text):
found = markup
else:
@@ -1554,7 +1554,7 @@ class SoupStrainer(object):
if isinstance(markup, list) or isinstance(markup, tuple):
# This should only happen when searching a multi-valued attribute
# like 'class'.
if (isinstance(match_against, unicode)
if (isinstance(match_against, str)
and ' ' in match_against):
# A bit of a special case. If they try to match "foo
# bar" on a multivalue attribute's value, only accept
@@ -1589,7 +1589,7 @@ class SoupStrainer(object):
# None matches None, False, an empty string, an empty list, and so on.
return not match_against
if isinstance(match_against, unicode):
if isinstance(match_against, str):
# Exact string match
return markup == match_against
+16 -16
View File
@@ -225,14 +225,14 @@ class HTMLTreeBuilderSmokeTest(object):
self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
def test_entities_in_attributes_converted_to_unicode(self):
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
def test_entities_in_text_converted_to_unicode(self):
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
@@ -243,7 +243,7 @@ class HTMLTreeBuilderSmokeTest(object):
'<p>I said "good day!"</p>')
def test_out_of_range_entity(self):
expect = u"\N{REPLACEMENT CHARACTER}"
expect = "\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("&#10000000000000;", expect)
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
@@ -285,9 +285,9 @@ class HTMLTreeBuilderSmokeTest(object):
# A seemingly innocuous document... but it's in Unicode! And
# it contains characters that can't be represented in the
# encoding found in the declaration! The horror!
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
@@ -327,7 +327,7 @@ class HTMLTreeBuilderSmokeTest(object):
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
@@ -337,15 +337,15 @@ class HTMLTreeBuilderSmokeTest(object):
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
@@ -354,7 +354,7 @@ class HTMLTreeBuilderSmokeTest(object):
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
@@ -493,15 +493,15 @@ class XMLTreeBuilderSmokeTest(object):
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
def test_can_parse_unicode_document(self):
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
soup = self.soup(markup)
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)
self.assertEqual(
unicode(soup.rss), markup)
str(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("<root/>")
@@ -532,17 +532,17 @@ class XMLTreeBuilderSmokeTest(object):
def test_closing_namespaced_tag(self):
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.p), markup)
self.assertEqual(str(soup.p), markup)
def test_namespaced_attributes(self):
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
self.assertEqual(str(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = '<foo xml:lang="fr">bar</foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
self.assertEqual(str(soup.foo), markup)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""
+441 -24
View File
@@ -1,27 +1,75 @@
# -*- coding: utf-8 -*-
"""
copied from
werkzeug.contrib.cache
~~~~~~~~~~~~~~~~~~~~~~
:copyright: (c) 2011 by the Werkzeug Team, see AUTHORS for more details.
The main problem with dynamic Web sites is, well, they're dynamic. Each
time a user requests a page, the webserver executes a lot of code, queries
the database, renders templates until the visitor gets the page he sees.
This is a lot more expensive than just loading a file from the file system
and sending it to the visitor.
For most Web applications, this overhead isn't a big deal but once it
becomes, you will be glad to have a cache system in place.
How Caching Works
=================
Caching is pretty simple. Basically you have a cache object lurking around
somewhere that is connected to a remote cache or the file system or
something else. When the request comes in you check if the current page
is already in the cache and if so, you're returning it from the cache.
Otherwise you generate the page and put it into the cache. (Or a fragment
of the page, you don't have to cache the full thing)
Here is a simple example of how to cache a sidebar for a template::
def get_sidebar(user):
identifier = 'sidebar_for/user%d' % user.id
value = cache.get(identifier)
if value is not None:
return value
value = generate_sidebar_for(user=user)
cache.set(identifier, value, timeout=60 * 5)
return value
Creating a Cache Object
=======================
To create a cache object you just import the cache system of your choice
from the cache module and instantiate it. Then you can start working
with that object:
>>> from werkzeug.contrib.cache import SimpleCache
>>> c = SimpleCache()
>>> c.set("foo", "value")
>>> c.get("foo")
'value'
>>> c.get("missing") is None
True
Please keep in mind that you have to create the cache and put it somewhere
you have access to it (either as a module global you can import or you just
put it into your WSGI application).
:copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
from cache.posixemulation import rename
from itertools import izip
from time import time
import os
import re
import tempfile
try:
from hashlib import md5
except ImportError:
from md5 import new as md5
from hashlib import md5
from time import time
try:
import cPickle as pickle
except ImportError:
import pickle
from ._compat import iteritems, string_types, text_type, \
integer_types, to_bytes
from .posixemulation import rename
def _items(mappingorseq):
"""Wrapper for efficient iteration over mappings represented by dicts
@@ -34,8 +82,11 @@ def _items(mappingorseq):
... assert k*k == v
"""
return mappingorseq.iteritems() if hasattr(mappingorseq, 'iteritems') \
else mappingorseq
if hasattr(mappingorseq, "iteritems"):
return mappingorseq.iteritems()
elif hasattr(mappingorseq, "items"):
return mappingorseq.items()
return mappingorseq
class BaseCache(object):
@@ -46,9 +97,17 @@ class BaseCache(object):
specified on :meth:`set`.
"""
def __init__(self, default_timeout = 300):
def __init__(self, default_timeout=300):
self.default_timeout = default_timeout
def get(self, key):
"""Looks up key in the cache and returns the value for it.
If the key does not exist `None` is returned instead.
:param key: the key to be looked up.
"""
return None
def delete(self, key):
"""Deletes `key` from the cache. If it does not exist in the cache
nothing happens.
@@ -81,9 +140,9 @@ class BaseCache(object):
:param keys: The function accepts multiple keys as positional
arguments.
"""
return dict(izip(keys, self.get_many(*keys)))
return dict(zip(keys, self.get_many(*keys)))
def set(self, key, value, timeout = None):
def set(self, key, value, timeout=None):
"""Adds a new key/value to the cache (overwrites value, if key already
exists in the cache).
@@ -94,7 +153,7 @@ class BaseCache(object):
"""
pass
def add(self, key, value, timeout = None):
def add(self, key, value, timeout=None):
"""Works like :meth:`set` but does not overwrite the values of already
existing keys.
@@ -105,7 +164,7 @@ class BaseCache(object):
"""
pass
def set_many(self, mapping, timeout = None):
def set_many(self, mapping, timeout=None):
"""Sets multiple keys and values from a mapping.
:param mapping: a mapping with the keys/values to set.
@@ -130,7 +189,7 @@ class BaseCache(object):
"""
pass
def inc(self, key, delta = 1):
def inc(self, key, delta=1):
"""Increments the value of a key by `delta`. If the key does
not yet exist it is initialized with `delta`.
@@ -141,7 +200,7 @@ class BaseCache(object):
"""
self.set(key, (self.get(key) or 0) + delta)
def dec(self, key, delta = 1):
def dec(self, key, delta=1):
"""Decrements the value of a key by `delta`. If the key does
not yet exist it is initialized with `-delta`.
@@ -153,6 +212,362 @@ class BaseCache(object):
self.set(key, (self.get(key) or 0) - delta)
class NullCache(BaseCache):
"""A cache that doesn't cache. This can be useful for unit testing.
:param default_timeout: a dummy parameter that is ignored but exists
for API compatibility with other caches.
"""
class SimpleCache(BaseCache):
"""Simple memory cache for single process environments. This class exists
mainly for the development server and is not 100% thread safe. It tries
to use as many atomic operations as possible and no locks for simplicity
but it could happen under heavy load that keys are added multiple times.
:param threshold: the maximum number of items the cache stores before
it starts deleting some.
:param default_timeout: the default timeout that is used if no timeout is
specified on :meth:`~BaseCache.set`.
"""
def __init__(self, threshold=500, default_timeout=300):
BaseCache.__init__(self, default_timeout)
self._cache = {}
self.clear = self._cache.clear
self._threshold = threshold
def _prune(self):
if len(self._cache) > self._threshold:
now = time()
for idx, (key, (expires, _)) in enumerate(self._cache.items()):
if expires <= now or idx % 3 == 0:
self._cache.pop(key, None)
def get(self, key):
expires, value = self._cache.get(key, (0, None))
if expires > time():
return pickle.loads(value)
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
self._prune()
self._cache[key] = (time() + timeout, pickle.dumps(value,
pickle.HIGHEST_PROTOCOL))
def add(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
if len(self._cache) > self._threshold:
self._prune()
item = (time() + timeout, pickle.dumps(value,
pickle.HIGHEST_PROTOCOL))
self._cache.setdefault(key, item)
def delete(self, key):
self._cache.pop(key, None)
_test_memcached_key = re.compile(br'[^\x00-\x21\xff]{1,250}$').match
class MemcachedCache(BaseCache):
"""A cache that uses memcached as backend.
The first argument can either be an object that resembles the API of a
:class:`memcache.Client` or a tuple/list of server addresses. In the
event that a tuple/list is passed, Werkzeug tries to import the best
available memcache library.
Implementation notes: This cache backend works around some limitations in
memcached to simplify the interface. For example unicode keys are encoded
to utf-8 on the fly. Methods such as :meth:`~BaseCache.get_dict` return
the keys in the same format as passed. Furthermore all get methods
silently ignore key errors to not cause problems when untrusted user data
is passed to the get methods which is often the case in web applications.
:param servers: a list or tuple of server addresses or alternatively
a :class:`memcache.Client` or a compatible client.
:param default_timeout: the default timeout that is used if no timeout is
specified on :meth:`~BaseCache.set`.
:param key_prefix: a prefix that is added before all keys. This makes it
possible to use the same memcached server for different
applications. Keep in mind that
:meth:`~BaseCache.clear` will also clear keys with a
different prefix.
"""
def __init__(self, servers=None, default_timeout=300, key_prefix=None):
BaseCache.__init__(self, default_timeout)
if servers is None or isinstance(servers, (list, tuple)):
if servers is None:
servers = ['127.0.0.1:11211']
self._client = self.import_preferred_memcache_lib(servers)
if self._client is None:
raise RuntimeError('no memcache module found')
else:
# NOTE: servers is actually an already initialized memcache
# client.
self._client = servers
self.key_prefix = to_bytes(key_prefix)
def get(self, key):
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
# memcached doesn't support keys longer than that. Because often
# checks for so long keys can occour because it's tested from user
# submitted data etc we fail silently for getting.
if _test_memcached_key(key):
return self._client.get(key)
def get_dict(self, *keys):
key_mapping = {}
have_encoded_keys = False
for key in keys:
if isinstance(key, unicode):
encoded_key = key.encode('utf-8')
have_encoded_keys = True
else:
encoded_key = key
if self.key_prefix:
encoded_key = self.key_prefix + encoded_key
if _test_memcached_key(key):
key_mapping[encoded_key] = key
d = rv = self._client.get_multi(key_mapping.keys())
if have_encoded_keys or self.key_prefix:
rv = {}
for key, value in iteritems(d):
rv[key_mapping[key]] = value
if len(rv) < len(keys):
for key in keys:
if key not in rv:
rv[key] = None
return rv
def add(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.add(key, value, timeout)
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.set(key, value, timeout)
def get_many(self, *keys):
d = self.get_dict(*keys)
return [d[key] for key in keys]
def set_many(self, mapping, timeout=None):
if timeout is None:
timeout = self.default_timeout
new_mapping = {}
for key, value in _items(mapping):
if isinstance(key, text_type):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
new_mapping[key] = value
self._client.set_multi(new_mapping, timeout)
def delete(self, key):
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
if _test_memcached_key(key):
self._client.delete(key)
def delete_many(self, *keys):
new_keys = []
for key in keys:
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
if _test_memcached_key(key):
new_keys.append(key)
self._client.delete_multi(new_keys)
def clear(self):
self._client.flush_all()
def inc(self, key, delta=1):
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.incr(key, delta)
def dec(self, key, delta=1):
if isinstance(key, unicode):
key = key.encode('utf-8')
if self.key_prefix:
key = self.key_prefix + key
self._client.decr(key, delta)
def import_preferred_memcache_lib(self, servers):
"""Returns an initialized memcache client. Used by the constructor."""
try:
import pylibmc
except ImportError:
pass
else:
return pylibmc.Client(servers)
try:
from google.appengine.api import memcache
except ImportError:
pass
else:
return memcache.Client()
try:
import memcache
except ImportError:
pass
else:
return memcache.Client(servers)
# backwards compatibility
GAEMemcachedCache = MemcachedCache
class RedisCache(BaseCache):
"""Uses the Redis key-value store as a cache backend.
The first argument can be either a string denoting address of the Redis
server or an object resembling an instance of a redis.Redis class.
Note: Python Redis API already takes care of encoding unicode strings on
the fly.
.. versionadded:: 0.7
.. versionadded:: 0.8
`key_prefix` was added.
.. versionchanged:: 0.8
This cache backend now properly serializes objects.
.. versionchanged:: 0.8.3
This cache backend now supports password authentication.
:param host: address of the Redis server or an object which API is
compatible with the official Python Redis client (redis-py).
:param port: port number on which Redis server listens for connections.
:param password: password authentication for the Redis server.
:param db: db (zero-based numeric index) on Redis Server to connect.
:param default_timeout: the default timeout that is used if no timeout is
specified on :meth:`~BaseCache.set`.
:param key_prefix: A prefix that should be added to all keys.
"""
def __init__(self, host='localhost', port=6379, password=None,
db=0, default_timeout=300, key_prefix=None):
BaseCache.__init__(self, default_timeout)
if isinstance(host, string_types):
try:
import redis
except ImportError:
raise RuntimeError('no redis module found')
self._client = redis.Redis(host=host, port=port, password=password, db=db)
else:
self._client = host
self.key_prefix = key_prefix or ''
def dump_object(self, value):
"""Dumps an object into a string for redis. By default it serializes
integers as regular string and pickle dumps everything else.
"""
t = type(value)
if t in integer_types:
return str(value).encode('ascii')
return b'!' + pickle.dumps(value)
def load_object(self, value):
"""The reversal of :meth:`dump_object`. This might be callde with
None.
"""
if value is None:
return None
if value.startswith(b'!'):
return pickle.loads(value[1:])
try:
return int(value)
except ValueError:
# before 0.8 we did not have serialization. Still support that.
return value
def get(self, key):
return self.load_object(self._client.get(self.key_prefix + key))
def get_many(self, *keys):
if self.key_prefix:
keys = [self.key_prefix + key for key in keys]
return [self.load_object(x) for x in self._client.mget(keys)]
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
dump = self.dump_object(value)
self._client.setex(self.key_prefix + key, dump, timeout)
def add(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
dump = self.dump_object(value)
added = self._client.setnx(self.key_prefix + key, dump)
if added:
self._client.expire(self.key_prefix + key, timeout)
def set_many(self, mapping, timeout=None):
if timeout is None:
timeout = self.default_timeout
pipe = self._client.pipeline()
for key, value in _items(mapping):
dump = self.dump_object(value)
pipe.setex(self.key_prefix + key, dump, timeout)
pipe.execute()
def delete(self, key):
self._client.delete(self.key_prefix + key)
def delete_many(self, *keys):
if not keys:
return
if self.key_prefix:
keys = [self.key_prefix + key for key in keys]
self._client.delete(*keys)
def clear(self):
if self.key_prefix:
keys = self._client.keys(self.key_prefix + '*')
if keys:
self._client.delete(*keys)
else:
self._client.flushdb()
def inc(self, key, delta=1):
return self._client.incr(self.key_prefix + key, delta)
def dec(self, key, delta=1):
return self._client.decr(self.key_prefix + key, delta)
class FileSystemCache(BaseCache):
"""A cache that stores the items on the file system. This cache depends
on being the only user of the `cache_dir`. Make absolutely sure that
@@ -170,7 +585,7 @@ class FileSystemCache(BaseCache):
#: used for temporary files by the FileSystemCache
_fs_transaction_suffix = '.__wz_cache'
def __init__(self, cache_dir, threshold = 500, default_timeout = 300, mode = 0600):
def __init__(self, cache_dir, threshold=500, default_timeout=300, mode=0o600):
BaseCache.__init__(self, default_timeout)
self._path = cache_dir
self._threshold = threshold
@@ -215,6 +630,8 @@ class FileSystemCache(BaseCache):
pass
def _get_filename(self, key):
if isinstance(key, text_type):
key = key.encode('utf-8') #XXX unicode review
hash = md5(key).hexdigest()
return os.path.join(self._path, hash)
@@ -231,19 +648,19 @@ class FileSystemCache(BaseCache):
except Exception:
return None
def add(self, key, value, timeout = None):
def add(self, key, value, timeout=None):
filename = self._get_filename(key)
if not os.path.exists(filename):
self.set(key, value, timeout)
def set(self, key, value, timeout = None):
def set(self, key, value, timeout=None):
if timeout is None:
timeout = self.default_timeout
filename = self._get_filename(key)
self._prune()
try:
fd, tmp = tempfile.mkstemp(suffix = self._fs_transaction_suffix,
dir = self._path)
fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
dir=self._path)
f = os.fdopen(fd, 'wb')
try:
pickle.dump(int(time() + timeout), f, 1)
+202
View File
@@ -0,0 +1,202 @@
import sys
import operator
import functools
try:
import builtins
except ImportError:
import __builtin__ as builtins
PY2 = sys.version_info[0] == 2
_identity = lambda x: x
if PY2:
unichr = unichr
text_type = unicode
string_types = (str, unicode)
integer_types = (int, long)
int_to_byte = chr
iterkeys = lambda d, *args, **kwargs: d.iterkeys(*args, **kwargs)
itervalues = lambda d, *args, **kwargs: d.itervalues(*args, **kwargs)
iteritems = lambda d, *args, **kwargs: d.iteritems(*args, **kwargs)
iterlists = lambda d, *args, **kwargs: d.iterlists(*args, **kwargs)
iterlistvalues = lambda d, *args, **kwargs: d.iterlistvalues(*args, **kwargs)
iter_bytes = lambda x: iter(x)
exec('def reraise(tp, value, tb=None):\n raise tp, value, tb')
def fix_tuple_repr(obj):
def __repr__(self):
cls = self.__class__
return '%s(%s)' % (cls.__name__, ', '.join(
'%s=%r' % (field, self[index])
for index, field in enumerate(cls._fields)
))
obj.__repr__ = __repr__
return obj
def implements_iterator(cls):
cls.next = cls.__next__
del cls.__next__
return cls
def implements_to_string(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls
def native_string_result(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs).encode('utf-8')
return functools.update_wrapper(wrapper, func)
def implements_bool(cls):
cls.__nonzero__ = cls.__bool__
del cls.__bool__
return cls
from itertools import imap, izip, ifilter
range_type = xrange
from StringIO import StringIO
from cStringIO import StringIO as BytesIO
NativeStringIO = BytesIO
def make_literal_wrapper(reference):
return lambda x: x
def normalize_string_tuple(tup):
"""Normalizes a string tuple to a common type. Following Python 2
rules, upgrades to unicode are implicit.
"""
if any(isinstance(x, text_type) for x in tup):
return tuple(to_unicode(x) for x in tup)
return tup
def try_coerce_native(s):
"""Try to coerce a unicode string to native if possible. Otherwise,
leave it as unicode.
"""
try:
return str(s)
except UnicodeError:
return s
wsgi_get_bytes = _identity
def wsgi_decoding_dance(s, charset='utf-8', errors='replace'):
return s.decode(charset, errors)
def wsgi_encoding_dance(s, charset='utf-8', errors='replace'):
if isinstance(s, bytes):
return s
return s.encode(charset, errors)
def to_bytes(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None:
return None
if isinstance(x, (bytes, bytearray, buffer)):
return bytes(x)
if isinstance(x, unicode):
return x.encode(charset, errors)
raise TypeError('Expected bytes')
def to_native(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None or isinstance(x, str):
return x
return x.encode(charset, errors)
else:
unichr = chr
text_type = str
string_types = (str, )
integer_types = (int, )
iterkeys = lambda d, *args, **kwargs: iter(d.keys(*args, **kwargs))
itervalues = lambda d, *args, **kwargs: iter(d.values(*args, **kwargs))
iteritems = lambda d, *args, **kwargs: iter(d.items(*args, **kwargs))
iterlists = lambda d, *args, **kwargs: iter(d.lists(*args, **kwargs))
iterlistvalues = lambda d, *args, **kwargs: iter(d.listvalues(*args, **kwargs))
int_to_byte = operator.methodcaller('to_bytes', 1, 'big')
def iter_bytes(b):
return map(int_to_byte, b)
def reraise(tp, value, tb=None):
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
raise value
fix_tuple_repr = _identity
implements_iterator = _identity
implements_to_string = _identity
implements_bool = _identity
native_string_result = _identity
imap = map
izip = zip
ifilter = filter
range_type = range
from io import StringIO, BytesIO
NativeStringIO = StringIO
def make_literal_wrapper(reference):
if isinstance(reference, text_type):
return lambda x: x
return lambda x: x.encode('latin1')
def normalize_string_tuple(tup):
"""Ensures that all types in the tuple are either strings
or bytes.
"""
tupiter = iter(tup)
is_text = isinstance(next(tupiter, None), text_type)
for arg in tupiter:
if isinstance(arg, text_type) != is_text:
raise TypeError('Cannot mix str and bytes arguments (got %s)'
% repr(tup))
return tup
try_coerce_native = _identity
def wsgi_get_bytes(s):
return s.encode('latin1')
def wsgi_decoding_dance(s, charset='utf-8', errors='replace'):
return s.encode('latin1').decode(charset, errors)
def wsgi_encoding_dance(s, charset='utf-8', errors='replace'):
if isinstance(s, bytes):
return s.decode('latin1', errors)
return s.encode(charset).decode('latin1', errors)
def to_bytes(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None:
return None
if isinstance(x, (bytes, bytearray, memoryview)):
return bytes(x)
if isinstance(x, str):
return x.encode(charset, errors)
raise TypeError('Expected bytes')
def to_native(x, charset=sys.getdefaultencoding(), errors='strict'):
if x is None or isinstance(x, str):
return x
return x.decode(charset, errors)
def to_unicode(x, charset=sys.getdefaultencoding(), errors='strict',
allow_none_charset=False):
if x is None:
return None
if not isinstance(x, bytes):
return text_type(x)
if charset is None and allow_none_charset:
return x
return x.decode(charset, errors)
+2 -2
View File
@@ -14,7 +14,7 @@ r"""
This module was introduced in 0.6.1 and is not a public interface.
It might become one in later versions of Werkzeug.
:copyright: (c) 2011 by the Werkzeug Team, see AUTHORS for more details.
:copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
import sys
@@ -90,7 +90,7 @@ if os.name == 'nt': # pragma: no cover
# Fall back to "move away and replace"
try:
os.rename(src, dst)
except OSError, e:
except OSError as e:
if e.errno != errno.EEXIST:
raise
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
+1 -2
View File
@@ -4,7 +4,6 @@
# `cssmin.py` - A Python port of the YUI CSS compressor.
from StringIO import StringIO # The pure-Python StringIO supports unicode.
import re
@@ -52,7 +51,7 @@ def remove_unnecessary_whitespace(css):
"""
Prevents 'p :link' from becoming 'p:link'.
Translates 'p :link' into 'p ___PSEUDOCLASSCOLON___link'; this is
translated back again later.
"""

Some files were not shown because too many files have changed in this diff Show More