From e8860233a0e648912364bbc1ead4e7a46de68a2f Mon Sep 17 00:00:00 2001 From: mdipierro Date: Tue, 1 Jan 2019 16:01:04 -0800 Subject: [PATCH] moved scheduler and validator into pydal --- gluon/__init__.py | 1 + gluon/_compat.py | 164 +- gluon/packages/dal | 2 +- gluon/scheduler.py | 1749 +------------------ gluon/validators.py | 3937 +------------------------------------------ 5 files changed, 6 insertions(+), 5847 deletions(-) diff --git a/gluon/__init__.py b/gluon/__init__.py index 6a5e25f1..9c62c6bc 100644 --- a/gluon/__init__.py +++ b/gluon/__init__.py @@ -23,6 +23,7 @@ try: import pydal sys.modules['pydal'] = pydal except ImportError: + raise raise RuntimeError( "web2py depends on pydal, which apparently you have not installed.\n" + "Probably you cloned the repository using git without '--recursive'" + diff --git a/gluon/_compat.py b/gluon/_compat.py index b3b82ad5..9452a44a 100644 --- a/gluon/_compat.py +++ b/gluon/_compat.py @@ -1,163 +1 @@ -import sys -import hashlib -import os - -PY2 = sys.version_info[0] == 2 - -_identity = lambda x: x - -if PY2: - import cPickle as pickle - from cStringIO import StringIO - import copy_reg as copyreg - from HTMLParser import HTMLParser - import urlparse - from htmlentitydefs import entitydefs, name2codepoint - import __builtin__ as builtin - import thread - import Cookie - import urllib2 - import Queue - import ConfigParser as configparser - from email.MIMEBase import MIMEBase - from email.Header import Header - from email import Encoders, Charset - from email.MIMEMultipart import MIMEMultipart - from email.MIMEText import MIMEText - from email.Charset import add_charset, QP as charset_QP - from urllib import FancyURLopener, urlencode, urlopen - from urllib import quote as urllib_quote, unquote as urllib_unquote, quote_plus as urllib_quote_plus - from string import maketrans - from types import ClassType - import cgi - import cookielib - from xmlrpclib import ProtocolError - from gluon.contrib import ipaddress - BytesIO = StringIO - reduce = reduce - reload = reload - hashlib_md5 = hashlib.md5 - iterkeys = lambda d: d.iterkeys() - itervalues = lambda d: d.itervalues() - iteritems = lambda d: d.iteritems() - integer_types = (int, long) - string_types = (str, unicode) - text_type = unicode - basestring = basestring - xrange = xrange - long = long - unichr = unichr - unicodeT = unicode - - def implements_bool(cls): - cls.__nonzero__ = cls.__bool__ - del cls.__bool__ - return cls - - def implements_iterator(cls): - cls.next = cls.__next__ - del cls.__next__ - return cls - - def to_bytes(obj, charset='utf-8', errors='strict'): - if obj is None: - return None - if isinstance(obj, (bytes, bytearray, buffer)): - return bytes(obj) - if hasattr(obj, 'encode'): - return obj.encode(charset, errors) - raise TypeError('Expected bytes') - - def to_native(obj, charset='utf8', errors='strict'): - if obj is None or isinstance(obj, str): - return obj - return obj.encode(charset, errors) - - -else: - import pickle - from io import StringIO, BytesIO - import copyreg - from importlib import reload - from functools import reduce - from html.parser import HTMLParser - from http import cookies as Cookie - from urllib import parse as urlparse - from urllib import request as urllib2 - from html.entities import entitydefs, name2codepoint - import builtins as builtin - import _thread as thread - import configparser - import queue as Queue - from email.mime.base import MIMEBase - from email.mime.multipart import MIMEMultipart - from email.mime.text import MIMEText - from email import encoders as Encoders - from email.header import Header - from email.charset import Charset, add_charset, QP as charset_QP - from urllib.request import FancyURLopener, urlopen - from urllib.parse import quote as urllib_quote, unquote as urllib_unquote, urlencode, quote_plus as urllib_quote_plus - from http import cookiejar as cookielib - from xmlrpc.client import ProtocolError - import html # warning, this is the python3 module and not the web2py html module - import ipaddress - hashlib_md5 = lambda s: hashlib.md5(bytes(s, 'utf8')) - iterkeys = lambda d: iter(d.keys()) - itervalues = lambda d: iter(d.values()) - iteritems = lambda d: iter(d.items()) - integer_types = (int,) - string_types = (str,) - text_type = str - basestring = str - xrange = range - long = int - unichr = chr - unicodeT = str - maketrans = str.maketrans - ClassType = type - - implements_iterator = _identity - implements_bool = _identity - - def to_bytes(obj, charset='utf-8', errors='strict'): - if obj is None: - return None - if isinstance(obj, (bytes, bytearray, memoryview)): - return bytes(obj) - if hasattr(obj, 'encode'): - return obj.encode(charset, errors) - raise TypeError('Expected bytes') - - def to_native(obj, charset='utf8', errors='strict'): - if obj is None or isinstance(obj, str): - return obj - return obj.decode(charset, errors) - - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): - __call__ = type.__call__ - __init__ = type.__init__ - - def __new__(cls, name, this_bases, d): - if this_bases is None: - return type.__new__(cls, name, (), d) - return meta(name, bases, d) - return metaclass('temporary_class', None, {}) - - -def to_unicode(obj, charset='utf-8', errors='strict'): - if obj is None: - return None - if not hasattr(obj, 'decode'): - return text_type(obj) - return obj.decode(charset, errors) - - -# shortcuts -pjoin = os.path.join -exists = os.path.exists +from pydal._compat import * diff --git a/gluon/packages/dal b/gluon/packages/dal index 5c335558..3a238919 160000 --- a/gluon/packages/dal +++ b/gluon/packages/dal @@ -1 +1 @@ -Subproject commit 5c3355583ce96777539aa13a59842671f2cf7f4c +Subproject commit 3a238919279e23074643f2efd0dc2807dfe90938 diff --git a/gluon/scheduler.py b/gluon/scheduler.py index 2343ffe6..28989a9a 100644 --- a/gluon/scheduler.py +++ b/gluon/scheduler.py @@ -1,1751 +1,4 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -| This file is part of the web2py Web Framework -| Copyrighted by Massimo Di Pierro -| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) -Background processes made simple ---------------------------------- -""" -from __future__ import print_function - -import os -import re -import time -import multiprocessing -import sys -import threading -import traceback -import signal -import socket -import datetime -import logging -import optparse -import tempfile -import types -from functools import reduce -from json import loads, dumps -from gluon import DAL, Field, IS_NOT_EMPTY, IS_IN_SET, IS_NOT_IN_DB, IS_EMPTY_OR -from gluon import IS_INT_IN_RANGE, IS_DATETIME, IS_IN_DB -from gluon.utils import web2py_uuid -from gluon._compat import Queue, long, iteritems, PY2 -from gluon.storage import Storage - -USAGE = """ -## Example - -For any existing app - -Create File: app/models/scheduler.py ====== -from gluon.scheduler import Scheduler - -def demo1(*args,**vars): - print('you passed args=%s and vars=%s' % (args, vars)) - return 'done!' - -def demo2(): - 1/0 - -scheduler = Scheduler(db,dict(demo1=demo1,demo2=demo2)) -## run worker nodes with: - - cd web2py - python web2py.py -K myapp -or - python gluon/scheduler.py -u sqlite://storage.sqlite \ - -f applications/myapp/databases/ \ - -t mytasks.py -(-h for info) -python scheduler.py -h - -## schedule jobs using -http://127.0.0.1:8000/myapp/appadmin/insert/db/scheduler_task - -## monitor scheduled jobs -http://127.0.0.1:8000/myapp/appadmin/select/db?query=db.scheduler_task.id>0 - -## view completed jobs -http://127.0.0.1:8000/myapp/appadmin/select/db?query=db.scheduler_run.id>0 - -## view workers -http://127.0.0.1:8000/myapp/appadmin/select/db?query=db.scheduler_worker.id>0 - -""" - -path = os.getcwd() - -if 'WEB2PY_PATH' not in os.environ: - os.environ['WEB2PY_PATH'] = path - -IDENTIFIER = "%s#%s" % (socket.gethostname(), os.getpid()) - -logger = logging.getLogger('web2py.scheduler.%s' % IDENTIFIER) - -QUEUED = 'QUEUED' -ASSIGNED = 'ASSIGNED' -RUNNING = 'RUNNING' -COMPLETED = 'COMPLETED' -FAILED = 'FAILED' -TIMEOUT = 'TIMEOUT' -STOPPED = 'STOPPED' -ACTIVE = 'ACTIVE' -TERMINATE = 'TERMINATE' -DISABLED = 'DISABLED' -KILL = 'KILL' -PICK = 'PICK' -STOP_TASK = 'STOP_TASK' -EXPIRED = 'EXPIRED' -SECONDS = 1 -HEARTBEAT = 3 * SECONDS -MAXHIBERNATION = 10 -CLEAROUT = '!clear!' - -CALLABLETYPES = (types.LambdaType, types.FunctionType, - types.BuiltinFunctionType, - types.MethodType, types.BuiltinMethodType) - - -class Task(object): - """Defines a "task" object that gets passed from the main thread to the - executor's one - """ - def __init__(self, app, function, timeout, args='[]', vars='{}', **kwargs): - logger.debug(' new task allocated: %s.%s', app, function) - self.app = app - self.function = function - self.timeout = timeout - self.args = args # json - self.vars = vars # json - self.__dict__.update(kwargs) - - def __str__(self): - return '' % self.function - - -class TaskReport(object): - """Defines a "task report" object that gets passed from the executor's - thread to the main one - """ - def __init__(self, status, result=None, output=None, tb=None): - logger.debug(' new task report: %s', status) - if tb: - logger.debug(' traceback: %s', tb) - else: - logger.debug(' result: %s', result) - self.status = status - self.result = result - self.output = output - self.tb = tb - - def __str__(self): - return '' % self.status - - -class JobGraph(object): - """Experimental: dependencies amongs tasks.""" - - def __init__(self, db, job_name): - self.job_name = job_name or 'job_0' - self.db = db - - def add_deps(self, task_parent, task_child): - """Create a dependency between task_parent and task_child.""" - self.db.scheduler_task_deps.insert(task_parent=task_parent, - task_child=task_child, - job_name=self.job_name) - - def validate(self, job_name=None): - """Validate if all tasks job_name can be completed. - - Checks if there are no mutual dependencies among tasks. - Commits at the end if successfull, or it rollbacks the entire - transaction. Handle with care! - """ - db = self.db - sd = db.scheduler_task_deps - if job_name: - q = sd.job_name == job_name - else: - q = sd.id > 0 - - edges = db(q).select() - nested_dict = {} - for row in edges: - k = row.task_parent - if k in nested_dict: - nested_dict[k].add(row.task_child) - else: - nested_dict[k] = set((row.task_child,)) - try: - rtn = [] - for k, v in nested_dict.items(): - v.discard(k) # Ignore self dependencies - extra_items_in_deps = reduce(set.union, nested_dict.values()) - set(nested_dict.keys()) - nested_dict.update(dict((item, set()) for item in extra_items_in_deps)) - while True: - ordered = set(item for item, dep in nested_dict.items() if not dep) - if not ordered: - break - rtn.append(ordered) - nested_dict = dict( - (item, (dep - ordered)) for item, dep in nested_dict.items() - if item not in ordered - ) - assert not nested_dict, "A cyclic dependency exists amongst %r" % nested_dict - db.commit() - return rtn - except: - db.rollback() - return None - - -class CronParser(object): - - def __init__(self, cronline, base=None): - self.cronline = cronline - self.sched = base or datetime.datetime.now() - self.task = None - - @staticmethod - def _rangetolist(s, period='min'): - retval = [] - if s.startswith('*'): - if period == 'min': - s = s.replace('*', '0-59', 1) - elif period == 'hr': - s = s.replace('*', '0-23', 1) - elif period == 'dom': - s = s.replace('*', '1-31', 1) - elif period == 'mon': - s = s.replace('*', '1-12', 1) - elif period == 'dow': - s = s.replace('*', '0-6', 1) - m = re.compile(r'(\d+)-(\d+)/(\d+)') - match = m.match(s) - if match: - min_, max_ = int(match.group(1)), int(match.group(2)) + 1 - step_ = int(match.group(3)) - else: - m = re.compile(r'(\d+)/(\d+)') - ranges_max = {'min': 59, 'hr': 23, 'mon': 12, 'dom': 31, 'dow': 7} - match = m.match(s) - if match: - min_, max_ = int(match.group(1)), ranges_max[period] + 1 - step_ = int(match.group(2)) - if match: - for i in range(min_, max_, step_): - retval.append(i) - return retval - - @staticmethod - def _sanitycheck(values, period): - if period == 'min': - check = all(0 <= i <= 59 for i in values) - elif period == 'hr': - check = all(0 <= i <= 23 for i in values) - elif period == 'dom': - domrange = list(range(1, 32)) + ['l'] - check = all(i in domrange for i in values) - elif period == 'mon': - check = all(1 <= i <= 12 for i in values) - elif period == 'dow': - check = all(0 <= i <= 7 for i in values) - return check - - def _parse(self): - line = self.cronline.lower() - task = {} - if line.startswith('@yearly'): - line = line.replace('@yearly', '0 0 1 1 *') - elif line.startswith('@annually'): - line = line.replace('@annually', '0 0 1 1 *') - elif line.startswith('@monthly'): - line = line.replace('@monthly', '0 0 1 * *') - elif line.startswith('@weekly'): - line = line.replace('@weekly', '0 0 * * 0') - elif line.startswith('@daily'): - line = line.replace('@daily', '0 0 * * *') - elif line.startswith('@midnight'): - line = line.replace('@midnight', '0 0 * * *') - elif line.startswith('@hourly'): - line = line.replace('@hourly', '0 * * * *') - params = line.strip().split() - if len(params) < 5: - raise ValueError('Invalid cron line (too short)') - elif len(params) > 5: - raise ValueError('Invalid cron line (too long)') - daysofweek = {'sun': 0, 'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, - 'fri': 5, 'sat': 6} - monthsofyear = {'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, - 'jun': 6, 'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, - 'nov': 11, 'dec': 12, 'l': 'l'} - for (s, i) in zip(params[:5], ['min', 'hr', 'dom', 'mon', 'dow']): - if s not in [None, '*']: - task[i] = [] - vals = s.split(',') - for val in vals: - if i == 'dow': - refdict = daysofweek - elif i == 'mon': - refdict = monthsofyear - if i in ('dow', 'mon') and '-' in val and '/' not in val: - isnum = val.split('-')[0].isdigit() - if isnum: - val = '%s/1' % val - else: - val = '-'.join([str(refdict[v]) - for v in val.split('-')]) - if val != '-1' and '-' in val and '/' not in val: - val = '%s/1' % val - if '/' in val: - task[i] += self._rangetolist(val, i) - elif val.isdigit() or val == '-1': - task[i].append(int(val)) - elif i in ('dow', 'mon'): - if val in refdict: - task[i].append(refdict[val]) - elif i == 'dom' and val == 'l': - task[i].append(val) - if not task[i]: - raise ValueError('Invalid cron value (%s)' % s) - if not self._sanitycheck(task[i], i): - raise ValueError('Invalid cron value (%s)' % s) - task[i] = sorted(task[i]) - self.task = task - - @staticmethod - def _get_next_dow(sched, task): - task_dow = [a % 7 for a in task['dow']] - while sched.isoweekday() % 7 not in task_dow: - sched += datetime.timedelta(days=1) - return sched - - @staticmethod - def _get_next_dom(sched, task): - if task['dom'] == ['l']: - last_feb = 29 if sched.year % 4 == 0 else 28 - lastdayofmonth = [ - 31, last_feb, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 - ] - task_dom = [lastdayofmonth[sched.month - 1]] - else: - task_dom = task['dom'] - while sched.day not in task_dom: - sched += datetime.timedelta(days=1) - return sched - - @staticmethod - def _get_next_mon(sched, task): - while sched.month not in task['mon']: - if sched.month < 12: - sched = sched.replace(month=sched.month + 1) - else: - sched = sched.replace(month=1, year=sched.year + 1) - return sched - - @staticmethod - def _getnext_hhmm(sched, task, add_to=True): - if add_to: - sched += datetime.timedelta(minutes=1) - if 'min' in task: - while sched.minute not in task['min']: - sched += datetime.timedelta(minutes=1) - if 'hr' in task and sched.hour not in task['hr']: - while sched.hour not in task['hr']: - sched += datetime.timedelta(hours=1) - return sched - - def _getnext_date(self, sched, task): - if 'dow' in task and 'dom' in task: - dow = self._get_next_dow(sched, task) - dom = self._get_next_dom(sched, task) - sched = min(dow, dom) - elif 'dow' in task: - sched = self._get_next_dow(sched, task) - elif 'dom' in task: - sched = self._get_next_dom(sched, task) - if 'mon' in task: - sched = self._get_next_mon(sched, task) - return sched.replace(hour=0, minute=0) - - def get_next(self): - """Get next date according to specs.""" - if not self.task: - self._parse() - task = self.task - sched = self.sched - x = 0 - while x < 1000: # avoid potential max recursions - x += 1 - try: - next_date = self._getnext_date(sched, task) - except (ValueError, OverflowError) as e: - raise ValueError('Invalid cron expression (%s)' % e) - if next_date.date() > self.sched.date(): - # we rolled date, check for valid hhmm - sched = self._getnext_hhmm(next_date, task, False) - break - else: - # same date, get next hhmm - sched_time = self._getnext_hhmm(sched, task, True) - if sched_time.date() > sched.date(): - # we rolled date again :( - sched = sched_time - else: - sched = sched_time - break - else: - raise ValueError('Potential bug found, please submit your ' - 'cron expression to the authors') - self.sched = sched - return sched - - def __iter__(self): - """Support iteration.""" - return self - - __next__ = next = get_next - -# the two functions below deal with simplejson decoding as unicode, esp for the dict decode -# and subsequent usage as function Keyword arguments unicode variable names won't work! -# borrowed from http://stackoverflow.com/questions/956867/ - - -def _decode_list(lst): - if not PY2: - return lst - newlist = [] - for i in lst: - if isinstance(i, unicode): - i = i.encode('utf-8') - elif isinstance(i, list): - i = _decode_list(i) - newlist.append(i) - return newlist - - -def _decode_dict(dct): - if not PY2: - return dct - newdict = {} - for k, v in iteritems(dct): - if isinstance(k, unicode): - k = k.encode('utf-8') - if isinstance(v, unicode): - v = v.encode('utf-8') - elif isinstance(v, list): - v = _decode_list(v) - newdict[k] = v - return newdict - - -def executor(queue, task, out): - """The function used to execute tasks in the background process.""" - logger.debug(' task started') - - class LogOutput(object): - """Facility to log output at intervals.""" - - def __init__(self, out_queue): - self.out_queue = out_queue - self.stdout = sys.stdout - sys.stdout = self - - def __del__(self): - sys.stdout = self.stdout - - def flush(self): - pass - - def write(self, data): - self.out_queue.put(data) - - W2P_TASK = Storage({ - 'id': task.task_id, - 'uuid': task.uuid, - 'run_id': task.run_id - }) - stdout = LogOutput(out) - try: - if task.app: - os.chdir(os.environ['WEB2PY_PATH']) - from gluon.shell import env, parse_path_info - from gluon import current - level = logging.getLogger().getEffectiveLevel() - logging.getLogger().setLevel(logging.WARN) - # Get controller-specific subdirectory if task.app is of - # form 'app/controller' - (a, c, f) = parse_path_info(task.app) - _env = env(a=a, c=c, import_models=True, - extra_request={'is_scheduler': True}) - logging.getLogger().setLevel(level) - f = task.function - functions = current._scheduler.tasks - if not functions: - # look into env - _function = _env.get(f) - else: - _function = functions.get(f) - if not isinstance(_function, CALLABLETYPES): - raise NameError( - "name '%s' not found in scheduler's environment" % f) - # Inject W2P_TASK into environment - _env.update({'W2P_TASK': W2P_TASK}) - # Inject W2P_TASK into current - from gluon import current - current.W2P_TASK = W2P_TASK - globals().update(_env) - args = _decode_list(loads(task.args)) - vars = loads(task.vars, object_hook=_decode_dict) - result = dumps(_function(*args, **vars)) - else: - # for testing purpose only - result = eval(task.function)( - *loads(task.args, object_hook=_decode_dict), - **loads(task.vars, object_hook=_decode_dict)) - if len(result) >= 1024: - fd, temp_path = tempfile.mkstemp(suffix='.w2p_sched') - with os.fdopen(fd, 'w') as f: - f.write(result) - result = 'w2p_special:%s' % temp_path - queue.put(TaskReport('COMPLETED', result=result)) - except BaseException as e: - tb = traceback.format_exc() - queue.put(TaskReport('FAILED', tb=tb)) - del stdout - - -class MetaScheduler(threading.Thread): - """Base class documenting scheduler's base methods.""" - - def __init__(self): - threading.Thread.__init__(self) - self.process = None # the background process - self.have_heartbeat = True # set to False to kill - self.empty_runs = 0 - - def local_async(self, task): - """Start the background process. - - Args: - task : a `Task` object - - Returns: - tuple: containing:: - - ('ok',result,output) - ('error',exception,None) - ('timeout',None,None) - ('terminated',None,None) - - """ - db = self.db - sr = db.scheduler_run - out = multiprocessing.Queue() - queue = multiprocessing.Queue(maxsize=1) - p = multiprocessing.Process(target=executor, args=(queue, task, out)) - self.process = p - logger.debug(' task starting') - p.start() - - task_output = "" - tout = "" - - try: - if task.sync_output > 0: - run_timeout = task.sync_output - else: - run_timeout = task.timeout - - start = time.time() - - while p.is_alive() and (not task.timeout or time.time() - start < task.timeout): - if tout: - try: - logger.debug(' partial output saved') - db(sr.id == task.run_id).update(run_output=task_output) - db.commit() - except: - pass - p.join(timeout=run_timeout) - tout = "" - while not out.empty(): - tout += out.get() - if tout: - logger.debug(' partial output: "%s"', str(tout)) - if CLEAROUT in tout: - task_output = tout[ - tout.rfind(CLEAROUT) + len(CLEAROUT):] - else: - task_output += tout - except: - p.terminate() - p.join() - logger.debug(' task stopped by general exception') - tr = TaskReport(STOPPED) - else: - if p.is_alive(): - p.terminate() - logger.debug(' task timeout') - try: - # we try to get a traceback here - tr = queue.get(timeout=2) - tr.status = TIMEOUT - tr.output = task_output - except Queue.Empty: - tr = TaskReport(TIMEOUT) - elif queue.empty(): - logger.debug(' task stopped') - tr = TaskReport(STOPPED) - else: - logger.debug(' task completed or failed') - tr = queue.get() - result = tr.result - if result and result.startswith('w2p_special'): - temp_path = result.replace('w2p_special:', '', 1) - with open(temp_path) as f: - tr.result = f.read() - os.unlink(temp_path) - tr.output = task_output - return tr - - def die(self): - """Forces termination of the worker process along with any running - task""" - logger.info('die!') - self.have_heartbeat = False - self.terminate_process() - - def give_up(self): - """Waits for any running task to be executed, then exits the worker - process""" - logger.info('Giving up as soon as possible!') - self.have_heartbeat = False - - def terminate_process(self): - """Terminate any running tasks (internal use only)""" - try: - self.process.terminate() - except: - pass # no process to terminate - - def run(self): - """This is executed by the main thread to send heartbeats""" - counter = 0 - while self.have_heartbeat: - self.send_heartbeat(counter) - counter += 1 - - def start_heartbeats(self): - self.start() - - def send_heartbeat(self, counter): - raise NotImplementedError - - def pop_task(self): - """Fetches a task ready to be executed""" - raise NotImplementedError - - def report_task(self, task, task_report): - """Creates a task report""" - raise NotImplementedError - - def sleep(self): - raise NotImplementedError - - def loop(self): - """Main loop, fetching tasks and starting executor's background - processes""" - raise NotImplementedError - - -TASK_STATUS = (QUEUED, RUNNING, COMPLETED, FAILED, TIMEOUT, STOPPED, EXPIRED) -RUN_STATUS = (RUNNING, COMPLETED, FAILED, TIMEOUT, STOPPED) -WORKER_STATUS = (ACTIVE, PICK, DISABLED, TERMINATE, KILL, STOP_TASK) - -class IS_CRONLINE(object): - """ - Validates cronline - """ - def __init__(self, error_message=None): - self.error_message = error_message - - def __call__(self, value): - recur = CronParser(value, datetime.datetime.now()) - try: - recur.get_next() - return (value, None) - except (KeyError, ValueError) as e: - if not self.error_message: - return (value, e) - return (value, self.error_message) - -class TYPE(object): - """ - Validator that checks whether field is valid json and validates its type. - Used for `args` and `vars` of the scheduler_task table - """ - - def __init__(self, myclass=list, parse=False): - self.myclass = myclass - self.parse = parse - - def __call__(self, value): - from gluon import current - try: - obj = loads(value) - except: - return (value, current.T('invalid json')) - else: - if isinstance(obj, self.myclass): - if self.parse: - return (obj, None) - else: - return (value, None) - else: - return (value, current.T('Not of type: %s') % self.myclass) - - -class Scheduler(MetaScheduler): - """Scheduler object - - Args: - db: DAL connection where Scheduler will create its tables - tasks(dict): either a dict containing name-->func or None. - If None, functions will be searched in the environment - migrate(bool): turn migration on/off for the Scheduler's tables - worker_name(str): force worker_name to identify each process. - Leave it to None to autoassign a name (hostname#pid) - group_names(list): process tasks belonging to this group - defaults to ['main'] if nothing gets passed - heartbeat(int): how many seconds the worker sleeps between one - execution and the following one. Indirectly sets how many seconds - will pass between checks for new tasks - max_empty_runs(int): how many loops are allowed to pass without - processing any tasks before exiting the process. 0 to keep always - the process alive - discard_results(bool): Scheduler stores executions's details into the - scheduler_run table. By default, only if there is a result the - details are kept. Turning this to True means discarding results - even for tasks that return something - utc_time(bool): do all datetime calculations assuming UTC as the - timezone. Remember to pass `start_time` and `stop_time` to tasks - accordingly - - """ - - def __init__(self, db, tasks=None, migrate=True, - worker_name=None, group_names=None, heartbeat=HEARTBEAT, - max_empty_runs=0, discard_results=False, utc_time=False): - - MetaScheduler.__init__(self) - - self.db = db - self.db_thread = None - self.tasks = tasks - self.group_names = group_names or ['main'] - self.heartbeat = heartbeat - self.worker_name = worker_name or IDENTIFIER - self.max_empty_runs = max_empty_runs - self.discard_results = discard_results - self.is_a_ticker = False - self.do_assign_tasks = False - self.greedy = False - self.utc_time = utc_time - self.w_stats = Storage( - dict( - status=RUNNING, - sleep=heartbeat, - total=0, - errors=0, - empty_runs=0, - queue=0, - distribution=None, - workers=0) - ) # dict holding statistics - - from gluon import current - current._scheduler = self - - self.define_tables(db, migrate=migrate) - - def __get_migrate(self, tablename, migrate=True): - if migrate is False: - return False - elif migrate is True: - return True - elif isinstance(migrate, str): - return "%s%s.table" % (migrate, tablename) - return True - - def now(self): - """Shortcut that fetches current time based on UTC preferences.""" - return self.utc_time and datetime.datetime.utcnow() or datetime.datetime.now() - - def set_requirements(self, scheduler_task): - """Called to set defaults for lazy_tables connections.""" - from gluon import current - if hasattr(current, 'request'): - scheduler_task.application_name.default = '%s/%s' % ( - current.request.application, current.request.controller - ) - - def define_tables(self, db, migrate): - """Define Scheduler tables structure.""" - from pydal.base import DEFAULT - logger.debug('defining tables (migrate=%s)', migrate) - now = self.now - db.define_table( - 'scheduler_task', - Field('application_name', requires=IS_NOT_EMPTY(), - default=None, writable=False), - Field('task_name', default=None), - Field('group_name', default='main'), - Field('status', requires=IS_IN_SET(TASK_STATUS), - default=QUEUED, writable=False), - Field('broadcast', 'boolean', default=False), - Field('function_name', - requires=IS_IN_SET(sorted(self.tasks.keys())) - if self.tasks else DEFAULT), - Field('uuid', length=255, - requires=IS_NOT_IN_DB(db, 'scheduler_task.uuid'), - unique=True, default=web2py_uuid), - Field('args', 'text', default='[]', requires=TYPE(list)), - Field('vars', 'text', default='{}', requires=TYPE(dict)), - Field('enabled', 'boolean', default=True), - Field('start_time', 'datetime', default=now, - requires=IS_DATETIME()), - Field('next_run_time', 'datetime', default=now), - Field('stop_time', 'datetime'), - Field('repeats', 'integer', default=1, comment="0=unlimited", - requires=IS_INT_IN_RANGE(0, None)), - Field('retry_failed', 'integer', default=0, comment="-1=unlimited", - requires=IS_INT_IN_RANGE(-1, None)), - Field('period', 'integer', default=60, comment='seconds', - requires=IS_INT_IN_RANGE(0, None)), - Field('prevent_drift', 'boolean', default=False, - comment='Exact start_times between runs'), - Field('cronline', default=None, - comment='Discard "period", use this cron expr instead', - requires=IS_EMPTY_OR(IS_CRONLINE())), - Field('timeout', 'integer', default=60, comment='seconds', - requires=IS_INT_IN_RANGE(1, None)), - Field('sync_output', 'integer', default=0, - comment="update output every n sec: 0=never", - requires=IS_INT_IN_RANGE(0, None)), - Field('times_run', 'integer', default=0, writable=False), - Field('times_failed', 'integer', default=0, writable=False), - Field('last_run_time', 'datetime', writable=False, readable=False), - Field('assigned_worker_name', default='', writable=False), - on_define=self.set_requirements, - migrate=self.__get_migrate('scheduler_task', migrate), - format='(%(id)s) %(task_name)s') - - db.define_table( - 'scheduler_run', - Field('task_id', 'reference scheduler_task'), - Field('status', requires=IS_IN_SET(RUN_STATUS)), - Field('start_time', 'datetime'), - Field('stop_time', 'datetime'), - Field('run_output', 'text'), - Field('run_result', 'text'), - Field('traceback', 'text'), - Field('worker_name', default=self.worker_name), - migrate=self.__get_migrate('scheduler_run', migrate) - ) - - db.define_table( - 'scheduler_worker', - Field('worker_name', length=255, unique=True), - Field('first_heartbeat', 'datetime'), - Field('last_heartbeat', 'datetime'), - Field('status', requires=IS_IN_SET(WORKER_STATUS)), - Field('is_ticker', 'boolean', default=False, writable=False), - Field('group_names', 'list:string', default=self.group_names), - Field('worker_stats', 'json'), - migrate=self.__get_migrate('scheduler_worker', migrate) - ) - - db.define_table( - 'scheduler_task_deps', - Field('job_name', default='job_0'), - Field('task_parent', 'integer', - requires=IS_IN_DB(db, 'scheduler_task.id', '%(task_name)s') - ), - Field('task_child', 'reference scheduler_task'), - Field('can_visit', 'boolean', default=False), - migrate=self.__get_migrate('scheduler_task_deps', migrate) - ) - - if migrate is not False: - db.commit() - - def loop(self, worker_name=None): - """Main loop. - - This works basically as a neverending loop that: - - - checks if the worker is ready to process tasks (is not DISABLED) - - pops a task from the queue - - if there is a task: - - - spawns the executor background process - - waits for the process to be finished - - sleeps `heartbeat` seconds - - if there is not a task: - - - checks for max_empty_runs - - sleeps `heartbeat` seconds - - """ - signal.signal(signal.SIGTERM, lambda signum, stack_frame: sys.exit(1)) - try: - self.start_heartbeats() - while self.have_heartbeat: - if self.w_stats.status == DISABLED: - logger.debug('Someone stopped me, sleeping until better' - ' times come (%s)', self.w_stats.sleep) - self.sleep() - continue - logger.debug('looping...') - task = self.wrapped_pop_task() - if task: - self.w_stats.empty_runs = 0 - self.w_stats.status = RUNNING - self.w_stats.total += 1 - self.wrapped_report_task(task, self.local_async(task)) - if not self.w_stats.status == DISABLED: - self.w_stats.status = ACTIVE - else: - self.w_stats.empty_runs += 1 - logger.debug('sleeping...') - if self.max_empty_runs != 0: - logger.debug('empty runs %s/%s', - self.w_stats.empty_runs, - self.max_empty_runs) - if self.w_stats.empty_runs >= self.max_empty_runs: - logger.info( - 'empty runs limit reached, killing myself') - self.die() - self.sleep() - except (KeyboardInterrupt, SystemExit): - logger.info('catched') - self.die() - - def wrapped_assign_tasks(self, db): - """Commodity function to call `assign_tasks` and trap exceptions. - - If an exception is raised, assume it happened because of database - contention and retries `assign_task` after 0.5 seconds - """ - logger.debug('Assigning tasks...') - db.commit() # db.commit() only for Mysql - x = 0 - while x < 10: - try: - self.assign_tasks(db) - db.commit() - logger.debug('Tasks assigned...') - break - except: - self.w_stats.errors += 1 - db.rollback() - logger.error('TICKER: error assigning tasks (%s)', x) - x += 1 - time.sleep(0.5) - - def wrapped_pop_task(self): - """Commodity function to call `pop_task` and trap exceptions. - - If an exception is raised, assume it happened because of database - contention and retries `pop_task` after 0.5 seconds - """ - db = self.db - db.commit() # another nifty db.commit() only for Mysql - x = 0 - while x < 10: - try: - rtn = self.pop_task(db) - return rtn - break - except: - self.w_stats.errors += 1 - db.rollback() - logger.error(' error popping tasks') - x += 1 - time.sleep(0.5) - - def pop_task(self, db): - """Grab a task ready to be executed from the queue.""" - now = self.now() - st = self.db.scheduler_task - if self.is_a_ticker and self.do_assign_tasks: - # I'm a ticker, and 5 loops passed without reassigning tasks, - # let's do that and loop again - self.wrapped_assign_tasks(db) - return None - # ready to process something - grabbed = db( - (st.assigned_worker_name == self.worker_name) & - (st.status == ASSIGNED) - ) - - task = grabbed.select(limitby=(0, 1), orderby=st.next_run_time).first() - if task: - task.update_record(status=RUNNING, last_run_time=now) - # noone will touch my task! - db.commit() - logger.debug(' work to do %s', task.id) - else: - if self.is_a_ticker and self.greedy: - # there are other tasks ready to be assigned - logger.info('TICKER: greedy loop') - self.wrapped_assign_tasks(db) - else: - logger.info('nothing to do') - return None - times_run = task.times_run + 1 - if task.cronline: - cron_recur = CronParser(task.cronline, now.replace(second=0)) - next_run_time = cron_recur.get_next() - elif not task.prevent_drift: - next_run_time = task.last_run_time + datetime.timedelta( - seconds=task.period - ) - else: - # calc next_run_time based on available slots - # see #1191 - next_run_time = task.start_time - secondspassed = (now - next_run_time).total_seconds() - steps = secondspassed // task.period + 1 - next_run_time += datetime.timedelta(seconds=task.period * steps) - - if times_run < task.repeats or task.repeats == 0: - # need to run (repeating task) - run_again = True - else: - # no need to run again - run_again = False - run_id = 0 - while True and not self.discard_results: - logger.debug(' new scheduler_run record') - try: - run_id = db.scheduler_run.insert( - task_id=task.id, - status=RUNNING, - start_time=now, - worker_name=self.worker_name) - db.commit() - break - except: - time.sleep(0.5) - db.rollback() - logger.info('new task %(id)s "%(task_name)s"' - ' %(application_name)s.%(function_name)s' % task) - return Task( - app=task.application_name, - function=task.function_name, - timeout=task.timeout, - args=task.args, # in json - vars=task.vars, # in json - task_id=task.id, - run_id=run_id, - run_again=run_again, - next_run_time=next_run_time, - times_run=times_run, - stop_time=task.stop_time, - retry_failed=task.retry_failed, - times_failed=task.times_failed, - sync_output=task.sync_output, - uuid=task.uuid) - - def wrapped_report_task(self, task, task_report): - """Commodity function to call `report_task` and trap exceptions. - - If an exception is raised, assume it happened because of database - contention and retries `pop_task` after 0.5 seconds - """ - db = self.db - while True: - try: - self.report_task(task, task_report) - db.commit() - break - except: - self.w_stats.errors += 1 - db.rollback() - logger.error(' error storing result') - time.sleep(0.5) - - def report_task(self, task, task_report): - """Take care of storing the result according to preferences. - - Deals with logic for repeating tasks. - """ - db = self.db - now = self.now() - st = db.scheduler_task - sr = db.scheduler_run - if not self.discard_results: - if task_report.result != 'null' or task_report.tb: - # result is 'null' as a string if task completed - # if it's stopped it's None as NoneType, so we record - # the STOPPED "run" anyway - logger.debug(' recording task report in db (%s)', - task_report.status) - db(sr.id == task.run_id).update( - status=task_report.status, - stop_time=now, - run_result=task_report.result, - run_output=task_report.output, - traceback=task_report.tb) - else: - logger.debug(' deleting task report in db because of no result') - db(sr.id == task.run_id).delete() - # if there is a stop_time and the following run would exceed it - is_expired = (task.stop_time and - task.next_run_time > task.stop_time and - True or False) - status = (task.run_again and is_expired and EXPIRED or - task.run_again and not is_expired and - QUEUED or COMPLETED) - if task_report.status == COMPLETED: - d = dict(status=status, - next_run_time=task.next_run_time, - times_run=task.times_run, - times_failed=0 - ) - db(st.id == task.task_id).update(**d) - if status == COMPLETED: - self.update_dependencies(db, task.task_id) - else: - st_mapping = {'FAILED': 'FAILED', - 'TIMEOUT': 'TIMEOUT', - 'STOPPED': 'FAILED'}[task_report.status] - status = (task.retry_failed - and task.times_failed < task.retry_failed - and QUEUED or task.retry_failed == -1 - and QUEUED or st_mapping) - db(st.id == task.task_id).update( - times_failed=st.times_failed + 1, - next_run_time=task.next_run_time, - status=status - ) - logger.info('task completed (%s)', task_report.status) - - def update_dependencies(self, db, task_id): - """Unblock execution paths for Jobs.""" - db(db.scheduler_task_deps.task_child == task_id).update(can_visit=True) - - def adj_hibernation(self): - """Used to increase the "sleep" interval for DISABLED workers.""" - if self.w_stats.status == DISABLED: - wk_st = self.w_stats.sleep - hibernation = wk_st + HEARTBEAT if wk_st < MAXHIBERNATION else MAXHIBERNATION - self.w_stats.sleep = hibernation - - def send_heartbeat(self, counter): - """Coordination among available workers. - - It: - - sends the heartbeat - - elects a ticker among available workers (the only process that - effectively dispatch tasks to workers) - - deals with worker's statuses - - does "housecleaning" for dead workers - - triggers tasks assignment to workers - """ - if self.db_thread: - # BKR 20180612 check if connection still works - try: - query = self.db_thread.scheduler_worker.worker_name == self.worker_name - self.db_thread(query).count() - except self.db_thread._adapter.connection.OperationalError: - # if not -> throw away self.db_thread and force reconnect - self.db_thread = None - - if not self.db_thread: - logger.debug('thread building own DAL object') - self.db_thread = DAL( - self.db._uri, folder=self.db._adapter.folder, decode_credentials=True) - self.define_tables(self.db_thread, migrate=False) - try: - db = self.db_thread - sw, st = db.scheduler_worker, db.scheduler_task - now = self.now() - # record heartbeat - mybackedstatus = db(sw.worker_name == self.worker_name).select().first() - if not mybackedstatus: - sw.insert(status=ACTIVE, worker_name=self.worker_name, - first_heartbeat=now, last_heartbeat=now, - group_names=self.group_names, - worker_stats=self.w_stats) - self.w_stats.status = ACTIVE - self.w_stats.sleep = self.heartbeat - mybackedstatus = ACTIVE - else: - mybackedstatus = mybackedstatus.status - if mybackedstatus == DISABLED: - # keep sleeping - self.w_stats.status = DISABLED - logger.debug('........recording heartbeat (%s)', - self.w_stats.status) - db(sw.worker_name == self.worker_name).update( - last_heartbeat=now, - worker_stats=self.w_stats) - elif mybackedstatus == TERMINATE: - self.w_stats.status = TERMINATE - logger.debug("Waiting to terminate the current task") - self.give_up() - elif mybackedstatus == KILL: - self.w_stats.status = KILL - self.die() - return - else: - if mybackedstatus == STOP_TASK: - logger.info('Asked to kill the current task') - self.terminate_process() - logger.debug('........recording heartbeat (%s)', - self.w_stats.status) - db(sw.worker_name == self.worker_name).update( - last_heartbeat=now, status=ACTIVE, - worker_stats=self.w_stats) - self.w_stats.sleep = self.heartbeat # re-activating the process - if self.w_stats.status != RUNNING: - self.w_stats.status = ACTIVE - - self.do_assign_tasks = False - if counter % 5 == 0 or mybackedstatus == PICK: - try: - # delete dead workers - expiration = now - datetime.timedelta( - seconds=self.heartbeat * 3) - departure = now - datetime.timedelta( - seconds=self.heartbeat * 3 * 15) - logger.debug( - ' freeing workers that have not sent heartbeat') - dead_workers = db( - ((sw.last_heartbeat < expiration) & (sw.status == ACTIVE)) | - ((sw.last_heartbeat < departure) & (sw.status != ACTIVE)) - ) - dead_workers_name = dead_workers._select(sw.worker_name) - db( - (st.assigned_worker_name.belongs(dead_workers_name)) & - (st.status == RUNNING) - ).update(assigned_worker_name='', status=QUEUED) - dead_workers.delete() - try: - self.is_a_ticker = self.being_a_ticker() - except: - logger.error('Error coordinating TICKER') - if self.w_stats.status == ACTIVE: - self.do_assign_tasks = True - except: - logger.error('Error cleaning up') - db.commit() - except: - logger.error('Error retrieving status') - db.rollback() - self.adj_hibernation() - self.sleep() - - def being_a_ticker(self): - """Elect a TICKER process that assigns tasks to available workers. - - Does its best to elect a worker that is not busy processing other tasks - to allow a proper distribution of tasks among all active workers ASAP - """ - db = self.db_thread - sw = db.scheduler_worker - my_name = self.worker_name - all_active = db( - (sw.worker_name != my_name) & (sw.status == ACTIVE) - ).select(sw.is_ticker, sw.worker_name) - ticker = all_active.find(lambda row: row.is_ticker is True).first() - not_busy = self.w_stats.status == ACTIVE - if not ticker: - # if no other tickers are around - if not_busy: - # only if I'm not busy - db(sw.worker_name == my_name).update(is_ticker=True) - db(sw.worker_name != my_name).update(is_ticker=False) - logger.info("TICKER: I'm a ticker") - else: - # I'm busy - if len(all_active) >= 1: - # so I'll "downgrade" myself to a "poor worker" - db(sw.worker_name == my_name).update(is_ticker=False) - else: - not_busy = True - db.commit() - return not_busy - else: - logger.info( - "%s is a ticker, I'm a poor worker" % ticker.worker_name) - return False - - def assign_tasks(self, db): - """Assign task to workers, that can then pop them from the queue. - - Deals with group_name(s) logic, in order to assign linearly tasks - to available workers for those groups - """ - sw, st, sd = db.scheduler_worker, db.scheduler_task, db.scheduler_task_deps - now = self.now() - all_workers = db(sw.status == ACTIVE).select() - # build workers as dict of groups - wkgroups = {} - for w in all_workers: - if w.worker_stats['status'] == 'RUNNING': - continue - group_names = w.group_names - for gname in group_names: - if gname not in wkgroups: - wkgroups[gname] = dict( - workers=[{'name': w.worker_name, 'c': 0}]) - else: - wkgroups[gname]['workers'].append( - {'name': w.worker_name, 'c': 0}) - # set queued tasks that expired between "runs" (i.e., you turned off - # the scheduler): then it wasn't expired, but now it is - db( - (st.status.belongs((QUEUED, ASSIGNED))) & - (st.stop_time < now) - ).update(status=EXPIRED) - - # calculate dependencies - deps_with_no_deps = db( - (sd.can_visit == False) & - (~sd.task_child.belongs( - db(sd.can_visit == False)._select(sd.task_parent) - ) - ) - )._select(sd.task_child) - no_deps = db( - (st.status.belongs((QUEUED, ASSIGNED))) & - ( - (sd.id == None) | (st.id.belongs(deps_with_no_deps)) - - ) - )._select(st.id, distinct=True, left=sd.on( - (st.id == sd.task_parent) & - (sd.can_visit == False) - ) - ) - - all_available = db( - (st.status.belongs((QUEUED, ASSIGNED))) & - (st.next_run_time <= now) & - (st.enabled == True) & - (st.id.belongs(no_deps)) - ) - - limit = len(all_workers) * (50 / (len(wkgroups) or 1)) - # if there are a moltitude of tasks, let's figure out a maximum of - # tasks per worker. This can be further tuned with some added - # intelligence (like esteeming how many tasks will a worker complete - # before the ticker reassign them around, but the gain is quite small - # 50 is a sweet spot also for fast tasks, with sane heartbeat values - # NB: ticker reassign tasks every 5 cycles, so if a worker completes - # its 50 tasks in less than heartbeat*5 seconds, - # it won't pick new tasks until heartbeat*5 seconds pass. - - # If a worker is currently elaborating a long task, its tasks needs to - # be reassigned to other workers - # this shuffles up things a bit, in order to give a task equal chances - # to be executed - - # let's freeze it up - db.commit() - x = 0 - for group in wkgroups.keys(): - tasks = all_available(st.group_name == group).select( - limitby=(0, limit), orderby=st.next_run_time) - # let's break up the queue evenly among workers - for task in tasks: - x += 1 - gname = task.group_name - ws = wkgroups.get(gname) - if ws: - if task.broadcast: - for worker in ws['workers']: - new_task = db.scheduler_task.insert( - application_name = task.application_name, - task_name = task.task_name, - group_name = task.group_name, - status = ASSIGNED, - broadcast = False, - function_name = task.function_name, - args = task.args, - start_time = now, - repeats = 1, - retry_failed = task.retry_failed, - sync_output = task.sync_output, - assigned_worker_name = worker['name']) - if task.period: - next_run_time = now+datetime.timedelta(seconds=task.period) - else: - # must be cronline - raise NotImplementedError - db(st.id == task.id).update(times_run=task.times_run+1, - next_run_time=next_run_time, - last_run_time=now) - db.commit() - else: - counter = 0 - myw = 0 - for i, w in enumerate(ws['workers']): - if w['c'] < counter: - myw = i - counter = w['c'] - assigned_wn = wkgroups[gname]['workers'][myw]['name'] - d = dict( - status=ASSIGNED, - assigned_worker_name=assigned_wn - ) - db( - (st.id == task.id) & - (st.status.belongs((QUEUED, ASSIGNED))) - ).update(**d) - wkgroups[gname]['workers'][myw]['c'] += 1 - db.commit() - # I didn't report tasks but I'm working nonetheless!!!! - if x > 0: - self.w_stats.empty_runs = 0 - self.w_stats.queue = x - self.w_stats.distribution = wkgroups - self.w_stats.workers = len(all_workers) - # I'll be greedy only if tasks assigned are equal to the limit - # (meaning there could be others ready to be assigned) - self.greedy = x >= limit - logger.info('TICKER: workers are %s', len(all_workers)) - logger.info('TICKER: tasks are %s', x) - - def sleep(self): - """Calculate the number of seconds to sleep.""" - time.sleep(self.w_stats.sleep) - # should only sleep until next available task - - def set_worker_status(self, group_names=None, action=ACTIVE, - exclude=None, limit=None, worker_name=None): - """Internal function to set worker's status.""" - ws = self.db.scheduler_worker - if not group_names: - group_names = self.group_names - elif isinstance(group_names, str): - group_names = [group_names] - if worker_name: - self.db(ws.worker_name == worker_name).update(status=action) - return - exclusion = exclude and exclude.append(action) or [action] - if not limit: - for group in group_names: - self.db( - (ws.group_names.contains(group)) & - (~ws.status.belongs(exclusion)) - ).update(status=action) - else: - for group in group_names: - workers = self.db((ws.group_names.contains(group)) & - (~ws.status.belongs(exclusion)) - )._select(ws.id, limitby=(0, limit)) - self.db(ws.id.belongs(workers)).update(status=action) - - def disable(self, group_names=None, limit=None, worker_name=None): - """Set DISABLED on the workers processing `group_names` tasks. - - A DISABLED worker will be kept alive but it won't be able to process - any waiting tasks, essentially putting it to sleep. - By default, all group_names of Scheduler's instantation are selected - """ - self.set_worker_status( - group_names=group_names, - action=DISABLED, - exclude=[DISABLED, KILL, TERMINATE], - limit=limit) - - def resume(self, group_names=None, limit=None, worker_name=None): - """Wakes a worker up (it will be able to process queued tasks)""" - self.set_worker_status( - group_names=group_names, - action=ACTIVE, - exclude=[KILL, TERMINATE], - limit=limit) - - def terminate(self, group_names=None, limit=None, worker_name=None): - """Sets TERMINATE as worker status. The worker will wait for any - currently running tasks to be executed and then it will exit gracefully - """ - self.set_worker_status( - group_names=group_names, - action=TERMINATE, - exclude=[KILL], - limit=limit) - - def kill(self, group_names=None, limit=None, worker_name=None): - """Sets KILL as worker status. The worker will be killed even if it's - processing a task.""" - self.set_worker_status( - group_names=group_names, - action=KILL, - limit=limit) - - def queue_task(self, function, pargs=[], pvars={}, **kwargs): - """ - Queue tasks. This takes care of handling the validation of all - parameters - - Args: - function: the function (anything callable with a __name__) - pargs: "raw" args to be passed to the function. Automatically - jsonified. - pvars: "raw" kwargs to be passed to the function. Automatically - jsonified - kwargs: all the parameters available (basically, every - `scheduler_task` column). If args and vars are here, they - should be jsonified already, and they will override pargs - and pvars - - Returns: - a dict just as a normal validate_and_insert(), plus a uuid key - holding the uuid of the queued task. If validation is not passed - ( i.e. some parameters are invalid) both id and uuid will be None, - and you'll get an "error" dict holding the errors found. - """ - if hasattr(function, '__name__'): - function = function.__name__ - targs = 'args' in kwargs and kwargs.pop('args') or dumps(pargs) - tvars = 'vars' in kwargs and kwargs.pop('vars') or dumps(pvars) - tuuid = 'uuid' in kwargs and kwargs.pop('uuid') or web2py_uuid() - tname = 'task_name' in kwargs and kwargs.pop('task_name') or function - immediate = 'immediate' in kwargs and kwargs.pop('immediate') or None - cronline = kwargs.get('cronline') - kwargs.update( - function_name=function, - task_name=tname, - args=targs, - vars=tvars, - uuid=tuuid, - ) - if cronline: - try: - start_time = kwargs.get('start_time', self.now) - next_run_time = CronParser(cronline, start_time).get_next() - kwargs.update(start_time=start_time, next_run_time=next_run_time) - except: - pass - if 'start_time' in kwargs and 'next_run_time' not in kwargs: - kwargs.update(next_run_time=kwargs['start_time']) - rtn = self.db.scheduler_task.validate_and_insert(**kwargs) - if not rtn.errors: - rtn.uuid = tuuid - if immediate: - self.db( - (self.db.scheduler_worker.is_ticker == True) - ).update(status=PICK) - else: - rtn.uuid = None - return rtn - - def task_status(self, ref, output=False): - """ - Retrieves task status and optionally the result of the task - - Args: - ref: can be - - - an integer : lookup will be done by scheduler_task.id - - a string : lookup will be done by scheduler_task.uuid - - a `Query` : lookup as you wish, e.g. :: - - db.scheduler_task.task_name == 'test1' - - output(bool): if `True`, fetch also the scheduler_run record - - Returns: - a single Row object, for the last queued task. - If output == True, returns also the last scheduler_run record. - The scheduler_run record is fetched by a left join, so it can - have all fields == None - - """ - from pydal.objects import Query - sr, st = self.db.scheduler_run, self.db.scheduler_task - if isinstance(ref, (int, long)): - q = st.id == ref - elif isinstance(ref, str): - q = st.uuid == ref - elif isinstance(ref, Query): - q = ref - else: - raise SyntaxError( - "You can retrieve results only by id, uuid or Query") - fields = [st.ALL] - left = False - orderby = ~st.id - if output: - fields = st.ALL, sr.ALL - left = sr.on(sr.task_id == st.id) - orderby = ~st.id | ~sr.id - row = self.db(q).select( - *fields, - **dict(orderby=orderby, - left=left, - limitby=(0, 1)) - ).first() - if row and output: - row.result = row.scheduler_run.run_result and \ - loads(row.scheduler_run.run_result, - object_hook=_decode_dict) or None - return row - - def stop_task(self, ref): - """Shortcut for task termination. - - If the task is RUNNING it will terminate it, meaning that status - will be set as FAILED. - - If the task is QUEUED, its stop_time will be set as to "now", - the enabled flag will be set to False, and the status to STOPPED - - Args: - ref: can be - - - an integer : lookup will be done by scheduler_task.id - - a string : lookup will be done by scheduler_task.uuid - - Returns: - - 1 if task was stopped (meaning an update has been done) - - None if task was not found, or if task was not RUNNING or QUEUED - - Note: - Experimental - """ - st, sw = self.db.scheduler_task, self.db.scheduler_worker - if isinstance(ref, (int, long)): - q = st.id == ref - elif isinstance(ref, str): - q = st.uuid == ref - else: - raise SyntaxError( - "You can retrieve results only by id or uuid") - task = self.db(q).select(st.id, st.status, st.assigned_worker_name) - task = task.first() - rtn = None - if not task: - return rtn - if task.status == 'RUNNING': - q = sw.worker_name == task.assigned_worker_name - rtn = self.db(q).update(status=STOP_TASK) - elif task.status == 'QUEUED': - rtn = self.db(q).update( - stop_time=self.now(), - enabled=False, - status=STOPPED) - return rtn - - def get_workers(self, only_ticker=False): - """ Returns a dict holding `worker_name : {**columns}` - representing all "registered" workers - only_ticker returns only the workers running as a TICKER, - if there are any - """ - db = self.db - if only_ticker: - workers = db(db.scheduler_worker.is_ticker == True).select() - else: - workers = db(db.scheduler_worker.id > 0).select() - all_workers = {} - for row in workers: - all_workers[row.worker_name] = Storage( - status=row.status, - first_heartbeat=row.first_heartbeat, - last_heartbeat=row.last_heartbeat, - group_names=row.group_names, - is_ticker=row.is_ticker, - worker_stats=row.worker_stats - ) - return all_workers - - -def main(): - """ - allows to run worker without python web2py.py .... by simply:: - - python gluon/scheduler.py - - """ - parser = optparse.OptionParser() - parser.add_option( - "-w", "--worker_name", dest="worker_name", default=None, - help="start a worker with name") - parser.add_option( - "-b", "--heartbeat", dest="heartbeat", default=10, - type='int', help="heartbeat time in seconds (default 10)") - parser.add_option( - "-L", "--logger_level", dest="logger_level", - default=30, - type='int', - help="set debug output level (0-100, 0 means all, 100 means none;default is 30)") - parser.add_option("-E", "--empty-runs", - dest="max_empty_runs", - type='int', - default=0, - help="max loops with no grabbed tasks permitted (0 for never check)") - parser.add_option( - "-g", "--group_names", dest="group_names", - default='main', - help="comma separated list of groups to be picked by the worker") - parser.add_option( - "-f", "--db_folder", dest="db_folder", - default='/Users/mdipierro/web2py/applications/scheduler/databases', - help="location of the dal database folder") - parser.add_option( - "-u", "--db_uri", dest="db_uri", - default='sqlite://storage.sqlite', - help="database URI string (web2py DAL syntax)") - parser.add_option( - "-t", "--tasks", dest="tasks", default=None, - help="file containing task files, must define" + - "tasks = {'task_name':(lambda: 'output')} or similar set of tasks") - parser.add_option( - "-U", "--utc-time", dest="utc_time", default=False, - help="work with UTC timestamps" - ) - (options, args) = parser.parse_args() - if not options.tasks or not options.db_uri: - print(USAGE) - if options.tasks: - path, filename = os.path.split(options.tasks) - if filename.endswith('.py'): - filename = filename[:-3] - sys.path.append(path) - print('importing tasks...') - tasks = __import__(filename, globals(), locals(), [], -1).tasks - print('tasks found: ' + ', '.join(tasks.keys())) - else: - tasks = {} - group_names = [x.strip() for x in options.group_names.split(',')] - - logging.getLogger().setLevel(options.logger_level) - - print('groups for this worker: ' + ', '.join(group_names)) - print('connecting to database in folder: ' + options.db_folder or './') - print('using URI: ' + options.db_uri) - db = DAL(options.db_uri, folder=options.db_folder, decode_credentials=True) - print('instantiating scheduler...') - scheduler = Scheduler(db=db, - worker_name=options.worker_name, - tasks=tasks, - migrate=True, - group_names=group_names, - heartbeat=options.heartbeat, - max_empty_runs=options.max_empty_runs, - utc_time=options.utc_time) - signal.signal(signal.SIGTERM, lambda signum, stack_frame: sys.exit(1)) - print('starting main worker loop...') - scheduler.loop() - -if __name__ == '__main__': - main() +from pydal.scheduler import * diff --git a/gluon/validators.py b/gluon/validators.py index 541dddd1..03b18cc3 100644 --- a/gluon/validators.py +++ b/gluon/validators.py @@ -1,3936 +1,3 @@ -#!/bin/env python -# -*- coding: utf-8 -*- +from pydal.validators import __all__ +from pydal.validators import * -""" -| This file is part of the web2py Web Framework -| Copyrighted by Massimo Di Pierro -| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) -| Thanks to ga2arch for help with IS_IN_DB and IS_NOT_IN_DB on GAE - -Validators ------------ -""" -import os -import re -import datetime -import time -import cgi -import json -import urllib -import struct -import decimal -import unicodedata - -from gluon._compat import StringIO, integer_types, basestring, unicodeT, urllib_unquote, unichr, to_bytes, PY2, \ - to_unicode, to_native, string_types, urlparse -from gluon.utils import simple_hash, web2py_uuid, DIGEST_ALG_BY_SIZE -from pydal.objects import Field, FieldVirtual, FieldMethod -from functools import reduce - -regex_isint = re.compile('^[+-]?\d+$') - -JSONErrors = (NameError, TypeError, ValueError, AttributeError, - KeyError) - -__all__ = [ - 'ANY_OF', - 'CLEANUP', - 'CRYPT', - 'IS_ALPHANUMERIC', - 'IS_DATE_IN_RANGE', - 'IS_DATE', - 'IS_DATETIME_IN_RANGE', - 'IS_DATETIME', - 'IS_DECIMAL_IN_RANGE', - 'IS_EMAIL', - 'IS_LIST_OF_EMAILS', - 'IS_EMPTY_OR', - 'IS_EXPR', - 'IS_FLOAT_IN_RANGE', - 'IS_IMAGE', - 'IS_IN_DB', - 'IS_IN_SET', - 'IS_INT_IN_RANGE', - 'IS_IPV4', - 'IS_IPV6', - 'IS_IPADDRESS', - 'IS_LENGTH', - 'IS_LIST_OF', - 'IS_LOWER', - 'IS_MATCH', - 'IS_EQUAL_TO', - 'IS_NOT_EMPTY', - 'IS_NOT_IN_DB', - 'IS_NULL_OR', - 'IS_SLUG', - 'IS_STRONG', - 'IS_TIME', - 'IS_UPLOAD_FILENAME', - 'IS_UPPER', - 'IS_URL', - 'IS_JSON', -] - -try: - from gluon.globals import current - have_current = True -except ImportError: - have_current = False - - -def translate(text): - if text is None: - return None - elif isinstance(text, (str, unicodeT)) and have_current: - if hasattr(current, 'T'): - return str(current.T(text)) - return str(text) - - -def options_sorter(x, y): - return (str(x[1]).upper() > str(y[1]).upper() and 1) or -1 - - -class Validator(object): - """ - Root for all validators, mainly for documentation purposes. - - Validators are classes used to validate input fields (including forms - generated from database tables). - - Here is an example of using a validator with a FORM:: - - INPUT(_name='a', requires=IS_INT_IN_RANGE(0, 10)) - - Here is an example of how to require a validator for a table field:: - - db.define_table('person', Field('name')) - db.person.name.requires=IS_NOT_EMPTY() - - Validators are always assigned using the requires attribute of a field. A - field can have a single validator or multiple validators. Multiple - validators are made part of a list:: - - db.person.name.requires=[IS_NOT_EMPTY(), IS_NOT_IN_DB(db, 'person.id')] - - Validators are called by the function accepts on a FORM or other HTML - helper object that contains a form. They are always called in the order in - which they are listed. - - Built-in validators have constructors that take the optional argument error - message which allows you to change the default error message. - Here is an example of a validator on a database table:: - - db.person.name.requires=IS_NOT_EMPTY(error_message=T('Fill this')) - - where we have used the translation operator T to allow for - internationalization. - - Notice that default error messages are not translated. - """ - - def formatter(self, value): - """ - For some validators returns a formatted version (matching the validator) - of value. Otherwise just returns the value. - """ - return value - - def __call__(self, value): - raise NotImplementedError - - -class IS_MATCH(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_MATCH('.+')) - - The argument of IS_MATCH is a regular expression:: - - >>> IS_MATCH('.+')('hello') - ('hello', None) - - >>> IS_MATCH('hell')('hello') - ('hello', None) - - >>> IS_MATCH('hell.*', strict=False)('hello') - ('hello', None) - - >>> IS_MATCH('hello')('shello') - ('shello', 'invalid expression') - - >>> IS_MATCH('hello', search=True)('shello') - ('shello', None) - - >>> IS_MATCH('hello', search=True, strict=False)('shellox') - ('shellox', None) - - >>> IS_MATCH('.*hello.*', search=True, strict=False)('shellox') - ('shellox', None) - - >>> IS_MATCH('.+')('') - ('', 'invalid expression') - - """ - - def __init__(self, expression, error_message='Invalid expression', - strict=False, search=False, extract=False, - is_unicode=False): - - if strict or not search: - if not expression.startswith('^'): - expression = '^(%s)' % expression - if strict: - if not expression.endswith('$'): - expression = '(%s)$' % expression - if is_unicode: - if not isinstance(expression, unicodeT): - expression = expression.decode('utf8') - self.regex = re.compile(expression, re.UNICODE) - else: - self.regex = re.compile(expression) - self.error_message = error_message - self.extract = extract - self.is_unicode = is_unicode or (not(PY2)) - - def __call__(self, value): - if not(PY2): # PY3 convert bytes to unicode - value = to_unicode(value) - - if self.is_unicode or not(PY2): - if not isinstance(value, unicodeT): - match = self.regex.search(str(value).decode('utf8')) - else: - match = self.regex.search(value) - else: - if not isinstance(value, unicodeT): - match = self.regex.search(str(value)) - else: - match = self.regex.search(value.encode('utf8')) - if match is not None: - return (self.extract and match.group() or value, None) - return (value, translate(self.error_message)) - - -class IS_EQUAL_TO(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='password') - INPUT(_type='text', _name='password2', - requires=IS_EQUAL_TO(request.vars.password)) - - The argument of IS_EQUAL_TO is a string:: - - >>> IS_EQUAL_TO('aaa')('aaa') - ('aaa', None) - - >>> IS_EQUAL_TO('aaa')('aab') - ('aab', 'no match') - - """ - - def __init__(self, expression, error_message='No match'): - self.expression = expression - self.error_message = error_message - - def __call__(self, value): - if value == self.expression: - return (value, None) - return (value, translate(self.error_message)) - - -class IS_EXPR(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', - requires=IS_EXPR('5 < int(value) < 10')) - - The argument of IS_EXPR must be python condition:: - - >>> IS_EXPR('int(value) < 2')('1') - ('1', None) - - >>> IS_EXPR('int(value) < 2')('2') - ('2', 'invalid expression') - - """ - - def __init__(self, expression, error_message='Invalid expression', environment=None): - self.expression = expression - self.error_message = error_message - self.environment = environment or {} - - def __call__(self, value): - if callable(self.expression): - return (value, self.expression(value)) - # for backward compatibility - self.environment.update(value=value) - exec('__ret__=' + self.expression, self.environment) - if self.environment['__ret__']: - return (value, None) - return (value, translate(self.error_message)) - - -class IS_LENGTH(Validator): - """ - Checks if length of field's value fits between given boundaries. Works - for both text and file inputs. - - Args: - maxsize: maximum allowed length / size - minsize: minimum allowed length / size - - Examples: - Check if text string is shorter than 33 characters:: - - INPUT(_type='text', _name='name', requires=IS_LENGTH(32)) - - Check if password string is longer than 5 characters:: - - INPUT(_type='password', _name='name', requires=IS_LENGTH(minsize=6)) - - Check if uploaded file has size between 1KB and 1MB:: - - INPUT(_type='file', _name='name', requires=IS_LENGTH(1048576, 1024)) - - Other examples:: - - >>> IS_LENGTH()('') - ('', None) - >>> IS_LENGTH()('1234567890') - ('1234567890', None) - >>> IS_LENGTH(maxsize=5, minsize=0)('1234567890') # too long - ('1234567890', 'enter from 0 to 5 characters') - >>> IS_LENGTH(maxsize=50, minsize=20)('1234567890') # too short - ('1234567890', 'enter from 20 to 50 characters') - """ - - def __init__(self, maxsize=255, minsize=0, - error_message='Enter from %(min)g to %(max)g characters'): - self.maxsize = maxsize - self.minsize = minsize - self.error_message = error_message - - def __call__(self, value): - if value is None: - length = 0 - if self.minsize <= length <= self.maxsize: - return (value, None) - elif isinstance(value, cgi.FieldStorage): - if value.file: - value.file.seek(0, os.SEEK_END) - length = value.file.tell() - value.file.seek(0, os.SEEK_SET) - elif hasattr(value, 'value'): - val = value.value - if val: - length = len(val) - else: - length = 0 - if self.minsize <= length <= self.maxsize: - return (value, None) - elif isinstance(value, str): - try: - lvalue = len(to_unicode(value)) - except: - lvalue = len(value) - if self.minsize <= lvalue <= self.maxsize: - return (value, None) - elif isinstance(value, unicodeT): - if self.minsize <= len(value) <= self.maxsize: - return (value.encode('utf8'), None) - elif isinstance(value, (bytes, bytearray)): - if self.minsize <= len(value) <= self.maxsize: - return (value, None) - elif isinstance(value, (tuple, list)): - if self.minsize <= len(value) <= self.maxsize: - return (value, None) - elif self.minsize <= len(str(value)) <= self.maxsize: - return (str(value), None) - return (value, translate(self.error_message) - % dict(min=self.minsize, max=self.maxsize)) - - -class IS_JSON(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', - requires=IS_JSON(error_message="This is not a valid json input") - - >>> IS_JSON()('{"a": 100}') - ({u'a': 100}, None) - - >>> IS_JSON()('spam1234') - ('spam1234', 'invalid json') - """ - - def __init__(self, error_message='Invalid json', native_json=False): - self.native_json = native_json - self.error_message = error_message - - def __call__(self, value): - try: - if self.native_json: - json.loads(value) # raises error in case of malformed json - return (value, None) # the serialized value is not passed - else: - return (json.loads(value), None) - except JSONErrors: - return (value, translate(self.error_message)) - - def formatter(self, value): - if value is None: - return None - if self.native_json: - return value - else: - return json.dumps(value) - - -class IS_IN_SET(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', - requires=IS_IN_SET(['max', 'john'],zero='')) - - The argument of IS_IN_SET must be a list or set:: - - >>> IS_IN_SET(['max', 'john'])('max') - ('max', None) - >>> IS_IN_SET(['max', 'john'])('massimo') - ('massimo', 'value not allowed') - >>> IS_IN_SET(['max', 'john'], multiple=True)(('max', 'john')) - (('max', 'john'), None) - >>> IS_IN_SET(['max', 'john'], multiple=True)(('bill', 'john')) - (('bill', 'john'), 'value not allowed') - >>> IS_IN_SET(('id1','id2'), ['first label','second label'])('id1') # Traditional way - ('id1', None) - >>> IS_IN_SET({'id1':'first label', 'id2':'second label'})('id1') - ('id1', None) - >>> import itertools - >>> IS_IN_SET(itertools.chain(['1','3','5'],['2','4','6']))('1') - ('1', None) - >>> IS_IN_SET([('id1','first label'), ('id2','second label')])('id1') # Redundant way - ('id1', None) - - """ - - def __init__( - self, - theset, - labels=None, - error_message='Value not allowed', - multiple=False, - zero='', - sort=False, - ): - self.multiple = multiple - if isinstance(theset, dict): - self.theset = [str(item) for item in theset] - self.labels = theset.values() - elif theset and isinstance(theset, (tuple, list)) \ - and isinstance(theset[0], (tuple, list)) and len(theset[0]) == 2: - self.theset = [str(item) for item, label in theset] - self.labels = [str(label) for item, label in theset] - else: - self.theset = [str(item) for item in theset] - self.labels = labels - self.error_message = error_message - self.zero = zero - self.sort = sort - - def options(self, zero=True): - if not self.labels: - items = [(k, k) for (i, k) in enumerate(self.theset)] - else: - items = [(k, list(self.labels)[i]) for (i, k) in enumerate(self.theset)] - if self.sort: - items.sort(key=lambda o: str(o[1]).upper()) - if zero and self.zero is not None and not self.multiple: - items.insert(0, ('', self.zero)) - return items - - def __call__(self, value): - if self.multiple: - # if below was values = re.compile("[\w\-:]+").findall(str(value)) - if not value: - values = [] - elif isinstance(value, (tuple, list)): - values = value - else: - values = [value] - else: - values = [value] - thestrset = [str(x) for x in self.theset] - failures = [x for x in values if not str(x) in thestrset] - if failures and self.theset: - return (value, translate(self.error_message)) - if self.multiple: - if isinstance(self.multiple, (tuple, list)) and \ - not self.multiple[0] <= len(values) < self.multiple[1]: - return (values, translate(self.error_message)) - return (values, None) - return (value, None) - - -regex1 = re.compile('\w+\.\w+') -regex2 = re.compile('%\(([^\)]+)\)\d*(?:\.\d+)?[a-zA-Z]') - - -class IS_IN_DB(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', - requires=IS_IN_DB(db, db.mytable.myfield, zero='')) - - used for reference fields, rendered as a dropbox - """ - - def __init__( - self, - dbset, - field, - label=None, - error_message='Value not in database', - orderby=None, - groupby=None, - distinct=None, - cache=None, - multiple=False, - zero='', - sort=False, - _and=None, - left=None, - delimiter=None, - auto_add=False, - ): - from pydal.objects import Table - if hasattr(dbset, 'define_table'): - self.dbset = dbset() - else: - self.dbset = dbset - - if isinstance(field, Table): - field = field._id - elif isinstance(field, str): - items = field.split('.') - if len(items) == 1: - field = items[0] + '.id' - - (ktable, kfield) = str(field).split('.') - if not label: - label = '%%(%s)s' % kfield - if isinstance(label, str): - if regex1.match(str(label)): - label = '%%(%s)s' % str(label).split('.')[-1] - fieldnames = regex2.findall(label) - if kfield not in fieldnames: - fieldnames.append(kfield) # kfield must be last - elif isinstance(label, Field): - fieldnames = [label.name, kfield] # kfield must be last - label = '%%(%s)s' % label.name - elif callable(label): - fieldnames = '*' - else: - raise NotImplementedError - - self.fieldnames = fieldnames # fields requires to build the formatting - self.label = label - self.ktable = ktable - self.kfield = kfield - self.error_message = error_message - self.theset = None - self.orderby = orderby - self.groupby = groupby - self.distinct = distinct - self.cache = cache - self.multiple = multiple - self.zero = zero - self.sort = sort - self._and = _and - self.left = left - self.delimiter = delimiter - self.auto_add = auto_add - - def set_self_id(self, id): - if self._and: - self._and.record_id = id - - def build_set(self): - table = self.dbset.db[self.ktable] - if self.fieldnames == '*': - fields = [f for f in table] - else: - fields = [table[k] for k in self.fieldnames] - ignore = (FieldVirtual, FieldMethod) - fields = filter(lambda f: not isinstance(f, ignore), fields) - if self.dbset.db._dbname != 'gae': - orderby = self.orderby or reduce(lambda a, b: a | b, fields) - groupby = self.groupby - distinct = self.distinct - left = self.left - dd = dict(orderby=orderby, groupby=groupby, - distinct=distinct, cache=self.cache, - cacheable=True, left=left) - records = self.dbset(table).select(*fields, **dd) - else: - orderby = self.orderby or \ - reduce(lambda a, b: a | b, ( - f for f in fields if not f.name == 'id')) - dd = dict(orderby=orderby, cache=self.cache, cacheable=True) - records = self.dbset(table).select(table.ALL, **dd) - self.theset = [str(r[self.kfield]) for r in records] - if isinstance(self.label, str): - self.labels = [self.label % r for r in records] - else: - self.labels = [self.label(r) for r in records] - - def options(self, zero=True): - self.build_set() - items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)] - if self.sort: - items.sort(key=lambda o: str(o[1]).upper()) - if zero and self.zero is not None and not self.multiple: - items.insert(0, ('', self.zero)) - return items - - def maybe_add(self, table, fieldname, value): - d = {fieldname: value} - record = table(**d) - if record: - return record.id - else: - return table.insert(**d) - - def __call__(self, value): - table = self.dbset.db[self.ktable] - field = table[self.kfield] - - if self.multiple: - if self._and: - raise NotImplementedError - if isinstance(value, list): - values = value - elif self.delimiter: - values = value.split(self.delimiter) # because of autocomplete - elif value: - values = [value] - else: - values = [] - - if field.type in ('id', 'integer'): - new_values = [] - for value in values: - if not (isinstance(value, integer_types) or value.isdigit()): - if self.auto_add: - value = str(self.maybe_add(table, self.fieldnames[0], value)) - else: - return (values, translate(self.error_message)) - new_values.append(value) - values = new_values - - if isinstance(self.multiple, (tuple, list)) and \ - not self.multiple[0] <= len(values) < self.multiple[1]: - return (values, translate(self.error_message)) - if self.theset: - if not [v for v in values if v not in self.theset]: - return (values, None) - else: - def count(values, s=self.dbset, f=field): - return s(f.belongs(map(int, values))).count() - - if self.dbset.db._adapter.dbengine == "google:datastore": - range_ids = range(0, len(values), 30) - total = sum(count(values[i:i + 30]) for i in range_ids) - if total == len(values): - return (values, None) - elif count(values) == len(values): - return (values, None) - else: - if field.type in ('id', 'integer'): - if isinstance(value, integer_types) or (isinstance(value, string_types) and value.isdigit()): - value = int(value) - elif self.auto_add: - value = self.maybe_add(table, self.fieldnames[0], value) - else: - return (value, translate(self.error_message)) - - try: - value = int(value) - except TypeError: - return (value, translate(self.error_message)) - - if self.theset: - if str(value) in self.theset: - if self._and: - return self._and(value) - else: - return (value, None) - else: - if self.dbset(field == value).count(): - if self._and: - return self._and(value) - else: - return (value, None) - return (value, translate(self.error_message)) - - -class IS_NOT_IN_DB(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_NOT_IN_DB(db, db.table)) - - makes the field unique - """ - - def __init__( - self, - dbset, - field, - error_message='Value already in database or empty', - allowed_override=[], - ignore_common_filters=False, - ): - - from pydal.objects import Table - if isinstance(field, Table): - field = field._id - - if hasattr(dbset, 'define_table'): - self.dbset = dbset() - else: - self.dbset = dbset - self.field = field - self.error_message = error_message - self.record_id = 0 - self.allowed_override = allowed_override - self.ignore_common_filters = ignore_common_filters - - def set_self_id(self, id): - self.record_id = id - - def __call__(self, value): - value = to_native(str(value)) - if not value.strip(): - return (value, translate(self.error_message)) - if value in self.allowed_override: - return (value, None) - (tablename, fieldname) = str(self.field).split('.') - table = self.dbset.db[tablename] - field = table[fieldname] - subset = self.dbset(field == value, - ignore_common_filters=self.ignore_common_filters) - id = self.record_id - if isinstance(id, dict): - fields = [table[f] for f in id] - row = subset.select(*fields, **dict(limitby=(0, 1), orderby_on_limitby=False)).first() - if row and any(str(row[f]) != str(id[f]) for f in id): - return (value, translate(self.error_message)) - else: - row = subset.select(table._id, field, limitby=(0, 1), orderby_on_limitby=False).first() - if row and str(row[table._id]) != str(id): - return (value, translate(self.error_message)) - return (value, None) - - -def range_error_message(error_message, what_to_enter, minimum, maximum): - """build the error message for the number range validators""" - if error_message is None: - error_message = 'Enter ' + what_to_enter - if minimum is not None and maximum is not None: - error_message += ' between %(min)g and %(max)g' - elif minimum is not None: - error_message += ' greater than or equal to %(min)g' - elif maximum is not None: - error_message += ' less than or equal to %(max)g' - if type(maximum) in integer_types: - maximum -= 1 - return translate(error_message) % dict(min=minimum, max=maximum) - - -class IS_INT_IN_RANGE(Validator): - """ - Determines that the argument is (or can be represented as) an int, - and that it falls within the specified range. The range is interpreted - in the Pythonic way, so the test is: min <= value < max. - - The minimum and maximum limits can be None, meaning no lower or upper limit, - respectively. - - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_INT_IN_RANGE(0, 10)) - - >>> IS_INT_IN_RANGE(1,5)('4') - (4, None) - >>> IS_INT_IN_RANGE(1,5)(4) - (4, None) - >>> IS_INT_IN_RANGE(1,5)(1) - (1, None) - >>> IS_INT_IN_RANGE(1,5)(5) - (5, 'enter an integer between 1 and 4') - >>> IS_INT_IN_RANGE(1,5)(5) - (5, 'enter an integer between 1 and 4') - >>> IS_INT_IN_RANGE(1,5)(3.5) - (3.5, 'enter an integer between 1 and 4') - >>> IS_INT_IN_RANGE(None,5)('4') - (4, None) - >>> IS_INT_IN_RANGE(None,5)('6') - ('6', 'enter an integer less than or equal to 4') - >>> IS_INT_IN_RANGE(1,None)('4') - (4, None) - >>> IS_INT_IN_RANGE(1,None)('0') - ('0', 'enter an integer greater than or equal to 1') - >>> IS_INT_IN_RANGE()(6) - (6, None) - >>> IS_INT_IN_RANGE()('abc') - ('abc', 'enter an integer') - """ - - def __init__( - self, - minimum=None, - maximum=None, - error_message=None, - ): - self.minimum = int(minimum) if minimum is not None else None - self.maximum = int(maximum) if maximum is not None else None - self.error_message = range_error_message( - error_message, 'an integer', self.minimum, self.maximum) - - def __call__(self, value): - if regex_isint.match(str(value)): - v = int(value) - if ((self.minimum is None or v >= self.minimum) and - (self.maximum is None or v < self.maximum)): - return (v, None) - return (value, self.error_message) - - -def str2dec(number): - s = str(number) - if '.' not in s: - s += '.00' - else: - s += '0' * (2 - len(s.split('.')[1])) - return s - - -class IS_FLOAT_IN_RANGE(Validator): - """ - Determines that the argument is (or can be represented as) a float, - and that it falls within the specified inclusive range. - The comparison is made with native arithmetic. - - The minimum and maximum limits can be None, meaning no lower or upper limit, - respectively. - - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_FLOAT_IN_RANGE(0, 10)) - - >>> IS_FLOAT_IN_RANGE(1,5)('4') - (4.0, None) - >>> IS_FLOAT_IN_RANGE(1,5)(4) - (4.0, None) - >>> IS_FLOAT_IN_RANGE(1,5)(1) - (1.0, None) - >>> IS_FLOAT_IN_RANGE(1,5)(5.25) - (5.25, 'enter a number between 1 and 5') - >>> IS_FLOAT_IN_RANGE(1,5)(6.0) - (6.0, 'enter a number between 1 and 5') - >>> IS_FLOAT_IN_RANGE(1,5)(3.5) - (3.5, None) - >>> IS_FLOAT_IN_RANGE(1,None)(3.5) - (3.5, None) - >>> IS_FLOAT_IN_RANGE(None,5)(3.5) - (3.5, None) - >>> IS_FLOAT_IN_RANGE(1,None)(0.5) - (0.5, 'enter a number greater than or equal to 1') - >>> IS_FLOAT_IN_RANGE(None,5)(6.5) - (6.5, 'enter a number less than or equal to 5') - >>> IS_FLOAT_IN_RANGE()(6.5) - (6.5, None) - >>> IS_FLOAT_IN_RANGE()('abc') - ('abc', 'enter a number') - """ - - def __init__( - self, - minimum=None, - maximum=None, - error_message=None, - dot='.' - ): - self.minimum = float(minimum) if minimum is not None else None - self.maximum = float(maximum) if maximum is not None else None - self.dot = str(dot) - self.error_message = range_error_message( - error_message, 'a number', self.minimum, self.maximum) - - def __call__(self, value): - try: - if self.dot == '.': - v = float(value) - else: - v = float(str(value).replace(self.dot, '.')) - if ((self.minimum is None or v >= self.minimum) and - (self.maximum is None or v <= self.maximum)): - return (v, None) - except (ValueError, TypeError): - pass - return (value, self.error_message) - - def formatter(self, value): - if value is None: - return None - return str2dec(value).replace('.', self.dot) - - -class IS_DECIMAL_IN_RANGE(Validator): - """ - Determines that the argument is (or can be represented as) a Python Decimal, - and that it falls within the specified inclusive range. - The comparison is made with Python Decimal arithmetic. - - The minimum and maximum limits can be None, meaning no lower or upper limit, - respectively. - - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_DECIMAL_IN_RANGE(0, 10)) - - >>> IS_DECIMAL_IN_RANGE(1,5)('4') - (Decimal('4'), None) - >>> IS_DECIMAL_IN_RANGE(1,5)(4) - (Decimal('4'), None) - >>> IS_DECIMAL_IN_RANGE(1,5)(1) - (Decimal('1'), None) - >>> IS_DECIMAL_IN_RANGE(1,5)(5.25) - (5.25, 'enter a number between 1 and 5') - >>> IS_DECIMAL_IN_RANGE(5.25,6)(5.25) - (Decimal('5.25'), None) - >>> IS_DECIMAL_IN_RANGE(5.25,6)('5.25') - (Decimal('5.25'), None) - >>> IS_DECIMAL_IN_RANGE(1,5)(6.0) - (6.0, 'enter a number between 1 and 5') - >>> IS_DECIMAL_IN_RANGE(1,5)(3.5) - (Decimal('3.5'), None) - >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(3.5) - (Decimal('3.5'), None) - >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(6.5) - (6.5, 'enter a number between 1.5 and 5.5') - >>> IS_DECIMAL_IN_RANGE(1.5,None)(6.5) - (Decimal('6.5'), None) - >>> IS_DECIMAL_IN_RANGE(1.5,None)(0.5) - (0.5, 'enter a number greater than or equal to 1.5') - >>> IS_DECIMAL_IN_RANGE(None,5.5)(4.5) - (Decimal('4.5'), None) - >>> IS_DECIMAL_IN_RANGE(None,5.5)(6.5) - (6.5, 'enter a number less than or equal to 5.5') - >>> IS_DECIMAL_IN_RANGE()(6.5) - (Decimal('6.5'), None) - >>> IS_DECIMAL_IN_RANGE(0,99)(123.123) - (123.123, 'enter a number between 0 and 99') - >>> IS_DECIMAL_IN_RANGE(0,99)('123.123') - ('123.123', 'enter a number between 0 and 99') - >>> IS_DECIMAL_IN_RANGE(0,99)('12.34') - (Decimal('12.34'), None) - >>> IS_DECIMAL_IN_RANGE()('abc') - ('abc', 'enter a number') - """ - - def __init__( - self, - minimum=None, - maximum=None, - error_message=None, - dot='.' - ): - self.minimum = decimal.Decimal(str(minimum)) if minimum is not None else None - self.maximum = decimal.Decimal(str(maximum)) if maximum is not None else None - self.dot = str(dot) - self.error_message = range_error_message( - error_message, 'a number', self.minimum, self.maximum) - - def __call__(self, value): - try: - if isinstance(value, decimal.Decimal): - v = value - else: - v = decimal.Decimal(str(value).replace(self.dot, '.')) - if ((self.minimum is None or v >= self.minimum) and - (self.maximum is None or v <= self.maximum)): - return (v, None) - except (ValueError, TypeError, decimal.InvalidOperation): - pass - return (value, self.error_message) - - def formatter(self, value): - if value is None: - return None - return str2dec(value).replace('.', self.dot) - - -def is_empty(value, empty_regex=None): - _value = value - """test empty field""" - if isinstance(value, (str, unicodeT)): - value = value.strip() - if empty_regex is not None and empty_regex.match(value): - value = '' - if value is None or value == '' or value == b'' or value == []: - return (_value, True) - return (_value, False) - - -class IS_NOT_EMPTY(Validator): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_NOT_EMPTY()) - - >>> IS_NOT_EMPTY()(1) - (1, None) - >>> IS_NOT_EMPTY()(0) - (0, None) - >>> IS_NOT_EMPTY()('x') - ('x', None) - >>> IS_NOT_EMPTY()(' x ') - ('x', None) - >>> IS_NOT_EMPTY()(None) - (None, 'enter a value') - >>> IS_NOT_EMPTY()('') - ('', 'enter a value') - >>> IS_NOT_EMPTY()(' ') - ('', 'enter a value') - >>> IS_NOT_EMPTY()(' \\n\\t') - ('', 'enter a value') - >>> IS_NOT_EMPTY()([]) - ([], 'enter a value') - >>> IS_NOT_EMPTY(empty_regex='def')('def') - ('', 'enter a value') - >>> IS_NOT_EMPTY(empty_regex='de[fg]')('deg') - ('', 'enter a value') - >>> IS_NOT_EMPTY(empty_regex='def')('abc') - ('abc', None) - """ - - def __init__(self, error_message='Enter a value', empty_regex=None): - self.error_message = error_message - if empty_regex is not None: - self.empty_regex = re.compile(empty_regex) - else: - self.empty_regex = None - - def __call__(self, value): - value, empty = is_empty(value, empty_regex=self.empty_regex) - if empty: - return (value, translate(self.error_message)) - return (value, None) - - -class IS_ALPHANUMERIC(IS_MATCH): - """ - Example: - Used as:: - - INPUT(_type='text', _name='name', requires=IS_ALPHANUMERIC()) - - >>> IS_ALPHANUMERIC()('1') - ('1', None) - >>> IS_ALPHANUMERIC()('') - ('', None) - >>> IS_ALPHANUMERIC()('A_a') - ('A_a', None) - >>> IS_ALPHANUMERIC()('!') - ('!', 'enter only letters, numbers, and underscore') - """ - - def __init__(self, error_message='Enter only letters, numbers, and underscore'): - IS_MATCH.__init__(self, '^[\w]*$', error_message) - - -class IS_EMAIL(Validator): - """ - Checks if field's value is a valid email address. Can be set to disallow - or force addresses from certain domain(s). - - Email regex adapted from - http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx, - generally following the RFCs, except that we disallow quoted strings - and permit underscores and leading numerics in subdomain labels - - Args: - banned: regex text for disallowed address domains - forced: regex text for required address domains - - Both arguments can also be custom objects with a match(value) method. - - Example: - Check for valid email address:: - - INPUT(_type='text', _name='name', - requires=IS_EMAIL()) - - Check for valid email address that can't be from a .com domain:: - - INPUT(_type='text', _name='name', - requires=IS_EMAIL(banned='^.*\.com(|\..*)$')) - - Check for valid email address that must be from a .edu domain:: - - INPUT(_type='text', _name='name', - requires=IS_EMAIL(forced='^.*\.edu(|\..*)$')) - - >>> IS_EMAIL()('a@b.com') - ('a@b.com', None) - >>> IS_EMAIL()('abc@def.com') - ('abc@def.com', None) - >>> IS_EMAIL()('abc@3def.com') - ('abc@3def.com', None) - >>> IS_EMAIL()('abc@def.us') - ('abc@def.us', None) - >>> IS_EMAIL()('abc@d_-f.us') - ('abc@d_-f.us', None) - >>> IS_EMAIL()('@def.com') # missing name - ('@def.com', 'enter a valid email address') - >>> IS_EMAIL()('"abc@def".com') # quoted name - ('"abc@def".com', 'enter a valid email address') - >>> IS_EMAIL()('abc+def.com') # no @ - ('abc+def.com', 'enter a valid email address') - >>> IS_EMAIL()('abc@def.x') # one-char TLD - ('abc@def.x', 'enter a valid email address') - >>> IS_EMAIL()('abc@def.12') # numeric TLD - ('abc@def.12', 'enter a valid email address') - >>> IS_EMAIL()('abc@def..com') # double-dot in domain - ('abc@def..com', 'enter a valid email address') - >>> IS_EMAIL()('abc@.def.com') # dot starts domain - ('abc@.def.com', 'enter a valid email address') - >>> IS_EMAIL()('abc@def.c_m') # underscore in TLD - ('abc@def.c_m', 'enter a valid email address') - >>> IS_EMAIL()('NotAnEmail') # missing @ - ('NotAnEmail', 'enter a valid email address') - >>> IS_EMAIL()('abc@NotAnEmail') # missing TLD - ('abc@NotAnEmail', 'enter a valid email address') - >>> IS_EMAIL()('customer/department@example.com') - ('customer/department@example.com', None) - >>> IS_EMAIL()('$A12345@example.com') - ('$A12345@example.com', None) - >>> IS_EMAIL()('!def!xyz%abc@example.com') - ('!def!xyz%abc@example.com', None) - >>> IS_EMAIL()('_Yosemite.Sam@example.com') - ('_Yosemite.Sam@example.com', None) - >>> IS_EMAIL()('~@example.com') - ('~@example.com', None) - >>> IS_EMAIL()('.wooly@example.com') # dot starts name - ('.wooly@example.com', 'enter a valid email address') - >>> IS_EMAIL()('wo..oly@example.com') # adjacent dots in name - ('wo..oly@example.com', 'enter a valid email address') - >>> IS_EMAIL()('pootietang.@example.com') # dot ends name - ('pootietang.@example.com', 'enter a valid email address') - >>> IS_EMAIL()('.@example.com') # name is bare dot - ('.@example.com', 'enter a valid email address') - >>> IS_EMAIL()('Ima.Fool@example.com') - ('Ima.Fool@example.com', None) - >>> IS_EMAIL()('Ima Fool@example.com') # space in name - ('Ima Fool@example.com', 'enter a valid email address') - >>> IS_EMAIL()('localguy@localhost') # localhost as domain - ('localguy@localhost', None) - - """ - - body_regex = re.compile(''' - ^(?!\.) # name may not begin with a dot - ( - [-a-z0-9!\#$%&'*+/=?^_`{|}~] # all legal characters except dot - | - (? obtained on 2008-Nov-10 - -official_url_schemes = [ - 'aaa', - 'aaas', - 'acap', - 'cap', - 'cid', - 'crid', - 'data', - 'dav', - 'dict', - 'dns', - 'fax', - 'file', - 'ftp', - 'go', - 'gopher', - 'h323', - 'http', - 'https', - 'icap', - 'im', - 'imap', - 'info', - 'ipp', - 'iris', - 'iris.beep', - 'iris.xpc', - 'iris.xpcs', - 'iris.lws', - 'ldap', - 'mailto', - 'mid', - 'modem', - 'msrp', - 'msrps', - 'mtqp', - 'mupdate', - 'news', - 'nfs', - 'nntp', - 'opaquelocktoken', - 'pop', - 'pres', - 'prospero', - 'rtsp', - 'service', - 'shttp', - 'sip', - 'sips', - 'snmp', - 'soap.beep', - 'soap.beeps', - 'tag', - 'tel', - 'telnet', - 'tftp', - 'thismessage', - 'tip', - 'tv', - 'urn', - 'vemmi', - 'wais', - 'xmlrpc.beep', - 'xmlrpc.beep', - 'xmpp', - 'z39.50r', - 'z39.50s', -] -unofficial_url_schemes = [ - 'about', - 'adiumxtra', - 'aim', - 'afp', - 'aw', - 'callto', - 'chrome', - 'cvs', - 'ed2k', - 'feed', - 'fish', - 'gg', - 'gizmoproject', - 'iax2', - 'irc', - 'ircs', - 'itms', - 'jar', - 'javascript', - 'keyparc', - 'lastfm', - 'ldaps', - 'magnet', - 'mms', - 'msnim', - 'mvn', - 'notes', - 'nsfw', - 'psyc', - 'paparazzi:http', - 'rmi', - 'rsync', - 'secondlife', - 'sgn', - 'skype', - 'ssh', - 'sftp', - 'smb', - 'sms', - 'soldat', - 'steam', - 'svn', - 'teamspeak', - 'unreal', - 'ut2004', - 'ventrilo', - 'view-source', - 'webcal', - 'wyciwyg', - 'xfire', - 'xri', - 'ymsgr', -] -all_url_schemes = [None] + official_url_schemes + unofficial_url_schemes -http_schemes = [None, 'http', 'https'] - -# Defined in RFC 3490, Section 3.1, Requirement #1 -# Use this regex to split the authority component of a unicode URL into -# its component labels -label_split_regex = re.compile(u'[\u002e\u3002\uff0e\uff61]') - - -def escape_unicode(string): - """ - Converts a unicode string into US-ASCII, using a simple conversion scheme. - Each unicode character that does not have a US-ASCII equivalent is - converted into a URL escaped form based on its hexadecimal value. - For example, the unicode character '\u4e86' will become the string '%4e%86' - - Args: - string: unicode string, the unicode string to convert into an - escaped US-ASCII form - - Returns: - string: the US-ASCII escaped form of the inputted string - - @author: Jonathan Benn - """ - returnValue = StringIO() - - for character in string: - code = ord(character) - if code > 0x7F: - hexCode = hex(code) - returnValue.write('%' + hexCode[2:4] + '%' + hexCode[4:6]) - else: - returnValue.write(character) - - return returnValue.getvalue() - - -def unicode_to_ascii_authority(authority): - """ - Follows the steps in RFC 3490, Section 4 to convert a unicode authority - string into its ASCII equivalent. - For example, u'www.Alliancefran\xe7aise.nu' will be converted into - 'www.xn--alliancefranaise-npb.nu' - - Args: - authority: unicode string, the URL authority component to convert, - e.g. u'www.Alliancefran\xe7aise.nu' - - Returns: - string: the US-ASCII character equivalent to the inputed authority, - e.g. 'www.xn--alliancefranaise-npb.nu' - - Raises: - Exception: if the function is not able to convert the inputed - authority - - @author: Jonathan Benn - """ - # RFC 3490, Section 4, Step 1 - # The encodings.idna Python module assumes that AllowUnassigned == True - - # RFC 3490, Section 4, Step 2 - labels = label_split_regex.split(authority) - - # RFC 3490, Section 4, Step 3 - # The encodings.idna Python module assumes that UseSTD3ASCIIRules == False - - # RFC 3490, Section 4, Step 4 - # We use the ToASCII operation because we are about to put the authority - # into an IDN-unaware slot - asciiLabels = [] - import encodings.idna - for label in labels: - if label: - asciiLabels.append(to_native(encodings.idna.ToASCII(label))) - else: - # encodings.idna.ToASCII does not accept an empty string, but - # it is necessary for us to allow for empty labels so that we - # don't modify the URL - asciiLabels.append('') - # RFC 3490, Section 4, Step 5 - return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels)) - - -def unicode_to_ascii_url(url, prepend_scheme): - """ - Converts the inputed unicode url into a US-ASCII equivalent. This function - goes a little beyond RFC 3490, which is limited in scope to the domain name - (authority) only. Here, the functionality is expanded to what was observed - on Wikipedia on 2009-Jan-22: - - Component Can Use Unicode? - --------- ---------------- - scheme No - authority Yes - path Yes - query Yes - fragment No - - The authority component gets converted to punycode, but occurrences of - unicode in other components get converted into a pair of URI escapes (we - assume 4-byte unicode). E.g. the unicode character U+4E2D will be - converted into '%4E%2D'. Testing with Firefox v3.0.5 has shown that it can - understand this kind of URI encoding. - - Args: - url: unicode string, the URL to convert from unicode into US-ASCII - prepend_scheme: string, a protocol scheme to prepend to the URL if - we're having trouble parsing it. - e.g. "http". Input None to disable this functionality - - Returns: - string: a US-ASCII equivalent of the inputed url - - @author: Jonathan Benn - """ - # convert the authority component of the URL into an ASCII punycode string, - # but encode the rest using the regular URI character encoding - components = urlparse.urlparse(url) - prepended = False - # If no authority was found - if not components.netloc: - # Try appending a scheme to see if that fixes the problem - scheme_to_prepend = prepend_scheme or 'http' - components = urlparse.urlparse(to_unicode(scheme_to_prepend) + u'://' + url) - prepended = True - - # if we still can't find the authority - if not components.netloc: - raise Exception('No authority component found, ' + - 'could not decode unicode to US-ASCII') - - # We're here if we found an authority, let's rebuild the URL - scheme = components.scheme - authority = components.netloc - path = components.path - query = components.query - fragment = components.fragment - - if prepended: - scheme = '' - - unparsed = urlparse.urlunparse((scheme, - unicode_to_ascii_authority(authority), - escape_unicode(path), - '', - escape_unicode(query), - str(fragment))) - if unparsed.startswith('//'): - unparsed = unparsed[2:] # Remove the // urlunparse puts in the beginning - return unparsed - - -class IS_GENERIC_URL(Validator): - """ - Rejects a URL string if any of the following is true: - * The string is empty or None - * The string uses characters that are not allowed in a URL - * The URL scheme specified (if one is specified) is not valid - - Based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html - - This function only checks the URL's syntax. It does not check that the URL - points to a real document, for example, or that it otherwise makes sense - semantically. This function does automatically prepend 'http://' in front - of a URL if and only if that's necessary to successfully parse the URL. - Please note that a scheme will be prepended only for rare cases - (e.g. 'google.ca:80') - - The list of allowed schemes is customizable with the allowed_schemes - parameter. If you exclude None from the list, then abbreviated URLs - (lacking a scheme such as 'http') will be rejected. - - The default prepended scheme is customizable with the prepend_scheme - parameter. If you set prepend_scheme to None then prepending will be - disabled. URLs that require prepending to parse will still be accepted, - but the return value will not be modified. - - @author: Jonathan Benn - - >>> IS_GENERIC_URL()('http://user@abc.com') - ('http://user@abc.com', None) - - Args: - error_message: a string, the error message to give the end user - if the URL does not validate - allowed_schemes: a list containing strings or None. Each element - is a scheme the inputed URL is allowed to use - prepend_scheme: a string, this scheme is prepended if it's - necessary to make the URL valid - - """ - - def __init__( - self, - error_message='Enter a valid URL', - allowed_schemes=None, - prepend_scheme=None, - ): - - self.error_message = error_message - if allowed_schemes is None: - self.allowed_schemes = all_url_schemes - else: - self.allowed_schemes = allowed_schemes - self.prepend_scheme = prepend_scheme - if self.prepend_scheme not in self.allowed_schemes: - raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s" - % (self.prepend_scheme, self.allowed_schemes)) - - GENERIC_URL = re.compile(r"%[^0-9A-Fa-f]{2}|%[^0-9A-Fa-f][0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]|%$|%[0-9A-Fa-f]$|%[^0-9A-Fa-f]$") - GENERIC_URL_VALID = re.compile(r"[A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%]+$") - URL_FRAGMENT_VALID = re.compile(r"[|A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%]+$") - - def __call__(self, value): - """ - Args: - value: a string, the URL to validate - - Returns: - a tuple, where tuple[0] is the inputed value (possible - prepended with prepend_scheme), and tuple[1] is either - None (success!) or the string error_message - """ - - # if we dont have anything or the URL misuses the '%' character - - if not value or self.GENERIC_URL.search(value): - return (value, translate(self.error_message)) - - if '#' in value: - url, fragment_part = value.split('#') - else: - url, fragment_part = value, '' - # if the URL is only composed of valid characters - if self.GENERIC_URL_VALID.match(url) and (not fragment_part or self.URL_FRAGMENT_VALID.match(fragment_part)): - # Then parse the URL into its components and check on - try: - components = urlparse.urlparse(urllib_unquote(value))._asdict() - except ValueError: - return (value, translate(self.error_message)) - - # Clean up the scheme before we check it - scheme = components['scheme'] - if len(scheme) == 0: - scheme = None - else: - scheme = components['scheme'].lower() - # If the scheme doesn't really exists - if scheme not in self.allowed_schemes or not scheme and ':' in components['path']: - # for the possible case of abbreviated URLs with - # ports, check to see if adding a valid scheme fixes - # the problem (but only do this if it doesn't have - # one already!) - if '://' not in value and None in self.allowed_schemes: - schemeToUse = self.prepend_scheme or 'http' - prependTest = self.__call__( - schemeToUse + '://' + value) - # if the prepend test succeeded - if prependTest[1] is None: - # if prepending in the output is enabled - if self.prepend_scheme: - return prependTest - else: - return (value, None) - else: - return (value, None) - # else the URL is not valid - return (value, translate(self.error_message)) - -# Sources (obtained 2017-Nov-11): -# http://data.iana.org/TLD/tlds-alpha-by-domain.txt -# see scripts/parse_top_level_domains.py for an easy update - -official_top_level_domains = [ - # a - 'aaa', 'aarp', 'abarth', 'abb', 'abbott', 'abbvie', 'abc', - 'able', 'abogado', 'abudhabi', 'ac', 'academy', 'accenture', - 'accountant', 'accountants', 'aco', 'active', 'actor', 'ad', - 'adac', 'ads', 'adult', 'ae', 'aeg', 'aero', 'aetna', 'af', - 'afamilycompany', 'afl', 'africa', 'ag', 'agakhan', 'agency', - 'ai', 'aig', 'aigo', 'airbus', 'airforce', 'airtel', 'akdn', - 'al', 'alfaromeo', 'alibaba', 'alipay', 'allfinanz', 'allstate', - 'ally', 'alsace', 'alstom', 'am', 'americanexpress', - 'americanfamily', 'amex', 'amfam', 'amica', 'amsterdam', - 'analytics', 'android', 'anquan', 'anz', 'ao', 'aol', - 'apartments', 'app', 'apple', 'aq', 'aquarelle', 'ar', 'arab', - 'aramco', 'archi', 'army', 'arpa', 'art', 'arte', 'as', 'asda', - 'asia', 'associates', 'at', 'athleta', 'attorney', 'au', - 'auction', 'audi', 'audible', 'audio', 'auspost', 'author', - 'auto', 'autos', 'avianca', 'aw', 'aws', 'ax', 'axa', 'az', - 'azure', - # b - 'ba', 'baby', 'baidu', 'banamex', 'bananarepublic', 'band', - 'bank', 'bar', 'barcelona', 'barclaycard', 'barclays', - 'barefoot', 'bargains', 'baseball', 'basketball', 'bauhaus', - 'bayern', 'bb', 'bbc', 'bbt', 'bbva', 'bcg', 'bcn', 'bd', 'be', - 'beats', 'beauty', 'beer', 'bentley', 'berlin', 'best', - 'bestbuy', 'bet', 'bf', 'bg', 'bh', 'bharti', 'bi', 'bible', - 'bid', 'bike', 'bing', 'bingo', 'bio', 'biz', 'bj', 'black', - 'blackfriday', 'blanco', 'blockbuster', 'blog', 'bloomberg', - 'blue', 'bm', 'bms', 'bmw', 'bn', 'bnl', 'bnpparibas', 'bo', - 'boats', 'boehringer', 'bofa', 'bom', 'bond', 'boo', 'book', - 'booking', 'boots', 'bosch', 'bostik', 'boston', 'bot', - 'boutique', 'box', 'br', 'bradesco', 'bridgestone', 'broadway', - 'broker', 'brother', 'brussels', 'bs', 'bt', 'budapest', - 'bugatti', 'build', 'builders', 'business', 'buy', 'buzz', 'bv', - 'bw', 'by', 'bz', 'bzh', - # c - 'ca', 'cab', 'cafe', 'cal', 'call', 'calvinklein', 'cam', - 'camera', 'camp', 'cancerresearch', 'canon', 'capetown', - 'capital', 'capitalone', 'car', 'caravan', 'cards', 'care', - 'career', 'careers', 'cars', 'cartier', 'casa', 'case', 'caseih', - 'cash', 'casino', 'cat', 'catering', 'catholic', 'cba', 'cbn', - 'cbre', 'cbs', 'cc', 'cd', 'ceb', 'center', 'ceo', 'cern', 'cf', - 'cfa', 'cfd', 'cg', 'ch', 'chanel', 'channel', 'chase', 'chat', - 'cheap', 'chintai', 'christmas', 'chrome', 'chrysler', 'church', - 'ci', 'cipriani', 'circle', 'cisco', 'citadel', 'citi', 'citic', - 'city', 'cityeats', 'ck', 'cl', 'claims', 'cleaning', 'click', - 'clinic', 'clinique', 'clothing', 'cloud', 'club', 'clubmed', - 'cm', 'cn', 'co', 'coach', 'codes', 'coffee', 'college', - 'cologne', 'com', 'comcast', 'commbank', 'community', 'company', - 'compare', 'computer', 'comsec', 'condos', 'construction', - 'consulting', 'contact', 'contractors', 'cooking', - 'cookingchannel', 'cool', 'coop', 'corsica', 'country', 'coupon', - 'coupons', 'courses', 'cr', 'credit', 'creditcard', - 'creditunion', 'cricket', 'crown', 'crs', 'cruise', 'cruises', - 'csc', 'cu', 'cuisinella', 'cv', 'cw', 'cx', 'cy', 'cymru', - 'cyou', 'cz', - # d - 'dabur', 'dad', 'dance', 'data', 'date', 'dating', 'datsun', - 'day', 'dclk', 'dds', 'de', 'deal', 'dealer', 'deals', 'degree', - 'delivery', 'dell', 'deloitte', 'delta', 'democrat', 'dental', - 'dentist', 'desi', 'design', 'dev', 'dhl', 'diamonds', 'diet', - 'digital', 'direct', 'directory', 'discount', 'discover', 'dish', - 'diy', 'dj', 'dk', 'dm', 'dnp', 'do', 'docs', 'doctor', 'dodge', - 'dog', 'doha', 'domains', 'dot', 'download', 'drive', 'dtv', - 'dubai', 'duck', 'dunlop', 'duns', 'dupont', 'durban', 'dvag', - 'dvr', 'dz', - # e - 'earth', 'eat', 'ec', 'eco', 'edeka', 'edu', 'education', 'ee', - 'eg', 'email', 'emerck', 'energy', 'engineer', 'engineering', - 'enterprises', 'epost', 'epson', 'equipment', 'er', 'ericsson', - 'erni', 'es', 'esq', 'estate', 'esurance', 'et', 'etisalat', - 'eu', 'eurovision', 'eus', 'events', 'everbank', 'exchange', - 'expert', 'exposed', 'express', 'extraspace', - # f - 'fage', 'fail', 'fairwinds', 'faith', 'family', 'fan', 'fans', - 'farm', 'farmers', 'fashion', 'fast', 'fedex', 'feedback', - 'ferrari', 'ferrero', 'fi', 'fiat', 'fidelity', 'fido', 'film', - 'final', 'finance', 'financial', 'fire', 'firestone', 'firmdale', - 'fish', 'fishing', 'fit', 'fitness', 'fj', 'fk', 'flickr', - 'flights', 'flir', 'florist', 'flowers', 'fly', 'fm', 'fo', - 'foo', 'food', 'foodnetwork', 'football', 'ford', 'forex', - 'forsale', 'forum', 'foundation', 'fox', 'fr', 'free', - 'fresenius', 'frl', 'frogans', 'frontdoor', 'frontier', 'ftr', - 'fujitsu', 'fujixerox', 'fun', 'fund', 'furniture', 'futbol', - 'fyi', - # g - 'ga', 'gal', 'gallery', 'gallo', 'gallup', 'game', 'games', - 'gap', 'garden', 'gb', 'gbiz', 'gd', 'gdn', 'ge', 'gea', 'gent', - 'genting', 'george', 'gf', 'gg', 'ggee', 'gh', 'gi', 'gift', - 'gifts', 'gives', 'giving', 'gl', 'glade', 'glass', 'gle', - 'global', 'globo', 'gm', 'gmail', 'gmbh', 'gmo', 'gmx', 'gn', - 'godaddy', 'gold', 'goldpoint', 'golf', 'goo', 'goodhands', - 'goodyear', 'goog', 'google', 'gop', 'got', 'gov', 'gp', 'gq', - 'gr', 'grainger', 'graphics', 'gratis', 'green', 'gripe', - 'grocery', 'group', 'gs', 'gt', 'gu', 'guardian', 'gucci', - 'guge', 'guide', 'guitars', 'guru', 'gw', 'gy', - # h - 'hair', 'hamburg', 'hangout', 'haus', 'hbo', 'hdfc', 'hdfcbank', - 'health', 'healthcare', 'help', 'helsinki', 'here', 'hermes', - 'hgtv', 'hiphop', 'hisamitsu', 'hitachi', 'hiv', 'hk', 'hkt', - 'hm', 'hn', 'hockey', 'holdings', 'holiday', 'homedepot', - 'homegoods', 'homes', 'homesense', 'honda', 'honeywell', 'horse', - 'hospital', 'host', 'hosting', 'hot', 'hoteles', 'hotels', - 'hotmail', 'house', 'how', 'hr', 'hsbc', 'ht', 'hu', 'hughes', - 'hyatt', 'hyundai', - # i - 'ibm', 'icbc', 'ice', 'icu', 'id', 'ie', 'ieee', 'ifm', 'ikano', - 'il', 'im', 'imamat', 'imdb', 'immo', 'immobilien', 'in', - 'industries', 'infiniti', 'info', 'ing', 'ink', 'institute', - 'insurance', 'insure', 'int', 'intel', 'international', 'intuit', - 'investments', 'io', 'ipiranga', 'iq', 'ir', 'irish', 'is', - 'iselect', 'ismaili', 'ist', 'istanbul', 'it', 'itau', 'itv', - 'iveco', 'iwc', - # j - 'jaguar', 'java', 'jcb', 'jcp', 'je', 'jeep', 'jetzt', 'jewelry', - 'jio', 'jlc', 'jll', 'jm', 'jmp', 'jnj', 'jo', 'jobs', 'joburg', - 'jot', 'joy', 'jp', 'jpmorgan', 'jprs', 'juegos', 'juniper', - # k - 'kaufen', 'kddi', 'ke', 'kerryhotels', 'kerrylogistics', - 'kerryproperties', 'kfh', 'kg', 'kh', 'ki', 'kia', 'kim', - 'kinder', 'kindle', 'kitchen', 'kiwi', 'km', 'kn', 'koeln', - 'komatsu', 'kosher', 'kp', 'kpmg', 'kpn', 'kr', 'krd', 'kred', - 'kuokgroup', 'kw', 'ky', 'kyoto', 'kz', - # l - 'la', 'lacaixa', 'ladbrokes', 'lamborghini', 'lamer', - 'lancaster', 'lancia', 'lancome', 'land', 'landrover', 'lanxess', - 'lasalle', 'lat', 'latino', 'latrobe', 'law', 'lawyer', 'lb', - 'lc', 'lds', 'lease', 'leclerc', 'lefrak', 'legal', 'lego', - 'lexus', 'lgbt', 'li', 'liaison', 'lidl', 'life', - 'lifeinsurance', 'lifestyle', 'lighting', 'like', 'lilly', - 'limited', 'limo', 'lincoln', 'linde', 'link', 'lipsy', 'live', - 'living', 'lixil', 'lk', 'loan', 'loans', 'localhost', 'locker', - 'locus', 'loft', 'lol', 'london', 'lotte', 'lotto', 'love', - 'lpl', 'lplfinancial', 'lr', 'ls', 'lt', 'ltd', 'ltda', 'lu', - 'lundbeck', 'lupin', 'luxe', 'luxury', 'lv', 'ly', - # m - 'ma', 'macys', 'madrid', 'maif', 'maison', 'makeup', 'man', - 'management', 'mango', 'map', 'market', 'marketing', 'markets', - 'marriott', 'marshalls', 'maserati', 'mattel', 'mba', 'mc', - 'mckinsey', 'md', 'me', 'med', 'media', 'meet', 'melbourne', - 'meme', 'memorial', 'men', 'menu', 'meo', 'merckmsd', 'metlife', - 'mg', 'mh', 'miami', 'microsoft', 'mil', 'mini', 'mint', 'mit', - 'mitsubishi', 'mk', 'ml', 'mlb', 'mls', 'mm', 'mma', 'mn', 'mo', - 'mobi', 'mobile', 'mobily', 'moda', 'moe', 'moi', 'mom', - 'monash', 'money', 'monster', 'mopar', 'mormon', 'mortgage', - 'moscow', 'moto', 'motorcycles', 'mov', 'movie', 'movistar', - 'mp', 'mq', 'mr', 'ms', 'msd', 'mt', 'mtn', 'mtr', 'mu', - 'museum', 'mutual', 'mv', 'mw', 'mx', 'my', 'mz', - # n - 'na', 'nab', 'nadex', 'nagoya', 'name', 'nationwide', 'natura', - 'navy', 'nba', 'nc', 'ne', 'nec', 'net', 'netbank', 'netflix', - 'network', 'neustar', 'new', 'newholland', 'news', 'next', - 'nextdirect', 'nexus', 'nf', 'nfl', 'ng', 'ngo', 'nhk', 'ni', - 'nico', 'nike', 'nikon', 'ninja', 'nissan', 'nissay', 'nl', 'no', - 'nokia', 'northwesternmutual', 'norton', 'now', 'nowruz', - 'nowtv', 'np', 'nr', 'nra', 'nrw', 'ntt', 'nu', 'nyc', 'nz', - # o - 'obi', 'observer', 'off', 'office', 'okinawa', 'olayan', - 'olayangroup', 'oldnavy', 'ollo', 'om', 'omega', 'one', 'ong', - 'onl', 'online', 'onyourside', 'ooo', 'open', 'oracle', 'orange', - 'org', 'organic', 'origins', 'osaka', 'otsuka', 'ott', 'ovh', - # p - 'pa', 'page', 'panasonic', 'panerai', 'paris', 'pars', - 'partners', 'parts', 'party', 'passagens', 'pay', 'pccw', 'pe', - 'pet', 'pf', 'pfizer', 'pg', 'ph', 'pharmacy', 'phd', 'philips', - 'phone', 'photo', 'photography', 'photos', 'physio', 'piaget', - 'pics', 'pictet', 'pictures', 'pid', 'pin', 'ping', 'pink', - 'pioneer', 'pizza', 'pk', 'pl', 'place', 'play', 'playstation', - 'plumbing', 'plus', 'pm', 'pn', 'pnc', 'pohl', 'poker', - 'politie', 'porn', 'post', 'pr', 'pramerica', 'praxi', 'press', - 'prime', 'pro', 'prod', 'productions', 'prof', 'progressive', - 'promo', 'properties', 'property', 'protection', 'pru', - 'prudential', 'ps', 'pt', 'pub', 'pw', 'pwc', 'py', - # q - 'qa', 'qpon', 'quebec', 'quest', 'qvc', - # r - 'racing', 'radio', 'raid', 're', 'read', 'realestate', 'realtor', - 'realty', 'recipes', 'red', 'redstone', 'redumbrella', 'rehab', - 'reise', 'reisen', 'reit', 'reliance', 'ren', 'rent', 'rentals', - 'repair', 'report', 'republican', 'rest', 'restaurant', 'review', - 'reviews', 'rexroth', 'rich', 'richardli', 'ricoh', - 'rightathome', 'ril', 'rio', 'rip', 'rmit', 'ro', 'rocher', - 'rocks', 'rodeo', 'rogers', 'room', 'rs', 'rsvp', 'ru', 'rugby', - 'ruhr', 'run', 'rw', 'rwe', 'ryukyu', - # s - 'sa', 'saarland', 'safe', 'safety', 'sakura', 'sale', 'salon', - 'samsclub', 'samsung', 'sandvik', 'sandvikcoromant', 'sanofi', - 'sap', 'sapo', 'sarl', 'sas', 'save', 'saxo', 'sb', 'sbi', 'sbs', - 'sc', 'sca', 'scb', 'schaeffler', 'schmidt', 'scholarships', - 'school', 'schule', 'schwarz', 'science', 'scjohnson', 'scor', - 'scot', 'sd', 'se', 'search', 'seat', 'secure', 'security', - 'seek', 'select', 'sener', 'services', 'ses', 'seven', 'sew', - 'sex', 'sexy', 'sfr', 'sg', 'sh', 'shangrila', 'sharp', 'shaw', - 'shell', 'shia', 'shiksha', 'shoes', 'shop', 'shopping', - 'shouji', 'show', 'showtime', 'shriram', 'si', 'silk', 'sina', - 'singles', 'site', 'sj', 'sk', 'ski', 'skin', 'sky', 'skype', - 'sl', 'sling', 'sm', 'smart', 'smile', 'sn', 'sncf', 'so', - 'soccer', 'social', 'softbank', 'software', 'sohu', 'solar', - 'solutions', 'song', 'sony', 'soy', 'space', 'spiegel', 'spot', - 'spreadbetting', 'sr', 'srl', 'srt', 'st', 'stada', 'staples', - 'star', 'starhub', 'statebank', 'statefarm', 'statoil', 'stc', - 'stcgroup', 'stockholm', 'storage', 'store', 'stream', 'studio', - 'study', 'style', 'su', 'sucks', 'supplies', 'supply', 'support', - 'surf', 'surgery', 'suzuki', 'sv', 'swatch', 'swiftcover', - 'swiss', 'sx', 'sy', 'sydney', 'symantec', 'systems', 'sz', - # t - 'tab', 'taipei', 'talk', 'taobao', 'target', 'tatamotors', - 'tatar', 'tattoo', 'tax', 'taxi', 'tc', 'tci', 'td', 'tdk', - 'team', 'tech', 'technology', 'tel', 'telecity', 'telefonica', - 'temasek', 'tennis', 'teva', 'tf', 'tg', 'th', 'thd', 'theater', - 'theatre', 'tiaa', 'tickets', 'tienda', 'tiffany', 'tips', - 'tires', 'tirol', 'tj', 'tjmaxx', 'tjx', 'tk', 'tkmaxx', 'tl', - 'tm', 'tmall', 'tn', 'to', 'today', 'tokyo', 'tools', 'top', - 'toray', 'toshiba', 'total', 'tours', 'town', 'toyota', 'toys', - 'tr', 'trade', 'trading', 'training', 'travel', 'travelchannel', - 'travelers', 'travelersinsurance', 'trust', 'trv', 'tt', 'tube', - 'tui', 'tunes', 'tushu', 'tv', 'tvs', 'tw', 'tz', - # u - 'ua', 'ubank', 'ubs', 'uconnect', 'ug', 'uk', 'unicom', - 'university', 'uno', 'uol', 'ups', 'us', 'uy', 'uz', - # v - 'va', 'vacations', 'vana', 'vanguard', 'vc', 've', 'vegas', - 'ventures', 'verisign', 'versicherung', 'vet', 'vg', 'vi', - 'viajes', 'video', 'vig', 'viking', 'villas', 'vin', 'vip', - 'virgin', 'visa', 'vision', 'vista', 'vistaprint', 'viva', - 'vivo', 'vlaanderen', 'vn', 'vodka', 'volkswagen', 'volvo', - 'vote', 'voting', 'voto', 'voyage', 'vu', 'vuelos', - # w - 'wales', 'walmart', 'walter', 'wang', 'wanggou', 'warman', - 'watch', 'watches', 'weather', 'weatherchannel', 'webcam', - 'weber', 'website', 'wed', 'wedding', 'weibo', 'weir', 'wf', - 'whoswho', 'wien', 'wiki', 'williamhill', 'win', 'windows', - 'wine', 'winners', 'wme', 'wolterskluwer', 'woodside', 'work', - 'works', 'world', 'wow', 'ws', 'wtc', 'wtf', - # x - 'xbox', 'xerox', 'xfinity', 'xihuan', 'xin', 'xn--11b4c3d', - 'xn--1ck2e1b', 'xn--1qqw23a', 'xn--2scrj9c', 'xn--30rr7y', - 'xn--3bst00m', 'xn--3ds443g', 'xn--3e0b707e', 'xn--3hcrj9c', - 'xn--3oq18vl8pn36a', 'xn--3pxu8k', 'xn--42c2d9a', 'xn--45br5cyl', - 'xn--45brj9c', 'xn--45q11c', 'xn--4gbrim', 'xn--54b7fta0cc', - 'xn--55qw42g', 'xn--55qx5d', 'xn--5su34j936bgsg', 'xn--5tzm5g', - 'xn--6frz82g', 'xn--6qq986b3xl', 'xn--80adxhks', 'xn--80ao21a', - 'xn--80aqecdr1a', 'xn--80asehdb', 'xn--80aswg', 'xn--8y0a063a', - 'xn--90a3ac', 'xn--90ae', 'xn--90ais', 'xn--9dbq2a', - 'xn--9et52u', 'xn--9krt00a', 'xn--b4w605ferd', - 'xn--bck1b9a5dre4c', 'xn--c1avg', 'xn--c2br7g', 'xn--cck2b3b', - 'xn--cg4bki', 'xn--clchc0ea0b2g2a9gcd', 'xn--czr694b', - 'xn--czrs0t', 'xn--czru2d', 'xn--d1acj3b', 'xn--d1alf', - 'xn--e1a4c', 'xn--eckvdtc9d', 'xn--efvy88h', 'xn--estv75g', - 'xn--fct429k', 'xn--fhbei', 'xn--fiq228c5hs', 'xn--fiq64b', - 'xn--fiqs8s', 'xn--fiqz9s', 'xn--fjq720a', 'xn--flw351e', - 'xn--fpcrj9c3d', 'xn--fzc2c9e2c', 'xn--fzys8d69uvgm', - 'xn--g2xx48c', 'xn--gckr3f0f', 'xn--gecrj9c', 'xn--gk3at1e', - 'xn--h2breg3eve', 'xn--h2brj9c', 'xn--h2brj9c8c', 'xn--hxt814e', - 'xn--i1b6b1a6a2e', 'xn--imr513n', 'xn--io0a7i', 'xn--j1aef', - 'xn--j1amh', 'xn--j6w193g', 'xn--jlq61u9w7b', 'xn--jvr189m', - 'xn--kcrx77d1x4a', 'xn--kprw13d', 'xn--kpry57d', 'xn--kpu716f', - 'xn--kput3i', 'xn--l1acc', 'xn--lgbbat1ad8j', 'xn--mgb9awbf', - 'xn--mgba3a3ejt', 'xn--mgba3a4f16a', 'xn--mgba7c0bbn0a', - 'xn--mgbaakc7dvf', 'xn--mgbaam7a8h', 'xn--mgbab2bd', - 'xn--mgbai9azgqp6j', 'xn--mgbayh7gpa', 'xn--mgbb9fbpob', - 'xn--mgbbh1a', 'xn--mgbbh1a71e', 'xn--mgbc0a9azcg', - 'xn--mgbca7dzdo', 'xn--mgberp4a5d4ar', 'xn--mgbgu82a', - 'xn--mgbi4ecexp', 'xn--mgbpl2fh', 'xn--mgbt3dhd', 'xn--mgbtx2b', - 'xn--mgbx4cd0ab', 'xn--mix891f', 'xn--mk1bu44c', 'xn--mxtq1m', - 'xn--ngbc5azd', 'xn--ngbe9e0a', 'xn--ngbrx', 'xn--node', - 'xn--nqv7f', 'xn--nqv7fs00ema', 'xn--nyqy26a', 'xn--o3cw4h', - 'xn--ogbpf8fl', 'xn--p1acf', 'xn--p1ai', 'xn--pbt977c', - 'xn--pgbs0dh', 'xn--pssy2u', 'xn--q9jyb4c', 'xn--qcka1pmc', - 'xn--qxam', 'xn--rhqv96g', 'xn--rovu88b', 'xn--rvc1e0am3e', - 'xn--s9brj9c', 'xn--ses554g', 'xn--t60b56a', 'xn--tckwe', - 'xn--tiq49xqyj', 'xn--unup4y', 'xn--vermgensberater-ctb', - 'xn--vermgensberatung-pwb', 'xn--vhquv', 'xn--vuq861b', - 'xn--w4r85el8fhu5dnra', 'xn--w4rs40l', 'xn--wgbh1c', - 'xn--wgbl6a', 'xn--xhq521b', 'xn--xkc2al3hye2a', - 'xn--xkc2dl3a5ee0h', 'xn--y9a3aq', 'xn--yfro4i67o', - 'xn--ygbi2ammx', 'xn--zfr164b', 'xperia', 'xxx', 'xyz', - # y - 'yachts', 'yahoo', 'yamaxun', 'yandex', 'ye', 'yodobashi', - 'yoga', 'yokohama', 'you', 'youtube', 'yt', 'yun', - # z - 'za', 'zappos', 'zara', 'zero', 'zip', 'zippo', 'zm', 'zone', - 'zuerich', 'zw' -] - - -class IS_HTTP_URL(Validator): - """ - Rejects a URL string if any of the following is true: - * The string is empty or None - * The string uses characters that are not allowed in a URL - * The string breaks any of the HTTP syntactic rules - * The URL scheme specified (if one is specified) is not 'http' or 'https' - * The top-level domain (if a host name is specified) does not exist - - Based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html - - This function only checks the URL's syntax. It does not check that the URL - points to a real document, for example, or that it otherwise makes sense - semantically. This function does automatically prepend 'http://' in front - of a URL in the case of an abbreviated URL (e.g. 'google.ca'). - - The list of allowed schemes is customizable with the allowed_schemes - parameter. If you exclude None from the list, then abbreviated URLs - (lacking a scheme such as 'http') will be rejected. - - The default prepended scheme is customizable with the prepend_scheme - parameter. If you set prepend_scheme to None then prepending will be - disabled. URLs that require prepending to parse will still be accepted, - but the return value will not be modified. - - @author: Jonathan Benn - - >>> IS_HTTP_URL()('http://1.2.3.4') - ('http://1.2.3.4', None) - >>> IS_HTTP_URL()('http://abc.com') - ('http://abc.com', None) - >>> IS_HTTP_URL()('https://abc.com') - ('https://abc.com', None) - >>> IS_HTTP_URL()('httpx://abc.com') - ('httpx://abc.com', 'enter a valid URL') - >>> IS_HTTP_URL()('http://abc.com:80') - ('http://abc.com:80', None) - >>> IS_HTTP_URL()('http://user@abc.com') - ('http://user@abc.com', None) - >>> IS_HTTP_URL()('http://user@1.2.3.4') - ('http://user@1.2.3.4', None) - - Args: - error_message: a string, the error message to give the end user - if the URL does not validate - allowed_schemes: a list containing strings or None. Each element - is a scheme the inputed URL is allowed to use - prepend_scheme: a string, this scheme is prepended if it's - necessary to make the URL valid - """ - - GENERIC_VALID_IP = re.compile( - "([\w.!~*'|;:&=+$,-]+@)?\d+\.\d+\.\d+\.\d+(:\d*)*$") - GENERIC_VALID_DOMAIN = re.compile("([\w.!~*'|;:&=+$,-]+@)?(([A-Za-z0-9]+[A-Za-z0-9\-]*[A-Za-z0-9]+\.)*([A-Za-z0-9]+\.)*)*([A-Za-z]+[A-Za-z0-9\-]*[A-Za-z0-9]+)\.?(:\d*)*$") - - def __init__( - self, - error_message='Enter a valid URL', - allowed_schemes=None, - prepend_scheme='http', - allowed_tlds=None - ): - - self.error_message = error_message - if allowed_schemes is None: - self.allowed_schemes = http_schemes - else: - self.allowed_schemes = allowed_schemes - if allowed_tlds is None: - self.allowed_tlds = official_top_level_domains - else: - self.allowed_tlds = allowed_tlds - self.prepend_scheme = prepend_scheme - - for i in self.allowed_schemes: - if i not in http_schemes: - raise SyntaxError("allowed_scheme value '%s' is not in %s" % - (i, http_schemes)) - - if self.prepend_scheme not in self.allowed_schemes: - raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s" % - (self.prepend_scheme, self.allowed_schemes)) - - def __call__(self, value): - """ - Args: - value: a string, the URL to validate - - Returns: - a tuple, where tuple[0] is the inputed value - (possible prepended with prepend_scheme), and tuple[1] is either - None (success!) or the string error_message - """ - try: - # if the URL passes generic validation - x = IS_GENERIC_URL(error_message=self.error_message, - allowed_schemes=self.allowed_schemes, - prepend_scheme=self.prepend_scheme) - if x(value)[1] is None: - components = urlparse.urlparse(value) - authority = components.netloc - # if there is an authority component - if authority: - # if authority is a valid IP address - if self.GENERIC_VALID_IP.match(authority): - # Then this HTTP URL is valid - return (value, None) - else: - # else if authority is a valid domain name - domainMatch = self.GENERIC_VALID_DOMAIN.match( - authority) - if domainMatch: - # if the top-level domain really exists - if domainMatch.group(5).lower()\ - in self.allowed_tlds: - # Then this HTTP URL is valid - return (value, None) - else: - # else this is a relative/abbreviated URL, which will parse - # into the URL's path component - path = components.path - # relative case: if this is a valid path (if it starts with - # a slash) - if path.startswith('/'): - # Then this HTTP URL is valid - return (value, None) - else: - # abbreviated case: if we haven't already, prepend a - # scheme and see if it fixes the problem - if '://' not in value and None in self.allowed_schemes: - schemeToUse = self.prepend_scheme or 'http' - prependTest = self.__call__(schemeToUse - + '://' + value) - # if the prepend test succeeded - if prependTest[1] is None: - # if prepending in the output is enabled - if self.prepend_scheme: - return prependTest - else: - # else return the original, non-prepended - # value - return (value, None) - except: - pass - # else the HTTP URL is not valid - return (value, translate(self.error_message)) - - -class IS_URL(Validator): - """ - Rejects a URL string if any of the following is true: - - * The string is empty or None - * The string uses characters that are not allowed in a URL - * The string breaks any of the HTTP syntactic rules - * The URL scheme specified (if one is specified) is not 'http' or 'https' - * The top-level domain (if a host name is specified) does not exist - - (These rules are based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html) - - This function only checks the URL's syntax. It does not check that the URL - points to a real document, for example, or that it otherwise makes sense - semantically. This function does automatically prepend 'http://' in front - of a URL in the case of an abbreviated URL (e.g. 'google.ca'). - - If the parameter mode='generic' is used, then this function's behavior - changes. It then rejects a URL string if any of the following is true: - - * The string is empty or None - * The string uses characters that are not allowed in a URL - * The URL scheme specified (if one is specified) is not valid - - (These rules are based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html) - - The list of allowed schemes is customizable with the allowed_schemes - parameter. If you exclude None from the list, then abbreviated URLs - (lacking a scheme such as 'http') will be rejected. - - The default prepended scheme is customizable with the prepend_scheme - parameter. If you set prepend_scheme to None then prepending will be - disabled. URLs that require prepending to parse will still be accepted, - but the return value will not be modified. - - IS_URL is compatible with the Internationalized Domain Name (IDN) standard - specified in RFC 3490 (http://tools.ietf.org/html/rfc3490). As a result, - URLs can be regular strings or unicode strings. - If the URL's domain component (e.g. google.ca) contains non-US-ASCII - letters, then the domain will be converted into Punycode (defined in - RFC 3492, http://tools.ietf.org/html/rfc3492). IS_URL goes a bit beyond - the standards, and allows non-US-ASCII characters to be present in the path - and query components of the URL as well. These non-US-ASCII characters will - be escaped using the standard '%20' type syntax. e.g. the unicode - character with hex code 0x4e86 will become '%4e%86' - - Args: - error_message: a string, the error message to give the end user - if the URL does not validate - allowed_schemes: a list containing strings or None. Each element - is a scheme the inputed URL is allowed to use - prepend_scheme: a string, this scheme is prepended if it's - necessary to make the URL valid - - Code Examples:: - - INPUT(_type='text', _name='name', requires=IS_URL()) - >>> IS_URL()('abc.com') - ('http://abc.com', None) - - INPUT(_type='text', _name='name', requires=IS_URL(mode='generic')) - >>> IS_URL(mode='generic')('abc.com') - ('abc.com', None) - - INPUT(_type='text', _name='name', - requires=IS_URL(allowed_schemes=['https'], prepend_scheme='https')) - >>> IS_URL(allowed_schemes=['https'], prepend_scheme='https')('https://abc.com') - ('https://abc.com', None) - - INPUT(_type='text', _name='name', - requires=IS_URL(prepend_scheme='https')) - >>> IS_URL(prepend_scheme='https')('abc.com') - ('https://abc.com', None) - - INPUT(_type='text', _name='name', - requires=IS_URL(mode='generic', allowed_schemes=['ftps', 'https'], - prepend_scheme='https')) - >>> IS_URL(mode='generic', allowed_schemes=['ftps', 'https'], prepend_scheme='https')('https://abc.com') - ('https://abc.com', None) - >>> IS_URL(mode='generic', allowed_schemes=['ftps', 'https', None], prepend_scheme='https')('abc.com') - ('abc.com', None) - - @author: Jonathan Benn - """ - - def __init__( - self, - error_message='Enter a valid URL', - mode='http', - allowed_schemes=None, - prepend_scheme='http', - allowed_tlds=None - ): - - self.error_message = error_message - self.mode = mode.lower() - if self.mode not in ['generic', 'http']: - raise SyntaxError("invalid mode '%s' in IS_URL" % self.mode) - self.allowed_schemes = allowed_schemes - if allowed_tlds is None: - self.allowed_tlds = official_top_level_domains - else: - self.allowed_tlds = allowed_tlds - - if self.allowed_schemes: - if prepend_scheme not in self.allowed_schemes: - raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s" - % (prepend_scheme, self.allowed_schemes)) - - # if allowed_schemes is None, then we will defer testing - # prepend_scheme's validity to a sub-method - - self.prepend_scheme = prepend_scheme - - def __call__(self, value): - """ - Args: - value: a unicode or regular string, the URL to validate - - Returns: - a (string, string) tuple, where tuple[0] is the modified - input value and tuple[1] is either None (success!) or the - string error_message. The input value will never be modified in the - case of an error. However, if there is success then the input URL - may be modified to (1) prepend a scheme, and/or (2) convert a - non-compliant unicode URL into a compliant US-ASCII version. - """ - if self.mode == 'generic': - subMethod = IS_GENERIC_URL(error_message=self.error_message, - allowed_schemes=self.allowed_schemes, - prepend_scheme=self.prepend_scheme) - elif self.mode == 'http': - subMethod = IS_HTTP_URL(error_message=self.error_message, - allowed_schemes=self.allowed_schemes, - prepend_scheme=self.prepend_scheme, - allowed_tlds=self.allowed_tlds) - else: - raise SyntaxError("invalid mode '%s' in IS_URL" % self.mode) - - if not isinstance(value, unicodeT): - return subMethod(value) - else: - try: - asciiValue = unicode_to_ascii_url(value, self.prepend_scheme) - except Exception as e: - # If we are not able to convert the unicode url into a - # US-ASCII URL, then the URL is not valid - return (value, translate(self.error_message)) - methodResult = subMethod(asciiValue) - # if the validation of the US-ASCII version of the value failed - if not methodResult[1] is None: - # then return the original input value, not the US-ASCII version - return (value, methodResult[1]) - else: - return methodResult - - -regex_time = re.compile( - '((?P[0-9]+))([^0-9 ]+(?P[0-9 ]+))?([^0-9ap ]+(?P[0-9]*))?((?P[ap]m))?') - - -class IS_TIME(Validator): - """ - Example: - Use as:: - - INPUT(_type='text', _name='name', requires=IS_TIME()) - - understands the following formats - hh:mm:ss [am/pm] - hh:mm [am/pm] - hh [am/pm] - - [am/pm] is optional, ':' can be replaced by any other non-space non-digit:: - - >>> IS_TIME()('21:30') - (datetime.time(21, 30), None) - >>> IS_TIME()('21-30') - (datetime.time(21, 30), None) - >>> IS_TIME()('21.30') - (datetime.time(21, 30), None) - >>> IS_TIME()('21:30:59') - (datetime.time(21, 30, 59), None) - >>> IS_TIME()('5:30') - (datetime.time(5, 30), None) - >>> IS_TIME()('5:30 am') - (datetime.time(5, 30), None) - >>> IS_TIME()('5:30 pm') - (datetime.time(17, 30), None) - >>> IS_TIME()('5:30 whatever') - ('5:30 whatever', 'enter time as hh:mm:ss (seconds, am, pm optional)') - >>> IS_TIME()('5:30 20') - ('5:30 20', 'enter time as hh:mm:ss (seconds, am, pm optional)') - >>> IS_TIME()('24:30') - ('24:30', 'enter time as hh:mm:ss (seconds, am, pm optional)') - >>> IS_TIME()('21:60') - ('21:60', 'enter time as hh:mm:ss (seconds, am, pm optional)') - >>> IS_TIME()('21:30::') - ('21:30::', 'enter time as hh:mm:ss (seconds, am, pm optional)') - >>> IS_TIME()('') - ('', 'enter time as hh:mm:ss (seconds, am, pm optional)')ù - - """ - - def __init__(self, error_message='Enter time as hh:mm:ss (seconds, am, pm optional)'): - self.error_message = error_message - - def __call__(self, value): - try: - ivalue = value - value = regex_time.match(value.lower()) - (h, m, s) = (int(value.group('h')), 0, 0) - if not value.group('m') is None: - m = int(value.group('m')) - if not value.group('s') is None: - s = int(value.group('s')) - if value.group('d') == 'pm' and 0 < h < 12: - h += 12 - if value.group('d') == 'am' and h == 12: - h = 0 - if not (h in range(24) and m in range(60) and s - in range(60)): - raise ValueError('Hours or minutes or seconds are outside of allowed range') - value = datetime.time(h, m, s) - return (value, None) - except AttributeError: - pass - except ValueError: - pass - return (ivalue, translate(self.error_message)) - - -# A UTC class. -class UTC(datetime.tzinfo): - """UTC""" - ZERO = datetime.timedelta(0) - - def utcoffset(self, dt): - return UTC.ZERO - - def tzname(self, dt): - return "UTC" - - def dst(self, dt): - return UTC.ZERO -utc = UTC() - - -class IS_DATE(Validator): - """ - Examples: - Use as:: - - INPUT(_type='text', _name='name', requires=IS_DATE()) - - date has to be in the ISO8960 format YYYY-MM-DD - """ - - def __init__(self, format='%Y-%m-%d', - error_message='Enter date as %(format)s'): - self.format = translate(format) - self.error_message = str(error_message) - self.extremes = {} - - def __call__(self, value): - ovalue = value - if isinstance(value, datetime.date): - return (value, None) - try: - (y, m, d, hh, mm, ss, t0, t1, t2) = \ - time.strptime(value, str(self.format)) - value = datetime.date(y, m, d) - return (value, None) - except: - self.extremes.update(IS_DATETIME.nice(self.format)) - return (ovalue, translate(self.error_message) % self.extremes) - - def formatter(self, value): - if value is None: - return None - format = self.format - year = value.year - y = '%.4i' % year - format = format.replace('%y', y[-2:]) - format = format.replace('%Y', y) - if year < 1900: - year = 2000 - d = datetime.date(year, value.month, value.day) - return d.strftime(format) - - -class IS_DATETIME(Validator): - """ - Examples: - Use as:: - - INPUT(_type='text', _name='name', requires=IS_DATETIME()) - - datetime has to be in the ISO8960 format YYYY-MM-DD hh:mm:ss - timezome must be None or a pytz.timezone("America/Chicago") object - """ - - isodatetime = '%Y-%m-%d %H:%M:%S' - - @staticmethod - def nice(format): - code = (('%Y', '1963'), - ('%y', '63'), - ('%d', '28'), - ('%m', '08'), - ('%b', 'Aug'), - ('%B', 'August'), - ('%H', '14'), - ('%I', '02'), - ('%p', 'PM'), - ('%M', '30'), - ('%S', '59')) - for (a, b) in code: - format = format.replace(a, b) - return dict(format=format) - - def __init__(self, format='%Y-%m-%d %H:%M:%S', - error_message='Enter date and time as %(format)s', - timezone=None): - self.format = translate(format) - self.error_message = str(error_message) - self.extremes = {} - self.timezone = timezone - - def __call__(self, value): - ovalue = value - if isinstance(value, datetime.datetime): - return (value, None) - try: - (y, m, d, hh, mm, ss, t0, t1, t2) = \ - time.strptime(value, str(self.format)) - value = datetime.datetime(y, m, d, hh, mm, ss) - if self.timezone is not None: - # TODO: https://github.com/web2py/web2py/issues/1094 (temporary solution) - value = self.timezone.localize(value).astimezone(utc).replace(tzinfo=None) - return (value, None) - except: - self.extremes.update(IS_DATETIME.nice(self.format)) - return (ovalue, translate(self.error_message) % self.extremes) - - def formatter(self, value): - if value is None: - return None - format = self.format - year = value.year - y = '%.4i' % year - format = format.replace('%y', y[-2:]) - format = format.replace('%Y', y) - if year < 1900: - year = 2000 - d = datetime.datetime(year, value.month, value.day, - value.hour, value.minute, value.second) - if self.timezone is not None: - d = d.replace(tzinfo=utc).astimezone(self.timezone) - return d.strftime(format) - - -class IS_DATE_IN_RANGE(IS_DATE): - """ - Examples: - Use as:: - - >>> v = IS_DATE_IN_RANGE(minimum=datetime.date(2008,1,1), \ - maximum=datetime.date(2009,12,31), \ - format="%m/%d/%Y",error_message="Oops") - - >>> v('03/03/2008') - (datetime.date(2008, 3, 3), None) - - >>> v('03/03/2010') - ('03/03/2010', 'oops') - - >>> v(datetime.date(2008,3,3)) - (datetime.date(2008, 3, 3), None) - - >>> v(datetime.date(2010,3,3)) - (datetime.date(2010, 3, 3), 'oops') - - """ - - def __init__(self, - minimum=None, - maximum=None, - format='%Y-%m-%d', - error_message=None): - self.minimum = minimum - self.maximum = maximum - if error_message is None: - if minimum is None: - error_message = "Enter date on or before %(max)s" - elif maximum is None: - error_message = "Enter date on or after %(min)s" - else: - error_message = "Enter date in range %(min)s %(max)s" - IS_DATE.__init__(self, - format=format, - error_message=error_message) - self.extremes = dict(min=self.formatter(minimum), - max=self.formatter(maximum)) - - def __call__(self, value): - ovalue = value - (value, msg) = IS_DATE.__call__(self, value) - if msg is not None: - return (value, msg) - if self.minimum and self.minimum > value: - return (ovalue, translate(self.error_message) % self.extremes) - if self.maximum and value > self.maximum: - return (ovalue, translate(self.error_message) % self.extremes) - return (value, None) - - -class IS_DATETIME_IN_RANGE(IS_DATETIME): - """ - Examples: - Use as:: - >>> v = IS_DATETIME_IN_RANGE(\ - minimum=datetime.datetime(2008,1,1,12,20), \ - maximum=datetime.datetime(2009,12,31,12,20), \ - format="%m/%d/%Y %H:%M",error_message="Oops") - >>> v('03/03/2008 12:40') - (datetime.datetime(2008, 3, 3, 12, 40), None) - - >>> v('03/03/2010 10:34') - ('03/03/2010 10:34', 'oops') - - >>> v(datetime.datetime(2008,3,3,0,0)) - (datetime.datetime(2008, 3, 3, 0, 0), None) - - >>> v(datetime.datetime(2010,3,3,0,0)) - (datetime.datetime(2010, 3, 3, 0, 0), 'oops') - - """ - - def __init__(self, - minimum=None, - maximum=None, - format='%Y-%m-%d %H:%M:%S', - error_message=None, - timezone=None): - self.minimum = minimum - self.maximum = maximum - if error_message is None: - if minimum is None: - error_message = "Enter date and time on or before %(max)s" - elif maximum is None: - error_message = "Enter date and time on or after %(min)s" - else: - error_message = "Enter date and time in range %(min)s %(max)s" - IS_DATETIME.__init__(self, - format=format, - error_message=error_message, - timezone=timezone) - self.extremes = dict(min=self.formatter(minimum), - max=self.formatter(maximum)) - - def __call__(self, value): - ovalue = value - (value, msg) = IS_DATETIME.__call__(self, value) - if msg is not None: - return (value, msg) - if self.minimum and self.minimum > value: - return (ovalue, translate(self.error_message) % self.extremes) - if self.maximum and value > self.maximum: - return (ovalue, translate(self.error_message) % self.extremes) - return (value, None) - - -class IS_LIST_OF(Validator): - - def __init__(self, other=None, minimum=None, maximum=None, error_message=None): - self.other = other - self.minimum = minimum - self.maximum = maximum - self.error_message = error_message - - def __call__(self, value): - ivalue = value - if not isinstance(value, list): - ivalue = [ivalue] - ivalue = [i for i in ivalue if str(i).strip()] - if self.minimum is not None and len(ivalue) < self.minimum: - return (ivalue, translate(self.error_message or - 'Minimum length is %(min)s') % dict(min=self.minimum, max=self.maximum)) - if self.maximum is not None and len(ivalue) > self.maximum: - return (ivalue, translate(self.error_message or - 'Maximum length is %(max)s') % dict(min=self.minimum, max=self.maximum)) - new_value = [] - other = self.other - if self.other: - if not isinstance(other, (list, tuple)): - other = [other] - for item in ivalue: - v = item - for validator in other: - (v, e) = validator(v) - if e: - return (ivalue, e) - new_value.append(v) - ivalue = new_value - return (ivalue, None) - - -class IS_LOWER(Validator): - """ - Converts to lowercase:: - - >>> IS_LOWER()('ABC') - ('abc', None) - >>> IS_LOWER()('Ñ') - ('\\xc3\\xb1', None) - - """ - - def __call__(self, value): - cast_back = lambda x: x - if isinstance(value, str): - cast_back = to_native - elif isinstance(value, bytes): - cast_back = to_bytes - value = to_unicode(value).lower() - return (cast_back(value), None) - - -class IS_UPPER(Validator): - """ - Converts to uppercase:: - - >>> IS_UPPER()('abc') - ('ABC', None) - >>> IS_UPPER()('ñ') - ('\\xc3\\x91', None) - - """ - - def __call__(self, value): - cast_back = lambda x: x - if isinstance(value, str): - cast_back = to_native - elif isinstance(value, bytes): - cast_back = to_bytes - value = to_unicode(value).upper() - return (cast_back(value), None) - - -def urlify(s, maxlen=80, keep_underscores=False): - """ - Converts incoming string to a simplified ASCII subset. - if (keep_underscores): underscores are retained in the string - else: underscores are translated to hyphens (default) - """ - s = to_unicode(s) # to unicode - s = s.lower() # to lowercase - s = unicodedata.normalize('NFKD', s) # replace special characters - s = to_native(s, charset='ascii', errors='ignore') # encode as ASCII - s = re.sub('&\w+?;', '', s) # strip html entities - if keep_underscores: - s = re.sub('\s+', '-', s) # whitespace to hyphens - s = re.sub('[^\w\-]', '', s) - # strip all but alphanumeric/underscore/hyphen - else: - s = re.sub('[\s_]+', '-', s) # whitespace & underscores to hyphens - s = re.sub('[^a-z0-9\-]', '', s) # strip all but alphanumeric/hyphen - s = re.sub('[-_][-_]+', '-', s) # collapse strings of hyphens - s = s.strip('-') # remove leading and trailing hyphens - return s[:maxlen] # enforce maximum length - - -class IS_SLUG(Validator): - """ - converts arbitrary text string to a slug:: - - >>> IS_SLUG()('abc123') - ('abc123', None) - >>> IS_SLUG()('ABC123') - ('abc123', None) - >>> IS_SLUG()('abc-123') - ('abc-123', None) - >>> IS_SLUG()('abc--123') - ('abc-123', None) - >>> IS_SLUG()('abc 123') - ('abc-123', None) - >>> IS_SLUG()('abc\t_123') - ('abc-123', None) - >>> IS_SLUG()('-abc-') - ('abc', None) - >>> IS_SLUG()('--a--b--_ -c--') - ('a-b-c', None) - >>> IS_SLUG()('abc&123') - ('abc123', None) - >>> IS_SLUG()('abc&123&def') - ('abc123def', None) - >>> IS_SLUG()('ñ') - ('n', None) - >>> IS_SLUG(maxlen=4)('abc123') - ('abc1', None) - >>> IS_SLUG()('abc_123') - ('abc-123', None) - >>> IS_SLUG(keep_underscores=False)('abc_123') - ('abc-123', None) - >>> IS_SLUG(keep_underscores=True)('abc_123') - ('abc_123', None) - >>> IS_SLUG(check=False)('abc') - ('abc', None) - >>> IS_SLUG(check=True)('abc') - ('abc', None) - >>> IS_SLUG(check=False)('a bc') - ('a-bc', None) - >>> IS_SLUG(check=True)('a bc') - ('a bc', 'must be slug') - """ - - @staticmethod - def urlify(value, maxlen=80, keep_underscores=False): - return urlify(value, maxlen, keep_underscores) - - def __init__(self, maxlen=80, check=False, error_message='Must be slug', keep_underscores=False): - self.maxlen = maxlen - self.check = check - self.error_message = error_message - self.keep_underscores = keep_underscores - - def __call__(self, value): - if self.check and value != urlify(value, self.maxlen, self.keep_underscores): - return (value, translate(self.error_message)) - return (urlify(value, self.maxlen, self.keep_underscores), None) - - -class ANY_OF(Validator): - """ - Tests if any of the validators in a list returns successfully:: - - >>> ANY_OF([IS_EMAIL(),IS_ALPHANUMERIC()])('a@b.co') - ('a@b.co', None) - >>> ANY_OF([IS_EMAIL(),IS_ALPHANUMERIC()])('abco') - ('abco', None) - >>> ANY_OF([IS_EMAIL(),IS_ALPHANUMERIC()])('@ab.co') - ('@ab.co', 'enter only letters, numbers, and underscore') - >>> ANY_OF([IS_ALPHANUMERIC(),IS_EMAIL()])('@ab.co') - ('@ab.co', 'enter a valid email address') - - """ - - def __init__(self, subs, error_message=None): - self.subs = subs - self.error_message = error_message - - def __call__(self, value): - for validator in self.subs: - value, error = validator(value) - if error is None: - break - if error is not None and self.error_message is not None: - error = translate(self.error_message) - return value, error - - def formatter(self, value): - # Use the formatter of the first subvalidator - # that validates the value and has a formatter - for validator in self.subs: - if hasattr(validator, 'formatter') and validator(value)[1] is None: - return validator.formatter(value) - - -class IS_EMPTY_OR(Validator): - """ - Dummy class for testing IS_EMPTY_OR:: - - >>> IS_EMPTY_OR(IS_EMAIL())('abc@def.com') - ('abc@def.com', None) - >>> IS_EMPTY_OR(IS_EMAIL())(' ') - (None, None) - >>> IS_EMPTY_OR(IS_EMAIL(), null='abc')(' ') - ('abc', None) - >>> IS_EMPTY_OR(IS_EMAIL(), null='abc', empty_regex='def')('def') - ('abc', None) - >>> IS_EMPTY_OR(IS_EMAIL())('abc') - ('abc', 'enter a valid email address') - >>> IS_EMPTY_OR(IS_EMAIL())(' abc ') - ('abc', 'enter a valid email address') - """ - - def __init__(self, other, null=None, empty_regex=None): - (self.other, self.null) = (other, null) - if empty_regex is not None: - self.empty_regex = re.compile(empty_regex) - else: - self.empty_regex = None - if hasattr(other, 'multiple'): - self.multiple = other.multiple - if hasattr(other, 'options'): - self.options = self._options - - def _options(self, *args, **kwargs): - options = self.other.options(*args, **kwargs) - if (not options or options[0][0] != '') and not self.multiple: - options.insert(0, ('', '')) - return options - - def set_self_id(self, id): - if isinstance(self.other, (list, tuple)): - for item in self.other: - if hasattr(item, 'set_self_id'): - item.set_self_id(id) - else: - if hasattr(self.other, 'set_self_id'): - self.other.set_self_id(id) - - def __call__(self, value): - value, empty = is_empty(value, empty_regex=self.empty_regex) - if empty: - return (self.null, None) - if isinstance(self.other, (list, tuple)): - error = None - for item in self.other: - value, error = item(value) - if error: - break - return value, error - else: - return self.other(value) - - def formatter(self, value): - if hasattr(self.other, 'formatter'): - return self.other.formatter(value) - return value - -IS_NULL_OR = IS_EMPTY_OR # for backward compatibility - - -class CLEANUP(Validator): - """ - Examples: - Use as:: - - INPUT(_type='text', _name='name', requires=CLEANUP()) - - removes special characters on validation - """ - REGEX_CLEANUP = re.compile('[^\x09\x0a\x0d\x20-\x7e]') - - def __init__(self, regex=None): - self.regex = self.REGEX_CLEANUP if regex is None \ - else re.compile(regex) - - def __call__(self, value): - v = self.regex.sub('', str(value).strip()) - return (v, None) - - -class LazyCrypt(object): - """ - Stores a lazy password hash - """ - - def __init__(self, crypt, password): - """ - crypt is an instance of the CRYPT validator, - password is the password as inserted by the user - """ - self.crypt = crypt - self.password = password - self.crypted = None - - def __str__(self): - """ - Encrypted self.password and caches it in self.crypted. - If self.crypt.salt the output is in the format $$ - - Try get the digest_alg from the key (if it exists) - else assume the default digest_alg. If not key at all, set key='' - - If a salt is specified use it, if salt is True, set salt to uuid - (this should all be backward compatible) - - Options: - key = 'uuid' - key = 'md5:uuid' - key = 'sha512:uuid' - ... - key = 'pbkdf2(1000,64,sha512):uuid' 1000 iterations and 64 chars length - """ - if self.crypted: - return self.crypted - if self.crypt.key: - if ':' in self.crypt.key: - digest_alg, key = self.crypt.key.split(':', 1) - else: - digest_alg, key = self.crypt.digest_alg, self.crypt.key - else: - digest_alg, key = self.crypt.digest_alg, '' - if self.crypt.salt: - if self.crypt.salt is True: - salt = str(web2py_uuid()).replace('-', '')[-16:] - else: - salt = self.crypt.salt - else: - salt = '' - hashed = simple_hash(self.password, key, salt, digest_alg) - self.crypted = '%s$%s$%s' % (digest_alg, salt, hashed) - return self.crypted - - def __eq__(self, stored_password): - """ - compares the current lazy crypted password with a stored password - """ - - # LazyCrypt objects comparison - if isinstance(stored_password, self.__class__): - return ((self is stored_password) or - ((self.crypt.key == stored_password.crypt.key) and - (self.password == stored_password.password))) - - if self.crypt.key: - if ':' in self.crypt.key: - key = self.crypt.key.split(':')[1] - else: - key = self.crypt.key - else: - key = '' - if stored_password is None: - return False - elif stored_password.count('$') == 2: - (digest_alg, salt, hash) = stored_password.split('$') - h = simple_hash(self.password, key, salt, digest_alg) - temp_pass = '%s$%s$%s' % (digest_alg, salt, h) - else: # no salting - # guess digest_alg - digest_alg = DIGEST_ALG_BY_SIZE.get(len(stored_password), None) - if not digest_alg: - return False - else: - temp_pass = simple_hash(self.password, key, '', digest_alg) - return temp_pass == stored_password - - def __ne__(self, other): - return not self.__eq__(other) - - -class CRYPT(object): - """ - Examples: - Use as:: - - INPUT(_type='text', _name='name', requires=CRYPT()) - - encodes the value on validation with a digest. - - If no arguments are provided CRYPT uses the MD5 algorithm. - If the key argument is provided the HMAC+MD5 algorithm is used. - If the digest_alg is specified this is used to replace the - MD5 with, for example, SHA512. The digest_alg can be - the name of a hashlib algorithm as a string or the algorithm itself. - - min_length is the minimal password length (default 4) - IS_STRONG for serious security - error_message is the message if password is too short - - Notice that an empty password is accepted but invalid. It will not allow login back. - Stores junk as hashed password. - - Specify an algorithm or by default we will use sha512. - - Typical available algorithms: - md5, sha1, sha224, sha256, sha384, sha512 - - If salt, it hashes a password with a salt. - If salt is True, this method will automatically generate one. - Either case it returns an encrypted password string in the following format: - - $$ - - Important: hashed password is returned as a LazyCrypt object and computed only if needed. - The LasyCrypt object also knows how to compare itself with an existing salted password - - Supports standard algorithms - - >>> for alg in ('md5','sha1','sha256','sha384','sha512'): - ... print(str(CRYPT(digest_alg=alg,salt=True)('test')[0])) - md5$...$... - sha1$...$... - sha256$...$... - sha384$...$... - sha512$...$... - - The syntax is always alg$salt$hash - - Supports for pbkdf2 - - >>> alg = 'pbkdf2(1000,20,sha512)' - >>> print(str(CRYPT(digest_alg=alg,salt=True)('test')[0])) - pbkdf2(1000,20,sha512)$...$... - - An optional hmac_key can be specified and it is used as salt prefix - - >>> a = str(CRYPT(digest_alg='md5',key='mykey',salt=True)('test')[0]) - >>> print(a) - md5$...$... - - Even if the algorithm changes the hash can still be validated - - >>> CRYPT(digest_alg='sha1',key='mykey',salt=True)('test')[0] == a - True - - If no salt is specified CRYPT can guess the algorithms from length: - - >>> a = str(CRYPT(digest_alg='sha1',salt=False)('test')[0]) - >>> a - 'sha1$$a94a8fe5ccb19ba61c4c0873d391e987982fbbd3' - >>> CRYPT(digest_alg='sha1',salt=False)('test')[0] == a - True - >>> CRYPT(digest_alg='sha1',salt=False)('test')[0] == a[6:] - True - >>> CRYPT(digest_alg='md5',salt=False)('test')[0] == a - True - >>> CRYPT(digest_alg='md5',salt=False)('test')[0] == a[6:] - True - """ - - def __init__(self, - key=None, - digest_alg='pbkdf2(1000,20,sha512)', - min_length=0, - error_message='Too short', salt=True, - max_length=1024): - """ - important, digest_alg='md5' is not the default hashing algorithm for - web2py. This is only an example of usage of this function. - - The actual hash algorithm is determined from the key which is - generated by web2py in tools.py. This defaults to hmac+sha512. - """ - self.key = key - self.digest_alg = digest_alg - self.min_length = min_length - self.max_length = max_length - self.error_message = error_message - self.salt = salt - - def __call__(self, value): - v = value and str(value)[:self.max_length] - if not v or len(v) < self.min_length: - return ('', translate(self.error_message)) - if isinstance(value, LazyCrypt): - return (value, None) - return (LazyCrypt(self, value), None) - -# entropy calculator for IS_STRONG -# -lowerset = frozenset(u'abcdefghijklmnopqrstuvwxyz') -upperset = frozenset(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ') -numberset = frozenset(u'0123456789') -sym1set = frozenset(u'!@#$%^&*()') -sym2set = frozenset(u'~`-_=+[]{}\\|;:\'",.<>?/') -otherset = frozenset( - u'0123456789abcdefghijklmnopqrstuvwxyz') # anything else - - -def calc_entropy(string): - """ calculates a simple entropy for a given string """ - import math - alphabet = 0 # alphabet size - other = set() - seen = set() - lastset = None - string = to_unicode(string) - for c in string: - # classify this character - inset = otherset - for cset in (lowerset, upperset, numberset, sym1set, sym2set): - if c in cset: - inset = cset - break - # calculate effect of character on alphabet size - if inset not in seen: - seen.add(inset) - alphabet += len(inset) # credit for a new character set - elif c not in other: - alphabet += 1 # credit for unique characters - other.add(c) - if inset is not lastset: - alphabet += 1 # credit for set transitions - lastset = cset - entropy = len( - string) * math.log(alphabet) / 0.6931471805599453 # math.log(2) - return round(entropy, 2) - - -class IS_STRONG(object): - """ - Examples: - Use as:: - - INPUT(_type='password', _name='passwd', - requires=IS_STRONG(min=10, special=2, upper=2)) - - enforces complexity requirements on a field - - >>> IS_STRONG(es=True)('Abcd1234') - ('Abcd1234', - 'Must include at least 1 of the following: ~!@#$%^&*()_+-=?<>,.:;{}[]|') - >>> IS_STRONG(es=True)('Abcd1234!') - ('Abcd1234!', None) - >>> IS_STRONG(es=True, entropy=1)('a') - ('a', None) - >>> IS_STRONG(es=True, entropy=1, min=2)('a') - ('a', 'Minimum length is 2') - >>> IS_STRONG(es=True, entropy=100)('abc123') - ('abc123', 'Entropy (32.35) less than required (100)') - >>> IS_STRONG(es=True, entropy=100)('and') - ('and', 'Entropy (14.57) less than required (100)') - >>> IS_STRONG(es=True, entropy=100)('aaa') - ('aaa', 'Entropy (14.42) less than required (100)') - >>> IS_STRONG(es=True, entropy=100)('a1d') - ('a1d', 'Entropy (15.97) less than required (100)') - >>> IS_STRONG(es=True, entropy=100)('añd') - ('a\\xc3\\xb1d', 'Entropy (18.13) less than required (100)') - - """ - - def __init__(self, min=None, max=None, upper=None, lower=None, number=None, - entropy=None, - special=None, specials=r'~!@#$%^&*()_+-=?<>,.:;{}[]|', - invalid=' "', error_message=None, es=False): - self.entropy = entropy - if entropy is None: - # enforce default requirements - self.min = 8 if min is None else min - self.max = max # was 20, but that doesn't make sense - self.upper = 1 if upper is None else upper - self.lower = 1 if lower is None else lower - self.number = 1 if number is None else number - self.special = 1 if special is None else special - else: - # by default, an entropy spec is exclusive - self.min = min - self.max = max - self.upper = upper - self.lower = lower - self.number = number - self.special = special - self.specials = specials - self.invalid = invalid - self.error_message = error_message - self.estring = es # return error message as string (for doctest) - - def __call__(self, value): - failures = [] - if value and len(value) == value.count('*') > 4: - return (value, None) - if self.entropy is not None: - entropy = calc_entropy(value) - if entropy < self.entropy: - failures.append(translate("Entropy (%(have)s) less than required (%(need)s)") - % dict(have=entropy, need=self.entropy)) - if isinstance(self.min, int) and self.min > 0: - if not len(value) >= self.min: - failures.append(translate("Minimum length is %s") % self.min) - if isinstance(self.max, int) and self.max > 0: - if not len(value) <= self.max: - failures.append(translate("Maximum length is %s") % self.max) - if isinstance(self.special, int): - all_special = [ch in value for ch in self.specials] - if self.special > 0: - if not all_special.count(True) >= self.special: - failures.append(translate("Must include at least %s of the following: %s") - % (self.special, self.specials)) - if self.invalid: - all_invalid = [ch in value for ch in self.invalid] - if all_invalid.count(True) > 0: - failures.append(translate("May not contain any of the following: %s") - % self.invalid) - if isinstance(self.upper, int): - all_upper = re.findall("[A-Z]", value) - if self.upper > 0: - if not len(all_upper) >= self.upper: - failures.append(translate("Must include at least %s uppercase") - % str(self.upper)) - else: - if len(all_upper) > 0: - failures.append( - translate("May not include any uppercase letters")) - if isinstance(self.lower, int): - all_lower = re.findall("[a-z]", value) - if self.lower > 0: - if not len(all_lower) >= self.lower: - failures.append(translate("Must include at least %s lowercase") - % str(self.lower)) - else: - if len(all_lower) > 0: - failures.append( - translate("May not include any lowercase letters")) - if isinstance(self.number, int): - all_number = re.findall("[0-9]", value) - if self.number > 0: - numbers = "number" - if self.number > 1: - numbers = "numbers" - if not len(all_number) >= self.number: - failures.append(translate("Must include at least %s %s") - % (str(self.number), numbers)) - else: - if len(all_number) > 0: - failures.append(translate("May not include any numbers")) - if len(failures) == 0: - return (value, None) - if not self.error_message: - if self.estring: - return (value, '|'.join(failures)) - from gluon.html import XML - return (value, XML('
'.join(failures))) - else: - return (value, translate(self.error_message)) - - -class IS_IMAGE(Validator): - """ - Checks if file uploaded through file input was saved in one of selected - image formats and has dimensions (width and height) within given boundaries. - - Does *not* check for maximum file size (use IS_LENGTH for that). Returns - validation failure if no data was uploaded. - - Supported file formats: BMP, GIF, JPEG, PNG. - - Code parts taken from - http://mail.python.org/pipermail/python-list/2007-June/617126.html - - Args: - extensions: iterable containing allowed *lowercase* image file extensions - ('jpg' extension of uploaded file counts as 'jpeg') - maxsize: iterable containing maximum width and height of the image - minsize: iterable containing minimum width and height of the image - aspectratio: iterable containing target aspect ratio - - Use (-1, -1) as minsize to pass image size check. - Use (-1, -1) as aspectratio to pass aspect ratio check. - - Examples: - Check if uploaded file is in any of supported image formats: - - INPUT(_type='file', _name='name', requires=IS_IMAGE()) - - Check if uploaded file is either JPEG or PNG: - - INPUT(_type='file', _name='name', - requires=IS_IMAGE(extensions=('jpeg', 'png'))) - - Check if uploaded file is PNG with maximum size of 200x200 pixels: - - INPUT(_type='file', _name='name', - requires=IS_IMAGE(extensions=('png'), maxsize=(200, 200))) - - Check if uploaded file has a 16:9 aspect ratio: - - INPUT(_type='file', _name='name', - requires=IS_IMAGE(aspectratio=(16, 9))) - """ - - def __init__(self, - extensions=('bmp', 'gif', 'jpeg', 'png'), - maxsize=(10000, 10000), - minsize=(0, 0), - aspectratio=(-1, -1), - error_message='Invalid image'): - - self.extensions = extensions - self.maxsize = maxsize - self.minsize = minsize - self.aspectratio = aspectratio - self.error_message = error_message - - def __call__(self, value): - try: - extension = value.filename.rfind('.') - assert extension >= 0 - extension = value.filename[extension + 1:].lower() - if extension == 'jpg': - extension = 'jpeg' - assert extension in self.extensions - if extension == 'bmp': - width, height = self.__bmp(value.file) - elif extension == 'gif': - width, height = self.__gif(value.file) - elif extension == 'jpeg': - width, height = self.__jpeg(value.file) - elif extension == 'png': - width, height = self.__png(value.file) - else: - width = -1 - height = -1 - - assert self.minsize[0] <= width <= self.maxsize[0] \ - and self.minsize[1] <= height <= self.maxsize[1] - - if self.aspectratio > (-1, -1): - target_ratio = (1.0 * self.aspectratio[1]) / self.aspectratio[0] - actual_ratio = (1.0 * height) / width - - assert actual_ratio == target_ratio - - value.file.seek(0) - return (value, None) - except Exception as e: - return (value, translate(self.error_message)) - - def __bmp(self, stream): - if stream.read(2) == b'BM': - stream.read(16) - return struct.unpack("= 0xC0 and code <= 0xC3: - return tuple(reversed( - struct.unpack("!xHH", stream.read(5)))) - else: - stream.read(length - 2) - return (-1, -1) - - def __png(self, stream): - if stream.read(8) == b'\211PNG\r\n\032\n': - stream.read(4) - if stream.read(4) == b"IHDR": - return struct.unpack("!LL", stream.read(8)) - return (-1, -1) - - -class IS_UPLOAD_FILENAME(Validator): - """ - Checks if name and extension of file uploaded through file input matches - given criteria. - - Does *not* ensure the file type in any way. Returns validation failure - if no data was uploaded. - - Args: - filename: filename (before dot) regex - extension: extension (after dot) regex - lastdot: which dot should be used as a filename / extension separator: - True means last dot, eg. file.png -> file / png - False means first dot, eg. file.tar.gz -> file / tar.gz - case: 0 - keep the case, 1 - transform the string into lowercase (default), - 2 - transform the string into uppercase - - If there is no dot present, extension checks will be done against empty - string and filename checks against whole value. - - Examples: - Check if file has a pdf extension (case insensitive): - - INPUT(_type='file', _name='name', - requires=IS_UPLOAD_FILENAME(extension='pdf')) - - Check if file has a tar.gz extension and name starting with backup: - - INPUT(_type='file', _name='name', - requires=IS_UPLOAD_FILENAME(filename='backup.*', - extension='tar.gz', lastdot=False)) - - Check if file has no extension and name matching README - (case sensitive): - - INPUT(_type='file', _name='name', - requires=IS_UPLOAD_FILENAME(filename='^README$', - extension='^$', case=0) - - """ - - def __init__(self, filename=None, extension=None, lastdot=True, case=1, - error_message='Enter valid filename'): - if isinstance(filename, str): - filename = re.compile(filename) - if isinstance(extension, str): - extension = re.compile(extension) - self.filename = filename - self.extension = extension - self.lastdot = lastdot - self.case = case - self.error_message = error_message - - def __call__(self, value): - try: - string = value.filename - except: - return (value, translate(self.error_message)) - if self.case == 1: - string = string.lower() - elif self.case == 2: - string = string.upper() - if self.lastdot: - dot = string.rfind('.') - else: - dot = string.find('.') - if dot == -1: - dot = len(string) - if self.filename and not self.filename.match(string[:dot]): - return (value, translate(self.error_message)) - elif self.extension and not self.extension.match(string[dot + 1:]): - return (value, translate(self.error_message)) - else: - return (value, None) - - -class IS_IPV4(Validator): - """ - Checks if field's value is an IP version 4 address in decimal form. Can - be set to force addresses from certain range. - - IPv4 regex taken from: http://regexlib.com/REDetails.aspx?regexp_id=1411 - - Args: - - minip: lowest allowed address; accepts: - - - str, eg. 192.168.0.1 - - list or tuple of octets, eg. [192, 168, 0, 1] - maxip: highest allowed address; same as above - invert: True to allow addresses only from outside of given range; note - that range boundaries are not matched this way - is_localhost: localhost address treatment: - - - None (default): indifferent - - True (enforce): query address must match localhost address (127.0.0.1) - - False (forbid): query address must not match localhost address - is_private: same as above, except that query address is checked against - two address ranges: 172.16.0.0 - 172.31.255.255 and - 192.168.0.0 - 192.168.255.255 - is_automatic: same as above, except that query address is checked against - one address range: 169.254.0.0 - 169.254.255.255 - - Minip and maxip may also be lists or tuples of addresses in all above - forms (str, int, list / tuple), allowing setup of multiple address ranges:: - - minip = (minip1, minip2, ... minipN) - | | | - | | | - maxip = (maxip1, maxip2, ... maxipN) - - Longer iterable will be truncated to match length of shorter one. - - Examples: - Check for valid IPv4 address: - - INPUT(_type='text', _name='name', requires=IS_IPV4()) - - Check for valid IPv4 address belonging to specific range: - - INPUT(_type='text', _name='name', - requires=IS_IPV4(minip='100.200.0.0', maxip='100.200.255.255')) - - Check for valid IPv4 address belonging to either 100.110.0.0 - - 100.110.255.255 or 200.50.0.0 - 200.50.0.255 address range: - - INPUT(_type='text', _name='name', - requires=IS_IPV4(minip=('100.110.0.0', '200.50.0.0'), - maxip=('100.110.255.255', '200.50.0.255'))) - - Check for valid IPv4 address belonging to private address space: - - INPUT(_type='text', _name='name', requires=IS_IPV4(is_private=True)) - - Check for valid IPv4 address that is not a localhost address: - - INPUT(_type='text', _name='name', requires=IS_IPV4(is_localhost=False)) - - >>> IS_IPV4()('1.2.3.4') - ('1.2.3.4', None) - >>> IS_IPV4()('255.255.255.255') - ('255.255.255.255', None) - >>> IS_IPV4()('1.2.3.4 ') - ('1.2.3.4 ', 'enter valid IPv4 address') - >>> IS_IPV4()('1.2.3.4.5') - ('1.2.3.4.5', 'enter valid IPv4 address') - >>> IS_IPV4()('123.123') - ('123.123', 'enter valid IPv4 address') - >>> IS_IPV4()('1111.2.3.4') - ('1111.2.3.4', 'enter valid IPv4 address') - >>> IS_IPV4()('0111.2.3.4') - ('0111.2.3.4', 'enter valid IPv4 address') - >>> IS_IPV4()('256.2.3.4') - ('256.2.3.4', 'enter valid IPv4 address') - >>> IS_IPV4()('300.2.3.4') - ('300.2.3.4', 'enter valid IPv4 address') - >>> IS_IPV4(minip='1.2.3.4', maxip='1.2.3.4')('1.2.3.4') - ('1.2.3.4', None) - >>> IS_IPV4(minip='1.2.3.5', maxip='1.2.3.9', error_message='Bad ip')('1.2.3.4') - ('1.2.3.4', 'bad ip') - >>> IS_IPV4(maxip='1.2.3.4', invert=True)('127.0.0.1') - ('127.0.0.1', None) - >>> IS_IPV4(maxip='1.2.3.4', invert=True)('1.2.3.4') - ('1.2.3.4', 'enter valid IPv4 address') - >>> IS_IPV4(is_localhost=True)('127.0.0.1') - ('127.0.0.1', None) - >>> IS_IPV4(is_localhost=True)('1.2.3.4') - ('1.2.3.4', 'enter valid IPv4 address') - >>> IS_IPV4(is_localhost=False)('127.0.0.1') - ('127.0.0.1', 'enter valid IPv4 address') - >>> IS_IPV4(maxip='100.0.0.0', is_localhost=True)('127.0.0.1') - ('127.0.0.1', 'enter valid IPv4 address') - - """ - - regex = re.compile( - '^(([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])\.){3}([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])$') - numbers = (16777216, 65536, 256, 1) - localhost = 2130706433 - private = ((2886729728, 2886795263), (3232235520, 3232301055)) - automatic = (2851995648, 2852061183) - - def __init__( - self, - minip='0.0.0.0', - maxip='255.255.255.255', - invert=False, - is_localhost=None, - is_private=None, - is_automatic=None, - error_message='Enter valid IPv4 address'): - for n, value in enumerate((minip, maxip)): - temp = [] - if isinstance(value, str): - temp.append(value.split('.')) - elif isinstance(value, (list, tuple)): - if len(value) == len(list(filter(lambda item: isinstance(item, int), value))) == 4: - temp.append(value) - else: - for item in value: - if isinstance(item, str): - temp.append(item.split('.')) - elif isinstance(item, (list, tuple)): - temp.append(item) - numbers = [] - for item in temp: - number = 0 - for i, j in zip(self.numbers, item): - number += i * int(j) - numbers.append(number) - if n == 0: - self.minip = numbers - else: - self.maxip = numbers - self.invert = invert - self.is_localhost = is_localhost - self.is_private = is_private - self.is_automatic = is_automatic - self.error_message = error_message - - def __call__(self, value): - if self.regex.match(value): - number = 0 - for i, j in zip(self.numbers, value.split('.')): - number += i * int(j) - ok = False - for bottom, top in zip(self.minip, self.maxip): - if self.invert != (bottom <= number <= top): - ok = True - if ok and self.is_localhost is not None and \ - self.is_localhost != (number == self.localhost): - ok = False - if ok and self.is_private is not None and (self.is_private != - any([private_number[0] <= number <= private_number[1] - for private_number in self.private])): - ok = False - if ok and self.is_automatic is not None and (self.is_automatic != - (self.automatic[0] <= number <= self.automatic[1])): - ok = False - if ok: - return (value, None) - return (value, translate(self.error_message)) - - -class IS_IPV6(Validator): - """ - Checks if field's value is an IP version 6 address. - - Uses the ipaddress from the Python 3 standard library - and its Python 2 backport (in contrib/ipaddress.py). - - Args: - is_private: None (default): indifferent - True (enforce): address must be in fc00::/7 range - False (forbid): address must NOT be in fc00::/7 range - is_link_local: Same as above but uses fe80::/10 range - is_reserved: Same as above but uses IETF reserved range - is_multicast: Same as above but uses ff00::/8 range - is_routeable: Similar to above but enforces not private, link_local, - reserved or multicast - is_6to4: Same as above but uses 2002::/16 range - is_teredo: Same as above but uses 2001::/32 range - subnets: value must be a member of at least one from list of subnets - - Examples: - Check for valid IPv6 address: - - INPUT(_type='text', _name='name', requires=IS_IPV6()) - - Check for valid IPv6 address is a link_local address: - - INPUT(_type='text', _name='name', requires=IS_IPV6(is_link_local=True)) - - Check for valid IPv6 address that is Internet routeable: - - INPUT(_type='text', _name='name', requires=IS_IPV6(is_routeable=True)) - - Check for valid IPv6 address in specified subnet: - - INPUT(_type='text', _name='name', requires=IS_IPV6(subnets=['2001::/32']) - - >>> IS_IPV6()('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', None) - >>> IS_IPV6()('192.168.1.1') - ('192.168.1.1', 'enter valid IPv6 address') - >>> IS_IPV6(error_message='Bad ip')('192.168.1.1') - ('192.168.1.1', 'bad ip') - >>> IS_IPV6(is_link_local=True)('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', None) - >>> IS_IPV6(is_link_local=False)('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', 'enter valid IPv6 address') - >>> IS_IPV6(is_link_local=True)('2001::126c:8ffa:fe22:b3af') - ('2001::126c:8ffa:fe22:b3af', 'enter valid IPv6 address') - >>> IS_IPV6(is_multicast=True)('2001::126c:8ffa:fe22:b3af') - ('2001::126c:8ffa:fe22:b3af', 'enter valid IPv6 address') - >>> IS_IPV6(is_multicast=True)('ff00::126c:8ffa:fe22:b3af') - ('ff00::126c:8ffa:fe22:b3af', None) - >>> IS_IPV6(is_routeable=True)('2001::126c:8ffa:fe22:b3af') - ('2001::126c:8ffa:fe22:b3af', None) - >>> IS_IPV6(is_routeable=True)('ff00::126c:8ffa:fe22:b3af') - ('ff00::126c:8ffa:fe22:b3af', 'enter valid IPv6 address') - >>> IS_IPV6(subnets='2001::/32')('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', None) - >>> IS_IPV6(subnets='fb00::/8')('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', 'enter valid IPv6 address') - >>> IS_IPV6(subnets=['fc00::/8','2001::/32'])('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', None) - >>> IS_IPV6(subnets='invalidsubnet')('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', 'invalid subnet provided') - - """ - - def __init__( - self, - is_private=None, - is_link_local=None, - is_reserved=None, - is_multicast=None, - is_routeable=None, - is_6to4=None, - is_teredo=None, - subnets=None, - error_message='Enter valid IPv6 address'): - self.is_private = is_private - self.is_link_local = is_link_local - self.is_reserved = is_reserved - self.is_multicast = is_multicast - self.is_routeable = is_routeable - self.is_6to4 = is_6to4 - self.is_teredo = is_teredo - self.subnets = subnets - self.error_message = error_message - - def __call__(self, value): - from gluon._compat import ipaddress - - try: - ip = ipaddress.IPv6Address(to_unicode(value)) - ok = True - except ipaddress.AddressValueError: - return (value, translate(self.error_message)) - - if self.subnets: - # iterate through self.subnets to see if value is a member - ok = False - if isinstance(self.subnets, str): - self.subnets = [self.subnets] - for network in self.subnets: - try: - ipnet = ipaddress.IPv6Network(to_unicode(network)) - except (ipaddress.NetmaskValueError, ipaddress.AddressValueError): - return (value, translate('invalid subnet provided')) - if ip in ipnet: - ok = True - - if self.is_routeable: - self.is_private = False - self.is_reserved = False - self.is_multicast = False - - if ok and self.is_private is not None and \ - self.is_private != ip.is_private: - ok = False - if ok and self.is_link_local is not None and \ - self.is_link_local != ip.is_link_local: - ok = False - if ok and self.is_reserved is not None and \ - self.is_reserved != ip.is_reserved: - ok = False - if ok and self.is_multicast is not None and \ - self.is_multicast != ip.is_multicast: - ok = False - if ok and self.is_6to4 is not None and \ - self.is_6to4 != bool(ip.sixtofour): - ok = False - if ok and self.is_teredo is not None and \ - self.is_teredo != bool(ip.teredo): - ok = False - - if ok: - return (value, None) - - return (value, translate(self.error_message)) - - -class IS_IPADDRESS(Validator): - """ - Checks if field's value is an IP Address (v4 or v6). Can be set to force - addresses from within a specific range. Checks are done with the correct - IS_IPV4 and IS_IPV6 validators. - - Uses the ipaddress from the Python 3 standard library - and its Python 2 backport (in contrib/ipaddress.py). - - Args: - minip: lowest allowed address; accepts: - str, eg. 192.168.0.1 - list or tuple of octets, eg. [192, 168, 0, 1] - maxip: highest allowed address; same as above - invert: True to allow addresses only from outside of given range; note - that range boundaries are not matched this way - - IPv4 specific arguments: - - - is_localhost: localhost address treatment: - - - None (default): indifferent - - True (enforce): query address must match localhost address - (127.0.0.1) - - False (forbid): query address must not match localhost address - - is_private: same as above, except that query address is checked against - two address ranges: 172.16.0.0 - 172.31.255.255 and - 192.168.0.0 - 192.168.255.255 - - is_automatic: same as above, except that query address is checked against - one address range: 169.254.0.0 - 169.254.255.255 - - is_ipv4: either: - - - None (default): indifferent - - True (enforce): must be an IPv4 address - - False (forbid): must NOT be an IPv4 address - - IPv6 specific arguments: - - - is_link_local: Same as above but uses fe80::/10 range - - is_reserved: Same as above but uses IETF reserved range - - is_multicast: Same as above but uses ff00::/8 range - - is_routeable: Similar to above but enforces not private, link_local, - reserved or multicast - - is_6to4: Same as above but uses 2002::/16 range - - is_teredo: Same as above but uses 2001::/32 range - - subnets: value must be a member of at least one from list of subnets - - is_ipv6: either: - - - None (default): indifferent - - True (enforce): must be an IPv6 address - - False (forbid): must NOT be an IPv6 address - - Minip and maxip may also be lists or tuples of addresses in all above - forms (str, int, list / tuple), allowing setup of multiple address ranges:: - - minip = (minip1, minip2, ... minipN) - | | | - | | | - maxip = (maxip1, maxip2, ... maxipN) - - Longer iterable will be truncated to match length of shorter one. - - >>> IS_IPADDRESS()('192.168.1.5') - ('192.168.1.5', None) - >>> IS_IPADDRESS(is_ipv6=False)('192.168.1.5') - ('192.168.1.5', None) - >>> IS_IPADDRESS()('255.255.255.255') - ('255.255.255.255', None) - >>> IS_IPADDRESS()('192.168.1.5 ') - ('192.168.1.5 ', 'enter valid IP address') - >>> IS_IPADDRESS()('192.168.1.1.5') - ('192.168.1.1.5', 'enter valid IP address') - >>> IS_IPADDRESS()('123.123') - ('123.123', 'enter valid IP address') - >>> IS_IPADDRESS()('1111.2.3.4') - ('1111.2.3.4', 'enter valid IP address') - >>> IS_IPADDRESS()('0111.2.3.4') - ('0111.2.3.4', 'enter valid IP address') - >>> IS_IPADDRESS()('256.2.3.4') - ('256.2.3.4', 'enter valid IP address') - >>> IS_IPADDRESS()('300.2.3.4') - ('300.2.3.4', 'enter valid IP address') - >>> IS_IPADDRESS(minip='192.168.1.0', maxip='192.168.1.255')('192.168.1.100') - ('192.168.1.100', None) - >>> IS_IPADDRESS(minip='1.2.3.5', maxip='1.2.3.9', error_message='Bad ip')('1.2.3.4') - ('1.2.3.4', 'bad ip') - >>> IS_IPADDRESS(maxip='1.2.3.4', invert=True)('127.0.0.1') - ('127.0.0.1', None) - >>> IS_IPADDRESS(maxip='192.168.1.4', invert=True)('192.168.1.4') - ('192.168.1.4', 'enter valid IP address') - >>> IS_IPADDRESS(is_localhost=True)('127.0.0.1') - ('127.0.0.1', None) - >>> IS_IPADDRESS(is_localhost=True)('192.168.1.10') - ('192.168.1.10', 'enter valid IP address') - >>> IS_IPADDRESS(is_localhost=False)('127.0.0.1') - ('127.0.0.1', 'enter valid IP address') - >>> IS_IPADDRESS(maxip='100.0.0.0', is_localhost=True)('127.0.0.1') - ('127.0.0.1', 'enter valid IP address') - - >>> IS_IPADDRESS()('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', None) - >>> IS_IPADDRESS(is_ipv4=False)('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', None) - >>> IS_IPADDRESS()('fe80::126c:8ffa:fe22:b3af ') - ('fe80::126c:8ffa:fe22:b3af ', 'enter valid IP address') - >>> IS_IPADDRESS(is_ipv4=True)('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', 'enter valid IP address') - >>> IS_IPADDRESS(is_ipv6=True)('192.168.1.1') - ('192.168.1.1', 'enter valid IP address') - >>> IS_IPADDRESS(is_ipv6=True, error_message='Bad ip')('192.168.1.1') - ('192.168.1.1', 'bad ip') - >>> IS_IPADDRESS(is_link_local=True)('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', None) - >>> IS_IPADDRESS(is_link_local=False)('fe80::126c:8ffa:fe22:b3af') - ('fe80::126c:8ffa:fe22:b3af', 'enter valid IP address') - >>> IS_IPADDRESS(is_link_local=True)('2001::126c:8ffa:fe22:b3af') - ('2001::126c:8ffa:fe22:b3af', 'enter valid IP address') - >>> IS_IPADDRESS(is_multicast=True)('2001::126c:8ffa:fe22:b3af') - ('2001::126c:8ffa:fe22:b3af', 'enter valid IP address') - >>> IS_IPADDRESS(is_multicast=True)('ff00::126c:8ffa:fe22:b3af') - ('ff00::126c:8ffa:fe22:b3af', None) - >>> IS_IPADDRESS(is_routeable=True)('2001::126c:8ffa:fe22:b3af') - ('2001::126c:8ffa:fe22:b3af', None) - >>> IS_IPADDRESS(is_routeable=True)('ff00::126c:8ffa:fe22:b3af') - ('ff00::126c:8ffa:fe22:b3af', 'enter valid IP address') - >>> IS_IPADDRESS(subnets='2001::/32')('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', None) - >>> IS_IPADDRESS(subnets='fb00::/8')('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', 'enter valid IP address') - >>> IS_IPADDRESS(subnets=['fc00::/8','2001::/32'])('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', None) - >>> IS_IPADDRESS(subnets='invalidsubnet')('2001::8ffa:fe22:b3af') - ('2001::8ffa:fe22:b3af', 'invalid subnet provided') - """ - - def __init__( - self, - minip='0.0.0.0', - maxip='255.255.255.255', - invert=False, - is_localhost=None, - is_private=None, - is_automatic=None, - is_ipv4=None, - is_link_local=None, - is_reserved=None, - is_multicast=None, - is_routeable=None, - is_6to4=None, - is_teredo=None, - subnets=None, - is_ipv6=None, - error_message='Enter valid IP address'): - self.minip = minip, - self.maxip = maxip, - self.invert = invert - self.is_localhost = is_localhost - self.is_private = is_private - self.is_automatic = is_automatic - self.is_ipv4 = is_ipv4 or is_ipv6 is False - self.is_private = is_private - self.is_link_local = is_link_local - self.is_reserved = is_reserved - self.is_multicast = is_multicast - self.is_routeable = is_routeable - self.is_6to4 = is_6to4 - self.is_teredo = is_teredo - self.subnets = subnets - self.is_ipv6 = is_ipv6 or is_ipv4 is False - self.error_message = error_message - - def __call__(self, value): - from gluon._compat import ipaddress - IPAddress = ipaddress.ip_address - IPv6Address = ipaddress.IPv6Address - IPv4Address = ipaddress.IPv4Address - - try: - ip = IPAddress(to_unicode(value)) - except ValueError: - return (value, translate(self.error_message)) - - if self.is_ipv4 and isinstance(ip, IPv6Address): - retval = (value, translate(self.error_message)) - elif self.is_ipv6 and isinstance(ip, IPv4Address): - retval = (value, translate(self.error_message)) - elif self.is_ipv4 or isinstance(ip, IPv4Address): - retval = IS_IPV4( - minip=self.minip, - maxip=self.maxip, - invert=self.invert, - is_localhost=self.is_localhost, - is_private=self.is_private, - is_automatic=self.is_automatic, - error_message=self.error_message - )(value) - elif self.is_ipv6 or isinstance(ip, IPv6Address): - retval = IS_IPV6( - is_private=self.is_private, - is_link_local=self.is_link_local, - is_reserved=self.is_reserved, - is_multicast=self.is_multicast, - is_routeable=self.is_routeable, - is_6to4=self.is_6to4, - is_teredo=self.is_teredo, - subnets=self.subnets, - error_message=self.error_message - )(value) - else: - retval = (value, translate(self.error_message)) - - return retval