#!/usr/bin/env python # -*- coding: utf-8 -*- """ This file is part of the web2py Web Framework Copyrighted by Massimo Di Pierro License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) Plural subsystem is created by Vladyslav Kozlovskyy (Ukraine) """ import os import re import pkgutil from utf8 import Utf8 from cgi import escape import portalocker import logging import marshal import copy_reg from fileutils import listdir import settings from cfs import getcfs from thread import allocate_lock from html import XML, xmlescape from contrib.markmin.markmin2html import render, markmin_escape from string import maketrans __all__ = ['translator', 'findT', 'update_all_languages'] ospath = os.path ostat = os.stat osep = os.sep pjoin = os.path.join pdirname = os.path.dirname isdir = os.path.isdir is_gae = settings.global_settings.web2py_runtime_gae DEFAULT_LANGUAGE = 'en' # DEFAULT PLURAL-FORMS RULES: # language doesn't use plural forms DEFAULT_NPLURALS = 1 # only one singular/plural form is used DEFAULT_GET_PLURAL_ID = lambda n: 0 # word is unchangeable DEFAULT_CONSTRUCTOR_PLURAL_FORM = lambda word, plural_id: word def safe_eval(text): if text.strip(): try: import ast return ast.literal_eval(text) except ImportError: return eval(text,{},{}) return None # used as default filter in translator.M() def markmin_aux(m): return '{%s}' % markmin_escape(m.group('s')) def markmin(s): return render(regex_param.sub(markmin_aux,s), sep='br', autolinks=None, id_prefix='') NUMBERS = (int,long,float) # pattern to find T(blah blah blah) expressions PY_STRING_LITERAL_RE = r'(?<=[^\w]T\()(?P'\ + r"[uU]?[rR]?(?:'''(?:[^']|'{1,2}(?!'))*''')|"\ + r"(?:'(?:[^'\\]|\\.)*')|" + r'(?:"""(?:[^"]|"{1,2}(?!"))*""")|'\ + r'(?:"(?:[^"\\]|\\.)*"))' regex_translate = re.compile(PY_STRING_LITERAL_RE, re.DOTALL) regex_param=re.compile(r'{(?P.+?)}') # pattern for a valid accept_language regex_language = \ re.compile('^([a-zA-Z]{2})(\-[a-zA-Z]{2})?(\-[a-zA-Z]+)?$') regex_langfile = re.compile('^[a-zA-Z]{2}(-[a-zA-Z]{2})?\.py$') regex_backslash = re.compile(r"\\([\\{}%])") regex_plural = re.compile('%({.+?})') regex_plural_dict = re.compile('^{(?P[^()[\]][^()[\]]*?)\((?P[^()\[\]]+)\)}$') # %%{word(varname or number)} regex_plural_tuple = re.compile('^{(?P[^[\]()]+)(?:\[(?P\d+)\])?}$') # %%{word[index]} or %%{word} # UTF8 helper functions def upper_fun(s): return unicode(s,'utf-8').upper().encode('utf-8') def title_fun(s): return unicode(s,'utf-8').title().encode('utf-8') def cap_fun(s): return lambda s: unicode(s,'utf-8').capitalize().encode('utf-8') ttab_in = maketrans("\\%{}", '\x1c\x1d\x1e\x1f') ttab_out = maketrans('\x1c\x1d\x1e\x1f', "\\%{}") # cache of translated messages: # global_language_cache: # { 'languages/xx.py': # ( {"def-message": "xx-message", # ... # "def-message": "xx-message"}, lock_object ) # 'languages/yy.py': ( {dict}, lock_object ) # ... # } global_language_cache={} def get_from_cache(cache, val, fun): lang_dict, lock = cache lock.acquire() try: result = lang_dict.get(val); finally: lock.release() if result: return result lock.acquire() try: result = lang_dict.setdefault(val, fun()) finally: lock.release() return result def clear_cache(filename): cache = global_language_cache.setdefault( filename, ({}, allocate_lock())) lang_dict, lock = cache lock.acquire() try: lang_dict.clear(); finally: lock.release() def lang_sampling(lang_tuple, langlist): """ search *lang_tuple* in *langlist* Args: lang_tuple (tuple of strings): ('aa'[[,'-bb'],'-cc']) langlist (list of strings): [available languages] Returns: language from langlist or None """ # step 1: # compare "aa-bb-cc" | "aa-bb" | "aa" from lang_tuple # with strings from langlist. Return appropriate string # from langlist: tries = range(len(lang_tuple),0,-1) for i in tries: language="".join(lang_tuple[:i]) if language in langlist: return language # step 2 (if not found in step 1): # compare "aa-bb-cc" | "aa-bb" | "aa" from lang_tuple # with left part of a string from langlist. Return # appropriate string from langlist for i in tries: lang="".join(lang_tuple[:i]) for language in langlist: if language.startswith(lang): return language return None def read_dict_aux(filename): lang_text = portalocker.read_locked(filename).replace('\r\n', '\n') clear_cache(filename) try: return safe_eval(lang_text) or {} except Exception, e: status = 'Syntax error in %s (%s)' % (filename, e) logging.error(status) return {'__corrupted__':status} def read_dict(filename): """ return dictionary with translation messages """ return getcfs('lang:'+filename, filename, lambda: read_dict_aux(filename)) def get_lang_info(lang, langdir): """ retrieve lang information from *langdir*/*lang*.py file. Read few strings from lang.py file until keys !langname!, !langcode! or keys greater then '!*' were found args: lang (str): lang-code or 'default' langdir (str): path to 'languages' directory in web2py app dir returns: tuple(langcode, langname, langfile_mtime) e.g.: ('en', 'English', 1338549043.0) """ filename = ospath.join(langdir, lang+'.py') d = read_dict(filename) langcode = d.get('!langcode!',DEFAULT_LANGUAGE) langname = d.get('!langname!',langcode) return (langcode, langname or langcode, ostat(filename).st_mtime) def read_possible_languages(appdir): langs = {} # scan languages directory for langfiles: langdir = ospath.join(appdir,'languages') for filename in os.listdir(langdir): if regex_langfile.match(filename) or filename=='default.py': lang = filename[:-3] langs[lang] = get_lang_info(lang, langdir) if not 'en' in langs: # if default.py is not found, add default value: langs['en'] = ('en', 'English', 0) return langs def read_possible_plurals(): """ create list of all possible plural rules files result is cached to increase speed """ try: import gluon.contrib.plural_rules as package plurals = {} for importer, modname, ispkg in pkgutil.iter_modules(package.__path__): if len(modname)==2: module = __import__(package.__name__+'.'+modname) lang = modname pname = modname+'.py' nplurals = getattr(module,'nplurals', DEFAULT_NPLURALS) get_plural_id = getattr( module,'get_plural_id', DEFAULT_GET_PLURAL_ID) construct_plural_form = getattr( module,'construct_plural_form', DEFAULT_CONSTRUCTOR_PLURAL_FORM) plurals[lang] = (lang, nplurals, get_plural_id, construct_plural_form, pname) except ImportError: logging.warn('Unable to import plural rules') plurals['default'] = ('default', DEFAULT_NPLURALS, DEFAULT_GET_PLURAL_ID, DEFAULT_CONSTRUCTOR_PLURAL_FORM, None) return plurals PLURAL_RULES = read_possible_plurals() def read_plural_dict_aux(filename): lang_text = portalocker.read_locked(filename).replace('\r\n', '\n') try: return eval(lang_text) or {} except Exception, e: status='Syntax error in %s (%s)' % (filename, e) logging.error(status) return {'__corrupted__':status} def read_plural_dict(filename): return getcfs('plurals:'+filename, filename, lambda: read_plural_dict_aux(filename)) def write_plural_dict(filename, contents): if '__corrupted__' in contents: return try: fp = portalocker.LockedFile(filename, 'w') fp.write('#!/usr/bin/env python\n{\n# "singular form (0)": ["first plural form (1)", "second plural form (2)", ...],\n') # coding: utf8\n{\n') for key in sorted(contents,lambda x,y: cmp(unicode(x,'utf-8').lower(), unicode(y,'utf-8').lower())): forms = '['+','.join([repr(Utf8(form)) for form in contents[key]])+']' fp.write('%s: %s,\n' % (repr(Utf8(key)), forms)) fp.write('}\n') except (IOError, OSError): if not is_gae: logging.warning('Unable to write to file %s' % filename) return finally: fp.close() def write_dict(filename, contents): if '__corrupted__' in contents: return try: fp = portalocker.LockedFile(filename, 'w') except (IOError, OSError): if not settings.global_settings.web2py_runtime_gae: logging.warning('Unable to write to file %s' % filename) return fp.write('# coding: utf8\n{\n') for key in sorted(contents,lambda x,y: cmp(unicode(x,'utf-8').lower(), unicode(y,'utf-8').lower())): fp.write('%s: %s,\n' % (repr(Utf8(key)), repr(Utf8(contents[key])))) fp.write('}\n') fp.close() class lazyT(object): """ never to be called explicitly, returned by translator.__call__() or translator.M() """ m = s = T = f = t = None M = is_copy = False def __init__( self, message, symbols = {}, T = None, filter = None, ftag = None, M = False ): if isinstance(message, lazyT): self.m = message.m self.s = message.s self.T = message.T self.f = message.f self.t = message.t self.M = message.M self.is_copy = True else: self.m = message self.s = symbols self.T = T self.f = filter self.t = ftag self.M = M self.is_copy = False def __repr__(self): return "" % (repr(Utf8(self.m)), ) def __str__(self): return str(self.T.apply_filter(self.m, self.s, self.f, self.t) if self.M else self.T.translate(self.m, self.s)) def __eq__(self, other): return str(self) == str(other) def __ne__(self, other): return str(self) != str(other) def __add__(self, other): return '%s%s' % (self, other) def __radd__(self, other): return '%s%s' % (other, self) def __mul__(self, other): return str(self) * other def __cmp__(self,other): return cmp(str(self), str(other)) def __hash__(self): return hash(str(self)) def __getattr__(self, name): return getattr(str(self), name) def __getitem__(self, i): return str(self)[i] def __getslice__(self, i, j): return str(self)[i:j] def __iter__(self): for c in str(self): yield c def __len__(self): return len(str(self)) def xml(self): return str(self) if self.M else escape(str(self)) def encode(self, *a, **b): return str(self).encode(*a, **b) def decode(self, *a, **b): return str(self).decode(*a, **b) def read(self): return str(self) def __mod__(self, symbols): if self.is_copy: return lazyT(self) return lazyT(self.m, symbols, self.T, self.f, self.t, self.M) class translator(object): """ this class is instantiated by gluon.compileapp.build_environment as the T object :: T.force(None) # turns off translation T.force('fr, it') # forces web2py to translate using fr.py or it.py T(\"Hello World\") # translates \"Hello World\" using the selected file notice 1: there is no need to force since, by default, T uses http_accept_language to determine a translation file. notice 2: en and en-en are considered different languages! notice 3: if language xx-yy is not found force() probes other similar languages using such algorithm: xx-yy.py -> xx.py -> xx-yy*.py -> xx*.py """ def __init__(self, request): self.request = request self.folder = request.folder self.langpath = ospath.join(self.folder,'languages') self.filenames = set(os.listdir(self.langpath)) self.http_accept_language = request.env.http_accept_language # self.cache # filled in self.force() # self.accepted_language = None # filled in self.force() # self.language_file = None # filled in self.force() # self.plural_language = None # filled in self.force() # self.nplurals = None # filled in self.force() # self.get_plural_id = None # filled in self.force() # self.construct_plural_form = None # filled in self.force() # self.plural_rules_file = None # filled in self.force() # self.plural_file = None # filled in self.force() # self.plural_dict = None # filled in self.force() # self.plural_status = None # filled in self.force() self.requested_languages = \ self.force(self.http_accept_language) self.lazy = True self.otherTs = {} self.filter = markmin self.ftag = 'markmin' def get_possible_languages(self): return [lang[:-3] for lang in self.filenames \ if regex_langfile.match(lang)] def set_current_languages(self, *languages): """ set current AKA "default" languages setting one of this languages makes force() function turn translation off to use default language """ if len(languages) == 1 and isinstance( languages[0], (tuple, list)): languages = languages[0] self.current_languages = languages self.force(self.http_accept_language) def set_plural(self, language): """ initialize plural forms subsystem invoked from self.force() """ lang = language[:2] (self.plural_language, self.nplurals, self.get_plural_id, self.construct_plural_form, self.plural_filename ) = PLURAL_RULES.get(language,PLURAL_RULES['default']) for lang in (language, language[:5], language[:2]): filename = 'plural-%s.py' % lang if filename in self.filenames: self.plural_file = ospath.join(self.langpath,filename) self.plural_dict = read_plural_dict(self.plural_file) break else: self.plural_file = None self.plural_dict = {} def plural(self, word, n): """ get plural form of word for number *n* NOTE: *word* MUST be defined in current language (T.accepted_language) invoked from T()/M() in %%{} tag args: word (str): word in singular n (numeric): number plural form created for returns: (str): word in appropriate singular/plural form """ nplurals = self.nplurals if int(n)==1: return word elif word: id = self.get_plural_id(abs(int(n))) # id = 0 first plural form # id = 1 second plural form # etc. forms = self.plural_dict.get(word, []) if len(forms)>=id: # have this plural form return forms[id-1] else: # guessing this plural form forms += ['']*(nplurals-len(forms)-1) form = self.construct_plural_form(word, id) forms[id-1] = form self.plural_dict[word] = forms if self.plural_file and not is_gae: write_plural_dict(self.plural_file, self.plural_dict) return form def get_possible_languages_info(self, lang=None): """ return info for selected language or dictionary with all possible languages info from APP/languages/*.py args: *lang* (str): language returns: if *lang* is defined: return tuple(langcode, langname, langfile_mtime) or None if *lang* is NOT defined: returns dictionary with all possible languages: { langcode(from filename): ( langcode(from !langcode! key), langname(from !langname! key), langfile_mtime ) } """ info = read_possible_languages(self.folder) if lang: info = info.get(lang) return info def force(self, *languages): """ select language(s) for translation if a list of languages is passed as a parameter, first language from this list that matches the ones from the possible_languages dictionary will be selected default language will be selected if none of them matches possible_languages. """ language = '' if isinstance(languages,str): languages = regex_language.findall(languages.lower()) elif not languages or languages[0] is None: languages = [] for lang in languages: if lang+'.py' in self.filenames: language = lang langfile = language+'.py' break elif len(lang)>5 and lang[:5]+'.py' in self.filenames: language = lang[:5] langfile = language+'.py' break elif len(lang)>2 and lang[:2]+'.py' in self.filenames: language = lang[:2] langfile = language+'.py' break else: if 'default.py' in self.filenames: language = DEFAULT_LANGUAGE langfile = 'default.py' else: language = DEFAULT_LANGUAGE langfile = None self.accepted_language = language if langfile: self.language_file = ospath.join(self.langpath,langfile) self.t = read_dict(self.language_file) else: self.language_file = None self.t = {} self.cache = global_language_cache.setdefault( self.language_file,({},allocate_lock())) self.set_plural(language) return languages def __call__(self, message, symbols={}, language=None, lazy=None): """ get cached translated plain text message with inserted parameters(symbols) if lazy==True lazyT object is returned """ if lazy is None: lazy = self.lazy if not language: if lazy : return lazyT(message, symbols, self) else: return self.translate(message, symbols) else: try: otherT = self.otherTs[language] except KeyError: otherT = self.otherTs[language] = translator(self.request) otherT.force(language) return otherT(message, symbols, lazy=lazy) def apply_filter(self, message, symbols={}, filter=None, ftag=None): def get_tr(message, prefix, filter): s = self.get_t(message, prefix) return filter(s) if filter else self.filter(s) if filter: prefix = '@'+(ftag or 'userdef')+'\x01' else: prefix = '@'+self.ftag+'\x01' message = get_from_cache( self.cache, prefix+message, lambda: get_tr(message, prefix, filter)) if symbols or symbols == 0 or symbols == "": if isinstance(symbols, dict): symbols.update( (key, xmlescape(value).translate(ttab_in)) for key, value in symbols.iteritems() if not isinstance(value, NUMBERS) ) else: if not isinstance(symbols, tuple): symbols = (symbols,) symbols = tuple( value if isinstance(value, NUMBERS) else xmlescape(value).translate(ttab_in) for value in symbols) message = self.params_substitution(message, symbols) return XML(message.translate(ttab_out)) def M(self, message, symbols={}, language=None, lazy=None, filter=None, ftag=None): """ get cached translated markmin-message with inserted parametes if lazy==True lazyT object is returned """ if lazy is None: lazy = self.lazy if not language: if lazy: return lazyT(message, symbols, self, filter, ftag, True) else: return self.apply_filter(message, symbols, filter, ftag) else: try: otherT = self.otherTs[language] except KeyError: otherT = self.otherTs[language] = translator(self.request) otherT.force(language) return otherT.M(message, symbols, lazy=lazy) def get_t(self, message, prefix=''): """ user ## to add a comment into a translation string the comment can be useful do discriminate different possible translations for the same string (for example different locations) T(' hello world ') -> ' hello world ' T(' hello world ## token') -> ' hello world ' T('hello ## world## token') -> 'hello ## world' the ## notation is ignored in multiline strings and strings that start with ##. this is to allow markmin syntax to be translated """ key = prefix+message mt = self.t.get(key, None) if mt is None: # we did not find a translation if message.find('##')>0 and not '\n' in message: # remove comments message = message.rsplit('##', 1)[0] # guess translation same as original self.t[key] = mt = message # update language file for later translation if self.language_file and not is_gae: write_dict(self.language_file, self.t) # fix backslash escaping mt = regex_backslash.sub( lambda m: m.group(1).translate(ttab_in), mt) return mt def params_substitution(self, message, symbols): """ substitute parameters from symbols into message using %. also parse %%{} placeholders for plural-forms processing. returns: string with parameters NOTE: *symbols* MUST BE OR tuple OR dict of parameters! """ def sub_plural(m): """string in %{} is transformed by this rules: If string starts with \\, ! or ? such transformations take place: "!string of words" -> "String of word" (Capitalize) "!!string of words" -> "String Of Word" (Title) "!!!string of words" -> "STRING OF WORD" (Upper) "\\!string of words" -> "!string of word" (remove \\ and disable transformations) "?word?number" -> "word" (return word, if number == 1) "?number" or "??number" -> "" (remove number, if number == 1) "?word?number" -> "number" (if number != 1) """ def sub_tuple(m): """ word[number], !word[number], !!word[number], !!!word[number] word, !word, !!word, !!!word, ?word?number, ??number, ?number ?word?word[number], ?word?[number], ??word[number] """ w,i = m.group('w','i') c = w[0] if c not in '!?': return self.plural(w, symbols[int(i or 0)]) elif c == '?': (p1, sep, p2) = w[1:].partition("?") part1 = p1 if sep else "" (part2, sep, part3) = (p2 if sep else p1).partition("?") if not sep: part3 = part2 if i is None: # ?[word]?number[?number] or ?number if not part2: return m.group(0) num = int(part2) else: # ?[word]?word2[?word3][number] num = int(symbols[int(i or 0)]) return part1 if num==1 else part3 if num==0 else part2 elif w.startswith('!!!'): word = w[3:] fun = upper_fun elif w.startswith('!!'): word = w[2:] fun = title_fun else: word = w[1:] fun = cap_fun if i is not None: return fun(self.plural(word, symbols[int(i)])) return fun(word) def sub_dict(m): """ word(var), !word(var), !!word(var), !!!word(var) word(num), !word(num), !!word(num), !!!word(num) ?word2(var), ?word1?word2(var), ?word1?word2?word0(var) ?word2(num), ?word1?word2(num), ?word1?word2?word0(num) """ w,n = m.group('w','n') c = w[0] n = int(n) if n.isdigit() else symbols[n] if c not in '!?': return self.plural(w, n) elif c == '?': # ?[word1]?word2[?word0](var or num), ?[word1]?word2(var or num) or ?word2(var or num) (p1, sep, p2) = w[1:].partition("?") part1 = p1 if sep else "" (part2, sep, part3) = (p2 if sep else p1).partition("?") if not sep: part3 = part2 num = int(n) return part1 if num==1 else part3 if num==0 else part2 elif w.startswith('!!!'): word = w[3:] fun = upper_fun elif w.startswith('!!'): word = w[2:] fun = title_fun else: word = w[1:] fun = cap_fun return fun(self.plural(word, n)) s = m.group(1) part = regex_plural_tuple.sub(sub_tuple, s) if part == s: part = regex_plural_dict.sub(sub_dict, s) if part == s: return m.group(0) return part message = message % symbols message = regex_plural.sub(sub_plural, message ) return message def translate(self, message, symbols): """ get cached translated message with inserted parameters(symbols) """ message = get_from_cache(self.cache, message, lambda: self.get_t(message)) if symbols or symbols == 0 or symbols == "": if isinstance(symbols, dict): symbols.update( (key, str(value).translate(ttab_in)) for key, value in symbols.iteritems() if not isinstance(value, NUMBERS) ) else: if not isinstance(symbols, tuple): symbols = (symbols,) symbols = tuple( value if isinstance(value, NUMBERS) else str(value).translate(ttab_in) for value in symbols) message = self.params_substitution(message, symbols) return message.translate(ttab_out) def findT(path, language='en'): """ must be run by the admin app """ lang_file = ospath.join(path, 'languages', language + '.py') sentences = read_dict(lang_file) mp = ospath.join(path, 'models') cp = ospath.join(path, 'controllers') vp = ospath.join(path, 'views') mop = ospath.join(path, 'modules') for filename in \ listdir(mp, '^.+\.py$', 0)+listdir(cp, '^.+\.py$', 0)\ +listdir(vp, '^.+\.html$', 0)+listdir(mop, '^.+\.py$', 0): data = portalocker.read_locked(filename) items = regex_translate.findall(data) for item in items: try: message = safe_eval(item) except: continue # silently ignore inproperly formatted strings if not message.startswith('#') and not '\n' in message: tokens = message.rsplit('##', 1) else: # this allows markmin syntax in translations tokens = [message] if len(tokens) == 2: message = tokens[0].strip()+'##'+tokens[1].strip() if message and not message in sentences: sentences[message] = message if not '!langcode!' in sentences: sentences['!langcode!'] = ( 'en' if language in ('default', 'en') else language) if not '!langname!' in sentences: sentences['!langname!'] = ( 'English' if language in ('default', 'en') else sentences['!langcode!']) write_dict(lang_file, sentences) ### important to allow safe session.flash=T(....) def lazyT_unpickle(data): return marshal.loads(data) def lazyT_pickle(data): return lazyT_unpickle, (marshal.dumps(str(data)),) copy_reg.pickle(lazyT, lazyT_pickle, lazyT_unpickle) def update_all_languages(application_path): path = ospath.join(application_path, 'languages/') for language in listdir(path, regex_langfile): findT(application_path, language[:-3]) if __name__ == '__main__': import doctest doctest.testmod()