faster and leaner languages, should be 100% backward compatible, plurals work again

This commit is contained in:
mdipierro
2012-08-27 10:57:26 -05:00
parent a3dfc9f621
commit 3ff4176e1f
3 changed files with 135 additions and 152 deletions
+1 -1
View File
@@ -1 +1 @@
Version 2.00.0 (2012-08-27 10:13:44) dev
Version 2.00.0 (2012-08-27 10:57:21) dev
+5 -5
View File
@@ -23,9 +23,9 @@ except ImportError:
GIT_MISSING = 'requires python-git module, but not installed or incompatible version'
from gluon.languages import (regex_language, read_possible_languages,
read_possible_plurals, lang_sampling,
lang_sampling,
read_dict, write_dict, read_plural_dict,
write_plural_dict)
write_plural_dict, PLURAL_RULES)
if DEMO_MODE and request.function in ['change_password','pack','pack_plugin','upgrade_web2py','uninstall','cleanup','compile_app','remove_compiled_app','delete','delete_plugin','create_file','upload_file','update_languages','reload_routes','git_push','git_pull']:
@@ -949,8 +949,8 @@ def design():
# get only existed files
languages = sorted(all_languages)
plural_rules={}
all_plurals=read_possible_plurals()
plural_rules = {}
all_plurals = PLURAL_RULES
for langfile,lang in all_languages.iteritems():
lang=lang.strip()
match_language = regex_language.match(lang)
@@ -961,7 +961,7 @@ def design():
plang = lang_sampling(match_language, all_plurals.keys())
if plang:
plural=all_plurals[plang]
plural_rules[langfile]=(plural[0],plang,plural[1],plural[3])
plural_rules[langfile]=(plural[0],plang,plural[4],plural[3])
else:
plural_rules[langfile]=(0,lang,'plural_rules-%s.py'%lang,'')
+129 -146
View File
@@ -34,7 +34,7 @@ osep = os.sep
isdir = os.path.isdir
is_gae = settings.global_settings.web2py_runtime_gae
DEFAULT_ACCEPT_LANGUAGE = 'en'
DEFAULT_LANGUAGE = 'en'
# DEFAULT PLURAL-FORMS RULES:
# language doesn't use plural forms
@@ -76,23 +76,24 @@ regex_param=re.compile(r'{(?P<s>.+?)}')
regex_language = \
re.compile('^([a-zA-Z]{2})(\-[a-zA-Z]{2})?(\-[a-zA-Z]+)?$')
regex_langfile = re.compile('^[a-zA-Z]{2}(-[a-zA-Z]{2})?\.py$')
regex_langinfo = re.compile("^[^'\"]*['\"]([^'\"]*)['\"]\s*:\s*['\"]([^'\"]*)['\"].*$")
regex_backslash = re.compile(r"\\([\\{}%])")
regex_plural = re.compile('%({.+?})')
regex_plural_dict = re.compile('^{(?P<w>[^()[\]][^()[\]]*?)\((?P<n>[^()\[\]]+)\)}$') # %%{word(varname or number)}
regex_plural_tuple = re.compile('^{(?P<w>[^[\]()]+)(?:\[(?P<i>\d+)\])?}$') # %%{word[index]} or %%{word}
regex_plural_q = re.compile('^asdf$') # %%{?word?cnt}, %%{??cnt} or %%{?cnt}
regex_plural_rules = re.compile('^plural_rules-[a-zA-Z]{2}(-[a-zA-Z]{2})?\.py$')
upper_fun = lambda s: unicode(s,'utf-8').upper().encode('utf-8')
title_fun = lambda s: unicode(s,'utf-8').title().encode('utf-8')
cap_fun = lambda s: unicode(s,'utf-8').capitalize().encode('utf-8')
# UTF8 helper functions
def upper_fun(s):
return unicode(s,'utf-8').upper().encode('utf-8')
def title_fun(s):
return unicode(s,'utf-8').title().encode('utf-8')
def cap_fun(s):
return lambda s: unicode(s,'utf-8').capitalize().encode('utf-8')
ttab_in = maketrans("\\%{}", '\x1c\x1d\x1e\x1f')
ttab_out = maketrans('\x1c\x1d\x1e\x1f', "\\%{}")
# cache of translated messages:
# of structure:
# global_language_cache:
# { 'languages/xx.py':
# ( {"def-message": "xx-message",
# ...
@@ -100,35 +101,38 @@ ttab_out = maketrans('\x1c\x1d\x1e\x1f', "\\%{}")
# 'languages/yy.py': ( {dict}, lock_object )
# ...
# }
tcache={}
global_language_cache={}
def get_from_cache(cache, val, fun):
lock=cache[1]
lang_dict, lock = cache
lock.acquire()
try:
result=cache[0].get(val);
result = lang_dict.get(val);
finally:
lock.release()
if result:
return result
lock.acquire()
try:
result=cache[0].setdefault(val, fun())
result = lang_dict.setdefault(val, fun())
finally:
lock.release()
return result
def clear_cache(cache):
lock=cache[1]
def clear_cache(filename):
cache = global_language_cache.setdefault(
filename, ({}, allocate_lock()))
lang_dict, lock = cache
lock.acquire()
try:
cache[0].clear();
lang_dict.clear();
finally:
lock.release()
def lang_sampling(lang_tuple, langlist):
""" search *lang_tuple* in *langlist*
"""
search *lang_tuple* in *langlist*
Args:
lang_tuple (tuple of strings): ('aa'[[,'-bb'],'-cc'])
@@ -159,11 +163,8 @@ def lang_sampling(lang_tuple, langlist):
def read_dict_aux(filename):
fp = portalocker.LockedFile(filename, 'r')
lang_text = fp.read().replace('\r\n', '\n')
fp.close()
# clear cache of processed messages:
clear_cache(tcache.setdefault(filename, ({}, allocate_lock())))
lang_text = portalocker.read_locked(filename).replace('\r\n', '\n')
clear_cache(filename)
try:
return safe_eval(lang_text) or {}
except Exception, e:
@@ -180,9 +181,10 @@ def read_dict(filename):
def get_lang_info(lang, langdir):
"""retrieve lang information from *langdir*/*lang*.py file.
Read few strings from lang.py file until keys !langname!,
!langcode! or keys greater then '!*' were found
"""
retrieve lang information from *langdir*/*lang*.py file.
Read few strings from lang.py file until keys !langname!,
!langcode! or keys greater then '!*' were found
args:
lang (str): lang-code or 'default'
@@ -193,27 +195,10 @@ def get_lang_info(lang, langdir):
e.g.: ('en', 'English', 1338549043.0)
"""
filename = ospath.join(langdir, lang+'.py')
langcode=langname=''
f = portalocker.LockedFile(filename, 'r')
try:
while not (langcode and langname):
line = f.readline()
if not line:
break
match=regex_langinfo.match(line)
if match:
k = match.group(1)
if k == '!langname!':
langname = match.group(2)
elif k == '!langcode!':
langcode = match.group(2)
elif k[0:1] > '!':
break
finally:
f.close()
if not langcode:
langcode = lang if lang != 'default' else 'en'
return langcode, langname or langcode, ostat(filename).st_mtime
d = read_dict(filename)
langcode = d.get('!langcode!',DEFAULT_LANGUAGE)
langname = d.get('!langname!',langcode)
return (langcode, langname or langcode, ostat(filename).st_mtime)
def read_possible_languages(appdir):
langs = {}
@@ -222,10 +207,10 @@ def read_possible_languages(appdir):
for filename in os.listdir(langdir):
if regex_langfile.match(filename) or filename=='default.py':
lang = filename[:-3]
langs[lang]=get_lang_info(lang, langdir)
if not 'default' in langs:
langs[lang] = get_lang_info(lang, langdir)
if not 'en' in langs:
# if default.py is not found, add default value:
langs['default'] = ('en', 'English', 0)
langs['en'] = ('en', 'English', 0)
return langs
def read_global_plural_rules(filename):
@@ -240,14 +225,13 @@ def read_global_plural_rules(filename):
e.g.: (3, <function>, <function>, ok)
"""
env = {}
lock = portalocker.LockedFile(filename, 'r')
data = portalocker.read_locked(filename)
try:
execfile(filename) in env
exec(data) in env
status='ok'
except Exception, e:
status='Syntax error in %s (%s)' % (filename, e)
logging.error(status)
lock.close()
nplurals = env.get('nplurals', DEFAULT_NPLURALS)
get_plural_id = env.get('get_plural_id', DEFAULT_GET_PLURAL_ID)
construct_plural_form = env.get('construct_plural_form',
@@ -256,8 +240,9 @@ def read_global_plural_rules(filename):
def read_possible_plurals():
""" create list of all possible plural rules files
result is cached to increase speed
"""
create list of all possible plural rules files
result is cached to increase speed
"""
pdir = abspath('gluon','contrib','rules')
plurals = {}
@@ -268,21 +253,20 @@ def read_possible_plurals():
fname = ospath.join(pdir, pname)
n, f1, f2, status = read_global_plural_rules(fname)
if status == 'ok':
plurals[lang] = (lang, n, f1, f2)
plurals[lang] = (lang, n, f1, f2, pname)
plurals['default'] = ('default',
DEFAULT_NPLURALS,
DEFAULT_GET_PLURAL_ID,
DEFAULT_CONSTRUCTOR_PLURAL_FORM)
DEFAULT_CONSTRUCTOR_PLURAL_FORM,
None)
return plurals
PLURAL_RULES = read_possible_plurals()
def read_plural_dict_aux(filename):
fp = portalocker.LockedFile(filename, 'r')
lang_text = fp.read().replace('\r\n', '\n')
fp.close()
lang_text = portalocker.read_locked(filename).replace('\r\n', '\n')
try:
return safe_eval(lang_text) or {}
return eval(lang_text) or {}
except Exception, e:
status='Syntax error in %s (%s)' % (filename, e)
logging.error(status)
@@ -422,13 +406,10 @@ class lazyT(object):
return lazyT(self.m, symbols, self.T, self.f, self.t, self.M)
class translator(object):
"""
this class is instantiated by gluon.compileapp.build_environment
as the T object
::
T.force(None) # turns off translation
T.force('fr, it') # forces web2py to translate using fr.py or it.py
@@ -436,20 +417,21 @@ class translator(object):
notice 1: there is no need to force since, by default, T uses
http_accept_language to determine a translation file.
notice 2: en and en-en are considered different languages!
notice 3: if language xx-yy is not found force() probes other similar
languages using such algorithm: xx-yy.py -> xx.py -> xx-yy*.py -> xx*.py
notice 2:
en and en-en are considered different languages!
notice 3:
if language xx-yy is not found force() probes other similar
languages using such algorithm:
xx-yy.py -> xx.py -> xx-yy*.py -> xx*.py
"""
def __init__(self, request):
self.request = request
self.folder = request.folder
self.langpath = ospath.join(self.folder,'languages')
self.filenames = set(os.listdir(self.langpath))
self.cache = tcache.setdefault(None, ({},allocate_lock()))
self.filenames = set(os.listdir(self.langpath))
self.http_accept_language = request.env.http_accept_language
# self.cache # filled in self.force()
# self.accepted_language = None # filled in self.force()
# self.language_file = None # filled in self.force()
# self.plural_language = None # filled in self.force()
@@ -468,12 +450,9 @@ class translator(object):
self.filter = markmin
self.ftag = 'markmin'
def get_possible_languages(self):
possible_languages = [
lang[:-3] for lang in self.filenames \
if regex_langfile.match(lang)]
return possible_languages
return [lang[:-3] for lang in self.filenames \
if regex_langfile.match(lang)]
def set_current_languages(self, *languages):
"""
@@ -488,27 +467,31 @@ class translator(object):
self.force(self.http_accept_language)
def set_plural(self, language):
""" initialize plural forms subsystem
invoked from self.force()
"""
initialize plural forms subsystem
invoked from self.force()
"""
lang = language[:2]
(self.plural_language,
self.nplurals,
self.get_plural_id,
self.construct_plural_form,
self.plural_filename
) = PLURAL_RULES.get(language,PLURAL_RULES['default'])
filename = 'plural-%s.py' % self.plural_language
if filename in self.filenames:
self.plural_file = ospath.join(self.langpath,filename)
self.plural_dict = read_plural_dict(self.plural_file)
for lang in (language, language[:5], language[:2]):
filename = 'plural-%s.py' % lang
if filename in self.filenames:
self.plural_file = ospath.join(self.langpath,filename)
self.plural_dict = read_plural_dict(self.plural_file)
break
else:
self.plural_file = None
self.plural_dict = {}
def plural(self, word, n):
"""
get plural form of word for number *n*
NOTE: *word" MUST be defined in current language
NOTE: *word* MUST be defined in current language
(T.accepted_language)
invoked from T()/M() in %%{} tag
@@ -520,25 +503,27 @@ class translator(object):
(str): word in appropriate singular/plural form
"""
nplurals = self.nplurals
if word:
id = self.get_plural_id(abs(int(n)))
if id > 0:
forms = self.plural_dict.get(word, [])
if forms:
form = forms.get(id-1)
if form:
return form
if int(n)==1:
return word
elif word:
id = min(int(n)-1,1) # self.get_plural_id(abs(int(n)))
# id = 0 first plural form
# id = 1 second plural form
# etc.
forms = self.plural_dict.get(word, [])
if len(forms)>=id:
# have this plural form
return forms[id-1]
else:
# guessing this plural form
forms += ['']*(nplurals-len(forms)-1)
form = self.construct_plural_form(word, id)
if len(forms) < nplurals-1:
forms.extend('' for i in xrange(nplurals-len(forms)-1))
forms[id-1] = form
self.plural_dict[word] = forms
if (self.plural_file and
not settings.global_settings.web2py_runtime_gae):
if self.plural_file and not is_gae:
write_plural_dict(self.plural_file,
self.plural_dict)
return form
return word
def get_possible_languages_info(self, lang=None):
"""
@@ -556,9 +541,9 @@ class translator(object):
langname(from !langname! key),
langfile_mtime ) }
"""
if lang:
return read_possible_languages(self.folder).get(lang)
return read_possible_languages(self.folder)
info = read_possible_languages(self.folder)
if lang: info = ingo.get(lang)
return info
def force(self, *languages):
"""
@@ -572,7 +557,6 @@ class translator(object):
default language will be selected if none
of them matches possible_languages.
"""
global tcache
language = ''
if isinstance(languages,str):
languages = regex_language.findall(languages.lower())
@@ -593,21 +577,20 @@ class translator(object):
break
else:
if 'default.py' in self.filenames:
language = DEFAULT_ACCEPT_LANGUAGE
language = DEFAULT_LANGUAGE
langfile = 'default.py'
else:
language = DEFAULT_ACCEPT_LANGUAGE
language = DEFAULT_LANGUAGE
langfile = None
self.accepted_language = language
if langfile:
self.language_file = ospath.join(self.langpath,langfile)
self.t = read_dict(self.language_file)
self.cache = tcache.setdefault(self.language_file,
({},allocate_lock()))
else:
self.language_file = None
self.t = {}
self.cache = tcache[None]
self.cache = global_language_cache.setdefault(
self.language_file,({},allocate_lock()))
self.set_plural(language)
return languages
@@ -639,26 +622,30 @@ class translator(object):
prefix = '@'+(ftag or 'userdef')+'\x01'
else:
prefix = '@'+self.ftag+'\x01'
message = get_from_cache(self.cache, prefix+message,
lambda: get_tr(message, prefix, filter))
message = get_from_cache(
self.cache, prefix+message,
lambda: get_tr(message, prefix, filter))
if symbols or symbols == 0 or symbols == "":
if isinstance(symbols, dict):
symbols.update( (key, xmlescape(value).translate(ttab_in))
for key, value in symbols.iteritems()
if not isinstance(value, NUMBERS) )
symbols.update(
(key, xmlescape(value).translate(ttab_in))
for key, value in symbols.iteritems()
if not isinstance(value, NUMBERS) )
else:
if not isinstance(symbols, tuple):
symbols = (symbols,)
symbols = tuple(value if isinstance(value, NUMBERS)
else xmlescape(value).translate(ttab_in)
for value in symbols)
symbols = tuple(
value if isinstance(value, NUMBERS)
else xmlescape(value).translate(ttab_in)
for value in symbols)
message = self.params_substitution(message, symbols)
return XML(message.translate(ttab_out))
def M(self, message, symbols={}, language=None, lazy=None, filter=None, ftag=None):
""" get cached translated markmin-message with inserted parametes
if lazy==True lazyT object is returned
def M(self, message, symbols={}, language=None,
lazy=None, filter=None, ftag=None):
"""
get cached translated markmin-message with inserted parametes
if lazy==True lazyT object is returned
"""
if lazy is None:
lazy = self.lazy
@@ -804,19 +791,21 @@ class translator(object):
"""
get cached translated message with inserted parameters(symbols)
"""
message = get_from_cache(self.cache, message, lambda: self.get_t(message))
message = get_from_cache(self.cache, message,
lambda: self.get_t(message))
if symbols or symbols == 0 or symbols == "":
if isinstance(symbols, dict):
symbols.update( (key, str(value).translate(ttab_in))
for key, value in symbols.iteritems()
if not isinstance(value, NUMBERS) )
symbols.update(
(key, str(value).translate(ttab_in))
for key, value in symbols.iteritems()
if not isinstance(value, NUMBERS) )
else:
if not isinstance(symbols, tuple):
symbols = (symbols,)
symbols = tuple(value if isinstance(value, NUMBERS)
else str(value).translate(ttab_in)
for value in symbols)
symbols = tuple(
value if isinstance(value, NUMBERS)
else str(value).translate(ttab_in)
for value in symbols)
message = self.params_substitution(message, symbols)
return message.translate(ttab_out)
@@ -830,26 +819,25 @@ def findT(path, language='en'):
cp = ospath.join(path, 'controllers')
vp = ospath.join(path, 'views')
mop = ospath.join(path, 'modules')
for file in listdir(mp, '^.+\.py$', 0) + listdir(cp, '^.+\.py$', 0)\
+ listdir(vp, '^.+\.html$', 0) + listdir(mop, '^.+\.py$', 0):
fp = portalocker.LockedFile(file, 'r')
data = fp.read()
fp.close()
for filename in \
listdir(mp, '^.+\.py$', 0)+listdir(cp, '^.+\.py$', 0)\
+listdir(vp, '^.+\.html$', 0)+listdir(mop, '^.+\.py$', 0):
data = portalocker.read_locked(filename)
items = regex_translate.findall(data)
for item in items:
try:
message = eval(item)
if not message.startswith('#') and not '\n' in message:
tokens = message.rsplit('##', 1)
else:
# this allows markmin syntax in translations
tokens = [message]
if len(tokens) == 2:
message = tokens[0].strip() + '##' + tokens[1].strip()
if message and not message in sentences:
sentences[message] = message
message = safe_eval(item)
except:
pass
continue # silently ignore inproperly formatted strings
if not message.startswith('#') and not '\n' in message:
tokens = message.rsplit('##', 1)
else:
# this allows markmin syntax in translations
tokens = [message]
if len(tokens) == 2:
message = tokens[0].strip()+'##'+tokens[1].strip()
if message and not message in sentences:
sentences[message] = message
if not '!langcode!' in sentences:
sentences['!langcode!'] = (
'en' if language in ('default', 'en') else language)
@@ -875,8 +863,3 @@ def update_all_languages(application_path):
if __name__ == '__main__':
import doctest
doctest.testmod()