fix languages

2016-06-11 20:12:30 +02:00
parent a27f6f88ef
commit 34f753be56
4 changed files with 27 additions and 28 deletions
@@ -18,7 +18,8 @@ import pkgutil
 import logging
 from cgi import escape
 from threading import RLock
-from gluon._compat import copyreg, PY2, maketrans, iterkeys, unicodeT, to_unicode, to_bytes, iteritems, _local_html_escape, to_native
+from gluon._compat import copyreg, PY2, maketrans, iterkeys, unicodeT, to_unicode, to_bytes, iteritems, _local_html_escape, to_native, \
+                          pjoin

 from gluon.portalocker import read_locked, LockedFile
 from gluon.utf8 import Utf8
@@ -34,8 +35,6 @@ __all__ = ['translator', 'findT', 'update_all_languages']

 ostat = os.stat
 oslistdir = os.listdir
-pjoin = os.path.join
-pexists = os.path.exists
 pdirname = os.path.dirname
 isdir = os.path.isdir

@@ -167,7 +166,7 @@ def read_dict_aux(filename):
    lang_text = read_locked(filename).replace(b'\r\n', b'\n')
    clear_cache(filename)
    try:
-        return safe_eval(lang_text) or {}
+        return safe_eval(to_native(lang_text)) or {}
    except Exception:
        e = sys.exc_info()[1]
        status = 'Syntax error in %s (%s)' % (filename, e)
@@ -623,7 +622,6 @@ class translator(object):
        of them matches possible_languages.
        """
        pl_info = read_possible_languages(self.langpath)
-
        def set_plural(language):
            """
            initialize plural forms subsystem
@@ -800,18 +798,16 @@ class translator(object):
        the ## notation is ignored in multiline strings and strings that
        start with ##. This is needed to allow markmin syntax to be translated
        """
-        if isinstance(message, unicodeT):
-            message = message.encode('utf8')
-        if isinstance(prefix, unicodeT):
-            prefix = prefix.encode('utf8')
+        message = to_native(message, 'utf8')
+        prefix = to_native(prefix, 'utf8')
        key = prefix + message
        mt = self.t.get(key, None)
        if mt is not None:
            return mt
        # we did not find a translation
-        if message.find(to_bytes('##')) > 0:
+        if message.find('##') > 0:
            pass
-        if message.find(to_bytes('##')) > 0 and not '\n' in message:
+        if message.find('##') > 0 and not '\n' in message:
            # remove comments
            message = message.rsplit('##', 1)[0]
        # guess translation same as original
@@ -966,7 +962,7 @@ def findT(path, language=DEFAULT_LANGUAGE):
    for filename in \
            listdir(mp, '^.+\.py$', 0) + listdir(cp, '^.+\.py$', 0)\
            + listdir(vp, '^.+\.html$', 0) + listdir(mop, '^.+\.py$', 0):
-        data = read_locked(filename)
+        data = to_native(read_locked(filename))
        items = regex_translate.findall(data)
        items += regex_translate_m.findall(data)
        for item in items:
@@ -17,11 +17,11 @@ from .test_validators import *
 from .test_tools import *
 from .test_utils import *
 from .test_serializers import *
+from .test_languages import *

 if sys.version[:3] == '2.7':
    from .test_compileapp import *
    from .test_is_url import *
-    from .test_languages import *
    from .test_appadmin import *
    from .test_scheduler import *
    from .test_web import *
@@ -15,6 +15,7 @@ from .fix_path import fix_sys_path
 fix_sys_path(__file__)

 from gluon import languages
+from gluon._compat import PY2

 MP_WORKING = 0
 try:
@@ -105,8 +106,10 @@ class TestTranslations(unittest.TestCase):
                         '1 quark')
        self.assertEqual(str(T('%s %%{quark[0]}', 2)),
                         '2 quarks')
-        self.assertEqual(str(T.M('**Hello World**')),
-                         '<strong>Hello World</strong>')
+        if PY2:
+            # FIXME PY3 markmin is not supported yet
+            self.assertEqual(str(T.M('**Hello World**')),
+                             '<strong>Hello World</strong>')
        T.force('it')
        self.assertEqual(str(T('Hello World')),
                         'Salve Mondo')
@@ -11,7 +11,7 @@ Utilities and class for UTF8 strings managing
 ----------------------------------------------
 """
 from __future__ import print_function
-from gluon._compat import builtin as __builtin__, unicodeT, iteritems, to_unicode
+from gluon._compat import builtin as __builtin__, unicodeT, iteritems, to_unicode, to_native

 __all__ = ['Utf8']

@@ -51,10 +51,10 @@ def sort_key(s):
        from gluon.contrib.pyuca import unicode_collator
        unicode_sort_key = unicode_collator.sort_key
        sort_key = lambda s: unicode_sort_key(
-            unicode(s, 'utf-8') if isinstance(s, str) else s)
+            to_unicode(s, 'utf-8') if isinstance(s, str) else s)
    except:
        sort_key = lambda s: (
-            unicode(s, 'utf-8') if isinstance(s, str) else s).lower()
+            to_unicode(s, 'utf-8') if isinstance(s, str) else s).lower()
    return sort_key(s)


@@ -64,7 +64,7 @@ def ord(char):
    """
    if isinstance(char, unicodeT):
        return __builtin__.ord(char)
-    return __builtin__.ord(unicode(char, 'utf-8'))
+    return __builtin__.ord(to_unicode(char, 'utf-8'))


 def chr(code):
@@ -92,8 +92,8 @@ def truncate(string, length, dots='...'):
    Returns:
        (utf8-str): original or cutted string
    """
-    text = unicode(string, 'utf-8')
-    dots = unicode(dots, 'utf-8') if isinstance(dots, str) else dots
+    text = to_unicode(string, 'utf-8')
+    dots = to_unicode(dots, 'utf-8') if isinstance(dots, str) else dots
    if len(text) > length:
        text = text[:length - len(dots)] + dots
    return str.__new__(Utf8, text.encode('utf-8'))
@@ -120,11 +120,11 @@ class Utf8(str):
    """
    def __new__(cls, content='', codepage='utf-8'):
        if isinstance(content, unicodeT):
-            return str.__new__(cls, unicode.encode(content, 'utf-8'))
+            return str.__new__(cls, to_native(content, 'utf-8'))
        elif codepage in ('utf-8', 'utf8') or isinstance(content, cls):
            return str.__new__(cls, content)
        else:
-            return str.__new__(cls, unicode(content, codepage).encode('utf-8'))
+            return str.__new__(cls, to_native(to_unicode(content, codepage), 'utf-8'))

    def __repr__(self):
        r''' # note that we use raw strings to avoid having to use double back slashes below
@@ -156,9 +156,9 @@ class Utf8(str):
            True
        '''
        if str.find(self, "'") >= 0 and str.find(self, '"') < 0:  # only single quote exists
-            return '"' + unicode(self, 'utf-8').translate(repr_escape_tab).encode('utf-8') + '"'
+            return '"' + to_native(to_unicode(self, 'utf-8').translate(repr_escape_tab), 'utf-8') + '"'
        else:
-            return "'" + unicode(self, 'utf-8').translate(repr_escape_tab2).encode('utf-8') + "'"
+            return "'" + to_native(to_unicode(self, 'utf-8').translate(repr_escape_tab2), 'utf-8') + "'"

    def __size__(self):
        """ length of utf-8 string in bytes """
@@ -168,17 +168,17 @@ class Utf8(str):
        return str.__contains__(self, Utf8(other))

    def __getitem__(self, index):
-        return str.__new__(Utf8, unicode(self, 'utf-8')[index].encode('utf-8'))
+        return str.__new__(Utf8, to_native(to_unicode(self, 'utf-8')[index], 'utf-8'))

    def __getslice__(self, begin, end):
-        return str.__new__(Utf8, unicode(self, 'utf-8')[begin:end].encode('utf-8'))
+        return str.__new__(Utf8, to_native(to_unicode(self, 'utf-8')[begin:end], 'utf-8'))

    def __add__(self, other):
        return str.__new__(Utf8, str.__add__(self, unicode.encode(other, 'utf-8')
                                             if isinstance(other, unicode) else other))

    def __len__(self):
-        return len(unicode(self, 'utf-8'))
+        return len(to_unicode(self, 'utf-8'))

    def __mul__(self, integer):
        return str.__new__(Utf8, str.__mul__(self, integer))