From 34f753be56f05f67e652c2d83e7f2424b3c19d55 Mon Sep 17 00:00:00 2001
From: ilvalle <paolo.valleri@gmail.com>
Date: Sat, 11 Jun 2016 20:12:30 +0200
Subject: [PATCH] fix languages

---
 gluon/languages.py            | 20 ++++++++------------
 gluon/tests/__init__.py       |  2 +-
 gluon/tests/test_languages.py |  7 +++++--
 gluon/utf8.py                 | 26 +++++++++++++-------------
 4 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/gluon/languages.py b/gluon/languages.py
index 2d29a51a..b40cabd3 100644
--- a/gluon/languages.py
+++ b/gluon/languages.py
@@ -18,7 +18,8 @@ import pkgutil
 import logging
 from cgi import escape
 from threading import RLock
-from gluon._compat import copyreg, PY2, maketrans, iterkeys, unicodeT, to_unicode, to_bytes, iteritems, _local_html_escape, to_native
+from gluon._compat import copyreg, PY2, maketrans, iterkeys, unicodeT, to_unicode, to_bytes, iteritems, _local_html_escape, to_native, \
+                          pjoin
 
 from gluon.portalocker import read_locked, LockedFile
 from gluon.utf8 import Utf8
@@ -34,8 +35,6 @@ __all__ = ['translator', 'findT', 'update_all_languages']
 
 ostat = os.stat
 oslistdir = os.listdir
-pjoin = os.path.join
-pexists = os.path.exists
 pdirname = os.path.dirname
 isdir = os.path.isdir
 
@@ -167,7 +166,7 @@ def read_dict_aux(filename):
     lang_text = read_locked(filename).replace(b'\r\n', b'\n')
     clear_cache(filename)
     try:
-        return safe_eval(lang_text) or {}
+        return safe_eval(to_native(lang_text)) or {}
     except Exception:
         e = sys.exc_info()[1]
         status = 'Syntax error in %s (%s)' % (filename, e)
@@ -623,7 +622,6 @@ class translator(object):
         of them matches possible_languages.
         """
         pl_info = read_possible_languages(self.langpath)
-
         def set_plural(language):
             """
             initialize plural forms subsystem
@@ -800,18 +798,16 @@ class translator(object):
         the ## notation is ignored in multiline strings and strings that
         start with ##. This is needed to allow markmin syntax to be translated
         """
-        if isinstance(message, unicodeT):
-            message = message.encode('utf8')
-        if isinstance(prefix, unicodeT):
-            prefix = prefix.encode('utf8')
+        message = to_native(message, 'utf8')
+        prefix = to_native(prefix, 'utf8')
         key = prefix + message
         mt = self.t.get(key, None)
         if mt is not None:
             return mt
         # we did not find a translation
-        if message.find(to_bytes('##')) > 0:
+        if message.find('##') > 0:
             pass
-        if message.find(to_bytes('##')) > 0 and not '\n' in message:
+        if message.find('##') > 0 and not '\n' in message:
             # remove comments
             message = message.rsplit('##', 1)[0]
         # guess translation same as original
@@ -966,7 +962,7 @@ def findT(path, language=DEFAULT_LANGUAGE):
     for filename in \
             listdir(mp, '^.+\.py$', 0) + listdir(cp, '^.+\.py$', 0)\
             + listdir(vp, '^.+\.html$', 0) + listdir(mop, '^.+\.py$', 0):
-        data = read_locked(filename)
+        data = to_native(read_locked(filename))
         items = regex_translate.findall(data)
         items += regex_translate_m.findall(data)
         for item in items:
diff --git a/gluon/tests/__init__.py b/gluon/tests/__init__.py
index cb3f7c13..5db0c683 100644
--- a/gluon/tests/__init__.py
+++ b/gluon/tests/__init__.py
@@ -17,11 +17,11 @@ from .test_validators import *
 from .test_tools import *
 from .test_utils import *
 from .test_serializers import *
+from .test_languages import *
 
 if sys.version[:3] == '2.7':
     from .test_compileapp import *
     from .test_is_url import *
-    from .test_languages import *
     from .test_appadmin import *
     from .test_scheduler import *
     from .test_web import *
diff --git a/gluon/tests/test_languages.py b/gluon/tests/test_languages.py
index bed7ff42..0df6f21b 100644
--- a/gluon/tests/test_languages.py
+++ b/gluon/tests/test_languages.py
@@ -15,6 +15,7 @@ from .fix_path import fix_sys_path
 fix_sys_path(__file__)
 
 from gluon import languages
+from gluon._compat import PY2
 
 MP_WORKING = 0
 try:
@@ -105,8 +106,10 @@ class TestTranslations(unittest.TestCase):
                          '1 quark')
         self.assertEqual(str(T('%s %%{quark[0]}', 2)),
                          '2 quarks')
-        self.assertEqual(str(T.M('**Hello World**')),
-                         '<strong>Hello World</strong>')
+        if PY2:
+            # FIXME PY3 markmin is not supported yet
+            self.assertEqual(str(T.M('**Hello World**')),
+                             '<strong>Hello World</strong>')
         T.force('it')
         self.assertEqual(str(T('Hello World')),
                          'Salve Mondo')
diff --git a/gluon/utf8.py b/gluon/utf8.py
index 6fbab2dd..21fd12c4 100644
--- a/gluon/utf8.py
+++ b/gluon/utf8.py
@@ -11,7 +11,7 @@ Utilities and class for UTF8 strings managing
 ----------------------------------------------
 """
 from __future__ import print_function
-from gluon._compat import builtin as __builtin__, unicodeT, iteritems, to_unicode
+from gluon._compat import builtin as __builtin__, unicodeT, iteritems, to_unicode, to_native
 
 __all__ = ['Utf8']
 
@@ -51,10 +51,10 @@ def sort_key(s):
         from gluon.contrib.pyuca import unicode_collator
         unicode_sort_key = unicode_collator.sort_key
         sort_key = lambda s: unicode_sort_key(
-            unicode(s, 'utf-8') if isinstance(s, str) else s)
+            to_unicode(s, 'utf-8') if isinstance(s, str) else s)
     except:
         sort_key = lambda s: (
-            unicode(s, 'utf-8') if isinstance(s, str) else s).lower()
+            to_unicode(s, 'utf-8') if isinstance(s, str) else s).lower()
     return sort_key(s)
 
 
@@ -64,7 +64,7 @@ def ord(char):
     """
     if isinstance(char, unicodeT):
         return __builtin__.ord(char)
-    return __builtin__.ord(unicode(char, 'utf-8'))
+    return __builtin__.ord(to_unicode(char, 'utf-8'))
 
 
 def chr(code):
@@ -92,8 +92,8 @@ def truncate(string, length, dots='...'):
     Returns:
         (utf8-str): original or cutted string
     """
-    text = unicode(string, 'utf-8')
-    dots = unicode(dots, 'utf-8') if isinstance(dots, str) else dots
+    text = to_unicode(string, 'utf-8')
+    dots = to_unicode(dots, 'utf-8') if isinstance(dots, str) else dots
     if len(text) > length:
         text = text[:length - len(dots)] + dots
     return str.__new__(Utf8, text.encode('utf-8'))
@@ -120,11 +120,11 @@ class Utf8(str):
     """
     def __new__(cls, content='', codepage='utf-8'):
         if isinstance(content, unicodeT):
-            return str.__new__(cls, unicode.encode(content, 'utf-8'))
+            return str.__new__(cls, to_native(content, 'utf-8'))
         elif codepage in ('utf-8', 'utf8') or isinstance(content, cls):
             return str.__new__(cls, content)
         else:
-            return str.__new__(cls, unicode(content, codepage).encode('utf-8'))
+            return str.__new__(cls, to_native(to_unicode(content, codepage), 'utf-8'))
 
     def __repr__(self):
         r''' # note that we use raw strings to avoid having to use double back slashes below
@@ -156,9 +156,9 @@ class Utf8(str):
             True
         '''
         if str.find(self, "'") >= 0 and str.find(self, '"') < 0:  # only single quote exists
-            return '"' + unicode(self, 'utf-8').translate(repr_escape_tab).encode('utf-8') + '"'
+            return '"' + to_native(to_unicode(self, 'utf-8').translate(repr_escape_tab), 'utf-8') + '"'
         else:
-            return "'" + unicode(self, 'utf-8').translate(repr_escape_tab2).encode('utf-8') + "'"
+            return "'" + to_native(to_unicode(self, 'utf-8').translate(repr_escape_tab2), 'utf-8') + "'"
 
     def __size__(self):
         """ length of utf-8 string in bytes """
@@ -168,17 +168,17 @@ class Utf8(str):
         return str.__contains__(self, Utf8(other))
 
     def __getitem__(self, index):
-        return str.__new__(Utf8, unicode(self, 'utf-8')[index].encode('utf-8'))
+        return str.__new__(Utf8, to_native(to_unicode(self, 'utf-8')[index], 'utf-8'))
 
     def __getslice__(self, begin, end):
-        return str.__new__(Utf8, unicode(self, 'utf-8')[begin:end].encode('utf-8'))
+        return str.__new__(Utf8, to_native(to_unicode(self, 'utf-8')[begin:end], 'utf-8'))
 
     def __add__(self, other):
         return str.__new__(Utf8, str.__add__(self, unicode.encode(other, 'utf-8')
                                              if isinstance(other, unicode) else other))
 
     def __len__(self):
-        return len(unicode(self, 'utf-8'))
+        return len(to_unicode(self, 'utf-8'))
 
     def __mul__(self, integer):
         return str.__new__(Utf8, str.__mul__(self, integer))