From 45a376eee904cf986f01a82e2fd41fed37ec1ed6 Mon Sep 17 00:00:00 2001
From: mdipierro <massimo.dipierro@gmail.com>
Date: Sat, 18 Jun 2016 08:12:01 -0500
Subject: [PATCH] added extra_mssql_models.py, thanks Kyle Flanagan

---
 gluon/packages/dal              |   2 +-
 scripts/extract_mssql_models.py | 328 ++++++++++++++++++++++++++++++++
 2 files changed, 329 insertions(+), 1 deletion(-)
 create mode 100644 scripts/extract_mssql_models.py

diff --git a/gluon/packages/dal b/gluon/packages/dal
index f44ce95f..902f222e 160000
--- a/gluon/packages/dal
+++ b/gluon/packages/dal
@@ -1 +1 @@
-Subproject commit f44ce95f685d1fad484fefd22c4105914b1de56e
+Subproject commit 902f222eb89451cb41bb20362a27a3410a0fdbb3
diff --git a/scripts/extract_mssql_models.py b/scripts/extract_mssql_models.py
new file mode 100644
index 00000000..e6fd807e
--- /dev/null
+++ b/scripts/extract_mssql_models.py
@@ -0,0 +1,328 @@
+"""Create web2py model (python code) to represent MS SQL Server tables.
+Features:
+* Uses ANSI Standard INFORMATION_SCHEMA (might work with other RDBMS)
+* Detects legacy "keyed" tables (not having an "id" PK)
+* Handles 'funny' column names. web2py requires all column names be valid python identifiers. This script uses rname
+*   for column names that have spaces or are otherwise invalid python identifiers.
+* Connects directly to running databases, no need to do a SQL dump
+* Handles notnull, unique and referential constraints
+* Detects most common datatypes and default values
+* Supports running from the command line as well as from an IDE's debug menu. See the COMMAND_LINE_MODE constant below
+*   for more info.
+
+Requirements:
+* Needs pyodbc python connector
+
+Created by Kyle Flanagan. Based on a script by Mariano Reingart which was
+based on a script to "generate schemas from dbs" (mysql) by Alexandre Andrade
+"""
+
+_author__ = "Kyle Flanagan <kyleflanagan@gmail.com>"
+
+HELP = """
+USAGE: extract_mssql_models db host port user passwd
+Call with SQL Server database connection parameters,
+web2py model will be printed on standard output.
+EXAMPLE: python extract_mssql_models.py mydb localhost 3306 kflanaga pass
+or
+python extract_mssql_models.py mydb localhost 3306 kflanaga pass > db_model.py
+"""
+
+# Config options
+DEBUG = False  # print debug messages to STDERR
+SCHEMA = 'dbo'
+COMMAND_LINE_MODE = True  # running from command prompt. Disable to specify variables and use in IDE
+# Only specify values below if not running from command line
+DB = None
+HOST = None
+USER = None
+PASSWD = None
+PORT = None
+
+# Constant for Field keyword parameter order (and filter):
+KWARGS = ('type', 'length', 'default', 'required', 'ondelete',
+          'notnull', 'unique', 'label', 'comment', 'rname')
+
+import sys
+import re
+# This is from pydal/helpers/regex.py as of 2016-06-16
+# Use this to recognize if a field name need to have an rname representation
+REGEX_VALID_TB_FLD = re.compile(r'^[^\d_][_0-9a-zA-Z]*\Z')
+# For replacing invalid characters in field names
+INVALID_CHARS = re.compile(r'[^a-zA-Z0-9_]')
+
+
+def get_valid_column_name(field):
+    """Return a valid column name that follows Python's rules for identifiers, which is what web2py requires for column
+    names. Replaces invalid characters with underscores and leading digits with their associated English word."""
+    if not REGEX_VALID_TB_FLD.match(field):
+        # If the first character is a digit, replace it with its word counterpart
+        if re.match(r'^[0-9]', field):
+            numbers = ['Zero', 'One', 'Two', 'Three', 'Four',
+                       'Five', 'Six', 'Seven', 'Eight', 'Nine']
+            field = numbers[int(field[0])] + field[1:]
+
+        field = INVALID_CHARS.sub('_', field)
+    return field
+
+
+def query(conn, sql, *args):
+    "Execute a SQL query and return rows as a list of dicts"
+    cur = conn.cursor()
+    ret = []
+    try:
+        if DEBUG: print >> sys.stderr, "QUERY: ", sql % args
+        cur.execute(sql % args)
+        for row in cur:
+            dic = {}
+            for i, value in enumerate(row):
+                field = cur.description[i][0]
+                dic[field] = value
+            if DEBUG: print >> sys.stderr, "RET: ", dic
+            ret.append(dic)
+        return ret
+    finally:
+        cur.close()
+
+
+def get_tables(conn, schema=SCHEMA):
+    "List table names in a given schema"
+    rows = query(conn, """SELECT table_name FROM information_schema.tables
+        WHERE table_schema = '%s'
+        ORDER BY table_name""", schema)
+    return [row['table_name'] for row in rows]
+
+
+def get_fields(conn, table):
+    "Retrieve field list for a given table"
+    if DEBUG: print >> sys.stderr, "Processing TABLE", table
+    rows = query(conn, """
+        SELECT column_name, data_type,
+            is_nullable,
+            character_maximum_length,
+            numeric_precision, numeric_precision_radix, numeric_scale,
+            column_default
+        FROM information_schema.columns
+        WHERE table_name='%s'
+        ORDER BY ordinal_position""", table)
+    return rows
+
+
+def define_field(conn, table, field, pks):
+    "Determine field type, default value, references, etc."
+    f = {}
+    ref = references(conn, table, field['column_name'])
+    if ref:
+        f.update(ref)
+    elif field['column_default'] and \
+            field['column_default'].startswith("nextval") and \
+                    field['column_name'] in pks:
+        f['type'] = "'id'"
+    elif field['data_type'].startswith('character'):
+        f['type'] = "'string'"
+        if field['character_maximum_length']:
+            f['length'] = field['character_maximum_length']
+    elif field['data_type'] in ('text', 'ntext'):
+        f['type'] = "'text'"
+    elif field['data_type'] in ('boolean', 'bit'):
+        f['type'] = "'boolean'"
+    elif field['data_type'] in ('tinyint', 'smallint', 'bigint', 'int'):
+        f['type'] = "'integer'"
+    elif field['data_type'] in ('real', 'float'):
+        f['type'] = "'double'"
+    elif field['data_type'] in ('datetime', 'datetime2', 'smalldatetime'):
+        f['type'] = "'datetime'"
+    elif field['data_type'] in ('timestamp',):
+        f['type'] = "'datetime'"
+        f['default'] = "request.now"
+        f['update'] = "request.now"
+    elif field['data_type'] in ('date',):
+        f['type'] = "'date'"
+    elif field['data_type'] in ('time',):
+        f['type'] = "'time'"
+    elif field['data_type'] in ('numeric', 'money', 'smallmoney', 'decimal'):
+        f['type'] = "'decimal'"
+        f['precision'] = field['numeric_precision']
+        f['scale'] = field['numeric_scale'] or 0
+    elif field['data_type'] in ('binary', 'varbinary', 'image'):
+        f['type'] = "'blob'"
+    elif field['data_type'] in ('point', 'lseg', 'polygon', 'unknown', 'USER-DEFINED', 'sql_variant'):
+        f['type'] = ""  # unsupported?
+    elif field['data_type'] in ('varchar', 'char', 'nchar', 'nvarchar', 'uniqueidentifer'):
+        f['type'] = "'string'"
+    else:
+        raise RuntimeError("Data Type not supported: %s " % str(field))
+
+    try:
+        if field['column_default']:
+            if field['column_default'] == "now()":
+                d = "request.now"
+            elif field['column_default'] == "true":
+                d = "True"
+            elif field['column_default'] == "false":
+                d = "False"
+            else:
+                d = repr(eval(field['column_default']))
+            f['default'] = str(d)
+    except (ValueError, SyntaxError):
+        pass
+    except Exception, e:
+        raise RuntimeError("Default unsupported '%s'" % field['column_default'])
+
+    if not field['is_nullable']:
+        f['notnull'] = "True"
+
+    # For field names that are not valid python identifiers, we need to add a reference to their actual name
+    # in the back end database
+    if not REGEX_VALID_TB_FLD.match(field['column_name']):
+        f['rname'] = "'[%s]'" % field['column_name']
+
+    return f
+
+
+def is_unique(conn, table, field):
+    "Find unique columns (incomplete support)"
+    rows = query(conn, """
+        SELECT c.column_name
+        FROM information_schema.table_constraints t
+        INNER JOIN information_schema.constraint_column_usage c
+        ON (t.CONSTRAINT_CATALOG =    c.CONSTRAINT_CATALOG
+            AND t.CONSTRAINT_NAME =   c.CONSTRAINT_NAME
+            AND t.CONSTRAINT_SCHEMA = c.CONSTRAINT_SCHEMA
+            AND t.TABLE_CATALOG =     c.TABLE_CATALOG
+            AND t.TABLE_NAME =        c.TABLE_NAME
+            AND t.TABLE_SCHEMA =      c.TABLE_SCHEMA)
+        WHERE t.table_name='%s'
+          AND c.column_name='%s'
+          AND t.constraint_type='UNIQUE'
+        ;""", table, field['column_name'])
+    return rows and True or False
+
+
+def primarykeys(conn, table):
+    "Find primary keys"
+    rows = query(conn, """
+        SELECT c.column_name
+        FROM information_schema.table_constraints t
+        INNER JOIN information_schema.constraint_column_usage c
+                ON (t.CONSTRAINT_CATALOG =    c.CONSTRAINT_CATALOG
+            AND t.CONSTRAINT_NAME =   c.CONSTRAINT_NAME
+            AND t.CONSTRAINT_SCHEMA = c.CONSTRAINT_SCHEMA
+            AND t.TABLE_CATALOG =     c.TABLE_CATALOG
+            AND t.TABLE_NAME =        c.TABLE_NAME
+            AND t.TABLE_SCHEMA =      c.TABLE_SCHEMA)
+        WHERE t.table_name='%s'
+          AND t.constraint_type='PRIMARY KEY'
+        ;""", table)
+    return [row['column_name'] for row in rows]
+
+
+def references(conn, table, field):
+    "Find a FK (fails if multiple)"
+    rows1 = query(conn, """
+        SELECT k.table_name, k.column_name, k.constraint_name,
+               r.update_rule, r.delete_rule, k.ordinal_position
+        FROM information_schema.key_column_usage k
+        INNER JOIN information_schema.referential_constraints r
+        ON (k.CONSTRAINT_CATALOG =    r.CONSTRAINT_CATALOG
+            AND k.CONSTRAINT_NAME =   r.CONSTRAINT_NAME
+            AND k.CONSTRAINT_SCHEMA = r.CONSTRAINT_SCHEMA)
+        INNER JOIN information_schema.table_constraints t
+        ON (r.CONSTRAINT_CATALOG =    t.CONSTRAINT_CATALOG
+            AND r.CONSTRAINT_NAME =   t.CONSTRAINT_NAME
+            AND r.CONSTRAINT_SCHEMA = t.CONSTRAINT_SCHEMA)
+
+        WHERE k.table_name='%s'
+          AND k.column_name='%s'
+          AND t.constraint_type='FOREIGN KEY'
+          ;""", table, field)
+    if len(rows1) == 1:
+        rows2 = query(conn, """
+            SELECT table_name, column_name, *
+            FROM information_schema.constraint_column_usage
+            WHERE constraint_name='%s'
+            """, rows1[0]['constraint_name'])
+        row = None
+        if len(rows2) > 1:
+            row = rows2[int(rows1[0]['ordinal_position']) - 1]
+            keyed = True
+        if len(rows2) == 1:
+            row = rows2[0]
+            keyed = False
+        if row:
+            if keyed:  # THIS IS BAD, DON'T MIX "id" and primarykey!!!
+                ref = {'type': "'reference %s.%s'" % (row['table_name'],
+                                                      row['column_name'])}
+            else:
+                ref = {'type': "'reference %s'" % (row['table_name'],)}
+            if rows1[0]['delete_rule'] != "NO ACTION":
+                ref['ondelete'] = repr(rows1[0]['delete_rule'])
+            return ref
+        elif rows2:
+            raise RuntimeError("Unsupported foreign key reference: %s" %
+                               str(rows2))
+
+    elif rows1:
+        raise RuntimeError("Unsupported referential constraint: %s" %
+                           str(rows1))
+
+
+def define_table(conn, table):
+    "Output single table definition"
+    fields = get_fields(conn, table)
+    pks = primarykeys(conn, table)
+    print "db.define_table('%s'," % (table,)
+    for field in fields:
+        fname = field['column_name']
+        fdef = define_field(conn, table, field, pks)
+        if fname not in pks and is_unique(conn, table, field):
+            fdef['unique'] = "True"
+        if fdef['type'] == "'id'" and fname in pks:
+            pks.pop(pks.index(fname))
+        print "    Field('%s', %s)," % (get_valid_column_name(fname),
+                                        ', '.join(["%s=%s" % (k, fdef[k]) for k in KWARGS
+                                                   if k in fdef and fdef[k]]))
+    if pks:
+        print "    primarykey=[%s]," % ", ".join(["'%s'" % pk for pk in pks])
+    print     "    migrate=migrate)"
+    print
+
+
+def define_db(conn, db, host, port, user, passwd):
+    "Output database definition (model)"
+    dal = 'db = DAL("mssql4://%s:%s@%s:%s/%s", pool_size=10, decode_credentials=True)'
+    print dal % (
+        user.replace('@', '%40').replace(':', '%3A'), passwd.replace('@', '%40').replace(':', '%3A'), host, port, db)
+    print
+    print "migrate = False"
+    print
+    for table in get_tables(conn):
+        define_table(conn, table)
+
+
+if __name__ == "__main__":
+    # Parse arguments from command line:
+    if len(sys.argv) < 6 and COMMAND_LINE_MODE:
+        print HELP
+    else:
+        # Parse arguments from command line:
+        if COMMAND_LINE_MODE:
+            db, host, port, user, passwd = sys.argv[1:6]
+        else:
+            db = DB
+            host = HOST
+            user = USER
+            passwd = PASSWD
+            port = PORT
+
+        # Make the database connection (change driver if required)
+        import pyodbc
+        # cnn = pyodbc.connect(database=db, host=host, port=port,
+        #                        user=user, password=passwd,
+        #                        )
+        cnn = pyodbc.connect(
+            r'DRIVER={{SQL Server Native Client 11.0}};SERVER={server};PORT={port};DATABASE={db};UID={user};PWD={passwd}'.format(
+                server=host, port=port, db=db, user=user, passwd=passwd)
+        )
+        # Start model code generation:
+        define_db(cnn, db, host, port, user, passwd)