From 4fa8b3ed674a7a33f8718a00be7a6285bf23e93e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20V=C3=A9zina?= Date: Fri, 15 Apr 2016 12:33:04 -0400 Subject: [PATCH] Update markdown2.py 2.2.4 -> 2.3.1 (latest release) --- gluon/contrib/markdown/markdown2.py | 141 +++++++++++++++++++++++----- 1 file changed, 117 insertions(+), 24 deletions(-) diff --git a/gluon/contrib/markdown/markdown2.py b/gluon/contrib/markdown/markdown2.py index c03eb111..50af2a5f 100644 --- a/gluon/contrib/markdown/markdown2.py +++ b/gluon/contrib/markdown/markdown2.py @@ -53,8 +53,9 @@ see for details): * header-ids: Adds "id" attributes to headers. The id value is a slug of the header text. * html-classes: Takes a dict mapping html tag names (lowercase) to a - string to use for a "class" tag attribute. Currently only supports - "pre" and "code" tags. Add an issue if you require this for other tags. + string to use for a "class" tag attribute. Currently only supports "img", + "table", "pre" and "code" tags. Add an issue if you require this for other + tags. * markdown-in-html: Allow the use of `markdown="1"` in a block HTML tag to have markdown processing be done on its contents. Similar to but with @@ -70,9 +71,14 @@ see for details): * smarty-pants: Replaces ' and " with curly quotation marks or curly apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes, and ellipses. +* spoiler: A special kind of blockquote commonly hidden behind a + click on SO. Syntax per . * toc: The returned HTML string gets a new "toc_html" attribute which is a Table of Contents for the document. (experimental) * xml: Passes one-liner processing instructions and namespaced XML tags. +* tables: Tables using the same format as GFM + and + PHP-Markdown Extra . * wiki-tables: Google Code Wiki-style tables. See . """ @@ -82,13 +88,11 @@ see for details): # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (2, 2, 4) +__version_info__ = (2, 3, 1) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" -import os import sys -from pprint import pprint import re import logging try: @@ -102,13 +106,7 @@ import codecs #---- Python version compat -try: - from urllib.parse import quote # python3 -except ImportError: - from urllib import quote # python2 - if sys.version_info[:2] < (2,4): - from sets import Set as set def reversed(sequence): for i in sequence[::-1]: yield i @@ -804,6 +802,8 @@ class Markdown(object): text = self._prepare_pyshell_blocks(text) if "wiki-tables" in self.extras: text = self._do_wiki_tables(text) + if "tables" in self.extras: + text = self._do_tables(text) text = self._do_code_blocks(text) @@ -844,6 +844,79 @@ class Markdown(object): return _pyshell_block_re.sub(self._pyshell_block_sub, text) + def _table_sub(self, match): + trim_space_re = '^[ \t\n]+|[ \t\n]+$' + trim_bar_re = '^\||\|$' + + head, underline, body = match.groups() + + # Determine aligns for columns. + cols = [cell.strip() for cell in re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)).split('|')] + align_from_col_idx = {} + for col_idx, col in enumerate(cols): + if col[0] == ':' and col[-1] == ':': + align_from_col_idx[col_idx] = ' align="center"' + elif col[0] == ':': + align_from_col_idx[col_idx] = ' align="left"' + elif col[-1] == ':': + align_from_col_idx[col_idx] = ' align="right"' + + # thead + hlines = ['' % self._html_class_str_from_tag('table'), '', ''] + cols = [cell.strip() for cell in re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)).split('|')] + for col_idx, col in enumerate(cols): + hlines.append(' %s' % ( + align_from_col_idx.get(col_idx, ''), + self._run_span_gamut(col) + )) + hlines.append('') + hlines.append('') + + # tbody + hlines.append('') + for line in body.strip('\n').split('\n'): + hlines.append('') + cols = [cell.strip() for cell in re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)).split('|')] + for col_idx, col in enumerate(cols): + hlines.append(' %s' % ( + align_from_col_idx.get(col_idx, ''), + self._run_span_gamut(col) + )) + hlines.append('') + hlines.append('') + hlines.append('') + + return '\n'.join(hlines) + '\n' + + def _do_tables(self, text): + """Copying PHP-Markdown and GFM table syntax. Some regex borrowed from + https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538 + """ + less_than_tab = self.tab_width - 1 + table_re = re.compile(r''' + (?:(?<=\n\n)|\A\n?) # leading blank line + + ^[ ]{0,%d} # allowed whitespace + (.*[|].*) \n # $1: header row (at least one pipe) + + ^[ ]{0,%d} # allowed whitespace + ( # $2: underline row + # underline row with leading bar + (?: \|\ *:?-+:?\ * )+ \|? \n + | + # or, underline row without leading bar + (?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \n + ) + + ( # $3: data rows + (?: + ^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces + .*\|.* \n + )+ + ) + ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X) + return table_re.sub(self._table_sub, text) + def _wiki_table_sub(self, match): ttext = match.group(0).strip() #print 'wiki table: %r' % match.group(0) @@ -853,7 +926,7 @@ class Markdown(object): row = [c.strip() for c in re.split(r'(?', ''] + hlines = ['' % self._html_class_str_from_tag('table'), ''] for row in rows: hrow = [''] for cell in row: @@ -899,6 +972,9 @@ class Markdown(object): text = self._encode_amps_and_angles(text) + if "strike" in self.extras: + text = self._do_strike(text) + text = self._do_italics_and_bold(text) if "smarty-pants" in self.extras: @@ -1206,7 +1282,6 @@ class Markdown(object): .replace('_', self._escape_table['_']) title = self.titles.get(link_id) if title: - before = title title = _xml_escape_attr(title) \ .replace('*', self._escape_table['*']) \ .replace('_', self._escape_table['_']) @@ -1418,7 +1493,6 @@ class Markdown(object): def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) - leading_space = match.group(2) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: item = self._run_block_gamut(self._outdent(item)) else: @@ -1654,6 +1728,11 @@ class Markdown(object): self._escape_table[text] = hashed return hashed + _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S) + def _do_strike(self, text): + text = self._strike_re.sub(r"\1", text) + return text + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) @@ -1714,38 +1793,53 @@ class Markdown(object): text = text.replace(". . .", "…") return text - _block_quote_re = re.compile(r''' + _block_quote_base = r''' ( # Wrap whole match in \1 ( - ^[ \t]*>[ \t]? # '>' at the start of a line + ^[ \t]*>%s[ \t]? # '>' at the start of a line .+\n # rest of the first line (.+\n)* # subsequent consecutive lines \n* # blanks )+ ) - ''', re.M | re.X) + ''' + _block_quote_re = re.compile(_block_quote_base % '', re.M | re.X) + _block_quote_re_spoiler = re.compile(_block_quote_base % '[ \t]*?!?', re.M | re.X) _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); - + _bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M); + _bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M) _html_pre_block_re = re.compile(r'(\s*
.+?
)', re.S) def _dedent_two_spaces_sub(self, match): return re.sub(r'(?m)^ ', '', match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) - bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting - bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq) + # trim one level of quoting + if is_spoiler: + bq = self._bq_one_level_re_spoiler.sub('', bq) + else: + bq = self._bq_one_level_re.sub('', bq) + # trim whitespace-only lines + bq = self._ws_only_line_re.sub('', bq) bq = self._run_block_gamut(bq) # recurse bq = re.sub('(?m)^', ' ', bq) # These leading spaces screw with
 content, so we need to fix that:
         bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
 
-        return "
\n%s\n
\n\n" % bq + if is_spoiler: + return '
\n%s\n
\n\n' % bq + else: + return '
\n%s\n
\n\n' % bq def _do_block_quotes(self, text): if '>' not in text: return text - return self._block_quote_re.sub(self._block_quote_sub, text) + if 'spoiler' in self.extras: + return self._block_quote_re_spoiler.sub(self._block_quote_sub, text) + else: + return self._block_quote_re.sub(self._block_quote_sub, text) def _form_paragraphs(self, text): # Strip leading and trailing lines: @@ -2053,7 +2147,6 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): if DEBUG: print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ % (tabsize, skip_first_line)) - indents = [] margin = None for i, line in enumerate(lines): if i == 0 and skip_first_line: continue @@ -2362,4 +2455,4 @@ def main(argv=None): if __name__ == "__main__": - sys.exit( main(sys.argv) ) + sys.exit( main(sys.argv) ) \ No newline at end of file