diff --git a/gluon/contrib/markmin/markmin2latex.py b/gluon/contrib/markmin/markmin2latex.py index 65ad4aa8..65bfbab2 100755 --- a/gluon/contrib/markmin/markmin2latex.py +++ b/gluon/contrib/markmin/markmin2latex.py @@ -7,53 +7,57 @@ import sys import doctest from optparse import OptionParser -__all__ = ['render','markmin2latex'] +__all__ = ['render', 'markmin2latex'] META = 'META' regex_newlines = re.compile('(\n\r)|(\r\n)') -regex_dd=re.compile('\$\$(?P.*?)\$\$') -regex_code = re.compile('('+META+')|(``(?P.*?)``(:(?P\w+))?)',re.S) -regex_title = re.compile('^#{1} (?P[^\n]+)',re.M) +regex_dd = re.compile('\$\$(?P.*?)\$\$') +regex_code = re.compile('(' + META + ')|(``(?P.*?)``(:(?P\w+))?)', re.S) +regex_title = re.compile('^#{1} (?P[^\n]+)', re.M) regex_maps = [ - (re.compile('[ \t\r]+\n'),'\n'), - (re.compile('\*\*(?P[^\s\*]+( +[^\s\*]+)*)\*\*'),'{\\\\bf \g}'), - (re.compile("''(?P[^\s']+( +[^\s']+)*)''"),'{\\it \g}'), - (re.compile('^#{5,6}\s*(?P[^\n]+)',re.M),'\n\n{\\\\bf \g}\n'), - (re.compile('^#{4}\s*(?P[^\n]+)',re.M),'\n\n\\\\goodbreak\\subsubsection{\g}\n'), - (re.compile('^#{3}\s*(?P[^\n]+)',re.M),'\n\n\\\\goodbreak\\subsection{\g}\n'), - (re.compile('^#{2}\s*(?P[^\n]+)',re.M),'\n\n\\\\goodbreak\\section{\g}\n'), - (re.compile('^#{1}\s*(?P[^\n]+)',re.M),''), - (re.compile('^\- +(?P.*)',re.M),'\\\\begin{itemize}\n\\item \g\n\\end{itemize}'), - (re.compile('^\+ +(?P.*)',re.M),'\\\\begin{itemize}\n\\item \g\n\\end{itemize}'), - (re.compile('\\\\end\{itemize\}\s+\\\\begin\{itemize\}'),'\n'), - (re.compile('\n\s+\n'),'\n\n')] -regex_table = re.compile('^\-{4,}\n(?P.*?)\n\-{4,}(:(?P\w+))?\n',re.M|re.S) + (re.compile('[ \t\r]+\n'), '\n'), + (re.compile('\*\*(?P[^\s\*]+( +[^\s\*]+)*)\*\*'), '{\\\\bf \g}'), + (re.compile("''(?P[^\s']+( +[^\s']+)*)''"), '{\\it \g}'), + (re.compile('^#{5,6}\s*(?P[^\n]+)', re.M), '\n\n{\\\\bf \g}\n'), + (re.compile('^#{4}\s*(?P[^\n]+)', re.M), '\n\n\\\\goodbreak\\subsubsection{\g}\n'), + (re.compile('^#{3}\s*(?P[^\n]+)', re.M), '\n\n\\\\goodbreak\\subsection{\g}\n'), + (re.compile('^#{2}\s*(?P[^\n]+)', re.M), '\n\n\\\\goodbreak\\section{\g}\n'), + (re.compile('^#{1}\s*(?P[^\n]+)', re.M), ''), + (re.compile('^\- +(?P.*)', re.M), '\\\\begin{itemize}\n\\item \g\n\\end{itemize}'), + (re.compile('^\+ +(?P.*)', re.M), '\\\\begin{itemize}\n\\item \g\n\\end{itemize}'), + (re.compile('\\\\end\{itemize\}\s+\\\\begin\{itemize\}'), '\n'), + (re.compile('\n\s+\n'), '\n\n')] +regex_table = re.compile('^\-{4,}\n(?P.*?)\n\-{4,}(:(?P\w+))?\n', re.M | re.S) regex_anchor = re.compile('\[\[(?P\S+)\]\]') regex_bibitem = re.compile('\-\s*\[\[(?P\S+)\]\]') regex_image_width = re.compile('\[\[(?P[^\]]*?) +(?P\S+) +(?P

left|right|center) +(?P\d+px)\]\]') regex_image = re.compile('\[\[(?P[^\]]*?) +(?P\S+) +(?P

left|right|center)\]\]') -#regex_video = re.compile('\[\[(?P[^\]]*?) +(?P\S+) +video\]\]') -#regex_audio = re.compile('\[\[(?P[^\]]*?) +(?P\S+) +audio\]\]') +# regex_video = re.compile('\[\[(?P[^\]]*?) +(?P\S+) +video\]\]') +# regex_audio = re.compile('\[\[(?P[^\]]*?) +(?P\S+) +audio\]\]') regex_link = re.compile('\[\[(?P[^\]]*?) +(?P\S+)\]\]') -regex_auto = re.compile('(?\w+://[\w\.\-\?&%\:]+)',re.M) +regex_auto = re.compile('(?\w+://[\w\.\-\?&%\:]+)', re.M) regex_commas = re.compile('[ ]+(?P[,;\.])') regex_noindent = re.compile('\n\n(?P[a-z])') -#regex_quote_left = re.compile('"(?=\w)') -#regex_quote_right = re.compile('(?=\w\.)"') -def latex_escape(text,pound=True): - text=text.replace('\\','{\\textbackslash}') - for c in '^_&$%{}': text=text.replace(c,'\\'+c) - text=text.replace('\\{\\textbackslash\\}','{\\textbackslash}') - if pound: text=text.replace('#','\\#') + +# regex_quote_left = re.compile('"(?=\w)') +# regex_quote_right = re.compile('(?=\w\.)"') + +def latex_escape(text, pound=True): + text = text.replace('\\', '{\\textbackslash}') + for c in '^_&$%{}': + text = text.replace(c, '\\' + c) + text = text.replace('\\{\\textbackslash\\}', '{\\textbackslash}') + if pound: text = text.replace('#', '\\#') return text + def render(text, extra={}, allowed={}, sep='p', - image_mapper=lambda x:x, + image_mapper=lambda x: x, chapters=False): ############################################################# # replace all blocks marked with ``...``:class with META @@ -61,62 +65,68 @@ def render(text, ############################################################# text = str(text or '') segments, i = [], 0 - text = regex_dd.sub('``\g``:latex ',text) - text = regex_newlines.sub('\n',text) + text = regex_dd.sub('``\g``:latex ', text) + text = regex_newlines.sub('\n', text) while True: - item = regex_code.search(text,i) - if not item: break - if item.group()==META: - segments.append((None,None)) - text = text[:item.start()]+META+text[item.end():] + item = regex_code.search(text, i) + if not item: + break + if item.group() == META: + segments.append((None, None)) + text = text[:item.start()] + META + text[item.end():] else: c = item.group('c') or '' - if 'code' in allowed and not c in allowed['code']: c = '' - code = item.group('t').replace('!`!','`') - segments.append((code,c)) - text = text[:item.start()]+META+text[item.end():] - i=item.start()+3 - + if 'code' in allowed and c not in allowed['code']: + c = '' + code = item.group('t').replace('!`!', '`') + segments.append((code, c)) + text = text[:item.start()] + META + text[item.end():] + i = item.start() + 3 ############################################################# # do h1,h2,h3,h4,h5,h6,b,i,ol,ul and normalize spaces ############################################################# title = regex_title.search(text) - if not title: title='Title' - else: title=title.group('t') + if not title: + title = 'Title' + else: + title = title.group('t') - text = latex_escape(text,pound=False) + text = latex_escape(text, pound=False) - texts = text.split('## References',1) + texts = text.split('## References', 1) text = regex_anchor.sub('\\label{\g}', texts[0]) - if len(texts)==2: + if len(texts) == 2: text += '\n\\begin{thebibliography}{999}\n' text += regex_bibitem.sub('\n\\\\bibitem{\g}', texts[1]) text += '\n\\end{thebibliography}\n' text = '\n'.join(t.strip() for t in text.split('\n')) for regex, sub in regex_maps: - text = regex.sub(sub,text) - text=text.replace('#','\\#') - text=text.replace('`',"'") + text = regex.sub(sub, text) + text = text.replace('#', '\\#') + text = text.replace('`', "'") ############################################################# # process tables and blockquotes ############################################################# while True: item = regex_table.search(text) - if not item: break + if not item: + break c = item.group('c') or '' - if 'table' in allowed and not c in allowed['table']: c = '' + if 'table' in allowed and c not in allowed['table']: + c = '' content = item.group('t') if ' | ' in content: - rows = content.replace('\n','\\\\\n').replace(' | ',' & ') - row0,row2 = rows.split('\\\\\n',1) - cols=row0.count(' & ')+1 - cal='{'+''.join('l' for j in range(cols))+'}' - tabular = '\\begin{center}\n{\\begin{tabular}'+cal+'\\hline\n' + row0+'\\\\ \\hline\n'+row2 + ' \\\\ \\hline\n\\end{tabular}}\n\\end{center}' - if row2.count('\n')>20: tabular='\\newpage\n'+tabular + rows = content.replace('\n', '\\\\\n').replace(' | ', ' & ') + row0, row2 = rows.split('\\\\\n', 1) + cols = row0.count(' & ') + 1 + cal = '{' + ''.join('l' for j in range(cols)) + '}' + tabular = '\\begin{center}\n{\\begin{tabular}' + cal + '\\hline\n' + row0 + '\\\\ \\hline\n' + row2 + ' \\\\ \\hline\n\\end{tabular}}\n\\end{center}' + if row2.count('\n') > 20: + tabular = '\\newpage\n' + tabular text = text[:item.start()] + tabular + text[item.end():] else: text = text[:item.start()] + '\\begin{quote}' + content + '\\end{quote}' + text[item.end():] @@ -126,29 +136,32 @@ def render(text, ############################################################# def sub(x): - f=image_mapper(x.group('k')) - if not f: return None - return '\n\\begin{center}\\includegraphics[width=8cm]{%s}\\end{center}\n' % (f) - text = regex_image_width.sub(sub,text) - text = regex_image.sub(sub,text) + f = image_mapper(x.group('k')) + if not f: + return None + return '\n\\begin{center}\\includegraphics[width=8cm]{%s}\\end{center}\n' % f + + text = regex_image_width.sub(sub, text) + text = regex_image.sub(sub, text) text = regex_link.sub('{\\\\footnotesize\\href{\g}{\g}}', text) - text = regex_commas.sub('\g',text) - text = regex_noindent.sub('\n\\\\noindent \g',text) + text = regex_commas.sub('\g', text) + text = regex_noindent.sub('\n\\\\noindent \g', text) - ### fix paths in images - regex=re.compile('\\\\_\w*\.(eps|png|jpg|gif)') + # ## fix paths in images + regex = re.compile('\\\\_\w*\.(eps|png|jpg|gif)') while True: - match=regex.search(text) - if not match: break - text=text[:match.start()]+text[match.start()+1:] - #text = regex_quote_left.sub('``',text) - #text = regex_quote_right.sub("''",text) + match = regex.search(text) + if not match: + break + text = text[:match.start()] + text[match.start() + 1:] + # text = regex_quote_left.sub('``',text) + # text = regex_quote_right.sub("''",text) if chapters: - text=text.replace(r'\section*{',r'\chapter*{') - text=text.replace(r'\section{',r'\chapter{') - text=text.replace(r'subsection{',r'section{') + text = text.replace(r'\section*{', r'\chapter*{') + text = text.replace(r'\section{', r'\chapter{') + text = text.replace(r'subsection{', r'section{') ############################################################# # process all code text @@ -156,57 +169,64 @@ def render(text, parts = text.split(META) text = parts[0] authors = [] - for i,(code,b) in enumerate(segments): - if code==None: + for i, (code, b) in enumerate(segments): + if code is None: html = META else: - if b=='hidden': - html='' - elif b=='author': + if b == 'hidden': + html = '' + elif b == 'author': author = latex_escape(code.strip()) authors.append(author) - html='' - elif b=='inxx': - html='\inxx{%s}' % latex_escape(code) - elif b=='cite': - html='~\cite{%s}' % latex_escape(code.strip()) - elif b=='ref': - html='~\ref{%s}' % latex_escape(code.strip()) - elif b=='latex': + html = '' + elif b == 'inxx': + html = '\inxx{%s}' % latex_escape(code) + elif b == 'cite': + html = '~\cite{%s}' % latex_escape(code.strip()) + elif b == 'ref': + html = '~\ref{%s}' % latex_escape(code.strip()) + elif b == 'latex': if '\n' in code: - html='\n\\begin{equation}\n%s\n\\end{equation}\n' % code.strip() + html = '\n\\begin{equation}\n%s\n\\end{equation}\n' % code.strip() else: - html='$%s$' % code.strip() - elif b=='latex_eqnarray': - code=code.strip() - code='\\\\'.join(x.replace('=','&=&',1) for x in code.split('\\\\')) - html='\n\\begin{eqnarray}\n%s\n\\end{eqnarray}\n' % code + html = '$%s$' % code.strip() + elif b == 'latex_eqnarray': + code = code.strip() + code = '\\\\'.join(x.replace('=', '&=&', 1) for x in code.split('\\\\')) + html = '\n\\begin{eqnarray}\n%s\n\\end{eqnarray}\n' % code elif b.startswith('latex_'): - key=b[6:] - html='\\begin{%s}%s\\end{%s}' % (key,code,key) + key = b[6:] + html = '\\begin{%s}%s\\end{%s}' % (key, code, key) elif b in extra: - if code[:1]=='\n': code=code[1:] - if code[-1:]=='\n': code=code[:-1] + if code[:1] == '\n': + code = code[1:] + if code[-1:] == '\n': + code = code[:-1] html = extra[b](code) - elif code[:1]=='\n' or code[:-1]=='\n': - if code[:1]=='\n': code=code[1:] - if code[-1:]=='\n': code=code[:-1] + elif code[:1] == '\n' or code[:-1] == '\n': + if code[:1] == '\n': + code = code[1:] + if code[-1:] == '\n': + code = code[:-1] if code.startswith('<') or code.startswith('{{') or code.startswith('http'): html = '\\begin{lstlisting}[keywords={}]\n%s\n\\end{lstlisting}' % code else: html = '\\begin{lstlisting}\n%s\n\\end{lstlisting}' % code else: - if code[:1]=='\n': code=code[1:] - if code[-1:]=='\n': code=code[:-1] + if code[:1] == '\n': + code = code[1:] + if code[-1:] == '\n': + code = code[:-1] html = '{\\ft %s}' % latex_escape(code) try: - text = text+html+parts[i+1] + text = text + html + parts[i + 1] except: text = text + '... WIKI PROCESSING ERROR ...' break - text = text.replace(' ~\\cite','~\\cite') + text = text.replace(' ~\\cite', '~\\cite') return text, title, authors + WRAPPER = """ \\documentclass[12pt]{article} \\usepackage{hyperref} @@ -239,12 +259,14 @@ WRAPPER = """ \\end{document} """ -def markmin2latex(data, image_mapper=lambda x:x, extra={}, + +def markmin2latex(data, image_mapper=lambda x: x, extra={}, wrapper=WRAPPER): body, title, authors = render(data, extra=extra, image_mapper=image_mapper) - author = '\n\\and\n'.join(a.replace('\n','\\\\\n\\footnotesize ') for a in authors) + author = '\n\\and\n'.join(a.replace('\n', '\\\\\n\\footnotesize ') for a in authors) return wrapper % dict(title=title, author=author, body=body) + if __name__ == '__main__': parser = OptionParser() parser.add_option("-i", "--info", dest="info", @@ -252,40 +274,39 @@ if __name__ == '__main__': parser.add_option("-t", "--test", dest="test", action="store_true", default=False) parser.add_option("-n", "--no_wrapper", dest="no_wrapper", - action="store_true",default=False) - parser.add_option("-c", "--chapters", dest="chapters",action="store_true", - default=False,help="switch section for chapter") + action="store_true", default=False) + parser.add_option("-c", "--chapters", dest="chapters", action="store_true", + default=False, help="switch section for chapter") parser.add_option("-w", "--wrapper", dest="wrapper", default=False, help="latex file containing header and footer") (options, args) = parser.parse_args() if options.info: import markmin2html + markmin2latex(markmin2html.__doc__) elif options.test: doctest.testmod() else: if options.wrapper: - fwrapper = open(options.wrapper,'rb') + fwrapper = open(options.wrapper, 'rb') try: wrapper = fwrapper.read() finally: fwrapper.close() elif options.no_wrapper: - wrapper = '%(body)s' + wrapper = '%(body)s' else: wrapper = WRAPPER for f in args: - fargs = open(f,'r') + fargs = open(f, 'r') content_data = [] try: content_data.append(fargs.read()) finally: fargs.close() content = '\n'.join(content_data) - output= markmin2latex(content, - wrapper=wrapper, - chapters=options.chapters) + output = markmin2latex(content, + wrapper=wrapper, + chapters=options.chapters) print output - -