From a38c7c2aa83b308de4f9c5d1d1fd52aace0dcd88 Mon Sep 17 00:00:00 2001 From: mdipierro Date: Mon, 15 Sep 2014 07:34:14 -0500 Subject: [PATCH] upgraded memcache and markdown2 --- VERSION | 2 +- gluon/contrib/markdown/markdown2.py | 923 ++++++++++++++++------ gluon/contrib/memcache/memcache.py | 1124 ++++++++++++++++----------- 3 files changed, 1376 insertions(+), 673 deletions(-) diff --git a/VERSION b/VERSION index c8fed297..bf409e3f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -Version 2.9.9-stable+timestamp.2014.09.14.16.41.44 +Version 2.9.9-stable+timestamp.2014.09.15.07.34.08 diff --git a/gluon/contrib/markdown/markdown2.py b/gluon/contrib/markdown/markdown2.py index 175d4864..c03eb111 100644 --- a/gluon/contrib/markdown/markdown2.py +++ b/gluon/contrib/markdown/markdown2.py @@ -1,7 +1,10 @@ #!/usr/bin/env python +# Copyright (c) 2012 Trent Mick. # Copyright (c) 2007-2008 ActiveState Corp. # License: MIT (http://www.opensource.org/licenses/mit-license.php) +from __future__ import generators + r"""A fast and complete Python implementation of Markdown. [from http://daringfireball.net/projects/markdown/] @@ -30,22 +33,57 @@ Module usage: This implementation of Markdown implements the full "core" syntax plus a number of extras (e.g., code syntax coloring, footnotes) as described on -. +. """ cmdln_desc = """A fast and complete Python implementation of Markdown, a text-to-HTML conversion tool for web writers. + +Supported extra syntax options (see -x|--extras option below and +see for details): + +* code-friendly: Disable _ and __ for em and strong. +* cuddled-lists: Allow lists to be cuddled to the preceding paragraph. +* fenced-code-blocks: Allows a code block to not have to be indented + by fencing it with '```' on a line before and after. Based on + with support for + syntax highlighting. +* footnotes: Support footnotes as in use on daringfireball.net and + implemented in other Markdown processors (tho not in Markdown.pl v1.0.1). +* header-ids: Adds "id" attributes to headers. The id value is a slug of + the header text. +* html-classes: Takes a dict mapping html tag names (lowercase) to a + string to use for a "class" tag attribute. Currently only supports + "pre" and "code" tags. Add an issue if you require this for other tags. +* markdown-in-html: Allow the use of `markdown="1"` in a block HTML tag to + have markdown processing be done on its contents. Similar to + but with + some limitations. +* metadata: Extract metadata from a leading '---'-fenced block. + See for details. +* nofollow: Add `rel="nofollow"` to add `` tags with an href. See + . +* pyshell: Treats unindented Python interactive shell sessions as + blocks. +* link-patterns: Auto-link given regex patterns in text (e.g. bug number + references, revision number references). +* smarty-pants: Replaces ' and " with curly quotation marks or curly + apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes, + and ellipses. +* toc: The returned HTML string gets a new "toc_html" attribute which is + a Table of Contents for the document. (experimental) +* xml: Passes one-liner processing instructions and namespaced XML tags. +* wiki-tables: Google Code Wiki-style tables. See + . """ # Dev Notes: -# - There is already a Python markdown processor -# (http://www.freewisdom.org/projects/python-markdown/). # - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (1, 0, 1, 16) # first three nums match Markdown.pl -__version__ = '1.0.1.16' +__version_info__ = (2, 2, 4) +__version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" import os @@ -60,22 +98,34 @@ except ImportError: import optparse from random import random, randint import codecs -from urllib import quote - #---- Python version compat +try: + from urllib.parse import quote # python3 +except ImportError: + from urllib import quote # python2 + if sys.version_info[:2] < (2,4): from sets import Set as set def reversed(sequence): for i in sequence[::-1]: yield i - def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): - return unicode(s, encoding, errors) -else: - def _unicode_decode(s, encoding, errors='strict'): - return s.decode(encoding, errors) + +# Use `bytes` for byte strings and `unicode` for unicode strings (str in Py3). +if sys.version_info[0] <= 2: + py3 = False + try: + bytes + except NameError: + bytes = str + base_string_type = basestring +elif sys.version_info[0] >= 3: + py3 = True + unicode = str + base_string_type = str + #---- globals @@ -86,21 +136,13 @@ log = logging.getLogger("markdown") DEFAULT_TAB_WIDTH = 4 -try: - import uuid -except ImportError: - SECRET_SALT = str(randint(0, 1000000)) -else: - SECRET_SALT = str(uuid.uuid4()) -def _hash_ascii(s): - #return md5(s).hexdigest() # Markdown.pl effectively does this. - return 'md5-' + md5(SECRET_SALT + s).hexdigest() +SECRET_SALT = bytes(randint(0, 1000000)) def _hash_text(s): return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest() # Table of hash values for escaped characters: -g_escape_table = dict([(ch, _hash_ascii(ch)) - for ch in '\\`*_{}[]()>#+-.!']) +g_escape_table = dict([(ch, _hash_text(ch)) + for ch in '\\`*_{}[]()>#+-.!']) @@ -118,10 +160,8 @@ def markdown_path(path, encoding="utf-8", safe_mode=None, extras=None, link_patterns=None, use_file_vars=False): fp = codecs.open(path, 'r', encoding) - try: - text = fp.read() - finally: - fp.close() + text = fp.read() + fp.close() return Markdown(html4tags=html4tags, tab_width=tab_width, safe_mode=safe_mode, extras=extras, link_patterns=link_patterns, @@ -172,6 +212,7 @@ class Markdown(object): else: self.safe_mode = safe_mode + # Massaging and building the "extras" info. if self.extras is None: self.extras = {} elif not isinstance(self.extras, dict): @@ -181,11 +222,19 @@ class Markdown(object): extras = dict([(e, None) for e in extras]) self.extras.update(extras) assert isinstance(self.extras, dict) + if "toc" in self.extras and not "header-ids" in self.extras: + self.extras["header-ids"] = None # "toc" implies "header-ids" self._instance_extras = self.extras.copy() + self.link_patterns = link_patterns self.use_file_vars = use_file_vars self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + self._escape_table = g_escape_table.copy() + if "smarty-pants" in self.extras: + self._escape_table['"'] = _hash_text('"') + self._escape_table["'"] = _hash_text("'") + def reset(self): self.urls = {} self.titles = {} @@ -196,6 +245,14 @@ class Markdown(object): if "footnotes" in self.extras: self.footnotes = {} self.footnote_ids = [] + if "header-ids" in self.extras: + self._count_from_header_id = {} # no `defaultdict` in Python 2.4 + if "metadata" in self.extras: + self.metadata = {} + + # Per "rel" + # should only be used in tags with an "href" attribute. + _a_nofollow = re.compile(r"<(a)([^>]*href=)", re.IGNORECASE) def convert(self, text): """Convert the given text.""" @@ -245,12 +302,24 @@ class Markdown(object): # contorted like /[ \t]*\n+/ . text = self._ws_only_line_re.sub("", text) + # strip metadata from head and extract + if "metadata" in self.extras: + text = self._extract_metadata(text) + + text = self.preprocess(text) + + if "fenced-code-blocks" in self.extras and not self.safe_mode: + text = self._do_fenced_code_blocks(text) + if self.safe_mode: text = self._hash_html_spans(text) # Turn block-level HTML blocks into hash entries text = self._hash_html_blocks(text, raw=True) + if "fenced-code-blocks" in self.extras and self.safe_mode: + text = self._do_fenced_code_blocks(text) + # Strip link definitions, store in hashes. if "footnotes" in self.extras: # Must do footnotes first because an unlucky footnote defn @@ -264,14 +333,64 @@ class Markdown(object): if "footnotes" in self.extras: text = self._add_footnotes(text) + text = self.postprocess(text) + text = self._unescape_special_chars(text) if self.safe_mode: text = self._unhash_html_spans(text) + if "nofollow" in self.extras: + text = self._a_nofollow.sub(r'<\1 rel="nofollow"\2', text) + text += "\n" + + rv = UnicodeWithAttrs(text) + if "toc" in self.extras: + rv._toc = self._toc + if "metadata" in self.extras: + rv.metadata = self.metadata + return rv + + def postprocess(self, text): + """A hook for subclasses to do some postprocessing of the html, if + desired. This is called before unescaping of special chars and + unhashing of raw HTML spans. + """ return text + def preprocess(self, text): + """A hook for subclasses to do some preprocessing of the Markdown, if + desired. This is called after basic formatting of the text, but prior + to any extras, safe mode, etc. processing. + """ + return text + + # Is metadata if the content starts with '---'-fenced `key: value` + # pairs. E.g. (indented for presentation): + # --- + # foo: bar + # another-var: blah blah + # --- + _metadata_pat = re.compile("""^---[ \t]*\n((?:[ \t]*[^ \t:]+[ \t]*:[^\n]*\n)+)---[ \t]*\n""") + + def _extract_metadata(self, text): + # fast test + if not text.startswith("---"): + return text + match = self._metadata_pat.match(text) + if not match: + return text + + tail = text[len(match.group(0)):] + metadata_str = match.group(1).strip() + for line in metadata_str.split('\n'): + key, value = line.split(':', 1) + self.metadata[key.strip()] = value.strip() + + return tail + + _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) # This regular expression is intended to match blocks like this: # PREFIX Local Variables: SUFFIX @@ -382,7 +501,7 @@ class Markdown(object): emacs_vars[variable] = value # Unquote values. - for var, val in emacs_vars.items(): + for var, val in list(emacs_vars.items()): if len(val) > 1 and (val.startswith('"') and val.endswith('"') or val.startswith('"') and val.endswith('"')): emacs_vars[var] = val[1:-1] @@ -414,7 +533,13 @@ class Markdown(object): return text return self._detab_re.subn(self._detab_sub, text)[0] + # I broke out the html5 tags here and add them to _block_tags_a and + # _block_tags_b. This way html5 tags are easy to keep track of. + _html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption' + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _block_tags_a += _html5tags + _strict_tag_block_re = re.compile(r""" ( # save in \1 ^ # start of line (with re.M) @@ -429,6 +554,8 @@ class Markdown(object): re.X | re.M) _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _block_tags_b += _html5tags + _liberal_tag_block_re = re.compile(r""" ( # save in \1 ^ # start of line (with re.M) @@ -442,10 +569,27 @@ class Markdown(object): """ % _block_tags_b, re.X | re.M) + _html_markdown_attr_re = re.compile( + r'''\s+markdown=("1"|'1')''') def _hash_html_block_sub(self, match, raw=False): html = match.group(1) if raw and self.safe_mode: html = self._sanitize_html(html) + elif 'markdown-in-html' in self.extras and 'markdown=' in html: + first_line = html.split('\n', 1)[0] + m = self._html_markdown_attr_re.search(first_line) + if m: + lines = html.split('\n') + middle = '\n'.join(lines[1:-1]) + last_line = lines[-1] + first_line = first_line[:m.start()] + first_line[m.end():] + f_key = _hash_text(first_line) + self.html_blocks[f_key] = first_line + l_key = _hash_text(last_line) + self.html_blocks[l_key] = last_line + return ''.join(["\n\n", f_key, + "\n\n", middle, "\n\n", + l_key, "\n\n"]) key = _hash_text(html) self.html_blocks[key] = html return "\n\n" + key + "\n\n" @@ -497,11 +641,11 @@ class Markdown(object): # Delimiters for next comment block. try: start_idx = text.index("", start_idx) + 3 - except ValueError, ex: + except ValueError: break # Start position for next comment block search. @@ -590,7 +734,7 @@ class Markdown(object): key = id.lower() # Link IDs are case-insensitive self.urls[key] = self._encode_amps_and_angles(url) if title: - self.titles[key] = title.replace('"', '"') + self.titles[key] = title return "" def _extract_footnote_def_sub(self, match): @@ -635,28 +779,31 @@ class Markdown(object): re.X | re.M) return footnote_def_re.sub(self._extract_footnote_def_sub, text) - - _hr_res = [ - re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), - re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), - re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), - ] + _hr_re = re.compile(r'^[ ]{0,3}([-_*][ ]{0,2}){3,}$', re.M) def _run_block_gamut(self, text): # These are all the transformations that form block-level # tags like paragraphs, headers, and list items. + if "fenced-code-blocks" in self.extras: + text = self._do_fenced_code_blocks(text) + text = self._do_headers(text) # Do Horizontal Rules: + # On the number of spaces in horizontal rules: The spec is fuzzy: "If + # you wish, you may use spaces between the hyphens or asterisks." + # Markdown.pl 1.0.1's hr regexes limit the number of spaces between the + # hr chars to one or two. We'll reproduce that limit here. hr = "\n', ''] + for row in rows: + hrow = [''] + for cell in row: + hrow.append('') + hrow.append(self._run_span_gamut(cell)) + hrow.append('') + hrow.append('') + hlines.append(''.join(hrow)) + hlines += ['', ''] + return '\n'.join(hlines) + '\n' + + def _do_wiki_tables(self, text): + # Optimization. + if "||" not in text: + return text + + less_than_tab = self.tab_width - 1 + wiki_table_re = re.compile(r''' + (?:(?<=\n\n)|\A\n?) # leading blank line + ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n # first line + (^\1\|\|.+?\|\|\n)* # any number of subsequent lines + ''' % less_than_tab, re.M | re.X) + return wiki_table_re.sub(self._wiki_table_sub, text) + def _run_span_gamut(self, text): # These are all the transformations that occur *within* block-level # tags like paragraphs, headers, and list items. @@ -720,8 +901,14 @@ class Markdown(object): text = self._do_italics_and_bold(text) + if "smarty-pants" in self.extras: + text = self._do_smart_punctuation(text) + # Do hard breaks: - text = re.sub(r" {2,}\n", " # \1 - <.*?> - | - .*? - ) - [ \t]* - ( # \2 - (['"]) # quote char = \3 + _inline_link_title = re.compile(r''' + ( # \1 + [ \t]+ + (['"]) # quote char = \2 (?P.*?) - \3 # matching quote + \2 )? # title is optional - \) + \)$ ''', re.X | re.S) _tail_of_reference_link_re = re.compile(r''' # Match tail of: [text][id] @@ -837,6 +1016,52 @@ class Markdown(object): \] ''', re.X | re.S) + _whitespace = re.compile(r'\s*') + + _strip_anglebrackets = re.compile(r'<(.*)>.*') + + def _find_non_whitespace(self, text, start): + """Returns the index of the first non-whitespace character in text + after (and including) start + """ + match = self._whitespace.match(text, start) + return match.end() + + def _find_balanced(self, text, start, open_c, close_c): + """Returns the index where the open_c and close_c characters balance + out - the same number of open_c and close_c are encountered - or the + end of string if it's reached before the balance point is found. + """ + i = start + l = len(text) + count = 1 + while count > 0 and i < l: + if text[i] == open_c: + count += 1 + elif text[i] == close_c: + count -= 1 + i += 1 + return i + + def _extract_url_and_title(self, text, start): + """Extracts the url and (optional) title from the tail of a link""" + # text[start] equals the opening parenthesis + idx = self._find_non_whitespace(text, start+1) + if idx == len(text): + return None, None, None + end_idx = idx + has_anglebrackets = text[idx] == "<" + if has_anglebrackets: + end_idx = self._find_balanced(text, end_idx+1, "<", ">") + end_idx = self._find_balanced(text, end_idx, "(", ")") + match = self._inline_link_title.search(text, idx, end_idx) + if not match: + return None, None, None + url, title = text[idx:match.start()], match.group("title") + if has_anglebrackets: + url = self._strip_anglebrackets.sub(r'\1', url) + return url, title, end_idx + def _do_links(self, text): """Turn Markdown link shortcuts into XHTML <a> and <img> tags. @@ -919,42 +1144,44 @@ class Markdown(object): # Inline anchor or img? if text[p] == '(': # attempt at perf improvement - match = self._tail_of_inline_link_re.match(text, p) - if match: + url, title, url_end_idx = self._extract_url_and_title(text, p) + if url is not None: # Handle an inline anchor or img. is_img = start_idx > 0 and text[start_idx-1] == "!" if is_img: start_idx -= 1 - url, title = match.group("url"), match.group("title") - if url and url[0] == '<': - url = url[1:-1] # '<url>' -> 'url' # We've got to encode these to avoid conflicting # with italics/bold. - url = url.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) + url = url.replace('*', self._escape_table['*']) \ + .replace('_', self._escape_table['_']) if title: - title_str = ' title="%s"' \ - % title.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) \ - .replace('"', '"') + title_str = ' title="%s"' % ( + _xml_escape_attr(title) + .replace('*', self._escape_table['*']) + .replace('_', self._escape_table['_'])) else: title_str = '' if is_img: - result = '<img src="%s" alt="%s"%s%s' \ + img_class_str = self._html_class_str_from_tag("img") + result = '<img src="%s" alt="%s"%s%s%s' \ % (url.replace('"', '"'), - link_text.replace('"', '"'), - title_str, self.empty_element_suffix) + _xml_escape_attr(link_text), + title_str, img_class_str, self.empty_element_suffix) + if "smarty-pants" in self.extras: + result = result.replace('"', self._escape_table['"']) curr_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] + text = text[:start_idx] + result + text[url_end_idx:] elif start_idx >= anchor_allowed_pos: result_head = '<a href="%s"%s>' % (url, title_str) result = '%s%s</a>' % (result_head, link_text) + if "smarty-pants" in self.extras: + result = result.replace('"', self._escape_table['"']) # <img> allowed from curr_pos on, <a> from # anchor_allowed_pos on. curr_pos = start_idx + len(result_head) anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] + text = text[:start_idx] + result + text[url_end_idx:] else: # Anchor not allowed here. curr_pos = start_idx + 1 @@ -975,20 +1202,25 @@ class Markdown(object): url = self.urls[link_id] # We've got to encode these to avoid conflicting # with italics/bold. - url = url.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) + url = url.replace('*', self._escape_table['*']) \ + .replace('_', self._escape_table['_']) title = self.titles.get(link_id) if title: - title = title.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) + before = title + title = _xml_escape_attr(title) \ + .replace('*', self._escape_table['*']) \ + .replace('_', self._escape_table['_']) title_str = ' title="%s"' % title else: title_str = '' if is_img: - result = '<img src="%s" alt="%s"%s%s' \ + img_class_str = self._html_class_str_from_tag("img") + result = '<img src="%s" alt="%s"%s%s%s' \ % (url.replace('"', '"'), link_text.replace('"', '"'), - title_str, self.empty_element_suffix) + title_str, img_class_str, self.empty_element_suffix) + if "smarty-pants" in self.extras: + result = result.replace('"', self._escape_table['"']) curr_pos = start_idx + len(result) text = text[:start_idx] + result + text[match.end():] elif start_idx >= anchor_allowed_pos: @@ -996,6 +1228,8 @@ class Markdown(object): % (url, title_str, link_text) result_head = '<a href="%s"%s>' % (url, title_str) result = '%s%s</a>' % (result_head, link_text) + if "smarty-pants" in self.extras: + result = result.replace('"', self._escape_table['"']) # <img> allowed from curr_pos on, <a> from # anchor_allowed_pos on. curr_pos = start_idx + len(result_head) @@ -1014,32 +1248,76 @@ class Markdown(object): return text + def header_id_from_text(self, text, prefix, n): + """Generate a header id attribute value from the given header + HTML content. - _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) - def _setext_h_sub(self, match): - n = {"=": 1, "-": 2}[match.group(2)[0]] - demote_headers = self.extras.get("demote-headers") - if demote_headers: - n = min(n + demote_headers, 6) - return "<h%d>%s</h%d>\n\n" \ - % (n, self._run_span_gamut(match.group(1)), n) + This is only called if the "header-ids" extra is enabled. + Subclasses may override this for different header ids. - _atx_h_re = re.compile(r''' - ^(\#{1,6}) # \1 = string of #'s - [ \t]* + @param text {str} The text of the header tag + @param prefix {str} The requested prefix for header ids. This is the + value of the "header-ids" extra key, if any. Otherwise, None. + @param n {int} The <hN> tag number, i.e. `1` for an <h1> tag. + @returns {str} The value for the header tag's "id" attribute. Return + None to not have an id attribute and to exclude this header from + the TOC (if the "toc" extra is specified). + """ + header_id = _slugify(text) + if prefix and isinstance(prefix, base_string_type): + header_id = prefix + '-' + header_id + if header_id in self._count_from_header_id: + self._count_from_header_id[header_id] += 1 + header_id += '-%s' % self._count_from_header_id[header_id] + else: + self._count_from_header_id[header_id] = 1 + return header_id + + _toc = None + def _toc_add_entry(self, level, id, name): + if self._toc is None: + self._toc = [] + self._toc.append((level, id, self._unescape_special_chars(name))) + + _h_re_base = r''' + (^(.+)[ \t]*\n(=+|-+)[ \t]*\n+) + | + (^(\#{1,6}) # \1 = string of #'s + [ \t]%s (.+?) # \2 = Header text [ \t]* (?<!\\) # ensure not an escaped trailing '#' \#* # optional closing #'s (not counted) \n+ - ''', re.X | re.M) - def _atx_h_sub(self, match): - n = len(match.group(1)) + ) + ''' + + _h_re = re.compile(_h_re_base % '*', re.X | re.M) + _h_re_tag_friendly = re.compile(_h_re_base % '+', re.X | re.M) + + def _h_sub(self, match): + if match.group(1) is not None: + # Setext header + n = {"=": 1, "-": 2}[match.group(3)[0]] + header_group = match.group(2) + else: + # atx header + n = len(match.group(5)) + header_group = match.group(6) + demote_headers = self.extras.get("demote-headers") if demote_headers: n = min(n + demote_headers, 6) - return "<h%d>%s</h%d>\n\n" \ - % (n, self._run_span_gamut(match.group(2)), n) + header_id_attr = "" + if "header-ids" in self.extras: + header_id = self.header_id_from_text(header_group, + self.extras["header-ids"], n) + if header_id: + header_id_attr = ' id="%s"' % header_id + html = self._run_span_gamut(header_group) + if "toc" in self.extras and header_id: + self._toc_add_entry(n, header_id, html) + return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n) def _do_headers(self, text): # Setext-style headers: @@ -1048,7 +1326,6 @@ class Markdown(object): # # Header 2 # -------- - text = self._setext_h_re.sub(self._setext_h_sub, text) # atx-style headers: # # Header 1 @@ -1056,10 +1333,10 @@ class Markdown(object): # ## Header 2 with closing hashes ## # ... # ###### Header 6 - text = self._atx_h_re.sub(self._atx_h_sub, text) - - return text + if 'tag-friendly' in self.extras: + return self._h_re_tag_friendly.sub(self._h_sub, text) + return self._h_re.sub(self._h_sub, text) _marker_ul_chars = '*+-' _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars @@ -1078,65 +1355,62 @@ class Markdown(object): def _do_lists(self, text): # Form HTML ordered (numbered) and unordered (bulleted) lists. - for marker_pat in (self._marker_ul, self._marker_ol): - # Re-usable pattern to match any entire ul or ol list: - less_than_tab = self.tab_width - 1 - whole_list = r''' - ( # \1 = whole list - ( # \2 - [ ]{0,%d} - (%s) # \3 = first list item marker - [ \t]+ - ) - (?:.+?) - ( # \4 - \Z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - %s[ \t]+ + # Iterate over each *non-overlapping* list match. + pos = 0 + while True: + # Find the *first* hit for either list style (ul or ol). We + # match ul and ol separately to avoid adjacent lists of different + # types running into each other (see issue #16). + hits = [] + for marker_pat in (self._marker_ul, self._marker_ol): + less_than_tab = self.tab_width - 1 + whole_list = r''' + ( # \1 = whole list + ( # \2 + [ ]{0,%d} + (%s) # \3 = first list item marker + [ \t]+ + (?!\ *\3\ ) # '- - - ...' isn't a list. See 'not_quite_a_list' test case. ) - ) - ) - ''' % (less_than_tab, marker_pat, marker_pat) - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _process_list_items(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if self.list_level: - sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) - text = sub_list_re.sub(self._list_sub, text) - else: - list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, - re.X | re.M | re.S) - text = list_re.sub(self._list_sub, text) + (?:.+?) + ( # \4 + \Z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + %s[ \t]+ + ) + ) + ) + ''' % (less_than_tab, marker_pat, marker_pat) + if self.list_level: # sub-list + list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + else: + list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, + re.X | re.M | re.S) + match = list_re.search(text, pos) + if match: + hits.append((match.start(), match)) + if not hits: + break + hits.sort() + match = hits[0][1] + start, end = match.span() + middle = self._list_sub(match) + text = text[:start] + middle + text[end:] + pos = start + len(middle) # start pos for next attempted match return text _list_item_re = re.compile(r''' - (\n)? # leading line = \1 - (^[ \t]*) # leading whitespace = \2 - (%s) [ \t]+ # list marker = \3 - ((?:.+?) # list item text = \4 - (\n{1,2})) # eols = \5 - (?= \n* (\Z | \2 (%s) [ \t]+)) + (\n)? # leading line = \1 + (^[ \t]*) # leading whitespace = \2 + (?P<marker>%s) [ \t]+ # list marker = \3 + ((?:.+?) # list item text = \4 + (\n{1,2})) # eols = \5 + (?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+)) ''' % (_marker_any, _marker_any), re.M | re.X | re.S) @@ -1215,34 +1489,76 @@ class Markdown(object): """Return the source with a code, pre, and div.""" return self._wrap_div(self._wrap_pre(self._wrap_code(source))) - formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) + formatter_opts.setdefault("cssclass", "codehilite") + formatter = HtmlCodeFormatter(**formatter_opts) return pygments.highlight(codeblock, lexer, formatter) - def _code_block_sub(self, match): - codeblock = match.group(1) - codeblock = self._outdent(codeblock) - codeblock = self._detab(codeblock) - codeblock = codeblock.lstrip('\n') # trim leading newlines - codeblock = codeblock.rstrip() # trim trailing whitespace + def _code_block_sub(self, match, is_fenced_code_block=False): + lexer_name = None + if is_fenced_code_block: + lexer_name = match.group(1) + if lexer_name: + formatter_opts = self.extras['fenced-code-blocks'] or {} + codeblock = match.group(2) + codeblock = codeblock[:-1] # drop one trailing newline + else: + codeblock = match.group(1) + codeblock = self._outdent(codeblock) + codeblock = self._detab(codeblock) + codeblock = codeblock.lstrip('\n') # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace - if "code-color" in self.extras and codeblock.startswith(":::"): - lexer_name, rest = codeblock.split('\n', 1) - lexer_name = lexer_name[3:].strip() - lexer = self._get_pygments_lexer(lexer_name) - codeblock = rest.lstrip("\n") # Remove lexer declaration line. - if lexer: + # Note: "code-color" extra is DEPRECATED. + if "code-color" in self.extras and codeblock.startswith(":::"): + lexer_name, rest = codeblock.split('\n', 1) + lexer_name = lexer_name[3:].strip() + codeblock = rest.lstrip("\n") # Remove lexer declaration line. formatter_opts = self.extras['code-color'] or {} + + if lexer_name: + def unhash_code( codeblock ): + for key, sanitized in list(self.html_spans.items()): + codeblock = codeblock.replace(key, sanitized) + replacements = [ + ("&", "&"), + ("<", "<"), + (">", ">") + ] + for old, new in replacements: + codeblock = codeblock.replace(old, new) + return codeblock + lexer = self._get_pygments_lexer(lexer_name) + if lexer: + codeblock = unhash_code( codeblock ) colored = self._color_with_pygments(codeblock, lexer, **formatter_opts) return "\n\n%s\n\n" % colored codeblock = self._encode_code(codeblock) - return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock + pre_class_str = self._html_class_str_from_tag("pre") + code_class_str = self._html_class_str_from_tag("code") + return "\n\n<pre%s><code%s>%s\n</code></pre>\n\n" % ( + pre_class_str, code_class_str, codeblock) + + def _html_class_str_from_tag(self, tag): + """Get the appropriate ' class="..."' string (note the leading + space), if any, for the given tag. + """ + if "html-classes" not in self.extras: + return "" + try: + html_classes_from_tag = self.extras["html-classes"] + except TypeError: + return "" + else: + if tag in html_classes_from_tag: + return ' class="%s"' % html_classes_from_tag[tag] + return "" def _do_code_blocks(self, text): """Process Markdown `<pre><code>` blocks.""" code_block_re = re.compile(r''' - (?:\n\n|\A) + (?:\n\n|\A\n?) ( # $1 = the code block -- one or more lines, starting with a space/tab (?: (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces @@ -1250,11 +1566,26 @@ class Markdown(object): )+ ) ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + # Lookahead to make sure this block isn't already in a code block. + # Needed when syntax highlighting is being used. + (?![^<]*\</code\>) ''' % (self.tab_width, self.tab_width), re.M | re.X) - return code_block_re.sub(self._code_block_sub, text) + _fenced_code_block_re = re.compile(r''' + (?:\n\n|\A\n?) + ^```([\w+-]+)?[ \t]*\n # opening fence, $1 = optional lang + (.*?) # $2 = code block content + ^```[ \t]*\n # closing fence + ''', re.M | re.X | re.S) + + def _fenced_code_block_sub(self, match): + return self._code_block_sub(match, is_fenced_code_block=True); + + def _do_fenced_code_blocks(self, text): + """Process ```-fenced unindented code blocks ('fenced-code-blocks' extra).""" + return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text) # Rules for a code span: # - backslash escapes are not interpreted in a code span @@ -1316,21 +1647,15 @@ class Markdown(object): # Do the angle bracket song and dance: ('<', '<'), ('>', '>'), - # Now, escape characters that are magic in Markdown: - ('*', g_escape_table['*']), - ('_', g_escape_table['_']), - ('{', g_escape_table['{']), - ('}', g_escape_table['}']), - ('[', g_escape_table['[']), - (']', g_escape_table[']']), - ('\\', g_escape_table['\\']), ] for before, after in replacements: text = text.replace(before, after) - return text + hashed = _hash_text(text) + self._escape_table[text] = hashed + return hashed - _strong_re = re.compile(r"(?<!\w)(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1(?!\w)", re.S) - _em_re = re.compile(r"(?<!\w)(\*|_)(?=\S)(.+?)(?<=\S)\1(?!\w)", re.S) + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) + _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) def _do_italics_and_bold(self, text): @@ -1343,6 +1668,51 @@ class Markdown(object): text = self._em_re.sub(r"<em>\2</em>", text) return text + # "smarty-pants" extra: Very liberal in interpreting a single prime as an + # apostrophe; e.g. ignores the fact that "round", "bout", "twer", and + # "twixt" can be written without an initial apostrophe. This is fine because + # using scare quotes (single quotation marks) is rare. + _apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))") + _contractions = ["tis", "twas", "twer", "neath", "o", "n", + "round", "bout", "twixt", "nuff", "fraid", "sup"] + def _do_smart_contractions(self, text): + text = self._apostrophe_year_re.sub(r"’\1", text) + for c in self._contractions: + text = text.replace("'%s" % c, "’%s" % c) + text = text.replace("'%s" % c.capitalize(), + "’%s" % c.capitalize()) + return text + + # Substitute double-quotes before single-quotes. + _opening_single_quote_re = re.compile(r"(?<!\S)'(?=\S)") + _opening_double_quote_re = re.compile(r'(?<!\S)"(?=\S)') + _closing_single_quote_re = re.compile(r"(?<=\S)'") + _closing_double_quote_re = re.compile(r'(?<=\S)"(?=(\s|,|;|\.|\?|!|$))') + def _do_smart_punctuation(self, text): + """Fancifies 'single quotes', "double quotes", and apostrophes. + Converts --, ---, and ... into en dashes, em dashes, and ellipses. + + Inspiration is: <http://daringfireball.net/projects/smartypants/> + See "test/tm-cases/smarty_pants.text" for a full discussion of the + support here and + <http://code.google.com/p/python-markdown2/issues/detail?id=42> for a + discussion of some diversion from the original SmartyPants. + """ + if "'" in text: # guard for perf + text = self._do_smart_contractions(text) + text = self._opening_single_quote_re.sub("‘", text) + text = self._closing_single_quote_re.sub("’", text) + + if '"' in text: # guard for perf + text = self._opening_double_quote_re.sub("“", text) + text = self._closing_double_quote_re.sub("”", text) + + text = text.replace("---", "—") + text = text.replace("--", "–") + text = text.replace("...", "…") + text = text.replace(" . . . ", "…") + text = text.replace(". . .", "…") + return text _block_quote_re = re.compile(r''' ( # Wrap whole match in \1 @@ -1382,15 +1752,35 @@ class Markdown(object): text = text.strip('\n') # Wrap <p> tags. - grafs = re.split(r"\n{2,}", text) - for i, graf in enumerate(grafs): + grafs = [] + for i, graf in enumerate(re.split(r"\n{2,}", text)): if graf in self.html_blocks: # Unhashify HTML blocks - grafs[i] = self.html_blocks[graf] + grafs.append(self.html_blocks[graf]) else: + cuddled_list = None + if "cuddled-lists" in self.extras: + # Need to put back trailing '\n' for `_list_item_re` + # match at the end of the paragraph. + li = self._list_item_re.search(graf + '\n') + # Two of the same list marker in this paragraph: a likely + # candidate for a list cuddled to preceding paragraph + # text (issue 33). Note the `[-1]` is a quick way to + # consider numeric bullets (e.g. "1." and "2.") to be + # equal. + if (li and len(li.group(2)) <= 3 and li.group("next_marker") + and li.group("marker")[-1] == li.group("next_marker")[-1]): + start = li.start() + cuddled_list = self._do_lists(graf[start:]).rstrip("\n") + assert cuddled_list.startswith("<ul>") or cuddled_list.startswith("<ol>") + graf = graf[:start] + # Wrap <p> tags. graf = self._run_span_gamut(graf) - grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" + grafs.append("<p>" + graf.lstrip(" \t") + "</p>") + + if cuddled_list: + grafs.append(cuddled_list) return "\n\n".join(grafs) @@ -1412,7 +1802,7 @@ class Markdown(object): '↩</a>' % (id, i+1)) if footer[-1].endswith("</p>"): footer[-1] = footer[-1][:-len("</p>")] \ - + ' ' + backlink + "</p>" + + ' ' + backlink + "</p>" else: footer.append("\n<p>%s</p>" % backlink) footer.append('</li>') @@ -1426,7 +1816,7 @@ class Markdown(object): # http://bumppo.net/projects/amputator/ _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) - _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) + _naked_gt_re = re.compile(r'''(?<![a-z0-9?!/'"-])>''', re.I) def _encode_amps_and_angles(self, text): # Smart processing for ampersands and angle brackets that need @@ -1443,7 +1833,7 @@ class Markdown(object): return text def _encode_backslash_escapes(self, text): - for ch, escape in g_escape_table.items(): + for ch, escape in list(self._escape_table.items()): text = text.replace("\\"+ch, escape) return text @@ -1512,19 +1902,19 @@ class Markdown(object): escaped_href = ( href.replace('"', '"') # b/c of attr quote # To avoid markdown <em> and <strong>: - .replace('*', g_escape_table['*']) - .replace('_', g_escape_table['_'])) + .replace('*', self._escape_table['*']) + .replace('_', self._escape_table['_'])) link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) hash = _hash_text(link) link_from_hash[hash] = link text = text[:start] + hash + text[end:] - for hash, link in link_from_hash.items(): + for hash, link in list(link_from_hash.items()): text = text.replace(hash, link) return text def _unescape_special_chars(self, text): # Swap back in all the special characters we've hidden. - for ch, hash in g_escape_table.items(): + for ch, hash in list(self._escape_table.items()): text = text.replace(hash, ch) return text @@ -1550,6 +1940,64 @@ class MarkdownWithExtras(Markdown): #---- internal support functions +class UnicodeWithAttrs(unicode): + """A subclass of unicode used for the return value of conversion to + possibly attach some attributes. E.g. the "toc_html" attribute when + the "toc" extra is used. + """ + metadata = None + _toc = None + def toc_html(self): + """Return the HTML for the current TOC. + + This expects the `_toc` attribute to have been set on this instance. + """ + if self._toc is None: + return None + + def indent(): + return ' ' * (len(h_stack) - 1) + lines = [] + h_stack = [0] # stack of header-level numbers + for level, id, name in self._toc: + if level > h_stack[-1]: + lines.append("%s<ul>" % indent()) + h_stack.append(level) + elif level == h_stack[-1]: + lines[-1] += "</li>" + else: + while level < h_stack[-1]: + h_stack.pop() + if not lines[-1].endswith("</li>"): + lines[-1] += "</li>" + lines.append("%s</ul></li>" % indent()) + lines.append('%s<li><a href="#%s">%s</a>' % ( + indent(), id, name)) + while len(h_stack) > 1: + h_stack.pop() + if not lines[-1].endswith("</li>"): + lines[-1] += "</li>" + lines.append("%s</ul>" % indent()) + return '\n'.join(lines) + '\n' + toc_html = property(toc_html) + +## {{{ http://code.activestate.com/recipes/577257/ (r1) +_slugify_strip_re = re.compile(r'[^\w\s-]') +_slugify_hyphenate_re = re.compile(r'[-\s]+') +def _slugify(value): + """ + Normalizes string, converts to lowercase, removes non-alpha characters, + and converts spaces to hyphens. + + From Django's "django/template/defaultfilters.py". + """ + import unicodedata + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode() + value = _slugify_strip_re.sub('', value).strip().lower() + return _slugify_hyphenate_re.sub('-', value) +## end of http://code.activestate.com/recipes/577257/ }}} + + # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 def _curry(*args, **kwargs): function, args = args[0], args[1:] @@ -1583,7 +2031,7 @@ def _regex_from_encoded_pattern(s): except KeyError: raise ValueError("unsupported regex flag: '%s' in '%s' " "(must be one of '%s')" - % (char, s, ''.join(flag_from_char.keys()))) + % (char, s, ''.join(list(flag_from_char.keys())))) return re.compile(s[1:idx], flags) else: # not an encoded regex return re.compile(re.escape(s)) @@ -1603,8 +2051,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): """ DEBUG = False if DEBUG: - print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ - % (tabsize, skip_first_line) + print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line)) indents = [] margin = None for i, line in enumerate(lines): @@ -1621,12 +2069,12 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): break else: continue # skip all-whitespace lines - if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if DEBUG: print("dedent: indent=%d: %r" % (indent, line)) if margin is None: margin = indent else: margin = min(margin, indent) - if DEBUG: print "dedent: margin=%r" % margin + if DEBUG: print("dedent: margin=%r" % margin) if margin is not None and margin > 0: for i, line in enumerate(lines): @@ -1638,7 +2086,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): elif ch == '\t': removed += tabsize - (removed % tabsize) elif ch in '\r\n': - if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + if DEBUG: print("dedent: %r: EOL -> strip up to EOL" % line) lines[i] = lines[i][j:] break else: @@ -1646,8 +2094,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): "line %r while removing %d-space margin" % (ch, line, margin)) if DEBUG: - print "dedent: %r: %r -> removed %d/%d"\ - % (line, ch, removed, margin) + print("dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin)) if removed == margin: lines[i] = lines[i][j+1:] break @@ -1741,6 +2189,22 @@ def _hr_tag_re_from_tab_width(tab_width): _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) +def _xml_escape_attr(attr, skip_single_quote=True): + """Escape the given string for use in an HTML/XML tag attribute. + + By default this doesn't bother with escaping `'` to `'`, presuming that + the tag attribute is surrounded by double quotes. + """ + escaped = (attr + .replace('&', '&') + .replace('"', '"') + .replace('<', '<') + .replace('>', '>')) + if not skip_single_quote: + escaped = escaped.replace("'", "'") + return escaped + + def _xml_encode_email_char_at_random(ch): r = random() # Roughly 10% raw, 45% hex, 45% dec. @@ -1791,17 +2255,11 @@ def main(argv=None): "[HTML_REMOVED] note") parser.add_option("-x", "--extras", action="append", help="Turn on specific extra features (not part of " - "the core Markdown spec). Supported values: " - "'code-friendly' disables _/__ for emphasis; " - "'code-color' adds code-block syntax coloring; " - "'link-patterns' adds auto-linking based on patterns; " - "'footnotes' adds the footnotes syntax;" - "'xml' passes one-liner processing instructions and namespaced XML tags;" - "'pyshell' to put unindented Python interactive shell sessions in a <code> block.") + "the core Markdown spec). See above.") parser.add_option("--use-file-vars", help="Look for and use Emacs-style 'markdown-extras' " "file var to turn on extras. See " - "<http://code.google.com/p/python-markdown2/wiki/Extras>.") + "<https://github.com/trentm/python-markdown2/wiki/Extras>") parser.add_option("--link-patterns-file", help="path to a link pattern file") parser.add_option("--self-test", action="store_true", @@ -1855,22 +2313,41 @@ def main(argv=None): from os.path import join, dirname, abspath, exists markdown_pl = join(dirname(dirname(abspath(__file__))), "test", "Markdown.pl") + if not paths: + paths = ['-'] for path in paths: + if path == '-': + text = sys.stdin.read() + else: + fp = codecs.open(path, 'r', opts.encoding) + text = fp.read() + fp.close() if opts.compare: - print "==== Markdown.pl ====" - perl_cmd = 'perl %s "%s"' % (markdown_pl, path) - o = os.popen(perl_cmd) - perl_html = o.read() - o.close() - sys.stdout.write(perl_html) - print "==== markdown2.py ====" - html = markdown_path(path, encoding=opts.encoding, - html4tags=opts.html4tags, - safe_mode=opts.safe_mode, - extras=extras, link_patterns=link_patterns, - use_file_vars=opts.use_file_vars) - sys.stdout.write( - html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + from subprocess import Popen, PIPE + print("==== Markdown.pl ====") + p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True) + p.stdin.write(text.encode('utf-8')) + p.stdin.close() + perl_html = p.stdout.read().decode('utf-8') + if py3: + sys.stdout.write(perl_html) + else: + sys.stdout.write(perl_html.encode( + sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + print("==== markdown2.py ====") + html = markdown(text, + html4tags=opts.html4tags, + safe_mode=opts.safe_mode, + extras=extras, link_patterns=link_patterns, + use_file_vars=opts.use_file_vars) + if py3: + sys.stdout.write(html) + else: + sys.stdout.write(html.encode( + sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + if extras and "toc" in extras: + log.debug("toc_html: " + + html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) if opts.compare: test_dir = join(dirname(dirname(abspath(__file__))), "test") if exists(join(test_dir, "test_markdown2.py")): @@ -1881,10 +2358,8 @@ def main(argv=None): else: norm_html = html norm_perl_html = perl_html - print "==== match? %r ====" % (norm_perl_html == norm_html) + print("==== match? %r ====" % (norm_perl_html == norm_html)) if __name__ == "__main__": sys.exit( main(sys.argv) ) - - diff --git a/gluon/contrib/memcache/memcache.py b/gluon/contrib/memcache/memcache.py index 706f5448..9bc61682 100644 --- a/gluon/contrib/memcache/memcache.py +++ b/gluon/contrib/memcache/memcache.py @@ -1,12 +1,12 @@ #!/usr/bin/env python -""" -client module for memcached (memory cache daemon) +"""client module for memcached (memory cache daemon) Overview ======== -See U{the MemCached homepage<http://www.danga.com/memcached>} for more about memcached. +See U{the MemCached homepage<http://www.danga.com/memcached>} for more +about memcached. Usage summary ============= @@ -22,11 +22,12 @@ This should give you a feel for how this module operates:: mc.set("another_key", 3) mc.delete("another_key") - mc.set("key", "1") # note that the key used for incr/decr must be a string. + mc.set("key", "1") # note that the key used for incr/decr must be + # a string. mc.incr("key") mc.decr("key") -The standard way to use memcache with a database is like this:: +The standard way to use memcache with a database is like this: key = derive_key(obj) obj = mc.get(key) @@ -41,27 +42,35 @@ Detailed Documentation ====================== More detailed documentation is available in the L{Client} class. + """ -import sys -import socket -import time -import os -import re -try: - import cPickle as pickle -except ImportError: - import pickle +from __future__ import print_function + +import binascii +import os +import pickle +import re +import socket +import sys +import threading +import time +import zlib + +import six + -from binascii import crc32 # zlib version is not cross-platform def cmemcache_hash(key): - return((((crc32(key) & 0xffffffff) >> 16) & 0x7fff) or 1) + return ( + (((binascii.crc32(key.encode('ascii')) & 0xffffffff) + >> 16) & 0x7fff) or 1) serverHashFunction = cmemcache_hash + def useOldServerHashFunction(): """Use the old python-memcache server hash function.""" global serverHashFunction - serverHashFunction = crc32 + serverHashFunction = binascii.crc32 try: from zlib import compress, decompress @@ -69,27 +78,40 @@ try: except ImportError: _supports_compress = False # quickly define a decompress just in case we recv compressed data. + def decompress(val): - raise _Error("received compressed data but I don't support compression (import error)") + raise _Error( + "Received compressed data but I don't support " + "compression (import error)") + +from io import BytesIO +try: + unicode +except NameError: + _has_unicode = False +else: + _has_unicode = True try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO + _str_cls = basestring +except NameError: + _str_cls = str + +valid_key_chars_re = re.compile('[\x21-\x7e\x80-\xff]+$') # Original author: Evan Martin of Danga Interactive -__author__ = "Sean Reifschneider <jafo-memcached@tummy.com>" -__version__ = "1.48" +__author__ = "Sean Reifschneider <jafo-memcached@tummy.com>" +__version__ = "1.53" __copyright__ = "Copyright (C) 2003 Danga Interactive" # http://en.wikipedia.org/wiki/Python_Software_Foundation_License -__license__ = "Python Software Foundation License" +__license__ = "Python Software Foundation License" SERVER_MAX_KEY_LENGTH = 250 -# Storing values larger than 1MB requires recompiling memcached. If you do, -# this value can be changed by doing "memcache.SERVER_MAX_VALUE_LENGTH = N" -# after importing this module. -SERVER_MAX_VALUE_LENGTH = 1024*1024 +# Storing values larger than 1MB requires recompiling memcached. If +# you do, this value can be changed by doing +# "memcache.SERVER_MAX_VALUE_LENGTH = N" after importing this module. +SERVER_MAX_VALUE_LENGTH = 1024 * 1024 class _Error(Exception): @@ -100,102 +122,119 @@ class _ConnectionDeadError(Exception): pass -try: - # Only exists in Python 2.4+ - from threading import local -except ImportError: - # TODO: add the pure-python local implementation - class local(object): - pass - - _DEAD_RETRY = 30 # number of seconds before retrying a dead server. -_SOCKET_TIMEOUT = 3 # number of seconds before sockets timeout. +_SOCKET_TIMEOUT = 3 # number of seconds before sockets timeout. -class Client(local): - """ - Object representing a pool of memcache servers. +class Client(threading.local): + """Object representing a pool of memcache servers. See L{memcache} for an overview. In all cases where a key is used, the key can be either: 1. A simple hashable type (string, integer, etc.). - 2. A tuple of C{(hashvalue, key)}. This is useful if you want to avoid - making this module calculate a hash value. You may prefer, for - example, to keep all of a given user's objects on the same memcache - server, so you could use the user's unique id as the hash value. + 2. A tuple of C{(hashvalue, key)}. This is useful if you want + to avoid making this module calculate a hash value. You may + prefer, for example, to keep all of a given user's objects on + the same memcache server, so you could use the user's unique + id as the hash value. - @group Setup: __init__, set_servers, forget_dead_hosts, disconnect_all, debuglog + + @group Setup: __init__, set_servers, forget_dead_hosts, + disconnect_all, debuglog @group Insertion: set, add, replace, set_multi @group Retrieval: get, get_multi @group Integers: incr, decr @group Removal: delete, delete_multi - @sort: __init__, set_servers, forget_dead_hosts, disconnect_all, debuglog,\ - set, set_multi, add, replace, get, get_multi, incr, decr, delete, delete_multi + @sort: __init__, set_servers, forget_dead_hosts, disconnect_all, + debuglog,\ set, set_multi, add, replace, get, get_multi, + incr, decr, delete, delete_multi """ - _FLAG_PICKLE = 1<<0 - _FLAG_INTEGER = 1<<1 - _FLAG_LONG = 1<<2 - _FLAG_COMPRESSED = 1<<3 + _FLAG_PICKLE = 1 << 0 + _FLAG_INTEGER = 1 << 1 + _FLAG_LONG = 1 << 2 + _FLAG_COMPRESSED = 1 << 3 _SERVER_RETRIES = 10 # how many times to try finding a free server. # exceptions for Client class MemcachedKeyError(Exception): pass + class MemcachedKeyLengthError(MemcachedKeyError): pass + class MemcachedKeyCharacterError(MemcachedKeyError): pass + class MemcachedKeyNoneError(MemcachedKeyError): pass + class MemcachedKeyTypeError(MemcachedKeyError): pass + class MemcachedStringEncodingError(Exception): pass def __init__(self, servers, debug=0, pickleProtocol=0, pickler=pickle.Pickler, unpickler=pickle.Unpickler, pload=None, pid=None, - server_max_key_length=SERVER_MAX_KEY_LENGTH, - server_max_value_length=SERVER_MAX_VALUE_LENGTH, + server_max_key_length=None, server_max_value_length=None, dead_retry=_DEAD_RETRY, socket_timeout=_SOCKET_TIMEOUT, - cache_cas = False): - """ - Create a new Client object with the given list of servers. + cache_cas=False, flush_on_reconnect=0, check_keys=True): + """Create a new Client object with the given list of servers. @param servers: C{servers} is passed to L{set_servers}. - @param debug: whether to display error messages when a server can't be - contacted. - @param pickleProtocol: number to mandate protocol used by (c)Pickle. - @param pickler: optional override of default Pickler to allow subclassing. - @param unpickler: optional override of default Unpickler to allow subclassing. - @param pload: optional persistent_load function to call on pickle loading. - Useful for cPickle since subclassing isn't allowed. - @param pid: optional persistent_id function to call on pickle storing. - Useful for cPickle since subclassing isn't allowed. - @param dead_retry: number of seconds before retrying a blacklisted - server. Default to 30 s. - @param socket_timeout: timeout in seconds for all calls to a server. Defaults - to 3 seconds. - @param cache_cas: (default False) If true, cas operations will be - cached. WARNING: This cache is not expired internally, if you have - a long-running process you will need to expire it manually via - "client.reset_cas(), or the cache can grow unlimited. + @param debug: whether to display error messages when a server + can't be contacted. + @param pickleProtocol: number to mandate protocol used by + (c)Pickle. + @param pickler: optional override of default Pickler to allow + subclassing. + @param unpickler: optional override of default Unpickler to + allow subclassing. + @param pload: optional persistent_load function to call on + pickle loading. Useful for cPickle since subclassing isn't + allowed. + @param pid: optional persistent_id function to call on pickle + storing. Useful for cPickle since subclassing isn't allowed. + @param dead_retry: number of seconds before retrying a + blacklisted server. Default to 30 s. + @param socket_timeout: timeout in seconds for all calls to a + server. Defaults to 3 seconds. + @param cache_cas: (default False) If true, cas operations will + be cached. WARNING: This cache is not expired internally, if + you have a long-running process you will need to expire it + manually via client.reset_cas(), or the cache can grow + unlimited. @param server_max_key_length: (default SERVER_MAX_KEY_LENGTH) Data that is larger than this will not be sent to the server. - @param server_max_value_length: (default SERVER_MAX_VALUE_LENGTH) - Data that is larger than this will not be sent to the server. + @param server_max_value_length: (default + SERVER_MAX_VALUE_LENGTH) Data that is larger than this will + not be sent to the server. + @param flush_on_reconnect: optional flag which prevents a + scenario that can cause stale data to be read: If there's more + than one memcached server and the connection to one is + interrupted, keys that mapped to that server will get + reassigned to another. If the first server comes back, those + keys will map to it again. If it still has its data, get()s + can read stale data that was overwritten on another + server. This flag is off by default for backwards + compatibility. + @param check_keys: (default True) If True, the key is checked + to ensure it is the correct length and composed of the right + characters. """ - local.__init__(self) + super(Client, self).__init__() self.debug = debug self.dead_retry = dead_retry self.socket_timeout = socket_timeout + self.flush_on_reconnect = flush_on_reconnect self.set_servers(servers) self.stats = {} self.cache_cas = cache_cas self.reset_cas() + self.do_check_key = check_keys # Allow users to modify pickling/unpickling behavior self.pickleProtocol = pickleProtocol @@ -204,69 +243,80 @@ class Client(local): self.persistent_load = pload self.persistent_id = pid self.server_max_key_length = server_max_key_length + if self.server_max_key_length is None: + self.server_max_key_length = SERVER_MAX_KEY_LENGTH self.server_max_value_length = server_max_value_length + if self.server_max_value_length is None: + self.server_max_value_length = SERVER_MAX_VALUE_LENGTH # figure out the pickler style - file = StringIO() + file = BytesIO() try: - pickler = self.pickler(file, protocol = self.pickleProtocol) + pickler = self.pickler(file, protocol=self.pickleProtocol) self.picklerIsKeyword = True except TypeError: self.picklerIsKeyword = False def reset_cas(self): - """ - Reset the cas cache. This is only used if the Client() object - was created with "cache_cas=True". If used, this cache does not - expire internally, so it can grow unbounded if you do not clear it + """Reset the cas cache. + + This is only used if the Client() object was created with + "cache_cas=True". If used, this cache does not expire + internally, so it can grow unbounded if you do not clear it yourself. """ self.cas_ids = {} - def set_servers(self, servers): - """ - Set the pool of servers used by this client. + """Set the pool of servers used by this client. @param servers: an array of servers. Servers can be passed in two forms: - 1. Strings of the form C{"host:port"}, which implies a default weight of 1. - 2. Tuples of the form C{("host:port", weight)}, where C{weight} is - an integer weight value. + 1. Strings of the form C{"host:port"}, which implies a + default weight of 1. + 2. Tuples of the form C{("host:port", weight)}, where + C{weight} is an integer weight value. + """ self.servers = [_Host(s, self.debug, dead_retry=self.dead_retry, - socket_timeout=self.socket_timeout) + socket_timeout=self.socket_timeout, + flush_on_reconnect=self.flush_on_reconnect) for s in servers] self._init_buckets() - def get_stats(self, stat_args = None): - '''Get statistics from each of the servers. + def get_stats(self, stat_args=None): + """Get statistics from each of the servers. @param stat_args: Additional arguments to pass to the memcache "stats" command. - @return: A list of tuples ( server_identifier, stats_dictionary ). - The dictionary contains a number of name/value pairs specifying - the name of the status field and the string value associated with - it. The values are not converted from strings. - ''' + @return: A list of tuples ( server_identifier, + stats_dictionary ). The dictionary contains a number of + name/value pairs specifying the name of the status field + and the string value associated with it. The values are + not converted from strings. + """ data = [] for s in self.servers: - if not s.connect(): continue + if not s.connect(): + continue if s.family == socket.AF_INET: - name = '%s:%s (%s)' % ( s.ip, s.port, s.weight ) + name = '%s:%s (%s)' % (s.ip, s.port, s.weight) + elif s.family == socket.AF_INET6: + name = '[%s]:%s (%s)' % (s.ip, s.port, s.weight) else: - name = 'unix:%s (%s)' % ( s.address, s.weight ) + name = 'unix:%s (%s)' % (s.address, s.weight) if not stat_args: s.send_cmd('stats') else: s.send_cmd('stats ' + stat_args) serverData = {} - data.append(( name, serverData )) + data.append((name, serverData)) readline = s.readline while 1: line = readline() - if not line or line.strip() == 'END': break + if not line or line.strip() == 'END': + break stats = line.split(' ', 2) serverData[stats[1]] = stats[2] @@ -275,33 +325,37 @@ class Client(local): def get_slabs(self): data = [] for s in self.servers: - if not s.connect(): continue + if not s.connect(): + continue if s.family == socket.AF_INET: - name = '%s:%s (%s)' % ( s.ip, s.port, s.weight ) + name = '%s:%s (%s)' % (s.ip, s.port, s.weight) + elif s.family == socket.AF_INET6: + name = '[%s]:%s (%s)' % (s.ip, s.port, s.weight) else: - name = 'unix:%s (%s)' % ( s.address, s.weight ) + name = 'unix:%s (%s)' % (s.address, s.weight) serverData = {} - data.append(( name, serverData )) + data.append((name, serverData)) s.send_cmd('stats items') readline = s.readline while 1: line = readline() - if not line or line.strip() == 'END': break + if not line or line.strip() == 'END': + break item = line.split(' ', 2) - #0 = STAT, 1 = ITEM, 2 = Value + # 0 = STAT, 1 = ITEM, 2 = Value slab = item[1].split(':', 2) - #0 = items, 1 = Slab #, 2 = Name + # 0 = items, 1 = Slab #, 2 = Name if slab[1] not in serverData: serverData[slab[1]] = {} serverData[slab[1]][slab[2]] = item[2] return data def flush_all(self): - 'Expire all data currently in the memcache servers.' + """Expire all data in memcache servers that are reachable.""" for s in self.servers: - if not s.connect(): continue - s.send_cmd('flush_all') - s.expect("OK") + if not s.connect(): + continue + s.flush() def debuglog(self, str): if self.debug: @@ -314,9 +368,7 @@ class Client(local): self.stats[func] += 1 def forget_dead_hosts(self): - """ - Reset every host in the pool to an "alive" state. - """ + """Reset every host in the pool to an "alive" state.""" for s in self.servers: s.deaduntil = 0 @@ -332,10 +384,13 @@ class Client(local): else: serverhash = serverHashFunction(key) + if not self.buckets: + return None, None + for i in range(Client._SERVER_RETRIES): server = self.buckets[serverhash % len(self.buckets)] if server.connect(): - #print "(using server %s)" % server, + # print("(using server %s)" % server,) return server, key serverhash = serverHashFunction(str(serverhash) + str(i)) return None, None @@ -345,54 +400,54 @@ class Client(local): s.close_socket() def delete_multi(self, keys, time=0, key_prefix=''): - ''' - Delete multiple keys in the memcache doing just one query. + """Delete multiple keys in the memcache doing just one query. - >>> notset_keys = mc.set_multi({'key1' : 'val1', 'key2' : 'val2'}) - >>> mc.get_multi(['key1', 'key2']) == {'key1' : 'val1', 'key2' : 'val2'} + >>> notset_keys = mc.set_multi({'a1' : 'val1', 'a2' : 'val2'}) + >>> mc.get_multi(['a1', 'a2']) == {'a1' : 'val1','a2' : 'val2'} 1 >>> mc.delete_multi(['key1', 'key2']) 1 >>> mc.get_multi(['key1', 'key2']) == {} 1 - - This method is recommended over iterated regular L{delete}s as it reduces total latency, since - your app doesn't have to wait for each round-trip of L{delete} before sending - the next one. + This method is recommended over iterated regular L{delete}s as + it reduces total latency, since your app doesn't have to wait + for each round-trip of L{delete} before sending the next one. @param keys: An iterable of keys to clear - @param time: number of seconds any subsequent set / update commands should fail. Defaults to 0 for no delay. - @param key_prefix: Optional string to prepend to each key when sending to memcache. - See docs for L{get_multi} and L{set_multi}. - + @param time: number of seconds any subsequent set / update + commands should fail. Defaults to 0 for no delay. + @param key_prefix: Optional string to prepend to each key when + sending to memcache. See docs for L{get_multi} and + L{set_multi}. @return: 1 if no failure in communication with any memcacheds. @rtype: int - - ''' + """ self._statlog('delete_multi') - server_keys, prefixed_to_orig_key = self._map_and_prefix_keys(keys, key_prefix) + server_keys, prefixed_to_orig_key = self._map_and_prefix_keys( + keys, key_prefix) # send out all requests on each server before reading anything dead_servers = [] rc = 1 - for server in server_keys.iterkeys(): + for server in six.iterkeys(server_keys): bigcmd = [] write = bigcmd.append - if time != None: - for key in server_keys[server]: # These are mangled keys - write("delete %s %d\r\n" % (key, time)) + if time is not None: + for key in server_keys[server]: # These are mangled keys + write("delete %s %d\r\n" % (key, time)) else: - for key in server_keys[server]: # These are mangled keys - write("delete %s\r\n" % key) + for key in server_keys[server]: # These are mangled keys + write("delete %s\r\n" % key) try: server.send_cmds(''.join(bigcmd)) - except socket.error, msg: + except socket.error as msg: rc = 0 - if isinstance(msg, tuple): msg = msg[1] + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) dead_servers.append(server) @@ -400,12 +455,13 @@ class Client(local): for server in dead_servers: del server_keys[server] - for server, keys in server_keys.iteritems(): + for server, keys in six.iteritems(server_keys): try: for key in keys: server.expect("DELETED") - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) rc = 0 return rc @@ -418,36 +474,57 @@ class Client(local): should fail. Defaults to None for no delay. @rtype: int ''' - self.check_key(key) + return self._deletetouch(['DELETED', 'NOT_FOUND'], "delete", key, time) + + def touch(self, key, time=0): + '''Updates the expiration time of a key in memcache. + + @return: Nonzero on success. + @param time: Tells memcached the time which this value should + expire, either as a delta number of seconds, or an absolute + unix time-since-the-epoch value. See the memcached protocol + docs section "Storage Commands" for more info on <exptime>. We + default to 0 == cache forever. + @rtype: int + ''' + return self._deletetouch(['TOUCHED'], "touch", key, time) + + def _deletetouch(self, expected, cmd, key, time=0): + if self.do_check_key: + self.check_key(key) server, key = self._get_server(key) if not server: return 0 - self._statlog('delete') - if time != None and time != 0: - cmd = "delete %s %d" % (key, time) + self._statlog(cmd) + if time is not None and time != 0: + cmd = "%s %s %d" % (cmd, key, time) else: - cmd = "delete %s" % key + cmd = "%s %s" % (cmd, key) try: server.send_cmd(cmd) line = server.readline() - if line and line.strip() in ['DELETED', 'NOT_FOUND']: return 1 - self.debuglog('Delete expected DELETED or NOT_FOUND, got: %s' - % repr(line)) - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] + if line and line.strip() in expected: + return 1 + self.debuglog('%s expected %s, got: %r' + % (cmd, ' or '.join(expected), line)) + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) return 0 def incr(self, key, delta=1): - """ - Sends a command to the server to atomically increment the value - for C{key} by C{delta}, or by 1 if C{delta} is unspecified. - Returns None if C{key} doesn't exist on server, otherwise it - returns the new value after incrementing. + """Increment value for C{key} by C{delta} - Note that the value for C{key} must already exist in the memcache, - and it must be the string representation of an integer. + Sends a command to the server to atomically increment the + value for C{key} by C{delta}, or by 1 if C{delta} is + unspecified. Returns None if C{key} doesn't exist on server, + otherwise it returns the new value after incrementing. + + Note that the value for C{key} must already exist in the + memcache, and it must be the string representation of an + integer. >>> mc.set("counter", "20") # returns 1, indicating success 1 @@ -456,49 +533,57 @@ class Client(local): >>> mc.incr("counter") 22 - Overflow on server is not checked. Be aware of values approaching - 2**32. See L{decr}. + Overflow on server is not checked. Be aware of values + approaching 2**32. See L{decr}. + + @param delta: Integer amount to increment by (should be zero + or greater). - @param delta: Integer amount to increment by (should be zero or greater). @return: New value after incrementing. @rtype: int """ return self._incrdecr("incr", key, delta) def decr(self, key, delta=1): - """ - Like L{incr}, but decrements. Unlike L{incr}, underflow is checked and - new values are capped at 0. If server value is 1, a decrement of 2 - returns 0, not -1. + """Decrement value for C{key} by C{delta} - @param delta: Integer amount to decrement by (should be zero or greater). - @return: New value after decrementing. + Like L{incr}, but decrements. Unlike L{incr}, underflow is + checked and new values are capped at 0. If server value is 1, + a decrement of 2 returns 0, not -1. + + @param delta: Integer amount to decrement by (should be zero + or greater). + + @return: New value after decrementing or None on error. @rtype: int """ return self._incrdecr("decr", key, delta) def _incrdecr(self, cmd, key, delta): - self.check_key(key) + if self.do_check_key: + self.check_key(key) server, key = self._get_server(key) if not server: - return 0 + return None self._statlog(cmd) cmd = "%s %s %d" % (cmd, key, delta) try: server.send_cmd(cmd) line = server.readline() - if line == None or line.strip() =='NOT_FOUND': return None + if line is None or line.strip() == 'NOT_FOUND': + return None return int(line) - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) return None - def add(self, key, val, time = 0, min_compress_len = 0): - ''' - Add new key with value. + def add(self, key, val, time=0, min_compress_len=0): + '''Add new key with value. - Like L{set}, but only stores in memcache if the key doesn't already exist. + Like L{set}, but only stores in memcache if the key doesn't + already exist. @return: Nonzero on success. @rtype: int @@ -542,68 +627,76 @@ class Client(local): '''Unconditionally sets a key to a given value in the memcache. The C{key} can optionally be an tuple, with the first element - being the server hash value and the second being the key. - If you want to avoid making this module calculate a hash value. - You may prefer, for example, to keep all of a given user's objects - on the same memcache server, so you could use the user's unique - id as the hash value. + being the server hash value and the second being the key. If + you want to avoid making this module calculate a hash value. + You may prefer, for example, to keep all of a given user's + objects on the same memcache server, so you could use the + user's unique id as the hash value. @return: Nonzero on success. @rtype: int - @param time: Tells memcached the time which this value should expire, either - as a delta number of seconds, or an absolute unix time-since-the-epoch - value. See the memcached protocol docs section "Storage Commands" - for more info on <exptime>. We default to 0 == cache forever. - @param min_compress_len: The threshold length to kick in auto-compression - of the value using the zlib.compress() routine. If the value being cached is - a string, then the length of the string is measured, else if the value is an - object, then the length of the pickle result is measured. If the resulting - attempt at compression yeilds a larger string than the input, then it is - discarded. For backwards compatability, this parameter defaults to 0, - indicating don't ever try to compress. + + @param time: Tells memcached the time which this value should + expire, either as a delta number of seconds, or an absolute + unix time-since-the-epoch value. See the memcached protocol + docs section "Storage Commands" for more info on <exptime>. We + default to 0 == cache forever. + + @param min_compress_len: The threshold length to kick in + auto-compression of the value using the zlib.compress() + routine. If the value being cached is a string, then the + length of the string is measured, else if the value is an + object, then the length of the pickle result is measured. If + the resulting attempt at compression yeilds a larger string + than the input, then it is discarded. For backwards + compatability, this parameter defaults to 0, indicating don't + ever try to compress. + ''' return self._set("set", key, val, time, min_compress_len) - def cas(self, key, val, time=0, min_compress_len=0): - '''Sets a key to a given value in the memcache if it hasn't been + '''Check and set (CAS) + + Sets a key to a given value in the memcache if it hasn't been altered since last fetched. (See L{gets}). The C{key} can optionally be an tuple, with the first element - being the server hash value and the second being the key. - If you want to avoid making this module calculate a hash value. - You may prefer, for example, to keep all of a given user's objects - on the same memcache server, so you could use the user's unique - id as the hash value. + being the server hash value and the second being the key. If + you want to avoid making this module calculate a hash value. + You may prefer, for example, to keep all of a given user's + objects on the same memcache server, so you could use the + user's unique id as the hash value. @return: Nonzero on success. @rtype: int - @param time: Tells memcached the time which this value should expire, - either as a delta number of seconds, or an absolute unix - time-since-the-epoch value. See the memcached protocol docs section - "Storage Commands" for more info on <exptime>. We default to - 0 == cache forever. + + @param time: Tells memcached the time which this value should + expire, either as a delta number of seconds, or an absolute + unix time-since-the-epoch value. See the memcached protocol + docs section "Storage Commands" for more info on <exptime>. We + default to 0 == cache forever. + @param min_compress_len: The threshold length to kick in - auto-compression of the value using the zlib.compress() routine. If - the value being cached is a string, then the length of the string is - measured, else if the value is an object, then the length of the - pickle result is measured. If the resulting attempt at compression - yeilds a larger string than the input, then it is discarded. For - backwards compatability, this parameter defaults to 0, indicating - don't ever try to compress. + auto-compression of the value using the zlib.compress() + routine. If the value being cached is a string, then the + length of the string is measured, else if the value is an + object, then the length of the pickle result is measured. If + the resulting attempt at compression yeilds a larger string + than the input, then it is discarded. For backwards + compatability, this parameter defaults to 0, indicating don't + ever try to compress. ''' return self._set("cas", key, val, time, min_compress_len) - def _map_and_prefix_keys(self, key_iterable, key_prefix): - """Compute the mapping of server (_Host instance) -> list of keys to stuff onto that server, as well as the mapping of - prefixed key -> original key. - - + """Compute the mapping of server (_Host instance) -> list of keys to + stuff onto that server, as well as the mapping of prefixed key + -> original key. """ # Check it just once ... - key_extra_len=len(key_prefix) - if key_prefix: + key_extra_len = len(key_prefix) + if key_prefix and self.do_check_key: self.check_key(key_prefix) # server (_Host) -> list of unprefixed server keys in mapping @@ -613,16 +706,23 @@ class Client(local): # build up a list for each server of all the keys we want. for orig_key in key_iterable: if isinstance(orig_key, tuple): - # Tuple of hashvalue, key ala _get_server(). Caller is essentially telling us what server to stuff this on. + # Tuple of hashvalue, key ala _get_server(). Caller is + # essentially telling us what server to stuff this on. # Ensure call to _get_server gets a Tuple as well. str_orig_key = str(orig_key[1]) - server, key = self._get_server((orig_key[0], key_prefix + str_orig_key)) # Gotta pre-mangle key before hashing to a server. Returns the mangled key. + + # Gotta pre-mangle key before hashing to a + # server. Returns the mangled key. + server, key = self._get_server( + (orig_key[0], key_prefix + str_orig_key)) else: - str_orig_key = str(orig_key) # set_multi supports int / long keys. + # set_multi supports int / long keys. + str_orig_key = str(orig_key) server, key = self._get_server(key_prefix + str_orig_key) # Now check to make sure key length is proper ... - self.check_key(str_orig_key, key_extra_len=key_extra_len) + if self.do_check_key: + self.check_key(str_orig_key, key_extra_len=key_extra_len) if not server: continue @@ -635,70 +735,92 @@ class Client(local): return (server_keys, prefixed_to_orig_key) def set_multi(self, mapping, time=0, key_prefix='', min_compress_len=0): - ''' - Sets multiple keys in the memcache doing just one query. + '''Sets multiple keys in the memcache doing just one query. >>> notset_keys = mc.set_multi({'key1' : 'val1', 'key2' : 'val2'}) - >>> mc.get_multi(['key1', 'key2']) == {'key1' : 'val1', 'key2' : 'val2'} + >>> mc.get_multi(['key1', 'key2']) == {'key1' : 'val1', + ... 'key2' : 'val2'} 1 - This method is recommended over regular L{set} as it lowers the number of - total packets flying around your network, reducing total latency, since - your app doesn't have to wait for each round-trip of L{set} before sending - the next one. + This method is recommended over regular L{set} as it lowers + the number of total packets flying around your network, + reducing total latency, since your app doesn't have to wait + for each round-trip of L{set} before sending the next one. @param mapping: A dict of key/value pairs to set. - @param time: Tells memcached the time which this value should expire, either - as a delta number of seconds, or an absolute unix time-since-the-epoch - value. See the memcached protocol docs section "Storage Commands" - for more info on <exptime>. We default to 0 == cache forever. - @param key_prefix: Optional string to prepend to each key when sending to memcache. Allows you to efficiently stuff these keys into a pseudo-namespace in memcache: - >>> notset_keys = mc.set_multi({'key1' : 'val1', 'key2' : 'val2'}, key_prefix='subspace_') + + @param time: Tells memcached the time which this value should + expire, either as a delta number of seconds, or an + absolute unix time-since-the-epoch value. See the + memcached protocol docs section "Storage Commands" for + more info on <exptime>. We default to 0 == cache forever. + + @param key_prefix: Optional string to prepend to each key when + sending to memcache. Allows you to efficiently stuff these + keys into a pseudo-namespace in memcache: + + >>> notset_keys = mc.set_multi( + ... {'key1' : 'val1', 'key2' : 'val2'}, + ... key_prefix='subspace_') >>> len(notset_keys) == 0 True - >>> mc.get_multi(['subspace_key1', 'subspace_key2']) == {'subspace_key1' : 'val1', 'subspace_key2' : 'val2'} + >>> mc.get_multi(['subspace_key1', + ... 'subspace_key2']) == {'subspace_key1': 'val1', + ... 'subspace_key2' : 'val2'} True - Causes key 'subspace_key1' and 'subspace_key2' to be set. Useful in conjunction with a higher-level layer which applies namespaces to data in memcache. - In this case, the return result would be the list of notset original keys, prefix not applied. + Causes key 'subspace_key1' and 'subspace_key2' to be + set. Useful in conjunction with a higher-level layer which + applies namespaces to data in memcache. In this case, the + return result would be the list of notset original keys, + prefix not applied. + + @param min_compress_len: The threshold length to kick in + auto-compression of the value using the zlib.compress() + routine. If the value being cached is a string, then the + length of the string is measured, else if the value is an + object, then the length of the pickle result is + measured. If the resulting attempt at compression yeilds a + larger string than the input, then it is discarded. For + backwards compatability, this parameter defaults to 0, + indicating don't ever try to compress. + + @return: List of keys which failed to be stored [ memcache out + of memory, etc. ]. - @param min_compress_len: The threshold length to kick in auto-compression - of the value using the zlib.compress() routine. If the value being cached is - a string, then the length of the string is measured, else if the value is an - object, then the length of the pickle result is measured. If the resulting - attempt at compression yeilds a larger string than the input, then it is - discarded. For backwards compatability, this parameter defaults to 0, - indicating don't ever try to compress. - @return: List of keys which failed to be stored [ memcache out of memory, etc. ]. @rtype: list - ''' - self._statlog('set_multi') - server_keys, prefixed_to_orig_key = self._map_and_prefix_keys(mapping.iterkeys(), key_prefix) + server_keys, prefixed_to_orig_key = self._map_and_prefix_keys( + six.iterkeys(mapping), key_prefix) # send out all requests on each server before reading anything dead_servers = [] - notstored = [] # original keys. + notstored = [] # original keys. - for server in server_keys.iterkeys(): + for server in six.iterkeys(server_keys): bigcmd = [] write = bigcmd.append try: - for key in server_keys[server]: # These are mangled keys + for key in server_keys[server]: # These are mangled keys store_info = self._val_to_store_info( - mapping[prefixed_to_orig_key[key]], - min_compress_len) + mapping[prefixed_to_orig_key[key]], + min_compress_len) if store_info: - write("set %s %d %d %d\r\n%s\r\n" % (key, store_info[0], - time, store_info[1], store_info[2])) + msg = "set %s %d %d %d\r\n%s\r\n" + write(msg % (key, + store_info[0], + time, + store_info[1], + store_info[2])) else: notstored.append(prefixed_to_orig_key[key]) server.send_cmds(''.join(bigcmd)) - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) dead_servers.append(server) @@ -707,24 +829,28 @@ class Client(local): del server_keys[server] # short-circuit if there are no servers, just return all keys - if not server_keys: return(mapping.keys()) + if not server_keys: + return(mapping.keys()) - for server, keys in server_keys.iteritems(): + for server, keys in six.iteritems(server_keys): try: for key in keys: - line = server.readline() - if line == 'STORED': + if server.readline() == 'STORED': continue else: - notstored.append(prefixed_to_orig_key[key]) #un-mangle. - except (_Error, socket.error), msg: - if isinstance(msg, tuple): msg = msg[1] + # un-mangle. + notstored.append(prefixed_to_orig_key[key]) + except (_Error, socket.error) as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) return notstored def _val_to_store_info(self, val, min_compress_len): - """ - Transform val to a storable representation, returning a tuple of the flags, the length of the new value, and the new value itself. + """Transform val to a storable representation. + + Returns a tuple of the flags, the length of the new value, and + the new value itself. """ flags = 0 if isinstance(val, str): @@ -741,9 +867,9 @@ class Client(local): min_compress_len = 0 else: flags |= Client._FLAG_PICKLE - file = StringIO() + file = BytesIO() if self.picklerIsKeyword: - pickler = self.pickler(file, protocol = self.pickleProtocol) + pickler = self.pickler(file, protocol=self.pickleProtocol) else: pickler = self.pickler(file, self.pickleProtocol) if self.persistent_id: @@ -752,10 +878,11 @@ class Client(local): val = file.getvalue() lv = len(val) - # We should try to compress if min_compress_len > 0 and we could - # import zlib and this string is longer than our min threshold. - if min_compress_len and _supports_compress and lv > min_compress_len: - comp_val = compress(val) + # We should try to compress if min_compress_len > 0 and we + # could import zlib and this string is longer than our min + # threshold. + if min_compress_len and lv > min_compress_len: + comp_val = zlib.compress(val) # Only retain the result if the compression result is smaller # than the original. if len(comp_val) < lv: @@ -763,13 +890,15 @@ class Client(local): val = comp_val # silently do not store if value length exceeds maximum - if self.server_max_value_length != 0 and \ - len(val) > self.server_max_value_length: return(0) + if (self.server_max_value_length != 0 and + len(val) > self.server_max_value_length): + return(0) return (flags, len(val), val) - def _set(self, cmd, key, val, time, min_compress_len = 0): - self.check_key(key) + def _set(self, cmd, key, val, time, min_compress_len=0): + if self.do_check_key: + self.check_key(key) server, key = self._get_server(key) if not server: return 0 @@ -778,23 +907,28 @@ class Client(local): self._statlog(cmd) store_info = self._val_to_store_info(val, min_compress_len) - if not store_info: return(0) + if not store_info: + return(0) if cmd == 'cas': if key not in self.cas_ids: return self._set('set', key, val, time, min_compress_len) fullcmd = "%s %s %d %d %d %d\r\n%s" % ( - cmd, key, store_info[0], time, store_info[1], - self.cas_ids[key], store_info[2]) + cmd, key, store_info[0], time, store_info[1], + self.cas_ids[key], store_info[2]) else: fullcmd = "%s %s %d %d %d\r\n%s" % ( - cmd, key, store_info[0], time, store_info[1], store_info[2]) + cmd, key, store_info[0], + time, store_info[1], store_info[2] + ) try: server.send_cmd(fullcmd) - return(server.expect("STORED") == "STORED") - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] + return(server.expect("STORED", raise_exception=True) + == "STORED") + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) return 0 @@ -803,14 +937,15 @@ class Client(local): except _ConnectionDeadError: # retry once try: - server._get_socket() - return _unsafe_set() - except (_ConnectionDeadError, socket.error), msg: + if server._get_socket(): + return _unsafe_set() + except (_ConnectionDeadError, socket.error) as msg: server.mark_dead(msg) return 0 def _get(self, cmd, key): - self.check_key(key) + if self.do_check_key: + self.check_key(key) server, key = self._get_server(key) if not server: return None @@ -823,20 +958,25 @@ class Client(local): rkey = flags = rlen = cas_id = None if cmd == 'gets': - rkey, flags, rlen, cas_id, = self._expect_cas_value(server) + rkey, flags, rlen, cas_id, = self._expect_cas_value( + server, raise_exception=True + ) if rkey and self.cache_cas: self.cas_ids[rkey] = cas_id else: - rkey, flags, rlen, = self._expectvalue(server) + rkey, flags, rlen, = self._expectvalue( + server, raise_exception=True + ) if not rkey: return None try: value = self._recv_value(server, flags, rlen) finally: - server.expect("END") - except (_Error, socket.error), msg: - if isinstance(msg, tuple): msg = msg[1] + server.expect("END", raise_exception=True) + except (_Error, socket.error) as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) return None @@ -850,7 +990,7 @@ class Client(local): if server.connect(): return _unsafe_get() return None - except (_ConnectionDeadError, socket.error), msg: + except (_ConnectionDeadError, socket.error) as msg: server.mark_dead(msg) return None @@ -869,54 +1009,73 @@ class Client(local): return self._get('gets', key) def get_multi(self, keys, key_prefix=''): - ''' - Retrieves multiple keys from the memcache doing just one query. + '''Retrieves multiple keys from the memcache doing just one query. >>> success = mc.set("foo", "bar") >>> success = mc.set("baz", 42) - >>> mc.get_multi(["foo", "baz", "foobar"]) == {"foo": "bar", "baz": 42} + >>> mc.get_multi(["foo", "baz", "foobar"]) == { + ... "foo": "bar", "baz": 42 + ... } 1 >>> mc.set_multi({'k1' : 1, 'k2' : 2}, key_prefix='pfx_') == [] 1 - This looks up keys 'pfx_k1', 'pfx_k2', ... . Returned dict will just have unprefixed keys 'k1', 'k2'. - >>> mc.get_multi(['k1', 'k2', 'nonexist'], key_prefix='pfx_') == {'k1' : 1, 'k2' : 2} + This looks up keys 'pfx_k1', 'pfx_k2', ... . Returned dict + will just have unprefixed keys 'k1', 'k2'. + + >>> mc.get_multi(['k1', 'k2', 'nonexist'], + ... key_prefix='pfx_') == {'k1' : 1, 'k2' : 2} 1 - get_mult [ and L{set_multi} ] can take str()-ables like ints / longs as keys too. Such as your db pri key fields. - They're rotored through str() before being passed off to memcache, with or without the use of a key_prefix. - In this mode, the key_prefix could be a table name, and the key itself a db primary key number. + get_mult [ and L{set_multi} ] can take str()-ables like ints / + longs as keys too. Such as your db pri key fields. They're + rotored through str() before being passed off to memcache, + with or without the use of a key_prefix. In this mode, the + key_prefix could be a table name, and the key itself a db + primary key number. - >>> mc.set_multi({42: 'douglass adams', 46 : 'and 2 just ahead of me'}, key_prefix='numkeys_') == [] + >>> mc.set_multi({42: 'douglass adams', + ... 46: 'and 2 just ahead of me'}, + ... key_prefix='numkeys_') == [] 1 - >>> mc.get_multi([46, 42], key_prefix='numkeys_') == {42: 'douglass adams', 46 : 'and 2 just ahead of me'} + >>> mc.get_multi([46, 42], key_prefix='numkeys_') == { + ... 42: 'douglass adams', + ... 46: 'and 2 just ahead of me' + ... } 1 - This method is recommended over regular L{get} as it lowers the number of - total packets flying around your network, reducing total latency, since - your app doesn't have to wait for each round-trip of L{get} before sending - the next one. + This method is recommended over regular L{get} as it lowers + the number of total packets flying around your network, + reducing total latency, since your app doesn't have to wait + for each round-trip of L{get} before sending the next one. See also L{set_multi}. @param keys: An array of keys. - @param key_prefix: A string to prefix each key when we communicate with memcache. - Facilitates pseudo-namespaces within memcache. Returned dictionary keys will not have this prefix. - @return: A dictionary of key/value pairs that were available. If key_prefix was provided, the keys in the retured dictionary will not have it present. + @param key_prefix: A string to prefix each key when we + communicate with memcache. Facilitates pseudo-namespaces + within memcache. Returned dictionary keys will not have this + prefix. + + @return: A dictionary of key/value pairs that were + available. If key_prefix was provided, the keys in the retured + dictionary will not have it present. ''' self._statlog('get_multi') - server_keys, prefixed_to_orig_key = self._map_and_prefix_keys(keys, key_prefix) + server_keys, prefixed_to_orig_key = self._map_and_prefix_keys( + keys, key_prefix) # send out all requests on each server before reading anything dead_servers = [] - for server in server_keys.iterkeys(): + for server in six.iterkeys(server_keys): try: server.send_cmd("get %s" % " ".join(server_keys[server])) - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) dead_servers.append(server) @@ -925,7 +1084,7 @@ class Client(local): del server_keys[server] retvals = {} - for server in server_keys.iterkeys(): + for server in six.iterkeys(server_keys): try: line = server.readline() while line and line != 'END': @@ -933,16 +1092,18 @@ class Client(local): # Bo Yang reports that this can sometimes be None if rkey is not None: val = self._recv_value(server, flags, rlen) - retvals[prefixed_to_orig_key[rkey]] = val # un-prefix returned key. + # un-prefix returned key. + retvals[prefixed_to_orig_key[rkey]] = val line = server.readline() - except (_Error, socket.error), msg: - if isinstance(msg, tuple): msg = msg[1] + except (_Error, socket.error) as msg: + if isinstance(msg, tuple): + msg = msg[1] server.mark_dead(msg) return retvals - def _expect_cas_value(self, server, line=None): + def _expect_cas_value(self, server, line=None, raise_exception=False): if not line: - line = server.readline() + line = server.readline(raise_exception) if line and line[:5] == 'VALUE': resp, rkey, flags, len, cas_id = line.split() @@ -950,9 +1111,9 @@ class Client(local): else: return (None, None, None, None) - def _expectvalue(self, server, line=None): + def _expectvalue(self, server, line=None, raise_exception=False): if not line: - line = server.readline() + line = server.readline(raise_exception) if line and line[:5] == 'VALUE': resp, rkey, flags, len = line.split() @@ -963,19 +1124,19 @@ class Client(local): return (None, None, None) def _recv_value(self, server, flags, rlen): - rlen += 2 # include \r\n + rlen += 2 # include \r\n buf = server.recv(rlen) if len(buf) != rlen: raise _Error("received %d bytes when expecting %d" - % (len(buf), rlen)) + % (len(buf), rlen)) if len(buf) == rlen: buf = buf[:-2] # strip \r\n if flags & Client._FLAG_COMPRESSED: - buf = decompress(buf) + buf = zlib.decompress(buf) - if flags == 0 or flags == Client._FLAG_COMPRESSED: + if flags == 0 or flags == Client._FLAG_COMPRESSED: # Either a bare string or a compressed string now decompressed... val = buf elif flags & Client._FLAG_INTEGER: @@ -984,21 +1145,25 @@ class Client(local): val = long(buf) elif flags & Client._FLAG_PICKLE: try: - file = StringIO(buf) + file = BytesIO(buf) unpickler = self.unpickler(file) if self.persistent_load: unpickler.persistent_load = self.persistent_load val = unpickler.load() - except Exception, e: + except Exception as e: self.debuglog('Pickle error: %s\n' % e) return None else: self.debuglog("unknown flags on get: %x\n" % flags) + raise ValueError('Unknown flags on get: %x' % flags) return val def check_key(self, key, key_extra_len=0): - """Checks sanity of key. Fails if: + """Checks sanity of key. + + Fails if: + Key length is > SERVER_MAX_KEY_LENGTH (Raises MemcachedKeyLength). Contains control characters (Raises MemcachedKeyCharacterError). Is not a string (Raises MemcachedStringEncodingError) @@ -1006,34 +1171,39 @@ class Client(local): Is not a string (Raises MemcachedKeyError) Is None (Raises MemcachedKeyError) """ - if isinstance(key, tuple): key = key[1] + if isinstance(key, tuple): + key = key[1] if not key: raise Client.MemcachedKeyNoneError("Key is None") - if isinstance(key, unicode): + + # Make sure we're not a specific unicode type, if we're old enough that + # it's a separate type. + if _has_unicode is True and isinstance(key, unicode): raise Client.MemcachedStringEncodingError( - "Keys must be str()'s, not unicode. Convert your unicode " - "strings using mystring.encode(charset)!") + "Keys must be str()'s, not unicode. Convert your unicode " + "strings using mystring.encode(charset)!") if not isinstance(key, str): raise Client.MemcachedKeyTypeError("Key must be str()'s") - if isinstance(key, basestring): - if self.server_max_key_length != 0 and \ - len(key) + key_extra_len > self.server_max_key_length: - raise Client.MemcachedKeyLengthError("Key length is > %s" - % self.server_max_key_length) - for char in key: - if ord(char) < 33 or ord(char) == 127: - raise Client.MemcachedKeyCharacterError( - "Control characters not allowed") + if isinstance(key, _str_cls): + if (self.server_max_key_length != 0 and + len(key) + key_extra_len > self.server_max_key_length): + raise Client.MemcachedKeyLengthError( + "Key length is > %s" % self.server_max_key_length + ) + if not valid_key_chars_re.match(key): + raise Client.MemcachedKeyCharacterError( + "Control characters not allowed") class _Host(object): def __init__(self, host, debug=0, dead_retry=_DEAD_RETRY, - socket_timeout=_SOCKET_TIMEOUT): + socket_timeout=_SOCKET_TIMEOUT, flush_on_reconnect=0): self.dead_retry = dead_retry self.socket_timeout = socket_timeout self.debug = debug + self.flush_on_reconnect = flush_on_reconnect if isinstance(host, tuple): host, self.weight = host else: @@ -1041,10 +1211,14 @@ class _Host(object): # parse the connection string m = re.match(r'^(?P<proto>unix):(?P<path>.*)$', host) + if not m: + m = re.match(r'^(?P<proto>inet6):' + r'\[(?P<host>[^\[\]]+)\](:(?P<port>[0-9]+))?$', host) if not m: m = re.match(r'^(?P<proto>inet):' - r'(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host) - if not m: m = re.match(r'^(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host) + r'(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host) + if not m: + m = re.match(r'^(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host) if not m: raise ValueError('Unable to parse connection string: "%s"' % host) @@ -1052,14 +1226,20 @@ class _Host(object): if hostData.get('proto') == 'unix': self.family = socket.AF_UNIX self.address = hostData['path'] + elif hostData.get('proto') == 'inet6': + self.family = socket.AF_INET6 + self.ip = hostData['host'] + self.port = int(hostData.get('port') or 11211) + self.address = (self.ip, self.port) else: self.family = socket.AF_INET self.ip = hostData['host'] - self.port = int(hostData.get('port', 11211)) - self.address = ( self.ip, self.port ) + self.port = int(hostData.get('port') or 11211) + self.address = (self.ip, self.port) self.deaduntil = 0 self.socket = None + self.flush_on_next_connect = 0 self.buffer = '' @@ -1081,6 +1261,8 @@ class _Host(object): def mark_dead(self, reason): self.debuglog("MemCache: %s: %s. Marking dead." % (self, reason)) self.deaduntil = time.time() + self.dead_retry + if self.flush_on_reconnect: + self.flush_on_next_connect = 1 self.close_socket() def _get_socket(self): @@ -1089,18 +1271,23 @@ class _Host(object): if self.socket: return self.socket s = socket.socket(self.family, socket.SOCK_STREAM) - if hasattr(s, 'settimeout'): s.settimeout(self.socket_timeout) + if hasattr(s, 'settimeout'): + s.settimeout(self.socket_timeout) try: s.connect(self.address) - except socket.timeout, msg: + except socket.timeout as msg: self.mark_dead("connect: %s" % msg) return None - except socket.error, msg: - if isinstance(msg, tuple): msg = msg[1] - self.mark_dead("connect: %s" % msg[1]) + except socket.error as msg: + if isinstance(msg, tuple): + msg = msg[1] + self.mark_dead("connect: %s" % msg) return None self.socket = s self.buffer = '' + if self.flush_on_next_connect: + self.flush() + self.flush_on_next_connect = 0 return s def close_socket(self): @@ -1112,12 +1299,21 @@ class _Host(object): self.socket.sendall(cmd + '\r\n') def send_cmds(self, cmds): - """ cmds already has trailing \r\n's applied """ + """cmds already has trailing \r\n's applied.""" self.socket.sendall(cmds) - def readline(self): + def readline(self, raise_exception=False): + """Read a line and return it. + + If "raise_exception" is set, raise _ConnectionDeadError if the + read fails, otherwise return an empty string. + """ buf = self.buffer - recv = self.socket.recv + if self.socket: + recv = self.socket.recv + else: + recv = lambda bufsize: '' + while True: index = buf.find('\r\n') if index >= 0: @@ -1125,18 +1321,21 @@ class _Host(object): data = recv(4096) if not data: # connection close, let's kill it and raise - self.close_socket() - raise _ConnectionDeadError() + self.mark_dead('connection closed in readline()') + if raise_exception: + raise _ConnectionDeadError() + else: + return '' buf += data - self.buffer = buf[index+2:] + self.buffer = buf[index + 2:] return buf[:index] - def expect(self, text): - line = self.readline() + def expect(self, text, raise_exception=False): + line = self.readline(raise_exception) if line != text: self.debuglog("while expecting '%s', got unexpected response '%s'" - % (text, line)) + % (text, line)) return line def recv(self, rlen): @@ -1146,11 +1345,15 @@ class _Host(object): foo = self_socket_recv(max(rlen - len(buf), 4096)) buf += foo if not foo: - raise _Error( 'Read %d bytes, expecting %d, ' - 'read returned 0 length bytes' % ( len(buf), rlen )) + raise _Error('Read %d bytes, expecting %d, ' + 'read returned 0 length bytes' % (len(buf), rlen)) self.buffer = buf[rlen:] return buf[:rlen] + def flush(self): + self.send_cmd('flush_all') + self.expect('OK') + def __str__(self): d = '' if self.deaduntil: @@ -1158,12 +1361,15 @@ class _Host(object): if self.family == socket.AF_INET: return "inet:%s:%d%s" % (self.address[0], self.address[1], d) + elif self.family == socket.AF_INET6: + return "inet6:[%s]:%d%s" % (self.address[0], self.address[1], d) else: return "unix:%s%s" % (self.address, d) def _doctest(): - import doctest, memcache + import doctest + import memcache servers = ["127.0.0.1:11211"] mc = Client(servers, debug=1) globs = {"mc": mc} @@ -1171,10 +1377,10 @@ def _doctest(): if __name__ == "__main__": failures = 0 - print "Testing docstrings..." + print("Testing docstrings...") _doctest() - print "Running tests:" - print + print("Running tests:") + print() serverList = [["127.0.0.1:11211"]] if '--do-unix' in sys.argv: serverList.append([os.path.join(os.getcwd(), 'memcached.socket')]) @@ -1183,27 +1389,32 @@ if __name__ == "__main__": mc = Client(servers, debug=1) def to_s(val): - if not isinstance(val, basestring): + if not isinstance(val, _str_cls): return "%s (%s)" % (val, type(val)) return "%s" % val + def test_setget(key, val): global failures - print "Testing set/get {'%s': %s} ..." % (to_s(key), to_s(val)), + print("Testing set/get {'%s': %s} ..." + % (to_s(key), to_s(val)), end=" ") mc.set(key, val) newval = mc.get(key) if newval == val: - print "OK" + print("OK") return 1 else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 return 0 - class FooStruct(object): + def __init__(self): self.bar = "baz" + def __str__(self): return "A FooStruct" + def __eq__(self, other): if isinstance(other, FooStruct): return self.bar == other.bar @@ -1211,138 +1422,155 @@ if __name__ == "__main__": test_setget("a_string", "some random string") test_setget("an_integer", 42) - if test_setget("long", long(1<<30)): - print "Testing delete ...", + if test_setget("long", long(1 << 30)): + print("Testing delete ...", end=" ") if mc.delete("long"): - print "OK" + print("OK") else: - print "FAIL"; failures = failures + 1 - print "Checking results of delete ..." - if mc.get("long") == None: - print "OK" + print("FAIL") + failures += 1 + print("Checking results of delete ...", end=" ") + if mc.get("long") is None: + print("OK") else: - print "FAIL"; failures = failures + 1 - print "Testing get_multi ...", - print mc.get_multi(["a_string", "an_integer"]) + print("FAIL") + failures += 1 + print("Testing get_multi ...",) + print(mc.get_multi(["a_string", "an_integer"])) # removed from the protocol - #if test_setget("timed_delete", 'foo'): - # print "Testing timed delete ...", - # if mc.delete("timed_delete", 1): - # print "OK" - # else: - # print "FAIL"; failures = failures + 1 - # print "Checking results of timed delete ..." - # if mc.get("timed_delete") == None: - # print "OK" - # else: - # print "FAIL"; failures = failures + 1 + # if test_setget("timed_delete", 'foo'): + # print "Testing timed delete ...", + # if mc.delete("timed_delete", 1): + # print("OK") + # else: + # print("FAIL") + # failures += 1 + # print "Checking results of timed delete ..." + # if mc.get("timed_delete") is None: + # print("OK") + # else: + # print("FAIL") + # failures += 1 - print "Testing get(unknown value) ...", - print to_s(mc.get("unknown_value")) + print("Testing get(unknown value) ...", end=" ") + print(to_s(mc.get("unknown_value"))) f = FooStruct() test_setget("foostruct", f) - print "Testing incr ...", + print("Testing incr ...", end=" ") x = mc.incr("an_integer", 1) if x == 43: - print "OK" + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 - print "Testing decr ...", + print("Testing decr ...", end=" ") x = mc.decr("an_integer", 1) if x == 42: - print "OK" + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 sys.stdout.flush() # sanity tests - print "Testing sending spaces...", + print("Testing sending spaces...", end=" ") sys.stdout.flush() try: x = mc.set("this has spaces", 1) - except Client.MemcachedKeyCharacterError, msg: - print "OK" + except Client.MemcachedKeyCharacterError as msg: + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 - print "Testing sending control characters...", + print("Testing sending control characters...", end=" ") try: x = mc.set("this\x10has\x11control characters\x02", 1) - except Client.MemcachedKeyCharacterError, msg: - print "OK" + except Client.MemcachedKeyCharacterError as msg: + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 - print "Testing using insanely long key...", + print("Testing using insanely long key...", end=" ") try: x = mc.set('a'*SERVER_MAX_KEY_LENGTH, 1) - except Client.MemcachedKeyLengthError, msg: - print "FAIL"; failures = failures + 1 + except Client.MemcachedKeyLengthError as msg: + print("FAIL") + failures += 1 else: - print "OK" + print("OK") try: x = mc.set('a'*SERVER_MAX_KEY_LENGTH + 'a', 1) - except Client.MemcachedKeyLengthError, msg: - print "OK" + except Client.MemcachedKeyLengthError as msg: + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 - print "Testing sending a unicode-string key...", + print("Testing sending a unicode-string key...", end=" ") try: - x = mc.set(u'keyhere', 1) - except Client.MemcachedStringEncodingError, msg: - print "OK", + x = mc.set(unicode('keyhere'), 1) + except Client.MemcachedStringEncodingError as msg: + print("OK", end=" ") else: - print "FAIL",; failures = failures + 1 + print("FAIL", end=" ") + failures += 1 try: - x = mc.set((u'a'*SERVER_MAX_KEY_LENGTH).encode('utf-8'), 1) - except: - print "FAIL",; failures = failures + 1 + x = mc.set((unicode('a')*SERVER_MAX_KEY_LENGTH).encode('utf-8'), 1) + except Client.MemcachedKeyError: + print("FAIL", end=" ") + failures += 1 else: - print "OK", - import pickle + print("OK", end=" ") s = pickle.loads('V\\u4f1a\np0\n.') try: - x = mc.set((s*SERVER_MAX_KEY_LENGTH).encode('utf-8'), 1) + x = mc.set((s * SERVER_MAX_KEY_LENGTH).encode('utf-8'), 1) except Client.MemcachedKeyLengthError: - print "OK" + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 - print "Testing using a value larger than the memcached value limit...", + print("Testing using a value larger than the memcached value limit...") + print('NOTE: "MemCached: while expecting[...]" is normal...') x = mc.set('keyhere', 'a'*SERVER_MAX_VALUE_LENGTH) - if mc.get('keyhere') == None: - print "OK", + if mc.get('keyhere') is None: + print("OK", end=" ") else: - print "FAIL",; failures = failures + 1 + print("FAIL", end=" ") + failures += 1 x = mc.set('keyhere', 'a'*SERVER_MAX_VALUE_LENGTH + 'aaa') - if mc.get('keyhere') == None: - print "OK" + if mc.get('keyhere') is None: + print("OK") else: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 - print "Testing set_multi() with no memcacheds running", + print("Testing set_multi() with no memcacheds running", end=" ") mc.disconnect_all() - errors = mc.set_multi({'keyhere' : 'a', 'keythere' : 'b'}) + errors = mc.set_multi({'keyhere': 'a', 'keythere': 'b'}) if errors != []: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 else: - print "OK" + print("OK") - print "Testing delete_multi() with no memcacheds running", + print("Testing delete_multi() with no memcacheds running", end=" ") mc.disconnect_all() - ret = mc.delete_multi({'keyhere' : 'a', 'keythere' : 'b'}) + ret = mc.delete_multi({'keyhere': 'a', 'keythere': 'b'}) if ret != 1: - print "FAIL"; failures = failures + 1 + print("FAIL") + failures += 1 else: - print "OK" + print("OK") if failures > 0: - print '*** THERE WERE FAILED TESTS' + print('*** THERE WERE FAILED TESTS') sys.exit(1) sys.exit(0)