608 lines
20 KiB
Python
608 lines
20 KiB
Python
from __future__ import print_function
|
|
|
|
import re
|
|
|
|
# Super dumb little selector parser.
|
|
|
|
# Yes, yes, this is a regex tokenizer. The actual meaning of the
|
|
# selector doesn't matter; the parts are just important for matching up
|
|
# during @extend.
|
|
|
|
# Selectors have three levels: simple, combinator, comma-delimited.
|
|
# Each combinator can only appear once as a delimiter between simple
|
|
# selectors, so it can be thought of as a prefix.
|
|
# So this:
|
|
# a.b + c, d#e
|
|
# parses into two Selectors with these structures:
|
|
# [[' ', 'a', '.b'], ['+', 'c']]
|
|
# [[' ', 'd', '#e']]
|
|
# Note that the first simple selector has an implied descendant
|
|
# combinator -- i.e., it is a descendant of the root element.
|
|
# TODO `*html` is incorrectly parsed as a single selector
|
|
# TODO this oughta be touched up for css4 selectors
|
|
SELECTOR_TOKENIZER = re.compile(r'''
|
|
# Colons introduce pseudo-selectors, sometimes with parens
|
|
# TODO doesn't handle quoted )
|
|
[:]+ [-\w]+ (?: [(] .+? [)] )?
|
|
|
|
# These guys are combinators -- note that a single space counts too
|
|
| \s* [ +>~,] \s*
|
|
|
|
# Square brackets are attribute tests
|
|
# TODO: this doesn't handle ] within a string
|
|
| [[] .+? []]
|
|
|
|
# Dot and pound start class/id selectors. Percent starts a Sass
|
|
# extend-target faux selector.
|
|
| [.#%] [-\w]+
|
|
|
|
# Percentages are used for @keyframes
|
|
| [-.\d]+ [%]
|
|
|
|
# Plain identifiers, or single asterisks, are element names
|
|
| [-\w]+
|
|
| [*]
|
|
|
|
# & is the sass replacement token
|
|
| [&]
|
|
|
|
# And as a last-ditch effort, just eat up to whitespace
|
|
| (\S+)
|
|
''', re.VERBOSE | re.MULTILINE)
|
|
|
|
|
|
# Maps the first character of a token to a rough ordering. The default
|
|
# (element names) is zero.
|
|
TOKEN_TYPE_ORDER = {
|
|
'#': 2,
|
|
'.': 3,
|
|
'[': 3,
|
|
':': 3,
|
|
'%': 4,
|
|
}
|
|
TOKEN_SORT_KEY = lambda token: TOKEN_TYPE_ORDER.get(token[0], 0)
|
|
|
|
|
|
def _is_combinator_subset_of(specific, general, is_first=True):
|
|
"""Return whether `specific` matches a non-strict subset of what `general`
|
|
matches.
|
|
"""
|
|
if is_first and general == ' ':
|
|
# First selector always has a space to mean "descendent of root", which
|
|
# still holds if any other selector appears above it
|
|
return True
|
|
|
|
if specific == general:
|
|
return True
|
|
|
|
if specific == '>' and general == ' ':
|
|
return True
|
|
|
|
if specific == '+' and general == '~':
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
class SimpleSelector(object):
|
|
"""A simple selector, by CSS 2.1 terminology: a combination of element
|
|
name, class selectors, id selectors, and other criteria that all apply to a
|
|
single element.
|
|
|
|
Note that CSS 3 considers EACH of those parts to be a "simple selector",
|
|
and calls a group of them a "sequence of simple selectors". That's a
|
|
terrible class name, so we're going with 2.1 here.
|
|
|
|
For lack of a better name, each of the individual parts is merely called a
|
|
"token".
|
|
"""
|
|
def __init__(self, combinator, tokens):
|
|
self.combinator = combinator
|
|
# TODO enforce that only one element name (including *) appears in a
|
|
# selector
|
|
# TODO remove duplicates
|
|
self.tokens = tuple(sorted(tokens, key=TOKEN_SORT_KEY))
|
|
|
|
def __repr__(self):
|
|
return "<%s: %r>" % (type(self).__name__, self.render())
|
|
|
|
def __hash__(self):
|
|
return hash((self.combinator, self.tokens))
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, SimpleSelector):
|
|
return NotImplemented
|
|
|
|
return (
|
|
self.combinator == other.combinator and
|
|
self.tokens == other.tokens)
|
|
|
|
@property
|
|
def has_parent_reference(self):
|
|
return '&' in self.tokens or 'self' in self.tokens
|
|
|
|
@property
|
|
def has_placeholder(self):
|
|
return any(
|
|
token[0] == '%'
|
|
for token in self.tokens)
|
|
|
|
def is_superset_of(self, other, soft_combinator=False):
|
|
"""Return True iff this selector matches the same elements as `other`,
|
|
and perhaps others.
|
|
|
|
That is, ``.foo`` is a superset of ``.foo.bar``, because the latter is
|
|
more specific.
|
|
|
|
Set `soft_combinator` true to ignore the specific case of this selector
|
|
having a descendent combinator and `other` having anything else. This
|
|
is for superset checking for ``@extend``, where a space combinator
|
|
really means "none".
|
|
"""
|
|
# Combinators must match, OR be compatible -- space is a superset of >,
|
|
# ~ is a superset of +
|
|
if soft_combinator and self.combinator == ' ':
|
|
combinator_superset = True
|
|
else:
|
|
combinator_superset = (
|
|
self.combinator == other.combinator or
|
|
(self.combinator == ' ' and other.combinator == '>') or
|
|
(self.combinator == '~' and other.combinator == '+'))
|
|
|
|
return (
|
|
combinator_superset and
|
|
set(self.tokens) <= set(other.tokens))
|
|
|
|
def replace_parent(self, parent_simples):
|
|
"""If ``&`` (or the legacy xCSS equivalent ``self``) appears in this
|
|
selector, replace it with the given iterable of parent selectors.
|
|
|
|
Returns a tuple of simple selectors.
|
|
"""
|
|
assert parent_simples
|
|
|
|
ancestors = parent_simples[:-1]
|
|
parent = parent_simples[-1]
|
|
|
|
did_replace = False
|
|
new_tokens = []
|
|
for token in self.tokens:
|
|
if not did_replace and token in ('&', 'self'):
|
|
did_replace = True
|
|
new_tokens.extend(parent.tokens)
|
|
else:
|
|
new_tokens.append(token)
|
|
|
|
if not did_replace:
|
|
# This simple selector doesn't contain a parent reference so just
|
|
# stick it on the end
|
|
return parent_simples + (self,)
|
|
|
|
# This simple selector was merged into the direct parent.
|
|
merged_self = type(self)(parent.combinator, new_tokens)
|
|
selector = ancestors + (merged_self,)
|
|
# Our combinator goes on the first ancestor, i.e., substituting "foo
|
|
# bar baz" into "+ &.quux" produces "+ foo bar baz.quux". This means a
|
|
# potential conflict with the first ancestor's combinator!
|
|
root = selector[0]
|
|
if not _is_combinator_subset_of(self.combinator, root.combinator):
|
|
raise ValueError(
|
|
"Can't sub parent {0!r} into {1!r}: "
|
|
"combinators {2!r} and {3!r} conflict!"
|
|
.format(
|
|
parent_simples, self, self.combinator, root.combinator))
|
|
|
|
root = type(self)(self.combinator, root.tokens)
|
|
selector = (root,) + selector[1:]
|
|
return tuple(selector)
|
|
|
|
# TODO just use set ops for these, once the constructor removes dupes
|
|
def merge_with(self, other):
|
|
new_tokens = self.tokens + tuple(token for token in other.tokens if token not in set(self.tokens))
|
|
return type(self)(self.combinator, new_tokens)
|
|
|
|
def difference(self, other):
|
|
new_tokens = tuple(token for token in self.tokens if token not in set(other.tokens))
|
|
return type(self)(self.combinator, new_tokens)
|
|
|
|
def render(self):
|
|
# TODO fail if there are no tokens, or if one is a placeholder?
|
|
rendered = ''.join(self.tokens)
|
|
if self.combinator != ' ':
|
|
rendered = ' '.join((self.combinator, rendered))
|
|
|
|
return rendered
|
|
|
|
|
|
class Selector(object):
|
|
"""A single CSS selector."""
|
|
|
|
def __init__(self, simples):
|
|
"""Return a selector containing a sequence of `SimpleSelector`s.
|
|
|
|
You probably want to use `parse_many` or `parse_one` instead.
|
|
"""
|
|
# TODO enforce uniqueness
|
|
self.simple_selectors = tuple(simples)
|
|
|
|
@classmethod
|
|
def parse_many(cls, selector):
|
|
selector = selector.strip()
|
|
ret = []
|
|
|
|
pending = dict(
|
|
simples=[],
|
|
combinator=' ',
|
|
tokens=[],
|
|
)
|
|
|
|
def promote_simple():
|
|
if pending['tokens']:
|
|
pending['simples'].append(
|
|
SimpleSelector(pending['combinator'], pending['tokens']))
|
|
pending['combinator'] = ' '
|
|
pending['tokens'] = []
|
|
|
|
def promote_selector():
|
|
promote_simple()
|
|
if pending['simples']:
|
|
ret.append(cls(pending['simples']))
|
|
pending['simples'] = []
|
|
|
|
pos = 0
|
|
while pos < len(selector):
|
|
# TODO i don't think this deals with " + " correctly. anywhere.
|
|
# TODO this used to turn "1.5%" into empty string; why does error
|
|
# not work?
|
|
m = SELECTOR_TOKENIZER.match(selector, pos)
|
|
if not m:
|
|
# TODO prettify me
|
|
raise SyntaxError("Couldn't parse selector: %r" % (selector,))
|
|
|
|
token = m.group(0)
|
|
pos += len(token)
|
|
|
|
# Kill any extraneous space, BUT make sure not to turn a lone space
|
|
# into an empty string
|
|
token = token.strip() or ' '
|
|
|
|
if token == ',':
|
|
# End current selector
|
|
# TODO what about "+ ,"? what do i even do with that
|
|
promote_selector()
|
|
elif token in ' +>~':
|
|
# End current simple selector
|
|
promote_simple()
|
|
pending['combinator'] = token
|
|
else:
|
|
# Add to pending simple selector
|
|
pending['tokens'].append(token)
|
|
|
|
# Deal with any remaining pending bits
|
|
promote_selector()
|
|
|
|
return ret
|
|
|
|
@classmethod
|
|
def parse_one(cls, selector_string):
|
|
selectors = cls.parse_many(selector_string)
|
|
if len(selectors) != 1:
|
|
# TODO better error
|
|
raise ValueError
|
|
|
|
return selectors[0]
|
|
|
|
def __repr__(self):
|
|
return "<%s: %r>" % (type(self).__name__, self.render())
|
|
|
|
def __hash__(self):
|
|
return hash(self.simple_selectors)
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, Selector):
|
|
return NotImplemented
|
|
|
|
return self.simple_selectors == other.simple_selectors
|
|
|
|
@property
|
|
def has_parent_reference(self):
|
|
return any(
|
|
simple.has_parent_reference
|
|
for simple in self.simple_selectors)
|
|
|
|
@property
|
|
def has_placeholder(self):
|
|
return any(
|
|
simple.has_placeholder
|
|
for simple in self.simple_selectors)
|
|
|
|
def with_parent(self, parent):
|
|
saw_parent_ref = False
|
|
|
|
new_simples = []
|
|
for simple in self.simple_selectors:
|
|
if simple.has_parent_reference:
|
|
new_simples.extend(simple.replace_parent(parent.simple_selectors))
|
|
saw_parent_ref = True
|
|
else:
|
|
new_simples.append(simple)
|
|
|
|
if not saw_parent_ref:
|
|
new_simples = parent.simple_selectors + tuple(new_simples)
|
|
|
|
return type(self)(new_simples)
|
|
|
|
def lookup_key(self):
|
|
"""Build a key from the "important" parts of a selector: elements,
|
|
classes, ids.
|
|
"""
|
|
parts = set()
|
|
for node in self.simple_selectors:
|
|
for token in node.tokens:
|
|
if token[0] not in ':[':
|
|
parts.add(token)
|
|
|
|
if not parts:
|
|
# Should always have at least ONE key; selectors with no elements,
|
|
# no classes, and no ids can be indexed as None to avoid a scan of
|
|
# every selector in the entire document
|
|
parts.add(None)
|
|
|
|
return frozenset(parts)
|
|
|
|
def is_superset_of(self, other):
|
|
assert isinstance(other, Selector)
|
|
|
|
idx = 0
|
|
for other_node in other.simple_selectors:
|
|
if idx >= len(self.simple_selectors):
|
|
return False
|
|
|
|
while idx < len(self.simple_selectors):
|
|
node = self.simple_selectors[idx]
|
|
idx += 1
|
|
|
|
if node.is_superset_of(other_node):
|
|
break
|
|
|
|
return True
|
|
|
|
def substitute(self, target, replacement):
|
|
"""Return a list of selectors obtained by replacing the `target`
|
|
selector with `replacement`.
|
|
|
|
Herein lie the guts of the Sass @extend directive.
|
|
|
|
In general, for a selector ``a X b Y c``, a target ``X Y``, and a
|
|
replacement ``q Z``, return the selectors ``a q X b Z c`` and ``q a X b
|
|
Z c``. Note in particular that no more than two selectors will be
|
|
returned, and the permutation of ancestors will never insert new simple
|
|
selectors "inside" the target selector.
|
|
"""
|
|
|
|
# Find the target in the parent selector, and split it into
|
|
# before/after
|
|
p_before, p_extras, p_after = self.break_around(target.simple_selectors)
|
|
|
|
# The replacement has no hinge; it only has the most specific simple
|
|
# selector (which is the part that replaces "self" in the parent) and
|
|
# whatever preceding simple selectors there may be
|
|
r_trail = replacement.simple_selectors[:-1]
|
|
r_extras = replacement.simple_selectors[-1]
|
|
|
|
# TODO what if the prefix doesn't match? who wins? should we even get
|
|
# this far?
|
|
focal_nodes = (p_extras.merge_with(r_extras),)
|
|
|
|
befores = _merge_selectors(p_before, r_trail)
|
|
|
|
cls = type(self)
|
|
return [
|
|
cls(before + focal_nodes + p_after)
|
|
for before in befores]
|
|
|
|
def break_around(self, hinge):
|
|
"""Given a simple selector node contained within this one (a "hinge"),
|
|
break it in half and return a parent selector, extra specifiers for the
|
|
hinge, and a child selector.
|
|
|
|
That is, given a hinge X, break the selector A + X.y B into A, + .y,
|
|
and B.
|
|
"""
|
|
hinge_start = hinge[0]
|
|
for i, node in enumerate(self.simple_selectors):
|
|
# In this particular case, a ' ' combinator actually means "no" (or
|
|
# any) combinator, so it should be ignored
|
|
if hinge_start.is_superset_of(node, soft_combinator=True):
|
|
start_idx = i
|
|
break
|
|
else:
|
|
raise ValueError(
|
|
"Couldn't find hinge %r in compound selector %r" %
|
|
(hinge_start, self.simple_selectors))
|
|
|
|
for i, hinge_node in enumerate(hinge):
|
|
if i == 0:
|
|
# We just did this
|
|
continue
|
|
|
|
self_node = self.simple_selectors[start_idx + i]
|
|
if hinge_node.is_superset_of(self_node):
|
|
continue
|
|
|
|
# TODO this isn't true; consider finding `a b` in `a c a b`
|
|
raise ValueError(
|
|
"Couldn't find hinge %r in compound selector %r" %
|
|
(hinge_node, self.simple_selectors))
|
|
|
|
end_idx = start_idx + len(hinge) - 1
|
|
|
|
focal_node = self.simple_selectors[end_idx]
|
|
extras = focal_node.difference(hinge[-1])
|
|
|
|
return (
|
|
self.simple_selectors[:start_idx],
|
|
extras,
|
|
self.simple_selectors[end_idx + 1:])
|
|
|
|
def render(self):
|
|
return ' '.join(simple.render() for simple in self.simple_selectors)
|
|
|
|
|
|
def _merge_selectors(left, right):
|
|
"""Given two selector chains (lists of simple selectors), return a list of
|
|
selector chains representing elements matched by both of them.
|
|
|
|
This operation is not exact, and involves some degree of fudging -- the
|
|
wackier and more divergent the input, the more fudging. It's meant to be
|
|
what a human might expect rather than a precise covering of all possible
|
|
cases. Most notably, when the two input chains have absolutely nothing in
|
|
common, the output is merely ``left + right`` and ``right + left`` rather
|
|
than all possible interleavings.
|
|
"""
|
|
|
|
if not left or not right:
|
|
# At least one is empty, so there are no conflicts; just return
|
|
# whichever isn't empty. Remember to return a LIST, though
|
|
return [left or right]
|
|
|
|
lcs = longest_common_subsequence(left, right, _merge_simple_selectors)
|
|
|
|
ret = [()] # start with a dummy empty chain or weaving won't work
|
|
|
|
left_last = 0
|
|
right_last = 0
|
|
for left_next, right_next, merged in lcs:
|
|
ret = _weave_conflicting_selectors(
|
|
ret,
|
|
left[left_last:left_next],
|
|
right[right_last:right_next],
|
|
(merged,))
|
|
|
|
left_last = left_next + 1
|
|
right_last = right_next + 1
|
|
|
|
ret = _weave_conflicting_selectors(
|
|
ret,
|
|
left[left_last:],
|
|
right[right_last:])
|
|
|
|
return ret
|
|
|
|
|
|
def _weave_conflicting_selectors(prefixes, a, b, suffix=()):
|
|
"""Part of the selector merge algorithm above. Not useful on its own. Pay
|
|
no attention to the man behind the curtain.
|
|
"""
|
|
# OK, what this actually does: given a list of selector chains, two
|
|
# "conflicting" selector chains, and an optional suffix, return a new list
|
|
# of chains like this:
|
|
# prefix[0] + a + b + suffix,
|
|
# prefix[0] + b + a + suffix,
|
|
# prefix[1] + a + b + suffix,
|
|
# ...
|
|
# In other words, this just appends a new chain to each of a list of given
|
|
# chains, except that the new chain might be the superposition of two
|
|
# other incompatible chains.
|
|
both = a and b
|
|
for prefix in prefixes:
|
|
yield prefix + a + b + suffix
|
|
if both:
|
|
# Only use both orderings if there's an actual conflict!
|
|
yield prefix + b + a + suffix
|
|
|
|
|
|
def _merge_simple_selectors(a, b):
|
|
"""Merge two simple selectors, for the purposes of the LCS algorithm below.
|
|
|
|
In practice this returns the more specific selector if one is a subset of
|
|
the other, else it returns None.
|
|
"""
|
|
# TODO what about combinators
|
|
if a.is_superset_of(b):
|
|
return b
|
|
elif b.is_superset_of(a):
|
|
return a
|
|
else:
|
|
return None
|
|
|
|
|
|
def longest_common_subsequence(a, b, mergefunc=None):
|
|
"""Find the longest common subsequence between two iterables.
|
|
|
|
The longest common subsequence is the core of any diff algorithm: it's the
|
|
longest sequence of elements that appears in both parent sequences in the
|
|
same order, but NOT necessarily consecutively.
|
|
|
|
Original algorithm borrowed from Wikipedia:
|
|
http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
|
|
|
|
This function is used only to implement @extend, largely because that's
|
|
what the Ruby implementation does. Thus it's been extended slightly from
|
|
the simple diff-friendly algorithm given above.
|
|
|
|
What @extend wants to know is whether two simple selectors are compatible,
|
|
not just equal. To that end, you must pass in a "merge" function to
|
|
compare a pair of elements manually. It should return `None` if they are
|
|
incompatible, and a MERGED element if they are compatible -- in the case of
|
|
selectors, this is whichever one is more specific.
|
|
|
|
Because of this fuzzier notion of equality, the return value is a list of
|
|
``(a_index, b_index, value)`` tuples rather than items alone.
|
|
"""
|
|
if mergefunc is None:
|
|
# Stupid default, just in case
|
|
def mergefunc(a, b):
|
|
if a == b:
|
|
return a
|
|
return None
|
|
|
|
# Precalculate equality, since it can be a tad expensive and every pair is
|
|
# compared at least once
|
|
eq = {}
|
|
for ai, aval in enumerate(a):
|
|
for bi, bval in enumerate(b):
|
|
eq[ai, bi] = mergefunc(aval, bval)
|
|
|
|
# Build the "length" matrix, which provides the length of the LCS for
|
|
# arbitrary-length prefixes. -1 exists only to support the base case
|
|
prefix_lcs_length = {}
|
|
for ai in range(-1, len(a)):
|
|
for bi in range(-1, len(b)):
|
|
if ai == -1 or bi == -1:
|
|
l = 0
|
|
elif eq[ai, bi]:
|
|
l = prefix_lcs_length[ai - 1, bi - 1] + 1
|
|
else:
|
|
l = max(
|
|
prefix_lcs_length[ai, bi - 1],
|
|
prefix_lcs_length[ai - 1, bi])
|
|
|
|
prefix_lcs_length[ai, bi] = l
|
|
|
|
# The interesting part. The key insight is that the bottom-right value in
|
|
# the length matrix must be the length of the LCS because of how the matrix
|
|
# is defined, so all that's left to do is backtrack from the ends of both
|
|
# sequences in whatever way keeps the LCS as long as possible, and keep
|
|
# track of the equal pairs of elements we see along the way.
|
|
# Wikipedia does this with recursion, but the algorithm is trivial to
|
|
# rewrite as a loop, as below.
|
|
ai = len(a) - 1
|
|
bi = len(b) - 1
|
|
|
|
ret = []
|
|
while ai >= 0 and bi >= 0:
|
|
merged = eq[ai, bi]
|
|
if merged is not None:
|
|
ret.append((ai, bi, merged))
|
|
ai -= 1
|
|
bi -= 1
|
|
elif prefix_lcs_length[ai, bi - 1] > prefix_lcs_length[ai - 1, bi]:
|
|
bi -= 1
|
|
else:
|
|
ai -= 1
|
|
|
|
# ret has the latest items first, which is backwards
|
|
ret.reverse()
|
|
return ret
|