GIF89a;
EcchiShell v1.0
/
/
usr/
lib64/
lib64/
lib64/
lib64/
lib64/
(a|b) if b more characteristic than c
Returns: The most 'characteristic'(as defined by
get_characteristic_subpattern) path for the compiled pattern
tree.
"""
for l in self.leaves():
subp = l.leaf_to_root()
if subp:
return subp
def leaves(self):
"Generator that returns the leaves of the tree"
for child in self.children:
for x in child.leaves():
yield x
if not self.children:
yield self
def reduce_tree(node, parent=None):
"""
Internal function. Reduces a compiled pattern tree to an
intermediate representation suitable for feeding the
automaton. This also trims off any optional pattern elements(like
[a], a*).
"""
new_node = None
#switch on the node type
if node.type == syms.Matcher:
#skip
node = node.children[0]
if node.type == syms.Alternatives :
#2 cases
if len(node.children) <= 2:
#just a single 'Alternative', skip this node
new_node = reduce_tree(node.children[0], parent)
else:
#real alternatives
new_node = MinNode(type=TYPE_ALTERNATIVES)
#skip odd children('|' tokens)
for child in node.children:
if node.children.index(child)%2:
continue
reduced = reduce_tree(child, new_node)
if reduced is not None:
new_node.children.append(reduced)
elif node.type == syms.Alternative:
if len(node.children) > 1:
new_node = MinNode(type=TYPE_GROUP)
for child in node.children:
reduced = reduce_tree(child, new_node)
if reduced:
new_node.children.append(reduced)
if not new_node.children:
# delete the group if all of the children were reduced to None
new_node = None
else:
new_node = reduce_tree(node.children[0], parent)
elif node.type == syms.Unit:
if (isinstance(node.children[0], pytree.Leaf) and
node.children[0].value == '('):
#skip parentheses
return reduce_tree(node.children[1], parent)
if ((isinstance(node.children[0], pytree.Leaf) and
node.children[0].value == '[')
or
(len(node.children)>1 and
hasattr(node.children[1], "value") and
node.children[1].value == '[')):
#skip whole unit if its optional
return None
leaf = True
details_node = None
alternatives_node = None
has_repeater = False
repeater_node = None
has_variable_name = False
for child in node.children:
if child.type == syms.Details:
leaf = False
details_node = child
elif child.type == syms.Repeater:
has_repeater = True
repeater_node = child
elif child.type == syms.Alternatives:
alternatives_node = child
if hasattr(child, 'value') and child.value == '=': # variable name
has_variable_name = True
#skip variable name
if has_variable_name:
#skip variable name, '='
name_leaf = node.children[2]
if hasattr(name_leaf, 'value') and name_leaf.value == '(':
# skip parenthesis
name_leaf = node.children[3]
else:
name_leaf = node.children[0]
#set node type
if name_leaf.type == token_labels.NAME:
#(python) non-name or wildcard
if name_leaf.value == 'any':
new_node = MinNode(type=TYPE_ANY)
else:
if hasattr(token_labels, name_leaf.value):
new_node = MinNode(type=getattr(token_labels, name_leaf.value))
else:
new_node = MinNode(type=getattr(pysyms, name_leaf.value))
elif name_leaf.type == token_labels.STRING:
#(python) name or character; remove the apostrophes from
#the string value
name = name_leaf.value.strip("'")
if name in tokens:
new_node = MinNode(type=tokens[name])
else:
new_node = MinNode(type=token_labels.NAME, name=name)
elif name_leaf.type == syms.Alternatives:
new_node = reduce_tree(alternatives_node, parent)
#handle repeaters
if has_repeater:
if repeater_node.children[0].value == '*':
#reduce to None
new_node = None
elif repeater_node.children[0].value == '+':
#reduce to a single occurence i.e. do nothing
pass
else:
#TODO: handle {min, max} repeaters
raise NotImplementedError
pass
#add children
if details_node and new_node is not None:
for child in details_node.children[1:-1]:
#skip '<', '>' markers
reduced = reduce_tree(child, new_node)
if reduced is not None:
new_node.children.append(reduced)
if new_node:
new_node.parent = parent
return new_node
def get_characteristic_subpattern(subpatterns):
"""Picks the most characteristic from a list of linear patterns
Current order used is:
names > common_names > common_chars
"""
if not isinstance(subpatterns, list):
return subpatterns
if len(subpatterns)==1:
return subpatterns[0]
# first pick out the ones containing variable names
subpatterns_with_names = []
subpatterns_with_common_names = []
common_names = ['in', 'for', 'if' , 'not', 'None']
subpatterns_with_common_chars = []
common_chars = "[]().,:"
for subpattern in subpatterns:
if any(rec_test(subpattern, lambda x: type(x) is str)):
if any(rec_test(subpattern,
lambda x: isinstance(x, str) and x in common_chars)):
subpatterns_with_common_chars.append(subpattern)
elif any(rec_test(subpattern,
lambda x: isinstance(x, str) and x in common_names)):
subpatterns_with_common_names.append(subpattern)
else:
subpatterns_with_names.append(subpattern)
if subpatterns_with_names:
subpatterns = subpatterns_with_names
elif subpatterns_with_common_names:
subpatterns = subpatterns_with_common_names
elif subpatterns_with_common_chars:
subpatterns = subpatterns_with_common_chars
# of the remaining subpatterns pick out the longest one
return max(subpatterns, key=len)
def rec_test(sequence, test_func):
"""Tests test_func on all items of sequence and items of included
sub-iterables"""
for x in sequence:
if isinstance(x, (list, tuple)):
for y in rec_test(x, test_func):
yield y
else:
yield test_func(x)