GIF89a;
Mass Deface
0:
print level*" " + "or"
a.dump(level+1); nl = 1
i = i + 1
elif type(av) in seqtypes:
for a in av:
if isinstance(a, SubPattern):
if not nl: print
a.dump(level+1); nl = 1
else:
print a, ; nl = 0
else:
print av, ; nl = 0
if not nl: print
def __repr__(self):
return repr(self.data)
def __len__(self):
return len(self.data)
def __delitem__(self, index):
del self.data[index]
def __getitem__(self, index):
if isinstance(index, slice):
return SubPattern(self.pattern, self.data[index])
return self.data[index]
def __setitem__(self, index, code):
self.data[index] = code
def insert(self, index, code):
self.data.insert(index, code)
def append(self, code):
self.data.append(code)
def getwidth(self):
# determine the width (min, max) for this subpattern
if self.width:
return self.width
lo = hi = 0L
UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
for op, av in self.data:
if op is BRANCH:
i = MAXREPEAT - 1
j = 0
for av in av[1]:
l, h = av.getwidth()
i = min(i, l)
j = max(j, h)
lo = lo + i
hi = hi + j
elif op is CALL:
i, j = av.getwidth()
lo = lo + i
hi = hi + j
elif op is SUBPATTERN:
i, j = av[1].getwidth()
lo = lo + i
hi = hi + j
elif op in REPEATCODES:
i, j = av[2].getwidth()
lo = lo + i * av[0]
hi = hi + j * av[1]
elif op in UNITCODES:
lo = lo + 1
hi = hi + 1
elif op == SUCCESS:
break
self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
return self.width
class Tokenizer:
def __init__(self, string):
self.string = string
self.index = 0
self.__next()
def __next(self):
if self.index >= len(self.string):
self.next = None
return
char = self.string[self.index]
if char[0] == "\\":
try:
c = self.string[self.index + 1]
except IndexError:
raise error, "bogus escape (end of line)"
char = char + c
self.index = self.index + len(char)
self.next = char
def match(self, char, skip=1):
if char == self.next:
if skip:
self.__next()
return 1
return 0
def get(self):
this = self.next
self.__next()
return this
def tell(self):
return self.index, self.next
def seek(self, index):
self.index, self.next = index
def isident(char):
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
def isdigit(char):
return "0" <= char <= "9"
def isname(name):
# check that group name is a valid string
if not isident(name[0]):
return False
for char in name[1:]:
if not isident(char) and not isdigit(char):
return False
return True
def _class_escape(source, escape):
# handle escape code inside character class
code = ESCAPES.get(escape)
if code:
return code
code = CATEGORIES.get(escape)
if code and code[0] == IN:
return code
try:
c = escape[1:2]
if c == "x":
# hexadecimal escape (exactly two digits)
while source.next in HEXDIGITS and len(escape) < 4:
escape = escape + source.get()
escape = escape[2:]
if len(escape) != 2:
raise error, "bogus escape: %s" % repr("\\" + escape)
return LITERAL, int(escape, 16) & 0xff
elif c in OCTDIGITS:
# octal escape (up to three digits)
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
escape = escape[1:]
return LITERAL, int(escape, 8) & 0xff
elif c in DIGITS:
raise error, "bogus escape: %s" % repr(escape)
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
pass
raise error, "bogus escape: %s" % repr(escape)
def _escape(source, escape, state):
# handle escape code in expression
code = CATEGORIES.get(escape)
if code:
return code
code = ESCAPES.get(escape)
if code:
return code
try:
c = escape[1:2]
if c == "x":
# hexadecimal escape
while source.next in HEXDIGITS and len(escape) < 4:
escape = escape + source.get()
if len(escape) != 4:
raise ValueError
return LITERAL, int(escape[2:], 16) & 0xff
elif c == "0":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xff
elif c in DIGITS:
# octal escape *or* decimal group reference (sigh)
if source.next in DIGITS:
escape = escape + source.get()
if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
source.next in OCTDIGITS):
# got three octal digits; this is an octal escape
escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xff
# not an octal escape, so this is a group reference
group = int(escape[1:])
if group < state.groups:
if not state.checkgroup(group):
raise error, "cannot refer to open group"
return GROUPREF, group
raise ValueError
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
pass
raise error, "bogus escape: %s" % repr(escape)
def _parse_sub(source, state, nested=1):
# parse an alternation: a|b|c
items = []
itemsappend = items.append
sourcematch = source.match
while 1:
itemsappend(_parse(source, state))
if sourcematch("|"):
continue
if not nested:
break
if not source.next or sourcematch(")", 0):
break
else:
raise error, "pattern not properly closed"
if len(items) == 1:
return items[0]
subpattern = SubPattern(state)
subpatternappend = subpattern.append
# check if all items share a common prefix
while 1:
prefix = None
for item in items:
if not item:
break
if prefix is None:
prefix = item[0]
elif item[0] != prefix:
break
else:
# all subitems start with a common "prefix".
# move it out of the branch
for item in items:
del item[0]
subpatternappend(prefix)
continue # check next one
break
# check if the branch can be replaced by a character set
for item in items:
if len(item) != 1 or item[0][0] != LITERAL:
break
else:
# we can store this as a character set instead of a
# branch (the compiler may optimize this even more)
set = []
setappend = set.append
for item in items:
setappend(item[0])
subpatternappend((IN, set))
return subpattern
subpattern.append((BRANCH, (None, items)))
return subpattern
def _parse_sub_cond(source, state, condgroup):
item_yes = _parse(source, state)
if source.match("|"):
item_no = _parse(source, state)
if source.match("|"):
raise error, "conditional backref with more than two branches"
else:
item_no = None
if source.next and not source.match(")", 0):
raise error, "pattern not properly closed"
subpattern = SubPattern(state)
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
_PATTERNENDERS = set("|)")
_ASSERTCHARS = set("=!<")
_LOOKBEHINDASSERTCHARS = set("=!")
_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
def _parse(source, state):
# parse a simple pattern
subpattern = SubPattern(state)
# precompute constants into local variables
subpatternappend = subpattern.append
sourceget = source.get
sourcematch = source.match
_len = len
PATTERNENDERS = _PATTERNENDERS
ASSERTCHARS = _ASSERTCHARS
LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
REPEATCODES = _REPEATCODES
while 1:
if source.next in PATTERNENDERS:
break # end of subpattern
this = sourceget()
if this is None:
break # end of pattern
if state.flags & SRE_FLAG_VERBOSE:
# skip whitespace and comments
if this in WHITESPACE:
continue
if this == "#":
while 1:
this = sourceget()
if this in (None, "\n"):
break
continue
if this and this[0] not in SPECIAL_CHARS:
subpatternappend((LITERAL, ord(this)))
elif this == "[":
# character set
set = []
setappend = set.append
## if sourcematch(":"):
## pass # handle character classes
if sourcematch("^"):
setappend((NEGATE, None))
# check remaining characters
start = set[:]
while 1:
this = sourceget()
if this == "]" and set != start:
break
elif this and this[0] == "\\":
code1 = _class_escape(source, this)
elif this:
code1 = LITERAL, ord(this)
else:
raise error, "unexpected end of regular expression"
if sourcematch("-"):
# potential range
this = sourceget()
if this == "]":
if code1[0] is IN:
code1 = code1[1][0]
setappend(code1)
setappend((LITERAL, ord("-")))
break
elif this:
if this[0] == "\\":
code2 = _class_escape(source, this)
else:
code2 = LITERAL, ord(this)
if code1[0] != LITERAL or code2[0] != LITERAL:
raise error, "bad character range"
lo = code1[1]
hi = code2[1]
if hi < lo:
raise error, "bad character range"
setappend((RANGE, (lo, hi)))
else:
raise error, "unexpected end of regular expression"
else:
if code1[0] is IN:
code1 = code1[1][0]
setappend(code1)
# XXX: