GIF89a; EcchiShell v1.0
//usr/lib64/lib64/lib64/python2.7/lib2to3/ %d" % j else: print " %s -> %d" % (label, j) def dump_dfa(self, name, dfa): print "Dump of DFA for", name for i, state in enumerate(dfa): print " State", i, state.isfinal and "(final)" or "" for label, next in state.arcs.iteritems(): print " %s -> %d" % (label, dfa.index(next)) def simplify_dfa(self, dfa): # This is not theoretically optimal, but works well enough. # Algorithm: repeatedly look for two states that have the same # set of arcs (same labels pointing to the same nodes) and # unify them, until things stop changing. # dfa is a list of DFAState instances changes = True while changes: changes = False for i, state_i in enumerate(dfa): for j in range(i+1, len(dfa)): state_j = dfa[j] if state_i == state_j: #print " unify", i, j del dfa[j] for state in dfa: state.unifystate(state_j, state_i) changes = True break def parse_rhs(self): # RHS: ALT ('|' ALT)* a, z = self.parse_alt() if self.value != "|": return a, z else: aa = NFAState() zz = NFAState() aa.addarc(a) z.addarc(zz) while self.value == "|": self.gettoken() a, z = self.parse_alt() aa.addarc(a) z.addarc(zz) return aa, zz def parse_alt(self): # ALT: ITEM+ a, b = self.parse_item() while (self.value in ("(", "[") or self.type in (token.NAME, token.STRING)): c, d = self.parse_item() b.addarc(c) b = d return a, b def parse_item(self): # ITEM: '[' RHS ']' | ATOM ['+' | '*'] if self.value == "[": self.gettoken() a, z = self.parse_rhs() self.expect(token.OP, "]") a.addarc(z) return a, z else: a, z = self.parse_atom() value = self.value if value not in ("+", "*"): return a, z self.gettoken() z.addarc(a) if value == "+": return a, z else: return a, a def parse_atom(self): # ATOM: '(' RHS ')' | NAME | STRING if self.value == "(": self.gettoken() a, z = self.parse_rhs() self.expect(token.OP, ")") return a, z elif self.type in (token.NAME, token.STRING): a = NFAState() z = NFAState() a.addarc(z, self.value) self.gettoken() return a, z else: self.raise_error("expected (...) or NAME or STRING, got %s/%s", self.type, self.value) def expect(self, type, value=None): if self.type != type or (value is not None and self.value != value): self.raise_error("expected %s/%s, got %s/%s", type, value, self.type, self.value) value = self.value self.gettoken() return value def gettoken(self): tup = self.generator.next() while tup[0] in (tokenize.COMMENT, tokenize.NL): tup = self.generator.next() self.type, self.value, self.begin, self.end, self.line = tup #print token.tok_name[self.type], repr(self.value) def raise_error(self, msg, *args): if args: try: msg = msg % args except: msg = " ".join([msg] + map(str, args)) raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line)) class NFAState(object): def __init__(self): self.arcs = [] # list of (label, NFAState) pairs def addarc(self, next, label=None): assert label is None or isinstance(label, str) assert isinstance(next, NFAState) self.arcs.append((label, next)) class DFAState(object): def __init__(self, nfaset, final): assert isinstance(nfaset, dict) assert isinstance(iter(nfaset).next(), NFAState) assert isinstance(final, NFAState) self.nfaset = nfaset self.isfinal = final in nfaset self.arcs = {} # map from label to DFAState def addarc(self, next, label): assert isinstance(label, str) assert label not in self.arcs assert isinstance(next, DFAState) self.arcs[label] = next def unifystate(self, old, new): for label, next in self.arcs.iteritems(): if next is old: self.arcs[label] = new def __eq__(self, other): # Equality test -- ignore the nfaset instance variable assert isinstance(other, DFAState) if self.isfinal != other.isfinal: return False # Can't just return self.arcs == other.arcs, because that # would invoke this method recursively, with cycles... if len(self.arcs) != len(other.arcs): return False for label, next in self.arcs.iteritems(): if next is not other.arcs.get(label): return False return True __hash__ = None # For Py3 compatibility. def generate_grammar(filename="Grammar.txt"): p = ParserGenerator(filename) return p.make_grammar()