#!/usr/bin/env python3 """A left-corner transform natural language parser.""" import sys from ucb import main from mr import parse_key_value_pair from trees import Tree, Leaf, START, END from util import trace_gen import words class Rule(object): """A syntactic rule in left-corner form.""" def __init__(self, first, rest, tag): self.first = first self.rest = rest self.tag = tag def __str__(self): rest_str = ' '.join([str(r) for r in self.rest]) return '{0} -> {1} {2}'.format(self.tag, self.first, rest_str) def guess_tag(word, lexicon): """Guess the tag of a word based on its lexicon entry.""" return lexicon.get(word, 'NN') def parse(tokens, grammar, lexicon): """Yield trees that match tokens.""" leaves = [Leaf(guess_tag(word, lexicon), word.lower()) for word in tokens] print(' '.join([str(s) for s in leaves])) words = tuple([START] + leaves + [END]) for tree, pos in parse_next('ROOT', 0, words, grammar): yield tree.branches[1] def parse_next(tag, pos, words, grammar): """Yielded parses of prefixes of words that match tag.""" for result in complete(tag, words[pos], pos+1, words, grammar): yield result def complete(tag, first, pos, words, grammar): """Complete the tree rooted at tag with left corner first.""" if tag == first.tag: yield first, pos else: for rule in grammar.get(first.tag, []): # Try each rule in order if len(rule.rest)==0 and type(first)==Tree and len(first.branches)==1: continue if len(rule.rest) > len(words)-pos: continue branches = (first,) for result in match(tag,rule.tag,rule.rest,pos,branches,words,grammar): yield result def match(tag, rule_tag, rest, pos, branches, words, grammar): """Match the rest of a rule to the words.""" if len(rest) == 0: tree = Tree(rule_tag, branches) for result in complete(tag, tree, pos, words, grammar): yield result else: for branch, next_pos in parse_next(rest[0], pos, words, grammar): for result in match(tag, rule_tag, rest[1:], next_pos, branches + (branch,), words, grammar): yield result def load_grammar(grammar_file): """Load a value constructed from a MapReduction.""" grammar = {} for line in open(grammar_file): first, rests = parse_key_value_pair(line) for rest, tag in rests: grammar.setdefault(first, []).append(Rule(first, rest, tag)) return grammar def load_lexicon(lexicon_file): """Load a lexicon constructed from a MapReduction.""" lexicon = {} for line in open(lexicon_file): word, tag_set = parse_key_value_pair(line) lexicon[word] = tag_set[0] return lexicon def first_parse(tokens, grammar, lexicon): """Return the first parse found.""" for p in parse(tokens, grammar, lexicon): return p def parse_test(): """Test the parser on a simple example.""" gr = {'NP': [Rule('NP', ('VP',), 'S')], 'PRP': [Rule('PRP', (), 'NP')], 'VB': [Rule('VB', (), 'VP'), Rule('VB', ('PRP',), 'VP')], '': [Rule('', ('S', ''), 'ROOT')], } lex = {'i': 'PRP', 'you': 'PRP', 'know': 'VB'} print(first_parse(['i', 'know', 'you'], gr, lex)) def extract_modal(t): """Delete the first clause of a modal verb and return it.""" if type(t) == Leaf: return None if len(t.branches) == 2 and t.branches[0].tag == 'MD': modal, clause = t.branches t.branches = [modal] return clause for b in t.branches: clause = extract_modal(b) if clause is not None: return clause return None def yoda_transform(t): """Place the clause of a modal verb at the front of the sentence.""" clause = extract_modal(t) if clause is not None: return Tree(t.tag, (clause, Leaf(',', ',')) + t.branches) return t @main def run(grammar_file, lexicon_file): print('Loading...') grammar, lexicon = load_grammar(grammar_file), load_lexicon(lexicon_file) print('Ready.') for line in sys.stdin: p = first_parse(line.split(), grammar, lexicon) print(p) if p is not None: y = yoda_transform(p) print(y) print(' '.join([w[0] for w in words.words(y)]))