from pprint import pprint

# The productions rules have to be binarized.

grammar_text = """
S -> NP VP
NP -> Det Noun
VP -> Verb NP
PP -> Prep NP
NP -> NP PP
VP -> VP PP
"""

lexicon = {
    'Noun': set(['cat','dog','table','food']),
    'Verb': set(['attacked','saw','loved','hated']),
    'Prep': set(['in','of','on','with']),
    'Det': set(['the','a']),
}

# Process the grammar rules.  You should not have to change this.
grammar_rules = []
for line in grammar_text.strip().split("\n"):
    if not line.strip(): continue
    left,right = line.split("->")
    left = left.strip()
    children = right.split()
    rule = (left, tuple(children))
    grammar_rules.append(rule)
possible_parents_for_children = {}
for parent, (leftchild, rightchild) in grammar_rules:
    if (leftchild,rightchild) not in possible_parents_for_children:
        possible_parents_for_children[leftchild,rightchild] = []
    possible_parents_for_children[leftchild,rightchild].append( parent )
# Error checking
all_parents = set(x[0] for x in grammar_rules) | set(lexicon.keys())
for par,(leftchild,rightchild) in grammar_rules:
    if leftchild not in all_parents:
        assert False, "Nonterminal %s does not appear as parent of prod rule, nor in lexicon." % leftchild
    if rightchild not in all_parents:
        assert False, "Nonterminal %s does not appear as parent of prod rule, nor in lexicon." % rightchild

print "Grammar rules in tuple form:"
pprint(grammar_rules)
print "Rule parents indexed by children:"
pprint(possible_parents_for_children)


def cky_acceptance(sentence):
    # return True or False depending whether the sentence is parseable by the grammar.
    global grammar_rules, lexicon

    # Set up the cells data structure.
    # It is intended that the cell indexed by (i,j)
    # refers to the span, in python notation, sentence[i:j],
    # which is start-inclusive, end-exclusive, which means it includes tokens
    # at indexes i, i+1, ... j-1.
    # So sentence[3:4] is the 3rd word, and sentence[3:6] is a 3-length phrase,
    # at indexes 3, 4, and 5.
    # Each cell would then contain a list of possible nonterminal symbols for that span.
    # If you want, feel free to use a totally different data structure.
    N = len(sentence)
    cells = {}
    for i in range(N):
        for j in range(i+1, N+1):
            cells[(i,j)] = []

    # TODO replace the below with an implementation
    pprint(cells)
    return False


def cky_parse(sentence):
    # Return one of the legal parses for the sentence.
    # If nothing is legal, return None.
    # This will be similar to cky_acceptance(), except with backpointers.
    global grammar_rules, lexicon

    N = len(sentence)
    cells = {}
    for i in range(N):
        for j in range(i+1, N+1):
            cells[(i,j)] = []

    # TODO replace the below with an implementation
    return None
        
## some examples of calling these things...
## you probably want to call only one sentence at a time to help debug more easily.

# print cky_acceptance(['the','cat','attacked','the','food'])
# pprint( cky_parse(['the','cat','attacked','the','food']))
# pprint( cky_acceptance(['the','the']))
# pprint( cky_parse(['the','the']))
# print cky_acceptance(['the','cat','attacked','the','food','with','a','dog'])
# pprint( cky_parse(['the','cat','attacked','the','food','with','a','dog']) )
# pprint( cky_parse(['the','cat','with','a','table','attacked','the','food']) )
#