from __future__ import division
from pprint import pprint
import sys

import corefutil # you will not change this
import perc      # you will change this

#----------------------------Start Mention Feature Code---------------------------------------

def isPronoun(ment):
    # This should return True if the mention is a pronoun, otherwise return False.
    #Answer Here
    return False # deleteme

def isProper(ment):
    # This returns True if the mention is a proper noun phrase.
    # You should implement a heuristic to detect this based on the POS tag of
    # the the head word.
    # if it's not a proper noun, return False.
    #Answer Here
    return False # deleteme

def isPlural(ment):
    # this returns True if the mention is plural. you should implement a
    # heuristic method for detecting plural-ness based on the given POS tag for
    # the head token .. or you will have to use the actual word form for pronouns,
    # since PTB tagset doesn't include grammatical number of pronouns.
    #Answer Here
    return False #deleteme

#----------------------------Start Stuff for Rule-Based Coref---------------------------------------

def isAcceptableAntecedent(mention,candidateAntecedent):
    return False

def doRuleCoref(documents, testSettings,verbose):
    # This performs rule-based coref on the documents. For each document, it
    # adds a field 'coref' this is a list of coreference links.
    # The links are in the form of a dictionary, where links[i]  = j represents
    # that mention i is linked to mention j, where j is i's antecedent.  So j
    # is to the left of i.
    windowWidth = testSettings['windowWidth']
    for doc in documents:
        links = {}
        mentions = doc['mentions']
        for mi in range(1, len(mentions)):
            if verbose:
                print "Current mention: %s" % corefutil.mentionString(mentions[mi])
            # the last up-to-windowWidth indexes
            leftInd = max(0, mi - windowWidth)
            candInds = list(reversed(range(leftInd, mi)))
            acceptableInds = [mj for mj in candInds if isAcceptableAntecedent(mentions[mi], mentions[mj])]
            if not acceptableInds:
                choice = None
            else:
                choice = acceptableInds[0]
            if choice is not None:
                links[mi] = choice


            if verbose:
                for mj in candInds:
                    print " Candidate= %s" % corefutil.mentionString(mentions[mj])
                    print " gold = %d, acceptable = %d, chosen = %d" % (
                            corefutil.entityIdForMention(mentions[mi]) ==  corefutil.entityIdForMention(mentions[mj]),  
                            mj in acceptableInds,
                            choice==mj)
                
        doc['coref'] = links

#----------------------------End Stuff for Rule-Based Coref---------------------------------------



#----------------------------Start Stuff for ML-Based Coref---------------------------------------


def getFeaturesForMentionPair(curMent, candMent):
    ## This should return a dictionary from featurename : value
    
    features = {}
    features["**BIAS**"] = 1
    w1,w2 = corefutil.headToken(curMent).lower(), corefutil.headToken(candMent).lower()
    features["same_headword"] = int(w1 == w2)
    # Answer here
    return features

# from coref_sol import getFeaturesForMentionPair

def getTrainingExamples(documents,windowWidth, verbose=False):
    # This goes through the documents, assuming they have gold standard
    # coreferencing inside of them, and extracts a bunch of binary classifier
    # examples for mention-mention linking decisions.
    # Returns a list of (label, featurevec) pairs, where
    #  - label is a boolean
    #  - featurevec is a {featname:value} dictionary representing a sparse vector

    examples = []
    for d in documents:
        mentions = d['mentions']
        # mi is the index of the current mention; miPrev is the antecedent candidate
        for mi in range(1,len(mentions)):
            for miPrev in range(max(0,mi - windowWidth),mi):
                # Assign a boolean to the variable 'label'
                # true if it's a coreferent link.
                label = mentions[mi]['entid'] == mentions[miPrev]['entid']
                featureVector = getFeaturesForMentionPair(mentions[mi],mentions[miPrev])
                if verbose:
                    pass
                examples.append((label,featureVector))
    return examples

def doCorefWithClassifier(documents, model, testSettings):
    #this is fully-implemented
    #for every mention, it constructs a window of candidates to the left. For each candidate, it get a feature vector and the score if the mention was linked to the candidate.
    #It assigns the mention to the candidate with the highest score, if this highest score is above a threshold.
    #note that the threshold testSettings['thresh'] may be an important tuning parameter
    
    windowWidth = testSettings['windowWidth']
    thresh = testSettings['thresh']
    for d in documents:
        links = {}
        mentions = d['mentions']
        for mi in range(1,len(mentions)):
            bestScore = -10000000000000
            bestIndex = None
            for miPrev in range(max(0, mi - windowWidth),mi):
                featureVector = getFeaturesForMentionPair(mentions[mi],mentions[miPrev])
                score = perc.linearModelScore(featureVector, model)               
                if(score > bestScore):
                    bestScore = score
                    bestIndex = miPrev
            if(bestScore > thresh):
                links[mi] = bestIndex
        d['coref'] = links

#----------------------------End Stuff for ML-Based Coref---------------------------------------

#----------------------------Start Experiment Drivers and Main Function-------------------------        
def ruleCorefMain(testDocuments, testSettings, verbose):
    # this is just a driver function that does prediction on the test set,
    # prints out the coref output, and computes accuracy
    # we recommend using the output of printDocument below to do error analysis
    # and improve your features. 
    # Note there is no need for training data in this function
    # because the rule-based system is not trained in any way. 
    doRuleCoref(testDocuments, testSettings, verbose)

    # You may want to change this to check different documents or whatever
    # docinds = [3]   # the Bob Stone document in test.json
    # docinds = range(5)  # first 5 documents
    docinds = [0]
    for i in docinds:
        corefutil.printDocument(testDocuments[i])

    print
    corefutil.evalCorefPairwise(testDocuments)


def mlCorefMain(trainDocuments, testDocuments, trainingSettings, testSettings, verbose):
    # this is just a drive function that trains on the train set, does
    # prediction on the test set, prints out the coref output, and computes
    # accuracy
    # we recommend using the output of printDocument below to do error analysis
    # and improve your features.
    
    print "Training With Perceptron"
    examples = getTrainingExamples(trainDocuments, trainingSettings['windowWidth'], verbose)
    print "Training perceptron on %d mention pair examples" % len(examples)
    model = perc.trainPerceptron(examples, **trainingSettings)

    doCorefWithClassifier(testDocuments, model, testSettings)
    # corefutil.printDocument(testDocuments[0])
    corefutil.evalCorefPairwise(testDocuments)

def main():
    # This is a main function that runs coref, either the rule-based or the
    # machine learning system, on the documents given.
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument('corefStyle', choices=["rule","ml"])
    p.add_argument('--train-file')
    p.add_argument('--test-file')
    args = p.parse_args()
    
    testDocuments = corefutil.loadJson(args.test_file)
    
    if args.corefStyle=='rule':
        if args.train_file is not None:
            print "WARNING: rule-based coref ignores --train-file and only uses the test data."
        testSettings = {}
        testSettings['windowWidth'] = 5
        ruleCorefMain(testDocuments,testSettings, verbose=False)

    elif args.corefStyle=='ml':
        assert args.train_file is not None, "Need to provide training file"
        trainingSettings = {}
        trainingSettings['windowWidth'] = 5
        trainingSettings['useAveraging'] = True
        trainingSettings['numPasses'] = 5
        trainingSettings['stepSize'] = 1.0
        testSettings = {}
        testSettings['windowWidth'] = trainingSettings['windowWidth']
        testSettings['thresh'] = 0
    
        trainDocuments = corefutil.loadJson(args.train_file)
        mlCorefMain(trainDocuments, testDocuments, trainingSettings, testSettings, verbose=False)
    else:
        print "Need to give coref mode"
        
if __name__ == "__main__":
    main()


#----------------------------End Experiment Drivers and Main Function---------------------------------------

