from __future__ import division import sys,re,random from collections import defaultdict from pprint import pprint ########################## # Important stuff you will use import vit OUTPUT_VOCAB = set(""" ! # $ & , @ A D E G L M N O P R S T U V X Y Z ^ """.split()) ########################## # Utilities def dict_subtract(vec1, vec2): """treat vec1 and vec2 as dict representations of sparse vectors""" out = defaultdict(float) out.update(vec1) for k in vec2: out[k] -= vec2[k] return dict(out) def dict_argmax(dct): """Return the key whose value is largest. In other words: argmax_k dct[k]""" return max(dct.iterkeys(), key=lambda k: dct[k]) def dict_dotprod(d1, d2): """Return the dot product (aka inner product) of two vectors, where each is represented as a dictionary of {index: weight} pairs, where indexes are any keys, potentially strings. If a key does not exist in a dictionary, its value is assumed to be zero.""" smaller = d1 if len(d1)