In [1]:
3+7
Out[1]:
10
In [15]:
import math,re
In [7]:
math.log10(10)
Out[7]:
1.0
In [8]:
text = open("doc.txt").read()
In [18]:
re.split(" ", "hello world there")
Out[18]:
['hello', 'world', 'there']
In [25]:
tokens = re.split(r"\W+", text.lower())
In [27]:
tokens
Out[27]:
['',
 'this',
 'isn',
 't',
 'a',
 'paywall',
 'we',
 'll',
 'cut',
 'to',
 'the',
 'chase',
 '98',
 'of',
 'wikipedia',
 's',
 'readers',
 'don',
 't',
 'give',
 'they',
 'simply',
 'look',
 'away',
 'all',
 'we',
 'ask',
 'is',
 '2',
 '75',
 'or',
 'whatever',
 'seems',
 'right',
 'to',
 'you',
 'this',
 'thursday',
 'before',
 'you',
 'get',
 'back',
 'to',
 'your',
 'article',
 'donate',
 'maybe',
 'laterfeatured',
 'article',
 'huey',
 'long',
 'from',
 'wikipedia',
 'the',
 'free',
 'encyclopedia',
 'jump',
 'to',
 'navigationjump',
 'to',
 'search',
 'the',
 'kingfish',
 'redirects',
 'here',
 'for',
 'other',
 'uses',
 'see',
 'kingfish',
 'disambiguation',
 'and',
 'huey',
 'long',
 'disambiguation',
 'huey',
 'long',
 'long',
 'with',
 'both',
 'arms',
 'raised',
 'while',
 'speaking',
 'united',
 'states',
 'senator',
 'from',
 'louisiana',
 'in',
 'office',
 'january',
 '25',
 '1932',
 'september',
 '10',
 '1935',
 'preceded',
 'by',
 'joseph',
 'e',
 'ransdell',
 'succeeded',
 'by',
 'rose',
 'mcconnell',
 'long',
 '40th',
 'governor',
 'of',
 'louisiana',
 'in',
 'office',
 'may',
 '21',
 '1928',
 'january',
 '25',
 '1932',
 'lieutenant',
 'paul',
 'n',
 'cyr',
 'alvin',
 'olin',
 'king',
 'preceded',
 'by',
 'oramel',
 'h',
 'simpson',
 'succeeded',
 'by',
 'alvin',
 'olin',
 'king',
 'personal',
 'details',
 'born',
 'august',
 '30',
 '1893',
 'winnfield',
 'louisiana',
 'u',
 's',
 'died',
 'september',
 '10',
 '1935',
 'aged',
 '42',
 'baton',
 'rouge',
 'louisiana',
 'u',
 's',
 'cause',
 'of',
 'death',
 'assasinated',
 'resting',
 'place',
 'louisiana',
 'state',
 'capitol',
 'political',
 'party',
 'democratic',
 'spouse',
 's',
 'rose',
 'mcconnell',
 'm',
 '1913',
 'children',
 '3',
 'including',
 'russell',
 'education',
 'oklahoma',
 'baptist',
 'university',
 'university',
 'of',
 'oklahoma',
 'tulane',
 'university',
 'signature',
 'we',
 'ask',
 'you',
 'humbly',
 'don',
 't',
 'scroll',
 'away',
 'hi',
 'reader',
 'this',
 'thursday',
 'for',
 'the',
 '3rd',
 'time',
 'recently',
 'we',
 'ask',
 'you',
 'to',
 'protect',
 'wikipedia',
 's',
 'independence',
 'thanks',
 'to',
 'the',
 '2',
 'of',
 'readers',
 'who',
 'donate',
 'wikipedia',
 'and',
 'the',
 'free',
 'knowledge',
 'movement',
 'are',
 'thriving',
 'if',
 'you',
 'too',
 'have',
 'benefitted',
 'from',
 'using',
 'wikipedia',
 'take',
 'a',
 'minute',
 'to',
 'donate',
 '2',
 '75',
 'to',
 'keep',
 'it',
 'growing',
 'for',
 'years',
 'if',
 'you',
 'are',
 'one',
 'of',
 'our',
 'rare',
 'donors',
 'we',
 'warmly',
 'thank',
 'you',
 'please',
 'select',
 'a',
 'payment',
 'method',
 'maybe',
 'later',
 'close',
 'huey',
 'long',
 'cropped',
 'jpg',
 'this',
 'article',
 'is',
 'part',
 'of',
 'a',
 'series',
 'about',
 'huey',
 'long',
 'political',
 'viewsearly',
 'lifeearly',
 'careerin',
 'culture',
 'governor',
 'of',
 'louisiana',
 '1924',
 'campaign1928',
 'electionstate',
 'capitolcotton',
 'holiday',
 'u',
 's',
 'senator',
 'from',
 'louisiana',
 'tenurechaco',
 'warshare',
 'our',
 'wealthassassination',
 'american',
 'progressevery',
 'man',
 'a',
 'king',
 'book',
 'every',
 'man',
 'a',
 'king',
 'song',
 'my',
 'first',
 'days',
 'in',
 'the',
 'white',
 'house',
 'seal',
 'of',
 'the',
 'united',
 'states',
 'senate',
 'svg',
 'seal',
 'of',
 'louisiana',
 'svg',
 'vte',
 'huey',
 'pierce',
 'long',
 'jr',
 'august',
 '30',
 '1893',
 'september',
 '10',
 '1935',
 'nicknamed',
 'the',
 'kingfish',
 'was',
 'an',
 'american',
 'lawyer',
 'and',
 'politician',
 'who',
 'served',
 'as',
 'the',
 '40th',
 'governor',
 'of',
 'louisiana',
 'from',
 '1928',
 'to',
 '1932',
 'and',
 'as',
 'a',
 'member',
 'of',
 'the',
 'united',
 'states',
 'senate',
 'from',
 '1932',
 'until',
 'his',
 'assassination',
 'in',
 '1935',
 'he',
 'was',
 'a',
 'populist',
 'member',
 'of',
 'the',
 'democratic',
 'party',
 'and',
 'rose',
 'to',
 'national',
 'prominence',
 'during',
 'the',
 'great',
 'depression',
 'for',
 'his',
 'vocal',
 'criticism',
 'of',
 'president',
 'franklin',
 'd',
 'roosevelt',
 'and',
 'his',
 'new',
 'deal',
 'which',
 'long',
 'deemed',
 'insufficiently',
 'radical',
 'as',
 'the',
 'political',
 'leader',
 'of',
 'louisiana',
 'he',
 'commanded',
 'wide',
 'networks',
 'of',
 'supporters',
 'and',
 'often',
 'took',
 'forceful',
 'action',
 'a',
 'controversial',
 'figure',
 'long',
 'is',
 'celebrated',
 'as',
 'a',
 'populist',
 'champion',
 'of',
 'the',
 'poor',
 'or',
 'conversely',
 'denounced',
 'as',
 'a',
 'fascistic',
 'demagogue',
 'long',
 'was',
 'born',
 'in',
 'the',
 'impoverished',
 'north',
 'of',
 'louisiana',
 'in',
 '1893',
 'after',
 'working',
 'as',
 'a',
 'traveling',
 'salesman',
 'and',
 'briefly',
 'attending',
 'three',
 'colleges',
 'he',
 'entered',
 'the',
 'bar',
 'in',
 'louisiana',
 'following',
 'a',
 'short',
 'private',
 'legal',
 'career',
 'in',
 'which',
 'he',
 'represented',
 'poor',
 'plaintiffs',
 'long',
 'was',
 'elected',
 'to',
 'the',
 'louisiana',
 'public',
 'service',
 'commission',
 'as',
 'commissioner',
 'he',
 'prosecuted',
 'large',
 'corporations',
 'such',
 'as',
 'standard',
 'oil',
 'a',
 'lifelong',
 'target',
 'of',
 'his',
 'rhetorical',
 'attacks',
 'after',
 'long',
 'successfully',
 'argued',
 'before',
 'the',
 'u',
 's',
 'supreme',
 'court',
 'chief',
 'justice',
 'and',
 'former',
 'president',
 'william',
 'howard',
 'taft',
 'praised',
 'him',
 'as',
 'the',
 'most',
 'brilliant',
 'lawyer',
 'who',
 'ever',
 'practiced',
 'before',
 'the',
 'court',
 'after',
 'a',
 'failed',
 '1924',
 'campaign',
 'long',
 'used',
 'the',
 'sharp',
 'economic',
 'and',
 'class',
 'divisions',
 'in',
 'louisiana',
 'to',
 'win',
 'the',
 '1928',
 'gubernatorial',
 'election',
 'once',
 'in',
 'office',
 'he',
 'expanded',
 'social',
 'programs',
 'organized',
 'massive',
 'public',
 'works',
 'projects',
 'such',
 'as',
 'a',
 'modern',
 'highway',
 'system',
 'and',
 'the',
 'tallest',
 'capitol',
 'building',
 'in',
 'the',
 'nation',
 'and',
 'proposed',
 'a',
 'cotton',
 'holiday',
 'through',
 'political',
 'maneuvering',
 'long',
 'became',
 'the',
 'political',
 'boss',
 'of',
 'louisiana',
 'he',
 'was',
 'impeached',
 'in',
 '1929',
 'for',
 'abuses',
 'of',
 'power',
 'but',
 'the',
 'proceedings',
 'collapsed',
 'in',
 'the',
 'state',
 'senate',
 'his',
 'opponents',
 'argued',
 'his',
 'policies',
 'and',
 'methods',
 'were',
 'unconstitutional',
 'and',
 'dictatorial',
 'at',
 'its',
 'climax',
 'political',
 'opposition',
 'organized',
 'a',
 'minor',
 'insurrection',
 'long',
 'was',
 'elected',
 'to',
 'the',
 'u',
 's',
 'senate',
 'in',
 '1930',
 'but',
 'did',
 'not',
 'assume',
 'his',
 'seat',
 'until',
 '1932',
 'he',
 'established',
 'himself',
 'as',
 'an',
 'isolationist',
 'arguing',
 'that',
 'standard',
 'oil',
 'and',
 'wall',
 'street',
 'orchestrated',
 'american',
 'foreign',
 'policy',
 'he',
 'was',
 'instrumental',
 'in',
 'securing',
 'roosevelt',
 's',
 '1932',
 'nomination',
 'but',
 'split',
 'with',
 'him',
 'in',
 '1933',
 'becoming',
 'a',
 'prominent',
 'critic',
 'of',
 'his',
 'new',
 'deal',
 'as',
 'an',
 'alternative',
 'he',
 'proposed',
 'the',
 'share',
 'our',
 'wealth',
 'program',
 'in',
 '1934',
 'to',
 'stimulate',
 'the',
 'economy',
 'he',
 'advocated',
 'massive',
 'federal',
 'spending',
 'a',
 'wealth',
 'tax',
 'and',
 'wealth',
 'redistribution',
 'these',
 'proposals',
 'drew',
 'wide',
 'support',
 'with',
 'millions',
 'joining',
 'local',
 'share',
 'our',
 'wealth',
 'clubs',
 'poised',
 'for',
 'a',
 '1936',
 'presidential',
 'bid',
 'long',
 'was',
 'mortally',
 'wounded',
 'by',
 'a',
 'lone',
 'assassin',
 'in',
 '1935',
 'although',
 'long',
 's',
 'movement',
 'faded',
 'roosevelt',
 'adopted',
 'many',
 'of',
 'his',
 'proposals',
 'in',
 'the',
 'second',
 'new',
 'deal',
 'and',
 'louisiana',
 'elections',
 'would',
 'be',
 'organized',
 'along',
 'anti',
 'or',
 'pro',
 'long',
 'factions',
 'until',
 'the',
 '1960s',
 'he',
 'left',
 'behind',
 'a',
 'political',
 'dynasty',
 'that',
 'included',
 'his',
 'wife',
 'senator',
 'rose',
 'mcconnell',
 'long',
 'his',
 'son',
 'senator',
 'russell',
 'b',
 'long',
 'and',
 'his',
 'brother',
 'governor',
 'earl',
 'long',
 'among',
 'others',
 'contents',
 '1',
 'early',
 'life',
 '1893',
 '1915',
 '1',
 '1',
 'childhood',
 '1',
 '2',
 'education',
 'and',
 'marriage',
 '2',
 'legal',
 'career',
 '1915',
 '1923',
 '3',
 'gubernatorial',
 'campaigns',
 '1924',
 '1928',
 '3',
 '1',
 '1924',
 'election',
 '3',
 '2',
 '1928',
 'election',
 '4',
 'louisiana',
 'governorship',
 '1928',
 '1932',
 '4',
 '1',
 'first',
 'year',
 '4',
 '2',
 'impeachment',
 '4',
 '3',
 'senate',
 'campaign',
 '4',
 '4',
 'senator',
 'elect',
 '4',
 '5',
 'accomplishments',
 'in',
 'louisiana',
 '5',
 'u',
 's',
 'senate',
 '1932',
 '1935',
 '5',
 '1',
 'senator',
 '5',
 '2',
 'roosevelt',
 'and',
 'the',
 'new',
 'deal',
 '5',
 '3',
 'chaco',
 'war',
 'and',
 'foreign',
 'policy',
 '5',
 '4',
 'share',
 'our',
 'wealth',
 '5',
 '5',
 'continued',
 'control',
 'over',
 'louisiana',
 '6',
 '1935',
 'final',
 'year',
 '6',
 '1',
 'presidential',
 'ambitions',
 '6',
 '2',
 'increased',
 'tensions',
 'in',
 'louisiana',
 '6',
 '3',
 'assassination',
 '7',
 'legacy',
 '7',
 '1',
 'politics',
 '7',
 '2',
 'historical',
 'reputation',
 '7',
 '3',
 'media',
 '8',
 'works',
 '8',
 '1',
 'bibliography',
 '8',
 '2',
 'discography',
 '9',
 'see',
 'also',
 '10',
 'notes',
 'and',
 'references',
 '10',
 '1',
 'notes',
 '10',
 '2',
 'references',
 'and',
 'citations',
 '10',
 '3',
 'works',
 'cited',
 'early',
 'life',
 '1893',
 '1915',
 'childhood',
 'long',
 'was',
 'born',
 'on',
 'august',
 '30',
 '1893',
 'near',
 'winnfield',
 'a',
 'small',
 'town',
 'in',
 'north',
 'central',
 'louisiana',
 'the',
 'seat',
 'of',
 'winn',
 'parish',
 '1',
 ...]
In [30]:
counts = {}   ### or: Counter, defaultdict
for w in tokens:
    if w not in counts:
        counts[w] = 0
    counts[w] += 1
In [32]:
counts["long"]
Out[32]:
428
In [38]:
lst = list(counts.items())
lst
Out[38]:
[('', 2),
 ('this', 28),
 ('isn', 1),
 ('t', 13),
 ('a', 250),
 ('paywall', 1),
 ('we', 5),
 ('ll', 1),
 ('cut', 2),
 ('to', 252),
 ('the', 774),
 ('chase', 1),
 ('98', 3),
 ('of', 328),
 ('wikipedia', 8),
 ('s', 185),
 ('readers', 2),
 ('don', 3),
 ('give', 4),
 ('they', 14),
 ('simply', 4),
 ('look', 2),
 ('away', 5),
 ('all', 15),
 ('ask', 4),
 ('is', 15),
 ('2', 37),
 ('75', 4),
 ('or', 21),
 ('whatever', 1),
 ('seems', 1),
 ('right', 5),
 ('you', 12),
 ('thursday', 2),
 ('before', 12),
 ('get', 3),
 ('back', 1),
 ('your', 6),
 ('article', 10),
 ('donate', 4),
 ('maybe', 3),
 ('laterfeatured', 1),
 ('huey', 93),
 ('long', 428),
 ('from', 118),
 ('free', 9),
 ('encyclopedia', 2),
 ('jump', 1),
 ('navigationjump', 1),
 ('search', 3),
 ('kingfish', 14),
 ('redirects', 1),
 ('here', 10),
 ('for', 102),
 ('other', 11),
 ('uses', 1),
 ('see', 5),
 ('disambiguation', 2),
 ('and', 258),
 ('with', 48),
 ('both', 6),
 ('arms', 1),
 ('raised', 4),
 ('while', 7),
 ('speaking', 5),
 ('united', 28),
 ('states', 34),
 ('senator', 20),
 ('louisiana', 146),
 ('in', 279),
 ('office', 18),
 ('january', 15),
 ('25', 13),
 ('1932', 21),
 ('september', 29),
 ('10', 26),
 ('1935', 35),
 ('preceded', 2),
 ('by', 63),
 ('joseph', 3),
 ('e', 5),
 ('ransdell', 7),
 ('succeeded', 2),
 ('rose', 8),
 ('mcconnell', 5),
 ('40th', 3),
 ('governor', 29),
 ('may', 19),
 ('21', 14),
 ('1928', 15),
 ('lieutenant', 5),
 ('paul', 2),
 ('n', 4),
 ('cyr', 10),
 ('alvin', 2),
 ('olin', 2),
 ('king', 20),
 ('oramel', 1),
 ('h', 6),
 ('simpson', 1),
 ('personal', 7),
 ('details', 1),
 ('born', 4),
 ('august', 11),
 ('30', 18),
 ('1893', 6),
 ('winnfield', 6),
 ('u', 19),
 ('died', 2),
 ('aged', 1),
 ('42', 6),
 ('baton', 21),
 ('rouge', 21),
 ('cause', 2),
 ('death', 12),
 ('assasinated', 1),
 ('resting', 1),
 ('place', 4),
 ('state', 79),
 ('capitol', 19),
 ('political', 36),
 ('party', 12),
 ('democratic', 14),
 ('spouse', 1),
 ('m', 11),
 ('1913', 2),
 ('children', 8),
 ('3', 25),
 ('including', 6),
 ('russell', 10),
 ('education', 5),
 ('oklahoma', 6),
 ('baptist', 4),
 ('university', 31),
 ('tulane', 3),
 ('signature', 1),
 ('humbly', 1),
 ('scroll', 1),
 ('hi', 1),
 ('reader', 1),
 ('3rd', 1),
 ('time', 12),
 ('recently', 2),
 ('protect', 1),
 ('independence', 2),
 ('thanks', 1),
 ('who', 28),
 ('knowledge', 1),
 ('movement', 6),
 ('are', 7),
 ('thriving', 1),
 ('if', 5),
 ('too', 7),
 ('have', 18),
 ('benefitted', 1),
 ('using', 4),
 ('take', 5),
 ('minute', 1),
 ('keep', 2),
 ('it', 38),
 ('growing', 1),
 ('years', 13),
 ('one', 26),
 ('our', 16),
 ('rare', 1),
 ('donors', 1),
 ('warmly', 1),
 ('thank', 1),
 ('please', 2),
 ('select', 1),
 ('payment', 2),
 ('method', 1),
 ('later', 12),
 ('close', 7),
 ('cropped', 1),
 ('jpg', 1),
 ('part', 4),
 ('series', 2),
 ('about', 8),
 ('viewsearly', 1),
 ('lifeearly', 1),
 ('careerin', 1),
 ('culture', 4),
 ('1924', 10),
 ('campaign1928', 1),
 ('electionstate', 1),
 ('capitolcotton', 1),
 ('holiday', 6),
 ('tenurechaco', 1),
 ('warshare', 1),
 ('wealthassassination', 1),
 ('american', 28),
 ('progressevery', 1),
 ('man', 13),
 ('book', 6),
 ('every', 15),
 ('song', 3),
 ('my', 10),
 ('first', 20),
 ('days', 7),
 ('white', 31),
 ('house', 18),
 ('seal', 2),
 ('senate', 40),
 ('svg', 2),
 ('vte', 6),
 ('pierce', 2),
 ('jr', 5),
 ('nicknamed', 3),
 ('was', 120),
 ('an', 36),
 ('lawyer', 3),
 ('politician', 3),
 ('served', 3),
 ('as', 66),
 ('member', 3),
 ('until', 9),
 ('his', 130),
 ('assassination', 7),
 ('he', 123),
 ('populist', 9),
 ('national', 24),
 ('prominence', 2),
 ('during', 10),
 ('great', 9),
 ('depression', 12),
 ('vocal', 2),
 ('criticism', 2),
 ('president', 9),
 ('franklin', 5),
 ('d', 7),
 ('roosevelt', 39),
 ('new', 79),
 ('deal', 16),
 ('which', 33),
 ('deemed', 1),
 ('insufficiently', 1),
 ('radical', 4),
 ('leader', 5),
 ('commanded', 1),
 ('wide', 5),
 ('networks', 1),
 ('supporters', 2),
 ('often', 6),
 ('took', 6),
 ('forceful', 1),
 ('action', 2),
 ('controversial', 2),
 ('figure', 3),
 ('celebrated', 1),
 ('champion', 2),
 ('poor', 11),
 ('conversely', 1),
 ('denounced', 5),
 ('fascistic', 1),
 ('demagogue', 7),
 ('impoverished', 3),
 ('north', 4),
 ('after', 27),
 ('working', 2),
 ('traveling', 2),
 ('salesman', 3),
 ('briefly', 1),
 ('attending', 2),
 ('three', 7),
 ('colleges', 1),
 ('entered', 3),
 ('bar', 2),
 ('following', 4),
 ('short', 1),
 ('private', 4),
 ('legal', 9),
 ('career', 6),
 ('represented', 3),
 ('plaintiffs', 2),
 ('elected', 6),
 ('public', 22),
 ('service', 9),
 ('commission', 10),
 ('commissioner', 4),
 ('prosecuted', 2),
 ('large', 3),
 ('corporations', 3),
 ('such', 9),
 ('standard', 11),
 ('oil', 25),
 ('lifelong', 2),
 ('target', 1),
 ('rhetorical', 1),
 ('attacks', 2),
 ('successfully', 6),
 ('argued', 5),
 ('supreme', 5),
 ('court', 9),
 ('chief', 2),
 ('justice', 3),
 ('former', 5),
 ('william', 9),
 ('howard', 4),
 ('taft', 4),
 ('praised', 2),
 ('him', 35),
 ('most', 11),
 ('brilliant', 2),
 ('ever', 6),
 ('practiced', 2),
 ('failed', 5),
 ('campaign', 21),
 ('used', 5),
 ('sharp', 2),
 ('economic', 7),
 ('class', 5),
 ('divisions', 1),
 ('win', 7),
 ('gubernatorial', 7),
 ('election', 30),
 ('once', 5),
 ('expanded', 1),
 ('social', 9),
 ('programs', 7),
 ('organized', 5),
 ('massive', 2),
 ('works', 10),
 ('projects', 4),
 ('modern', 3),
 ('highway', 5),
 ('system', 5),
 ('tallest', 4),
 ('building', 5),
 ('nation', 3),
 ('proposed', 5),
 ('cotton', 10),
 ('through', 8),
 ('maneuvering', 1),
 ('became', 9),
 ('boss', 1),
 ('impeached', 4),
 ('1929', 6),
 ('abuses', 1),
 ('power', 5),
 ('but', 21),
 ('proceedings', 1),
 ('collapsed', 2),
 ('opponents', 9),
 ('policies', 6),
 ('methods', 1),
 ('were', 26),
 ('unconstitutional', 1),
 ('dictatorial', 2),
 ('at', 43),
 ('its', 11),
 ('climax', 1),
 ('opposition', 7),
 ('minor', 1),
 ('insurrection', 1),
 ('1930', 6),
 ('did', 10),
 ('not', 22),
 ('assume', 2),
 ('seat', 6),
 ('established', 6),
 ('himself', 7),
 ('isolationist', 1),
 ('arguing', 1),
 ('that', 85),
 ('wall', 5),
 ('street', 5),
 ('orchestrated', 2),
 ('foreign', 4),
 ('policy', 5),
 ('instrumental', 1),
 ('securing', 1),
 ('nomination', 2),
 ('split', 2),
 ('1933', 12),
 ('becoming', 1),
 ('prominent', 2),
 ('critic', 5),
 ('alternative', 1),
 ('share', 12),
 ('wealth', 24),
 ('program', 8),
 ('1934', 9),
 ('stimulate', 1),
 ('economy', 1),
 ('advocated', 1),
 ('federal', 9),
 ('spending', 2),
 ('tax', 13),
 ('redistribution', 4),
 ('these', 10),
 ('proposals', 5),
 ('drew', 4),
 ('support', 12),
 ('millions', 1),
 ('joining', 1),
 ('local', 8),
 ('clubs', 3),
 ('poised', 1),
 ('1936', 13),
 ('presidential', 10),
 ('bid', 2),
 ('mortally', 1),
 ('wounded', 3),
 ('lone', 2),
 ('assassin', 1),
 ('although', 14),
 ('faded', 1),
 ('adopted', 2),
 ('many', 6),
 ('second', 7),
 ('elections', 6),
 ('would', 28),
 ('be', 13),
 ('along', 2),
 ('anti', 4),
 ('pro', 5),
 ('factions', 2),
 ('1960s', 1),
 ('left', 6),
 ('behind', 4),
 ('dynasty', 2),
 ('included', 2),
 ('wife', 3),
 ('son', 9),
 ('b', 10),
 ('brother', 9),
 ('earl', 8),
 ('among', 6),
 ('others', 5),
 ('contents', 2),
 ('1', 42),
 ('early', 4),
 ('life', 6),
 ('1915', 6),
 ('childhood', 3),
 ('marriage', 3),
 ('1923', 3),
 ('campaigns', 3),
 ('4', 27),
 ('governorship', 7),
 ('year', 13),
 ('impeachment', 10),
 ('elect', 4),
 ('5', 40),
 ('accomplishments', 2),
 ('chaco', 5),
 ('war', 12),
 ('continued', 8),
 ('control', 8),
 ('over', 20),
 ('6', 13),
 ('final', 3),
 ('ambitions', 2),
 ('increased', 6),
 ('tensions', 2),
 ('7', 16),
 ('legacy', 8),
 ('politics', 9),
 ('historical', 21),
 ('reputation', 6),
 ('media', 3),
 ('8', 27),
 ('bibliography', 2),
 ('discography', 2),
 ('9', 23),
 ('also', 9),
 ('notes', 5),
 ('references', 4),
 ('citations', 2),
 ('cited', 2),
 ('on', 124),
 ('near', 1),
 ('small', 2),
 ('town', 1),
 ('central', 1),
 ('winn', 4),
 ('parish', 7),
 ('told', 3),
 ('followers', 1),
 ('log', 2),
 ('cabin', 1),
 ('family', 7),
 ('lived', 1),
 ('comfortable', 1),
 ('farmhouse', 1),
 ('well', 7),
 ('off', 4),
 ('compared', 3),
 ('residents', 1),
 ('mostly', 1),
 ('southern', 9),
 ('baptists', 1),
 ('outsiders', 1),
 ('civil', 2),
 ('had', 50),
 ('been', 23),
 ('stronghold', 1),
 ('unionism', 1),
 ('otherwise', 1),
 ('confederate', 1),
 ('1861', 1),
 ('convention', 2),
 ('secession', 1),
 ('delegate', 1),
 ('voted', 6),
 ('remain', 2),
 ('union', 3),
 ('saying', 3),
 ('wants', 2),
 ('fight', 3),
 ('negroes', 1),
 ('wealthy', 2),
 ('planters', 1),
 ('note', 16),
 ('1890s', 1),
 ('bastion', 1),
 ('1912', 2),
 ('plurality', 2),
 ('35', 4),
 ('socialist', 2),
 ('candidate', 5),
 ('eugene', 1),
 ('v', 3),
 ('debs', 1),
 ('embraced', 1),
 ('sentiments', 1),
 ('nine', 1),
 ('home', 1),
 ('schooled', 1),
 ('age', 5),
 ('eleven', 1),
 ('earned', 2),
 ('excellent', 1),
 ('student', 2),
 ('remarkable', 2),
 ('memory', 2),
 ('convinced', 2),
 ('teachers', 2),
 ('let', 3),
 ('skip', 1),
 ('seventh', 1),
 ('grade', 1),
 ('high', 3),
 ('school', 12),
 ('friends', 1),
 ('formed', 5),
 ('secret', 4),
 ('society', 3),
 ('advertising', 2),
 ('their', 17),
 ('exclusivity', 1),
 ('wearing', 1),
 ('red', 2),
 ('ribbon', 1),
 ('according', 11),
 ('club', 1),
 ('mission', 1),
 ('run', 4),
 ('things', 2),
 ('laying', 1),
 ('down', 3),
 ('certain', 1),
 ('rules', 2),
 ('students', 3),
 ('follow', 1),
 ('faculty', 1),
 ('learned', 3),
 ('antics', 2),
 ('warned', 2),
 ('obey', 1),
 ('rebel', 1),
 ('writing', 3),
 ('distributing', 2),
 ('flyer', 1),
 ('criticized', 7),
 ('necessity', 1),
 ('mandated', 1),
 ('fourth', 1),
 ('secondary', 1),
 ('expelled', 1),
 ('1910', 1),
 ('petitioned', 3),
 ('fire', 2),
 ('principal', 1),
 ('never', 4),
 ('returned', 2),
 ('proved', 1),
 ('capable', 1),
 ('debater', 1),
 ('debate', 2),
 ('competition', 1),
 ('won', 6),
 ('full', 3),
 ('tuition', 1),
 ('scholarship', 2),
 ('lsu', 11),
 ('because', 3),
 ('cover', 3),
 ('textbooks', 2),
 ('living', 2),
 ('expenses', 1),
 ('could', 5),
 ('afford', 1),
 ('attend', 3),
 ('instead', 5),
 ('workforce', 1),
 ('rural', 5),
 ('south', 7),
 ('1911', 1),
 ('started', 3),
 ('seminary', 1),
 ('classes', 2),
 ('urging', 1),
 ('mother', 3),
 ('devout', 1),
 ('george', 5),
 ('attended', 5),
 ('only', 9),
 ('semester', 2),
 ('rarely', 1),
 ('appearing', 1),
 ('lectures', 1),
 ('deciding', 1),
 ('unsuited', 1),
 ('preaching', 1),
 ('focused', 1),
 ('law', 10),
 ('11', 28),
 ('borrowing', 1),
 ('hundred', 1),
 ('dollars', 2),
 ('lost', 4),
 ('playing', 1),
 ('roulette', 1),
 ('city', 6),
 ('college', 3),
 ('earn', 1),
 ('money', 5),
 ('studying', 1),
 ('work', 3),
 ('four', 10),
 ('received', 4),
 ('incomplete', 1),
 ('c', 3),
 ('confessed', 2),
 ('little', 3),
 ('there', 10),
 ('much', 5),
 ('excitement', 1),
 ('those', 3),
 ('gambling', 3),
 ('houses', 1),
 ('everything', 1),
 ('met', 4),
 ('baking', 1),
 ('contest', 1),
 ('promoted', 1),
 ('sell', 1),
 ('cottolene', 1),
 ('shortening', 1),
 ('12', 11),
 ('two', 19),
 ('began', 3),
 ('half', 2),
 ('courtship', 1),
 ('married', 2),
 ('april', 7),
 ('gayoso', 1),
 ('hotel', 2),
 ('memphis', 1),
 ('tennessee', 2),
 ('13', 4),
 ('wedding', 1),
 ('day', 3),
 ('no', 12),
 ('cash', 2),
 ('borrow', 1),
 ('fiancée', 1),
 ('pay', 2),
 ('officiant', 1),
 ('14', 12),
 ('shortly', 3),
 ('revealed', 4),
 ('aspirations', 1),
 ('statewide', 1),
 ('ultimately', 3),
 ('presidency', 4),
 ('15', 14),
 ('longs', 1),
 ('daughter', 1),
 ('named', 2),
 ('1917', 2),
 ('2006', 35),
 ('sons', 1),
 ('1918', 3),
 ('2003', 2),
 ('palmer', 2),
 ('reid', 2),
 ('1921', 2),
 ('2010', 5),
 ('oilman', 2),
 ('shreveport', 4),
 ('16', 13),
 ('17', 7),
 ('enrolled', 1),
 ('orleans', 20),
 ('fall', 3),
 ('1914', 1),
 ('18', 11),
 ('study', 2),
 ('concentrated', 3),
 ('courses', 1),
 ('necessary', 1),
 ('exam', 1),
 ('permission', 1),
 ('test', 1),
 ('scheduled', 1),
 ('june', 67),
 ('date', 1),
 ('examined', 2),
 ('passed', 10),
 ('license', 2),
 ('practice', 3),
 ('19', 11),
 ('i', 19),
 ('came', 4),
 ('out', 8),
 ('courtroom', 1),
 ('running', 1),
 ('20', 25),
 ('usually', 2),
 ('workers', 4),
 ('compensation', 1),
 ('cases', 1),
 ('22', 12),
 ('avoided', 1),
 ('fighting', 1),
 ('world', 4),
 ('obtaining', 1),
 ('draft', 2),
 ('deferment', 1),
 ('grounds', 4),
 ('dependent', 2),
 ('child', 2),
 ('defended', 1),
 ('prosecution', 1),
 ('under', 9),
 ('espionage', 1),
 ('act', 5),
 ('loaned', 1),
 ('complete', 1),
 ('studies', 2),
 ('claimed', 7),
 ('serve', 2),
 ('mad', 1),
 ('anybody', 1),
 ('23', 7),
 ('invested', 1),
 ('050', 1),
 ('equivalent', 2),
 ('066', 1),
 ('2020', 112),
 ('struck', 1),
 ('company', 8),
 ('refused', 2),
 ('accept', 1),
 ('any', 5),
 ('pipelines', 2),
 ('costing', 1),
 ('investment', 1),
 ('24', 25),
 ('episode', 1),
 ('catalyst', 1),
 ('hatred', 1),
 ('card', 4),
 ('sporting', 2),
 ('face', 5),
 ('surrounded', 4),
 ('text', 3),
 ('respectfully', 1),
 ('vote', 8),
 ('railroad', 3),
 ('p', 165),
 ('same', 1),
 ('race', 5),
 ('historian', 1),
 ('ivy', 2),
 ('hair', 18),
 ('message', 2),
 ('repeated', 3),
 ('end', 6),
 ('young', 3),
 ('warrior', 1),
 ('plain', 1),
 ('people', 11),
 ('battling', 1),
 ('evil', 1),
 ('giants', 1),
 ('america', 13),
 ('few', 5),
 ('hands', 2),
 ('unfairness', 1),
 ('perpetuated', 1),
 ('educational', 1),
 ('so', 5),
 ('stacked', 1),
 ('against', 14),
 ('statistics', 1),
 ('fourteen', 1),
 ('thousand', 4),
 ('obtained', 1),
 ('way', 3),
 ('begin', 1),
 ('rectifying', 1),
 ('wrongs', 1),
 ('turn', 3),
 ('corrupt', 3),
 ('flunkies', 1),
 ('big', 3),
 ('business', 3),
 ('true', 3),
 ('men', 11),
 ('26', 10),
 ('primary', 5),
 ('polled', 3),
 ('incumbent', 5),
 ('burk', 2),
 ('bridges', 3),
 ('since', 4),
 ('garnered', 1),
 ('majority', 5),
 ('votes', 9),
 ('held', 3),
 ('campaigned', 4),
 ('tirelessly', 1),
 ('across', 2),
 ('northern', 2),
 ('defeated', 3),
 ('just', 6),
 ('636', 1),
 ('27', 10),
 ('returns', 1),
 ('areas', 4),
 ('performed', 2),
 ('poorly', 1),
 ('urban', 3),
 ('forced', 1),
 ('utilities', 2),
 ('lower', 1),
 ('rates', 1),
 ('ordered', 2),
 ('railroads', 1),
 ('extend', 1),
 ('towns', 1),
 ('demanded', 2),
 ('cease', 1),
 ('importation', 1),
 ('mexican', 1),
 ('crude', 1),
 ('use', 4),
 ('more', 8),
 ('wells', 2),
 ('28', 10),
 ('29', 13),
 ('1920', 2),
 ('heavily', 2),
 ('john', 8),
 ('parker', 9),
 ('today', 1),
 ('credited', 2),
 ('helping', 1),
 ('parishes', 2),
 ('31', 11),
 ('bitter', 2),
 ('rivals', 1),
 ('break', 1),
 ('largely', 3),
 ('caused', 2),
 ('demand', 2),
 ('refusal', 1),
 ('declare', 1),
 ('infuriated', 2),
 ('when', 15),
 ('allowed', 1),
 ('companies', 6),
 ('led', 5),
 ('team', 1),
 ('assist', 1),
 ('severance', 1),
 ('laws', 5),
 ('corporate', 2),
 ('chattel', 1),
 ('feud', 1),
 ('climaxed', 1),
 ('tried', 2),
 ('unsuccessfully', 1),
 ('ousted', 1),
 ('1922', 1),
 ('become', 2),
 ('chairman', 3),
 ('now', 12),
 ('called', 11),
 ('cumberland', 1),
 ('telephone', 2),
 ('telegraph', 2),
 ('unfair', 1),
 ('rate', 2),
 ('increases', 1),
 ('case', 5),
 ('appeal', 4),
 ('32', 3),
 ('resulted', 2),
 ('refunds', 1),
 ('thousands', 2),
 ('overcharged', 1),
 ('customers', 1),
 ('33', 11),
 ('decision', 3),
 ('34', 4),
 ('main', 7),
 ('announced', 2),
 ('candidacy', 2),
 ('stumped', 1),
 ('throughout', 3),
 ('personally', 1),
 ('circulars', 2),
 ('posters', 1),
 ('stooge', 1),
 ('vilified', 2),
 ('assailed', 1),
 ('bosses', 2),
 ('36', 11),
 ('disenfranchised', 1),
 ('establishment', 3),
 ('old', 11),
 ('regulars', 3),
 ('1877', 1),
 ('republican', 3),
 ('controlled', 2),
 ('reconstruction', 2),
 ('government', 10),
 ('alliances', 1),
 ('officials', 2),
 ('37', 3),
 ('negligible', 1),
 ('republicans', 3),
 ('essentially', 1),
 ('holding', 3),
 ('mock', 1),
 ('invoked', 2),
 ('confederacy', 1),
 ('presided', 1),
 ('benefited', 1),
 ('planter', 1),
 ('38', 2),
 ('consequently', 2),
 ('least', 2),
 ('developed', 2),
 ('300', 5),
 ('miles', 5),
 ('paved', 2),
 ('roads', 6),
 ('lowest', 1),
 ('literacy', 1),
 ('39', 7),
 ('40', 4),
 ('despite', 5),
 ('enthusiastic', 1),
 ('third', 4),
 ('eliminated', 1),
 ('polls', 2),
 ('projected', 1),
 ('attracted', 1),
 ('almost', 1),
 ('72', 3),
 ('000', 15),
 ('around', 3),
 ('electorate', 2),
 ('carried', 1),
 ('than', 7),
 ('either', 3),
 ('opponent', 4),
 ('limited', 3),
 ('sectional', 1),
 ('best', 1),
 ('ku', 1),
 ('klux', 1),
 ('klan', 2),
 ('issue', 2),
 ('candidates', 2),
 ('strongly', 1),
 ('opposed', 4),
 ('supported', 10),
 ('remained', 2),
 ('neutral', 1),
 ('alienating', 1),
 ('sides', 2),
 ('attract', 1),
 ('roman', 1),
 ...]
In [41]:
lst.sort(key=lambda x: -x[1])
In [48]:
lst;
In [43]:
len(tokens)
Out[43]:
14527
In [44]:
len(counts)
Out[44]:
3523
In [45]:
vocab = set(counts.keys())
In [49]:
vocab;
In [51]:
cc = [x[1] for x in lst]
In [54]:
cc;
In [55]:
import matplotlib.pyplot as plt
%matplotlib inline
In [57]:
plt.scatter( range(len(cc)),  cc )
Out[57]:
<matplotlib.collections.PathCollection at 0x7ff2bac65130>
In [58]:
plt.scatter( range(len(cc)),  [math.log10(x) for x in cc] )
Out[58]:
<matplotlib.collections.PathCollection at 0x7ff2981482e0>
In [60]:
## Zipf plot:  log rank vs log frequency
plt.scatter(  [math.log10(x+1) for x in range(len(cc))],  [math.log10(x) for x in cc] )
Out[60]:
<matplotlib.collections.PathCollection at 0x7ff2681e6e20>