import os,sys,glob,re,itertools,json,random
from collections import Counter, defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [5.2,5] ## fairly square plots
#plt.rcParams['figure.figsize'] = [14,6]  ## fills notebook width
%matplotlib inline


## Observed data

obs_correctness = np.array([1, 1, 1, 0, 0, 1, 1, 0, 1])


len(obs_correctness)

9


np.mean(obs_correctness)

0.6666666666666666


## Null hypothesis

null_p_correct = 0.5


null_correctness_data = [int(random.random() > null_p_correct)  for i in range(9)]
null_correctness_data

[0, 1, 1, 1, 0, 1, 1, 1, 0]


null_correctness_data = [int(random.random() > null_p_correct)  for i in range(9)]
np.mean(null_correctness_data)

0.4444444444444444


Nsim = int(100e3)
null_sim_acc = [
    np.mean([int(random.random() > null_p_correct)  for i in range(9)])
    for _sim in range(Nsim)
               ]


null_sim_acc[:10]

[0.425531914893617,
 0.48936170212765956,
 0.46808510638297873,
 0.425531914893617,
 0.44680851063829785,
 0.5957446808510638,
 0.48936170212765956,
 0.5531914893617021,
 0.5319148936170213,
 0.46808510638297873]


plt.hist(null_sim_acc)

(array([  192.,  1771.,  7162., 16454., 24319., 24450., 16549.,  7143.,
         1759.,   201.]),
 array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
 <BarContainer object of 10 artists>)


## p-value.  is observed accuracy an outlier?


# prob. the observed result is better than null hypothesis
np.mean(np.mean(obs_correctness) > np.array(null_sim_acc))

0.74348


## "p-value":  complement of above
np.mean(np.mean(obs_correctness) < np.array(null_sim_acc))

0.09103


obs_correctness = np.array([1, 1, 1, 0, 0, 1, 1, 0, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0])


Nsamp = len(obs_correctness)
Nsamp

47


Nsim = int(100e3)
null_sim_acc = [
    np.mean([int(random.random() > null_p_correct)  for i in range(Nsamp)])
    for _sim in range(Nsim)
               ]


plt.hist(null_sim_acc)

(array([1.3000e+01, 3.7700e+02, 3.5470e+03, 1.5070e+04, 1.9311e+04,
        3.3528e+04, 2.0828e+04, 6.3860e+03, 8.8800e+02, 5.2000e+01]),
 array([0.19148936, 0.25106383, 0.3106383 , 0.37021277, 0.42978723,
        0.4893617 , 0.54893617, 0.60851064, 0.66808511, 0.72765957,
        0.78723404]),
 <BarContainer object of 10 artists>)


##
## Previous paper says their model got 75% accuracy
## You got 92% accuracy... on the same test set of size N
##
## Null hypo?  New system is no better than old system.
## H0:  accuracy=75%

Randomized null hypothesis test¶