import os,sys,glob,re,itertools,json,random
from collections import Counter, defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [5.2,5] ## fairly square plots
#plt.rcParams['figure.figsize'] = [14,6] ## fills notebook width
%matplotlib inline
## Observed data
obs_correctness = np.array([1, 1, 1, 0, 0, 1, 1, 0, 1])
len(obs_correctness)
9
np.mean(obs_correctness)
0.6666666666666666
## Null hypothesis
null_p_correct = 0.5
null_correctness_data = [int(random.random() > null_p_correct) for i in range(9)]
null_correctness_data
[0, 1, 1, 1, 0, 1, 1, 1, 0]
null_correctness_data = [int(random.random() > null_p_correct) for i in range(9)]
np.mean(null_correctness_data)
0.4444444444444444
Nsim = int(100e3)
null_sim_acc = [
np.mean([int(random.random() > null_p_correct) for i in range(9)])
for _sim in range(Nsim)
]
null_sim_acc[:10]
[0.425531914893617, 0.48936170212765956, 0.46808510638297873, 0.425531914893617, 0.44680851063829785, 0.5957446808510638, 0.48936170212765956, 0.5531914893617021, 0.5319148936170213, 0.46808510638297873]
plt.hist(null_sim_acc)
(array([ 192., 1771., 7162., 16454., 24319., 24450., 16549., 7143., 1759., 201.]), array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]), <BarContainer object of 10 artists>)
## p-value. is observed accuracy an outlier?
# prob. the observed result is better than null hypothesis
np.mean(np.mean(obs_correctness) > np.array(null_sim_acc))
0.74348
## "p-value": complement of above
np.mean(np.mean(obs_correctness) < np.array(null_sim_acc))
0.09103
obs_correctness = np.array([1, 1, 1, 0, 0, 1, 1, 0, 1, 1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0])
Nsamp = len(obs_correctness)
Nsamp
47
Nsim = int(100e3)
null_sim_acc = [
np.mean([int(random.random() > null_p_correct) for i in range(Nsamp)])
for _sim in range(Nsim)
]
plt.hist(null_sim_acc)
(array([1.3000e+01, 3.7700e+02, 3.5470e+03, 1.5070e+04, 1.9311e+04, 3.3528e+04, 2.0828e+04, 6.3860e+03, 8.8800e+02, 5.2000e+01]), array([0.19148936, 0.25106383, 0.3106383 , 0.37021277, 0.42978723, 0.4893617 , 0.54893617, 0.60851064, 0.66808511, 0.72765957, 0.78723404]), <BarContainer object of 10 artists>)
##
## Previous paper says their model got 75% accuracy
## You got 92% accuracy... on the same test set of size N
##
## Null hypo? New system is no better than old system.
## H0: accuracy=75%