In [9]:
import os,sys,glob,re,itertools,json,random
from collections import Counter, defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [5.2,5] ## fairly square plots
#plt.rcParams['figure.figsize'] = [14,6]  ## fills notebook width
%matplotlib inline
In [2]:
## Observed data

obs_correctness = np.array([1, 1, 1, 0, 0, 1, 1, 0, 1, 1,1,0, 1,1,0])
In [3]:
len(obs_correctness)
Out[3]:
15
In [4]:
# observed accuracy
np.mean(obs_correctness)
Out[4]:
0.6666666666666666
In [13]:
# Bootstrap sampling
boot_accs = []
Nboot = int(1e6)
for _b in range(Nboot):
    # sample WITH replacement, N times.
    resampled_dataset = [  random.choice(obs_correctness)   for i in range(len(obs_correctness))]
    boot_acc =  np.mean(resampled_dataset)
    boot_accs.append(  boot_acc  )
len(boot_accs)
Out[13]:
1000000
In [16]:
plt.hist(boot_accs, bins=30)
Out[16]:
(array([2.00000e+00, 0.00000e+00, 2.30000e+01, 0.00000e+00, 2.27000e+02,
        0.00000e+00, 1.58300e+03, 0.00000e+00, 6.60900e+03, 0.00000e+00,
        2.21340e+04, 0.00000e+00, 5.74050e+04, 0.00000e+00, 0.00000e+00,
        1.14706e+05, 0.00000e+00, 1.78207e+05, 0.00000e+00, 2.14508e+05,
        0.00000e+00, 1.95423e+05, 0.00000e+00, 1.29838e+05, 0.00000e+00,
        5.97590e+04, 0.00000e+00, 1.72700e+04, 0.00000e+00, 2.30600e+03]),
 array([0.06666667, 0.09777778, 0.12888889, 0.16      , 0.19111111,
        0.22222222, 0.25333333, 0.28444444, 0.31555556, 0.34666667,
        0.37777778, 0.40888889, 0.44      , 0.47111111, 0.50222222,
        0.53333333, 0.56444444, 0.59555556, 0.62666667, 0.65777778,
        0.68888889, 0.72      , 0.75111111, 0.78222222, 0.81333333,
        0.84444444, 0.87555556, 0.90666667, 0.93777778, 0.96888889,
        1.        ]),
 <BarContainer object of 30 artists>)
In [19]:
# observed acc
np.mean(obs_correctness)
Out[19]:
0.6666666666666666
In [18]:
np.percentile(np.array(boot_accs),  [2.5, 97.5])
Out[18]:
array([0.4       , 0.86666667])
In [15]:
?plt.hist