import os,sys,glob,re,itertools,json,random
from collections import Counter, defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [5.2,5] ## fairly square plots
#plt.rcParams['figure.figsize'] = [14,6] ## fills notebook width
%matplotlib inline
## Observed data
obs_correctness = np.array([1, 1, 1, 0, 0, 1, 1, 0, 1, 1,1,0, 1,1,0])
len(obs_correctness)
15
# observed accuracy
np.mean(obs_correctness)
0.6666666666666666
# Bootstrap sampling
boot_accs = []
Nboot = int(1e6)
for _b in range(Nboot):
# sample WITH replacement, N times.
resampled_dataset = [ random.choice(obs_correctness) for i in range(len(obs_correctness))]
boot_acc = np.mean(resampled_dataset)
boot_accs.append( boot_acc )
len(boot_accs)
1000000
plt.hist(boot_accs, bins=30)
(array([2.00000e+00, 0.00000e+00, 2.30000e+01, 0.00000e+00, 2.27000e+02, 0.00000e+00, 1.58300e+03, 0.00000e+00, 6.60900e+03, 0.00000e+00, 2.21340e+04, 0.00000e+00, 5.74050e+04, 0.00000e+00, 0.00000e+00, 1.14706e+05, 0.00000e+00, 1.78207e+05, 0.00000e+00, 2.14508e+05, 0.00000e+00, 1.95423e+05, 0.00000e+00, 1.29838e+05, 0.00000e+00, 5.97590e+04, 0.00000e+00, 1.72700e+04, 0.00000e+00, 2.30600e+03]), array([0.06666667, 0.09777778, 0.12888889, 0.16 , 0.19111111, 0.22222222, 0.25333333, 0.28444444, 0.31555556, 0.34666667, 0.37777778, 0.40888889, 0.44 , 0.47111111, 0.50222222, 0.53333333, 0.56444444, 0.59555556, 0.62666667, 0.65777778, 0.68888889, 0.72 , 0.75111111, 0.78222222, 0.81333333, 0.84444444, 0.87555556, 0.90666667, 0.93777778, 0.96888889, 1. ]), <BarContainer object of 30 artists>)
# observed acc
np.mean(obs_correctness)
0.6666666666666666
np.percentile(np.array(boot_accs), [2.5, 97.5])
array([0.4 , 0.86666667])
?plt.hist