B = 100000
milk_estimates = pd.DataFrame({"milk_estimate":np.zeros(B)}) #딕셔너리를 데이터로 전달
for i in range(B):
milk_first_choice = np.random.choice(P,n_milk_first,replace=False)
milk_estimates.loc[i,"milk_estimate"] = np.sum(milk_first_choice)
milk_estimates.head(10)
milk_estimate | |
---|---|
0 | 6.0 |
1 | 6.0 |
2 | 4.0 |
3 | 5.0 |
4 | 6.0 |
5 | 5.0 |
6 | 4.0 |
7 | 5.0 |
8 | 4.0 |
9 | 5.0 |
n_cups = 8
n_milk_first = 4
opposite = n_cups - n_milk_first
P = np.concatenate((np.ones(n_milk_first),np.zeros(opposite)))
observed_val = 4
B = 1000000
estimates = pd.DataFrame({"milk_estimates":np.zeros(B)})
for i in range(B):
sample_milk = np.random.choice(P,n_milk_first,replace=False)
estimates.loc[i,"milk_estimates"] = np.sum(sample_milk)
a,b = estimates[estimates["milk_estimates"]>=observed_val].shape
p_value = a/B
print(p_value)
0.014372
B = 100000
observed_val = 620
sample = np.random.binomial(1000,0.54,B)
p_value = np.sum(sample >= observed_val)/B
p_value
0.0
1.929786053178617e-07
url = "https://ilovedata.github.io/teaching/bigdata2/data/physical_test_2018_data.csv"
physical_data = pd.read_csv(url)
twogroup = physical_data[["TEST_AGE", "TEST_SEX", "ITEM_F001", "ITEM_F002"]].rename(columns = {"ITEM_F001":"height","ITEM_F002":"weight"})
twogroup.head(10)
TEST_AGE | TEST_SEX | height | weight | |
---|---|---|---|---|
0 | 33 | M | 159.2 | 57.2 |
1 | 48 | F | 155.8 | 52.9 |
2 | 22 | M | 175.2 | 96.2 |
3 | 29 | M | 178.7 | 79.4 |
4 | 31 | F | 160.1 | 50.2 |
5 | 23 | F | 157.8 | 60.1 |
6 | 11 | M | 165.5 | 60.3 |
7 | 24 | M | 174.9 | 74.5 |
8 | 18 | M | 181.0 | 71.3 |
9 | 41 | F | 160.6 | 72.7 |
#randomnization
a,b = twogroup.shape
treatment = np.array([np.random.choice(["A","B"]) for i in np.arange(a)])
twogroup["treatment"] = treatment
twogroup.head(10)
TEST_AGE | TEST_SEX | height | weight | treatment | |
---|---|---|---|---|---|
0 | 33 | M | 159.2 | 57.2 | A |
1 | 48 | F | 155.8 | 52.9 | B |
2 | 22 | M | 175.2 | 96.2 | B |
3 | 29 | M | 178.7 | 79.4 | A |
4 | 31 | F | 160.1 | 50.2 | B |
5 | 23 | F | 157.8 | 60.1 | B |
6 | 11 | M | 165.5 | 60.3 | A |
7 | 24 | M | 174.9 | 74.5 | B |
8 | 18 | M | 181.0 | 71.3 | A |
9 | 41 | F | 160.6 | 72.7 | B |
filename = "https://ilovedata.github.io/teaching/bigdata2/data/drug.csv"
three_drug_wide = pd.read_csv(filename)
three_drug_wide
Placebo | Old | New | |
---|---|---|---|
0 | 31 | 23 | 23 |
1 | 28 | 17 | 17 |
2 | 34 | 29 | 11 |
3 | 36 | 23 | 11 |
4 | 33 | 17 | 9 |
5 | 27 | 17 | 16 |
6 | 39 | 22 | 16 |
7 | 25 | 17 | 14 |
8 | 23 | 23 | 17 |
9 | 29 | 25 | 18 |
10 | 36 | 17 | 10 |
11 | 36 | 24 | 7 |
12 | 32 | 19 | 14 |
13 | 30 | 18 | 24 |
14 | 33 | 5 | 18 |
15 | 39 | 26 | 17 |
16 | 30 | 33 | 14 |
17 | 32 | 16 | 14 |
18 | 30 | 20 | 16 |
19 | 28 | 27 | 11 |
신약그룹이 기존약 그룹보다 효과가 있는가?
- 귀무가설 : 효과없다.
- 데이터 관측 & 검정통계량 설정
def diff_mean(df,treatment):
twodrug = df.loc[(df.treatment == treatment[0]) | (df.treatment == treatment[1])].reset_index(drop=True) #reset_index 반드시 해주기
group_mean = twodrug.groupby("treatment").mean().reset_index()
print(group_mean)
test_stat = float(group_mean.loc[group_mean.treatment == treatment[0],"Value"]) - float(group_mean.loc[group_mean.treatment == treatment[1],"Value"])
return test_stat
diff_mean(three_drug_long,["Old","New"])
treatment Value
0 New 14.85
1 Old 20.90
6.049999999999999
- 귀무가설 하에서의 분포 설정
cond = (three_drug_long.treatment == "Old")|(three_drug_long.treatment == "New")
twodrug = three_drug_long[cond].reset_index(drop=True)
random_permuted_treat = np.array(twodrug.treatment.sample(frac=1.0,replace=False))
two_drug_permuted = twodrug
two_drug_permuted["permuted_treat"] = random_permuted_treat
two_drug_permuted
treatment | Value | permuted_treat | |
---|---|---|---|
0 | Old | 23 | Old |
1 | Old | 17 | New |
2 | Old | 29 | Old |
3 | Old | 23 | New |
4 | Old | 17 | New |
5 | Old | 17 | Old |
6 | Old | 22 | Old |
7 | Old | 17 | Old |
8 | Old | 23 | Old |
9 | Old | 25 | Old |
10 | Old | 17 | New |
11 | Old | 24 | Old |
12 | Old | 19 | Old |
13 | Old | 18 | New |
14 | Old | 5 | Old |
15 | Old | 26 | New |
16 | Old | 33 | New |
17 | Old | 16 | New |
18 | Old | 20 | New |
19 | Old | 27 | New |
20 | New | 23 | New |
21 | New | 17 | New |
22 | New | 11 | New |
23 | New | 11 | Old |
24 | New | 9 | Old |
25 | New | 16 | New |
26 | New | 16 | Old |
27 | New | 14 | Old |
28 | New | 17 | Old |
29 | New | 18 | Old |
30 | New | 10 | Old |
31 | New | 7 | New |
32 | New | 14 | New |
33 | New | 24 | Old |
34 | New | 18 | Old |
35 | New | 17 | New |
36 | New | 14 | New |
37 | New | 14 | New |
38 | New | 16 | New |
39 | New | 11 | Old |