https://github.com/thomas-haslwanter/statsintro_python/tree/master/ISP/Code_Quantlets/08_TestsMeanValues/anovaTwoway

H0假设

F_第一因素=第一因素平方和/组内误差平方和

F_第二因素=第二因素平方和/组内误差平方和

F_第一第二因素交互=第一第二因素交互平方和/组内误差平方和

spss应用

R**2=0.518，年龄和性别对分数影响只占了一半，还有其他因素造成分数的波动。

python代码测试结果和spss一致

if we suppose that you have k groups, N is the total sample size for all groups, then n-k  should exceeds zero. Otherwise, there is no minimum size for each group except you need 2 elements for each to enable calculating the variance, but this is just a theoretical criteria.

However, to use ANOVA you need the check the Normal distribution for each group, so the higher size of your groups sizes, the more opportunity to have the Normal distribution.

Is there a minimum number per group neccessary for an ANOVA?. Available from: https://www.researchgate.net/post/Is_there_a_minimum_number_per_group_neccessary_for_an_ANOVA [accessed Jun 2, 2017].

spss结果

python结果和spss结果一致

python使用了参数检验和非参数检验

#原创公众号(python风控模型)
from scipy.stats.mstats import kruskalwallis
import scipy.stats as stats
import numpy as np
import scipy as sp
list_paper=[8,12,22,14,10,18]
list_TV=[12,8,26,30,18,14]
list_group=[list_paper,list_TV]
def Kruskawallis_test(list_group):
print"Use kruskawallis test:"
h, p = kruskalwallis(list_group)
print"H value:",h
print"p",p
if p<0.05:
print('There is a significant difference.')
return True
else:
print('No significant difference.')
return False

def Mannwhitneyu(group1, group2):
if np.int(sp.__version__.split('.')[1]) > 16:
u, p_value = stats.mannwhitneyu(group1, group2, alternative='two-sided')
else:
u, p_value = stats.mannwhitneyu(group1, group2, use_continuity=True)
p_value *= 2
print(("Mann-Whitney test", p_value))

if p_value<0.05:
print "there is significant difference"
else:
print "there is no significant difference"

print(stats.ttest_ind(list_paper,list_TV))
print(Mannwhitneyu(list_paper,list_TV))

print(Kruskawallis_test(list_group))
print(stats.f_oneway(list_adPlan1,list_adPlan2,list_adPlan3))

python 与spss结果一致

variance_check.py

import scipy,math
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.stats.diagnostic import lillifors
from statsmodels.sandbox.stats.multicomp import multipletests
import itertools

a=0.05
def check_normality(testData):
if 20<len(testData) <50:
p_value= stats.normaltest(testData)[1]
if p_value<0.05:
print"use normaltest"
print"p of normal:",p_value
print "data are not normal distributed"
return  False
else:
print"use normaltest"
print"p of normal:",p_value
print "data are normal distributed"
return True
if len(testData) <50:
p_value= stats.shapiro(testData)[1]
if p_value<0.05:
print "use shapiro:"
print"p of normal:",p_value
print "data are not normal distributed"
return  False
else:
print "use shapiro:"
print"p of normal:",p_value
print "data are normal distributed"
return True

if 300>=len(testData) >=50:
p_value= lillifors(testData)[1]
if p_value<0.05:
print "use lillifors:"
print"p of normal:",p_value
print "data are not normal distributed"
return  False
else:
print "use lillifors:"
print"p of normal:",p_value
print "data are normal distributed"
return True
if len(testData) >300:
p_value= stats.kstest(testData,'norm')[1]
if p_value<0.05:
print "use kstest:"
print"p of normal:",p_value
print "data are not normal distributed"
return  False
else:
print "use kstest:"
print"p of normal:",p_value
print "data are normal distributed"
return True

def NormalTest(list_groups):
for group in list_groups:
status=check_normality(group)
if status==False :
return False
return True

def Combination(list_groups):
combination= []
for i in range(1,len(list_groups)+1):
iter = itertools.combinations(list_groups,i)
combination.append(list(iter))
return combination[1:-1][0]

def Levene_test(group1,group2,group3):
leveneResult=scipy.stats.levene(group1,group2,group3)
p=leveneResult[1]
print"levene test:"
if p<0.05:
print"variances of groups are not equal"
return False
else:
print"variances of groups are equal"
return True

def Equal_lenth(list_groups):
list1=list_groups[0]
list2=list_groups[1]
list3=list_groups[2]
list1_removeNan=[x for x in list1 if str(x) != 'nan' and str(x)!= '-inf']
list2_removeNan=[x for x in list2 if str(x) != 'nan' and str(x)!= '-inf']
list3_removeNan=[x for x in list3 if str(x) != 'nan' and str(x)!= '-inf']
len1=len(list1_removeNan)
len2=len(list2_removeNan)
len3=len(list3_removeNan)
if len1==len2==len3:
return True
else:
return False

python机器学习生物信息学，博主录制，2k超清
https://edu.51cto.com/sd/3a516

(微信二维码扫一扫报名)