📋 文章目录
- 一、数据展示
- 二、模型分析
- 三、调用函数
- 四、循环比较所有结果
一般来说,anova是可以完成多重比较的,但由于数据是非等长,因此统计功效会大幅缩减,这里故而使用非参数检验 pairwise.wilcox.test()函数。
一、数据展示
本例中使用的R包为openxlsx
,tidyverse
,agricolae
,export
data = read.csv("C:/Users/GJ/Desktop/data_0_100.csv", dec=".", header = TRUE)
str(data)
# 'data.frame': 1552 obs. of 4 variables:
# $ factor1: Factor w/ 10 levels "1","2","3","4",..: 8 7 4 4 3 3 1 4 4 3 ...
# $ factor2: Factor w/ 8 levels "1","2","3","4",..: 2 1 6 2 5 5 2 6 2 2 ...
# $ factor3: Factor w/ 6 levels "3","4","5","6",..: 4 4 4 4 1 1 3 3 5 6 ...
# $ Y : num 150.2 210.1 -17.6 126.7 -9.8 ...
#
#简单分组统计看看函数
library(tidyverse)
data %>% group_by(factor1) %>% summarise(mean=mean(Y)) %>% as.data.frame()
data %>% group_by(factor1) %>% summarise(mean=mean(Y), n=n()) %>% as.data.frame()
# factor1 mean n
# 1 1 54.78004 60
# 2 2 40.06969 30
# 3 3 60.18953 971
# 4 4 56.99819 146
# 5 5 85.57797 30
# 6 6 43.08903 40
# 7 7 175.39072 25
# 8 8 80.51860 113
# 9 9 44.93028 127
# 10 10 85.18720 10
看到在不同水平下,y的数量并不相同,需求就是对比不同水平下y是不是存在显著差异,因为有3个factor,最好是循环完成3个factor下的工作。
二、模型分析
#模型分析
model <- pairwise.wilcox.test(data$Y,data$factor1,p.adjust.method = "bonferroni")
# 在这里 multcompView包里的函数multcompLetters()
# 可以对p值矩阵进行标字母处理,
# 所以我们需要先提取model里的p值。
#函数用于提取model的p value
extraxt_P<-function(x)
{
rn<-row.names(x)
cn<-colnames(x)
an<-unique(c(cn,rn))
myval<-x[!is.na(x)]
mymat<-matrix(1,nrow=length(an),ncol=length(an),dimnames=list(an,an))
for(ext in 1:length(cn))
{
for(int in 1:length(rn))
{
if(is.na(x[row.names(x)==rn[int],colnames(x)==cn[ext]])) next
mymat[row.names(mymat)==rn[int],colnames(mymat)==cn[ext]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
mymat[row.names(mymat)==cn[ext],colnames(mymat)==rn[int]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
}
}
return(mymat)
}
multcompLetters(extraxt_P(model$p.value))
# 1 2 3 4 5 6 7 8 9 10
# "abc" "ab" "ac" "ac" "cd" "ab" "e" "d" "b" "abcd"
上面的代码分析显示multcompLetters可以对多重比较进行标字母表示显著性。但这里存在一个问题:出来的结果的字母顺序并不是按照均值大小排列。一般来说,在多重比较时候,最大的值是标a,但这里是根据不同水平出现的先后顺序标的,所以有点不尽如人意,所以有了下面的函数。
三、调用函数
mul_sig <- function (data, alpha=0.05, model)
{
# data 某factor下,目标变量的按照均值由大到小排列的数据框
# model 非参数比较的模型
extraxt_P<-function(x)
{
rn<-row.names(x)
cn<-colnames(x)
an<-unique(c(cn,rn))
myval<-x[!is.na(x)]
mymat<-matrix(1,nrow=length(an),ncol=length(an),dimnames=list(an,an))
for(ext in 1:length(cn))
{
for(int in 1:length(rn))
{
if(is.na(x[row.names(x)==rn[int],colnames(x)==cn[ext]])) next
mymat[row.names(mymat)==rn[int],colnames(mymat)==cn[ext]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
mymat[row.names(mymat)==cn[ext],colnames(mymat)==rn[int]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
}
}
return(mymat)
}
lastC <- function (x)
{
y <- sub(" +$", "", x)
p1 <- nchar(y)
cc <- substr(y, p1, p1)
return(cc)
}
pvalue <- extraxt_P(model$p.value)
n <- dim(data)[1]
z <- data.frame(data)
letras <- c(letters[1:26], LETTERS[1:26], 1:9, c(".", "+",
"-", "*", "/", "#", "$", "%", "&", "^", "[", "]", ":",
"@", ";", "_", "?", "!", "=", "#", rep(" ", 2000)))
w <- z[order(z[, 2], decreasing = TRUE), ]
M <- rep("", n)
k <- 1
k1 <- 0
j <- 1
i <- 1
cambio <- n
cambio1 <- 0
chequeo = 0
M[1] <- letras[k]
q <- as.numeric(rownames(w))
while (j < n) {
chequeo <- chequeo + 1
if (chequeo > n)
break
for (i in j:n) {
s <- pvalue[q[i], q[j]] > alpha
if (s) {
if (lastC(M[i]) != letras[k])
M[i] <- paste(M[i], letras[k], sep = "")
}
else {
k <- k + 1
cambio <- i
cambio1 <- 0
ja <- j
for (jj in cambio:n) M[jj] <- paste(M[jj], "", sep = "")
M[cambio] <- paste(M[cambio], letras[k], sep = "")
for (v in ja:cambio) {
if (pvalue[q[v], q[cambio]] <= alpha) {
j <- j + 1
cambio1 <- 1
}
else break
}
break
}
}
if (cambio1 == 0)
j <- j + 1
}
w <- data.frame(w, stat = M)
return(w)
}
# 调用函数
mydat <- data %>% group_by(factor1) %>% summarise(mean=mean(Y)) %>% as.data.frame()
model <- pairwise.wilcox.test(data$Y, data100$factor1, p.adjust.method = "bonferroni")
mul_sig(mydat,0.05,model)
# factor1 mean stat
# 7 7 175.39072 a
# 5 5 85.57797 b
# 10 10 85.18720 bc
# 8 8 80.51860 bc
# 3 3 60.18953 bc
# 4 4 56.99819 bc
# 1 1 54.78004 bc
# 9 9 44.93028 c
# 6 6 43.08903 c
# 2 2 40.06969 c
四、循环比较所有结果
# 进行循环
res <- list() # 储存结果
treat <-c(quo(factor1), quo(factor2),quo(factor3))
for(j in 1:3){
mydat <- data100 %>% group_by(!!treat[[j]]) %>% summarise(mean=mean(Y)) %>% as.data.frame()
model <- pairwise.wilcox.test(data$Y, data100[,j], p.adjust.method = "bonferroni")
res[[j]] <- mul_sig(mydat,0.05,model)
}
str(res)
# List of 3
# $ :'data.frame': 10 obs. of 3 variables:
# ..$ factor1: Factor w/ 10 levels "1","2","3","4",..: 7 5 10 8 3 4 1 9 6 2
# ..$ mean : num [1:10] 175.4 85.6 85.2 80.5 60.2 ...
# ..$ stat : chr [1:10] "a" "b" "bc" "bc" ...
# $ :'data.frame': 8 obs. of 3 variables:
# ..$ factor2: Factor w/ 8 levels "1","2","3","4",..: 4 8 2 3 7 6 1 5
# ..$ mean : num [1:8] 183.8 74.6 64.4 63.2 56.8 ...
# ..$ stat : chr [1:8] "a" "ab" "bc" "bcd" ...
# $ :'data.frame': 6 obs. of 3 variables:
# ..$ factor3: Factor w/ 6 levels "3","4","5","6",..: 4 3 5 6 2 1
# ..$ mean : num [1:6] 71.3 69.2 58.5 48.5 47.8 ...
# ..$ stat : chr [1:6] "a" "a" "a" "b" ...
res[[2]]
# factor2 mean stat
# 4 4 183.80988 a
# 8 8 74.64407 ab
# 2 2 64.38166 bc
# 3 3 63.19971 bcd
# 7 7 56.75159 cd
# 6 6 56.19029 d
# 1 1 56.03927 d
# 5 5 51.34603 d
res[[3]]
# factor3 mean stat
# 4 6 71.25338 a
# 3 5 69.18140 a
# 5 7 58.48289 a
# 6 8 48.46200 b
# 2 4 47.84567 b
# 1 3 47.20666 b
至此,已经完成了所有非参数的多重比较,按照均值大小标注字母。