📋 文章目录

  • 一、数据展示
  • 二、模型分析
  • 三、调用函数
  • 四、循环比较所有结果



   一般来说,anova是可以完成多重比较的,但由于数据是非等长,因此统计功效会大幅缩减,这里故而使用非参数检验 pairwise.wilcox.test()函数。

一、数据展示

   本例中使用的R包为openxlsxtidyverseagricolaeexport

data = read.csv("C:/Users/GJ/Desktop/data_0_100.csv", dec=".", header = TRUE)
str(data)
# 'data.frame': 1552 obs. of 4 variables:
# $ factor1: Factor w/ 10 levels "1","2","3","4",..: 8 7 4 4 3 3 1 4 4 3 ...
# $ factor2: Factor w/ 8 levels "1","2","3","4",..: 2 1 6 2 5 5 2 6 2 2 ...
# $ factor3: Factor w/ 6 levels "3","4","5","6",..: 4 4 4 4 1 1 3 3 5 6 ...
# $ Y : num 150.2 210.1 -17.6 126.7 -9.8 ...
#

#简单分组统计看看函数
library(tidyverse)
data %>% group_by(factor1) %>% summarise(mean=mean(Y)) %>% as.data.frame()
data %>% group_by(factor1) %>% summarise(mean=mean(Y), n=n()) %>% as.data.frame()
# factor1 mean n
# 1 1 54.78004 60
# 2 2 40.06969 30
# 3 3 60.18953 971
# 4 4 56.99819 146
# 5 5 85.57797 30
# 6 6 43.08903 40
# 7 7 175.39072 25
# 8 8 80.51860 113
# 9 9 44.93028 127
# 10 10 85.18720 10

看到在不同水平下,y的数量并不相同,需求就是对比不同水平下y是不是存在显著差异,因为有3个factor,最好是循环完成3个factor下的工作。

二、模型分析

#模型分析
model <- pairwise.wilcox.test(data$Y,data$factor1,p.adjust.method = "bonferroni")

# 在这里 multcompView包里的函数multcompLetters()
# 可以对p值矩阵进行标字母处理,
# 所以我们需要先提取model里的p值。

#函数用于提取model的p value
extraxt_P<-function(x)
{
  rn<-row.names(x)
  cn<-colnames(x)
  an<-unique(c(cn,rn))
  myval<-x[!is.na(x)]
  mymat<-matrix(1,nrow=length(an),ncol=length(an),dimnames=list(an,an))
  for(ext in 1:length(cn))
  {
    for(int in 1:length(rn))
    {
      if(is.na(x[row.names(x)==rn[int],colnames(x)==cn[ext]])) next
      mymat[row.names(mymat)==rn[int],colnames(mymat)==cn[ext]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
      mymat[row.names(mymat)==cn[ext],colnames(mymat)==rn[int]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
    }
    
  }
  return(mymat)
}

multcompLetters(extraxt_P(model$p.value))
# 1 2 3 4 5 6 7 8 9 10
# "abc" "ab" "ac" "ac" "cd" "ab" "e" "d" "b" "abcd"

上面的代码分析显示multcompLetters可以对多重比较进行标字母表示显著性。但这里存在一个问题:出来的结果的字母顺序并不是按照均值大小排列。一般来说,在多重比较时候,最大的值是标a,但这里是根据不同水平出现的先后顺序标的,所以有点不尽如人意,所以有了下面的函数。

三、调用函数

mul_sig <- function (data, alpha=0.05, model)
{
 # data 某factor下,目标变量的按照均值由大到小排列的数据框
  # model 非参数比较的模型
  extraxt_P<-function(x)
  {
    rn<-row.names(x)
    cn<-colnames(x)
    an<-unique(c(cn,rn))
    myval<-x[!is.na(x)]
    mymat<-matrix(1,nrow=length(an),ncol=length(an),dimnames=list(an,an))
    for(ext in 1:length(cn))
    {
      for(int in 1:length(rn))
      {
        if(is.na(x[row.names(x)==rn[int],colnames(x)==cn[ext]])) next
        mymat[row.names(mymat)==rn[int],colnames(mymat)==cn[ext]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
        mymat[row.names(mymat)==cn[ext],colnames(mymat)==rn[int]]<-x[row.names(x)==rn[int],colnames(x)==cn[ext]]
      }
    }
    return(mymat)
  }
  
  lastC <- function (x)
  {
    y <- sub(" +$", "", x)
    p1 <- nchar(y)
    cc <- substr(y, p1, p1)
    return(cc)
  }
  
  pvalue <- extraxt_P(model$p.value)
  n <- dim(data)[1]
  z <- data.frame(data)
  letras <- c(letters[1:26], LETTERS[1:26], 1:9, c(".", "+",
                                                   "-", "*", "/", "#", "$", "%", "&", "^", "[", "]", ":",
                                                   "@", ";", "_", "?", "!", "=", "#", rep(" ", 2000)))
  w <- z[order(z[, 2], decreasing = TRUE), ]
  M <- rep("", n)
  k <- 1
  k1 <- 0
  j <- 1
  i <- 1
  cambio <- n
  cambio1 <- 0
  chequeo = 0
  M[1] <- letras[k]
  q <- as.numeric(rownames(w))
  while (j < n) {
    chequeo <- chequeo + 1
    if (chequeo > n)
      break
    for (i in j:n) {
      s <- pvalue[q[i], q[j]] > alpha
      if (s) {
        if (lastC(M[i]) != letras[k])
          M[i] <- paste(M[i], letras[k], sep = "")
      }
      else {
        k <- k + 1
        cambio <- i
        cambio1 <- 0
        ja <- j
        for (jj in cambio:n) M[jj] <- paste(M[jj], "", sep = "")
        M[cambio] <- paste(M[cambio], letras[k], sep = "")
        for (v in ja:cambio) {
          if (pvalue[q[v], q[cambio]] <= alpha) {
            j <- j + 1
            cambio1 <- 1
          }
          else break
        }
        break
      }
    }
    if (cambio1 == 0)
      j <- j + 1
  }
  w <- data.frame(w, stat = M)
  return(w)
}

# 调用函数
mydat <- data %>% group_by(factor1) %>% summarise(mean=mean(Y)) %>% as.data.frame()
model <- pairwise.wilcox.test(data$Y, data100$factor1, p.adjust.method = "bonferroni")
mul_sig(mydat,0.05,model)
# factor1 mean stat
# 7 7 175.39072 a
# 5 5 85.57797 b
# 10 10 85.18720 bc
# 8 8 80.51860 bc
# 3 3 60.18953 bc
# 4 4 56.99819 bc
# 1 1 54.78004 bc
# 9 9 44.93028 c
# 6 6 43.08903 c
# 2 2 40.06969 c

四、循环比较所有结果

# 进行循环
res <- list() # 储存结果
treat <-c(quo(factor1), quo(factor2),quo(factor3))
for(j in 1:3){
  mydat <- data100 %>% group_by(!!treat[[j]]) %>% summarise(mean=mean(Y)) %>% as.data.frame()
  model <- pairwise.wilcox.test(data$Y, data100[,j], p.adjust.method = "bonferroni")
  res[[j]] <- mul_sig(mydat,0.05,model)
}
str(res)
# List of 3
# $ :'data.frame': 10 obs. of 3 variables:
# ..$ factor1: Factor w/ 10 levels "1","2","3","4",..: 7 5 10 8 3 4 1 9 6 2
# ..$ mean : num [1:10] 175.4 85.6 85.2 80.5 60.2 ...
# ..$ stat : chr [1:10] "a" "b" "bc" "bc" ...
# $ :'data.frame': 8 obs. of 3 variables:
# ..$ factor2: Factor w/ 8 levels "1","2","3","4",..: 4 8 2 3 7 6 1 5
# ..$ mean : num [1:8] 183.8 74.6 64.4 63.2 56.8 ...
# ..$ stat : chr [1:8] "a" "ab" "bc" "bcd" ...
# $ :'data.frame': 6 obs. of 3 variables:
# ..$ factor3: Factor w/ 6 levels "3","4","5","6",..: 4 3 5 6 2 1
# ..$ mean : num [1:6] 71.3 69.2 58.5 48.5 47.8 ...
# ..$ stat : chr [1:6] "a" "a" "a" "b" ...
res[[2]]
# factor2 mean stat
# 4 4 183.80988 a
# 8 8 74.64407 ab
# 2 2 64.38166 bc
# 3 3 63.19971 bcd
# 7 7 56.75159 cd
# 6 6 56.19029 d
# 1 1 56.03927 d
# 5 5 51.34603 d
res[[3]]
# factor3 mean stat
# 4 6 71.25338 a
# 3 5 69.18140 a
# 5 7 58.48289 a
# 6 8 48.46200 b
# 2 4 47.84567 b
# 1 3 47.20666 b

至此,已经完成了所有非参数的多重比较,按照均值大小标注字母。