R包WGCNA---转录组WGCNA共表达网络构建(无表型信息)
- 1. 下载R包WGCNA
- 2. 运行步骤
- 2.1参数筛选和模块计算
- 2.2 全部基因所属模块信息输出
- 2.3 计算KME值并输出筛选基因结果
- 2.4 导出Cytoscape格式网络数据
- 2.5 从TOM矩阵中提取固定基因集的Cytoscape数据
- 参考
最近有一个需求,需要使用多个分组的RNA-seq数据(包含CK在内共30个处理)进行共表达网络构建。将网上搜索的资料整理如下,作为记录。
1. 下载R包WGCNA
目前,共表达网络构建通常使用WGCNA这个R包,下载的时候按照官方文档即可。
下载:
当然也可以选择当前版本的R下继续安装,这就需要手动安装很多R包,最终完成安装,官网中也有相关的方法介绍。如果使用的Mac OS X系统,有时安装失败,也要重新安装才行【1】。
为了方便,我选择conda新建R4.1环境,然后下载最新的R版本R4.1.2,再完成安装。
## R version 4.1.2 (2021-11-01) -- "Bird Hippie"
install.packages("BiocManager")
BiocManager::install("WGCNA")
2. 运行步骤
2.1参数筛选和模块计算
参看【2】的链接,我们可以找到其中相关的教程方法,并且详细描述了相关的代码和图片结果。
使用下面的代码得到样本聚类图、软阈值筛选图、模块聚类图和模块相关性图
。并保存网络计算结果net.wgcna.RData
以及TOM矩阵结果
。
rm(list = ls()) ## clear variable space
load("04.stages_mean_30column_exp_stat.RData")
## load packages
require(WGCNA) ## co-expression package
require(reshape2)
require(stringr)
#require(data.table) ## fast reading files
#options(datatable.fread.datatable=FALSE)
options(stringsAsFactors = FALSE)
## open multiple threads
enableWGCNAThreads()
## set some params
type = "signed" # "unsigned"
corType = "pearson"
corFnc = ifelse(corType=="pearson", cor, bicor)
maxPOutliers = ifelse(corType=="pearson",1,0.05)
robustY = ifelse(corType=="pearson",T,F)
load("04.stages_mean_30column_exp_stat.RData")
#total_stages_exp
dataE=t(total_stages_exp)
print(dim(dataE))
print(paste0("Expression data is OK"))
## plot tree
sampleTree = hclust(dist(dataE), method = "ward.D") # average
pdf(file="Fig01.hclust.tree.pdf")
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="")
dev.off()
nGenes = ncol(dataE)
nSamples = nrow(dataE)
# cal soft value and plot tree
powers = c(c(1:10), seq(from = 12, to=30, by=2))
sft = pickSoftThreshold(dataE, powerVector=powers, networkType=type, verbose=5)
pdf(file="Fig02.picksoft_threshold.pdf")
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",
ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
## select standards: R-square=0.85
abline(h=0.65,col="red")
## Soft threshold与平均连通性
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
cex=cex1, col="red")
dev.off()
print(paste0("softthreshold is ",sft$powerEstimate))
### calculate net
power = 14 # sft$powerEstimate estimation value
net = blockwiseModules(dataE, power = power,
maxBlockSize = 100000, TOMType = type,
minModuleSize = 30,reassignThreshold = 0,
mergeCutHeight = 0.27,numericLabels = TRUE,
pamRespectsDendro = FALSE,saveTOMs=TRUE,
corType = corType, maxPOutliers=maxPOutliers,
loadTOM =TRUE, saveTOMFileBase = "dataExpTOM",
verbose = 3)
table(net$colors) # module number
pdf(file="Fig03.cluster_module.color.pdf")
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
unique(moduleColors)
# plot clustered modules and tree
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
# plot module correlation heatmap
pdf(file="Fig04.cluster_heatmap.color.pdf")
MEs = net$MEs
MEs_col = MEs
colnames(MEs_col) = paste0("ME", labels2colors(as.numeric(str_replace_all(colnames(MEs),"ME",""))))
MEs_col = orderMEs(MEs_col)
plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",
marDendro = c(3,3,2,4),
marHeatmap = c(3,4,2,2), plotDendrograms = T,
xLabelsAngle = 90)
dev.off()
save(net,file="net.wgcna.RData")
TOM矩阵热图绘制,有时矩阵太大,容易运行时间长,或者不出结果。
TOM = TOMsimilarityFromExpr(dataE, power=14,corType=corType, networkType=type)
## 使用load方法也可以
#load("net.wgcna.RData")
#load(net$TOMFiles[1], verbose=T)
## Loading objects:
## TOM
TOM <- as.matrix(TOM)
dissTOM = 1-TOM
# Transform dissTOM with a power to make moderately strong
# connections more visible in the heatmap
plotTOM = dissTOM^7
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA
# Call the plot function
pdf(file="TOM.pdf")
TOMplot(plotTOM, net$dendrograms, moduleColors,
main = "Network heatmap plot, all genes")
dev.off()
2.2 全部基因所属模块信息输出
load("net.wgcna.RData")
test=data.frame(color=net$colors)
test$gene=rownames(test)
write.table(test,file="f01.total_module_gene.record.txt",sep="\t",row.names=F,col.names=T,quote=F)
## 文件内容如下,第一列为基因名,第二列为所属模块的数字编号
gene color
Nitab4.5_0000471g0100 0
Nitab4.5_0005432g0020 2
Nitab4.5_0000002g0180 2
Nitab4.5_0002510g0040 5
2.3 计算KME值并输出筛选基因结果
## 计算KME结果
require(WGCNA)
load("04.stages_mean_30column_exp_stat.RData")
datKME=signedKME(dataE, MEs, outputColumnName="kME_MM.")
write.table(datKME, file="kME_MM.txt",quote=F,sep="\t")
## 筛选模块内基因
FilterGenes = abs(datKME$kME_MM.3) > 0.8
table(FilterGenes)
hubgene_lightyellow <- dimnames(data.frame(dataE))[[2]][FilterGenes]
hubset1=data.frame(gene=hubgene_lightyellow)
write.table(hubset1,file="f06.hubset1.gene.txt",quote=F,sep="\t",row.names=F,col.names=T)
## 也可以每个模块只筛选一个Hub gene
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
HubGenes <- chooseTopHubInEachModule(dataE, moduleColors)
2.4 导出Cytoscape格式网络数据
## 将上面KME筛选的f06.hubset1.gene.txt基因与我们RNAseq分析得到的候选基因取交集,得到f09.brown.module.hub.gene.txt
## 使用这部分数据绘制网络
modules = c("brown")
inModule = is.finite(match(moduleColors, modules))
modProbes = probes[inModule]
dataE=as.data.frame(dataE)
probes = names(dataE)
modProbes = probes[inModule]
modGenes=read.table("f09.brown.module.hub.gene.txt")$V1
modTOM = TOM[inModule, inModule]
dimnames(modTOM) = list(modProbes, modProbes)
## 导出固定模块内的网络数据
cyt = exportNetworkToCytoscape(modTOM,edgeFile = paste("CytoscapeInput-edges-",
paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,threshold = 0.1,
nodeNames = modProbes,altNodeNames = modGenes,
nodeAttr = moduleColors[inModule])
2.5 从TOM矩阵中提取固定基因集的Cytoscape数据
后面和人交流发现,可以依据TOM矩阵,提取给定基因的weight值,没必要拘束在一个模块内,以此构建网络。
load("dataExpTOM-block.1.RData")
load("04.stages_mean_30column_exp_stat.RData")
require(WGCNA)
target=read.table("f14.extract_68_known_181gene.txt",header=F,stringsAsFactors=F)
total=rownames(total_stages_exp)
idx=which(total %in% target$V1)
tom_res <- TOM[idx,idx]
rownames(TOM)=total
colnames(TOM)=total
TOM[1:5,1:5]
tom_res <- TOM[total[idx],total[idx]]
## 输出TOM矩阵中选定基因集合的网络weight信息
cyt = exportNetworkToCytoscape(tom_res,
edgeFile = paste("CytoscapeInput-edges-249gene", ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-249gene", ".txt", sep=""),
weighted = TRUE,threshold = 0.1)
## 输出TOM矩阵中全部的网络weight信息
modTOM2=TOM
cyt = exportNetworkToCytoscape(modTOM2,edgeFile = paste("CytoscapeInput-edges-total0.1", ".txt", sep=""), nodeFile = paste("CytoscapeInput-nodes-tom_total0.1", ".txt", sep=""),weighted = TRUE,threshold = 0.1)
cyt = exportNetworkToCytoscape(modTOM2,edgeFile = paste("CytoscapeInput-edges-total0.3", ".txt", sep=""), nodeFile = paste("CytoscapeInput-nodes-tom_total0.3", ".txt", sep=""),weighted = TRUE,threshold = 0.3)
cyt = exportNetworkToCytoscape(modTOM2,edgeFile = paste("CytoscapeInput-edges-tom_total0.4", ".txt", sep=""), nodeFile = paste("CytoscapeInput-nodes-tom_total0.4", ".txt", sep=""),weighted = TRUE,threshold = 0.4)
参考
- https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/
- https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/index.html
- https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html
- https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/WGCNA_1.70-3.pdf
- WGCNA: an R package for weighted correlation network analysis