数据下载
rm(list = ls())
library(GEOquery)
setwd('D:\\陈思杰\\大创')
GSE=c("GSE85680","GSE106817","GSE110651",
"GSE112264","GSE113486","GSE113740",
"GSE119892","GSE122497","GSE124158",
"GSE134108","GSE137140","GSE164174",
"GSE139031")
# 从GEO下载上述所有数据集
data<-NULL
for(i in GSE){
x<-paste(i,"<-getGEO(i,destdir='.')",sep='')
eval(parse(text = x))
y<-paste('names(',i,')',sep='')
for(a in eval(parse(text = y))){
z<-paste('data<-','c(','data,',i,'$`',a,'`)',sep='')
eval(parse(text = z))
names(data)[length(data)]<-a[1]
}
}
rm(list =ls()[ls()!="data"])
data$`GSE85680-GPL18941_series_matrix.txt.gz`<-NULL
data$`GSE124158-GPL18941_series_matrix.txt.gz`<-NULL
data$`GSE134108-GPL18941_series_matrix.txt.gz`<-NULL
names(data)<-sub('-GPL21263','',names(data))
data$GSE85680_series_matrix.txt.gz<-NULL
save(data,file="data12.RData")
数据筛查
load("D:/陈思杰/大创/data12.RData")
for(name in names(data)){
a<-sub('_series_matrix.txt.gz','',name)
a<-sub('-','_',a)
a<-paste(a,"<-pData(data[[name]])",sep='')
eval(parse(text = a))
rm(a);rm(name)
}
#GSE号
gse=c()
len=c(4046,147,1591,972,1817,66,5531,1370,71,3924,580,2934)
for(i in 1:12){
gse=c(rep(GSE[i],len[i]),gse)
}
#GSM号
GSM=c(
"GSE106817[,2]",
"GSE110651[,2]",
"GSE112264[,2]",
"GSE113486[,2]",
"GSE113740[,2]",
"GSE119892[,2]",
"GSE122497[,2]",
"GSE124158[,2]",
"GSE134108[,2]",
"GSE137140[,2]",
"GSE139031[,2]",
"GSE164174[,2]")
#年龄
age=c(
"GSE106817[,32]",
"GSE110651[,34]",
"GSE112264[,36]",
"GSE113486[,35]",
"GSE113740[,43]",
"GSE119892[,36]",
"GSE122497[,38]",
"GSE124158[,38]",
"GSE134108[,36]",
"GSE137140[,34]",
"GSE139031[,31]",
"GSE164174[,33]")
#性别
sex=c(
"rep(NA,4046)",
"rep(NA,147)",
"GSE112264[,41]",
"GSE113486[,39]",
"GSE113740[,53]",
"GSE119892[,39]",
"GSE122497[,41]",
"GSE124158[,39]",
"rep(NA,71)",
"GSE137140[,36]",
"GSE139031[,32]",
"GSE164174[,36]")
#癌症分类TYPE
type=c(
"as.character(GSE106817[,19])",
"GSE110651[,35]",
"GSE112264[,40]",
"GSE113486[,36]",
"GSE113740[,48]",
"GSE119892[,37]",
"GSE122497[,40]",
"GSE124158[,40]",
"sub('[0-9]+','',GSE134108[,1])",
"GSE137140[,35]",
"as.character(GSE139031[,19])",
"GSE164174[,34]"
)
#癌症stage
stage=c(
"GSE106817[,33]",
"rep(NA,147)",
"GSE112264[,37]",
"GSE113486[,37]",
"GSE113740[,51]",
"rep(NA,66)",
"GSE122497[,39]",
"GSE124158[,42]",
"rep(NA,71)",
"GSE137140[,35]",
"rep(NA,580)",
"GSE164174[,37]"
)