maftools包分析和可视化大规模测序研究中的突变注释格式(MAF)文件。该软件包提供了各种功能来执行癌症基因组学中最常用的分析,并以最小的工作量创建功能丰富的可定制可视化。
1. 数据读入,生成MAF对象
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("maftools")
library(maftools)
# ls("package:maftools")
# 演示数据
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools')
laml = read.maf(maf = laml.maf, clinicalData = laml.clin) # MAF对象
# 查看MAF对象
getSampleSummary(laml)
getGeneSummary(laml)
getClinicalData(laml)
getFields(laml)
# write.mafSummary(maf = laml, basename = 'laml')
# 所有样本中突变统计图
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median',
dashboard = TRUE, titvRaw = FALSE)
2. 取MAF子集
## 取MAF对象子集。tsb:subset by these samples (Tumor Sample Barcodes)
# mafObj = FALSE,返回data.frame
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'),
mafObj = FALSE)[1:5]
# 默认mafObj = TRUE,返回MAF object
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'))
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE,
query = "Variant_Classification == 'Splice_Site'")
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'),
mafObj = FALSE,
query = "Variant_Classification == 'Splice_Site'",
fields = 'i_transcript_name')
laml_m4 = subsetMaf(maf = laml, clinQuery = "FAB_classification %in% 'M4'")
# Filter MAF by genes or samples
filterMaf(maf = laml, genes =c("TTN", "AHNAK2"))
3. oncoplot 作图
Usage
oncoplot( maf, top = 20, minMut = NULL, genes = NULL, altered = FALSE, drawRowBar = TRUE, drawColBar = TRUE, leftBarData = NULL, leftBarLims = NULL, rightBarData = NULL, rightBarLims = NULL, topBarData = NULL, logColBar = FALSE, includeColBarCN = TRUE, clinicalFeatures = NULL, annotationColor = NULL, annotationDat = NULL, pathways = NULL, path_order = NULL, selectedPathways = NULL, pwLineCol = "#535c68", pwLineWd = 1, draw_titv = FALSE, titv_col = NULL, showTumorSampleBarcodes = FALSE, barcode_mar = 4, barcodeSrt = 90, gene_mar = 5, anno_height = 1, legend_height = 4, sortByAnnotation = FALSE, groupAnnotationBySize = TRUE, annotationOrder = NULL, sortByMutation = FALSE, keepGeneOrder = FALSE, GeneOrderSort = TRUE, sampleOrder = NULL, additionalFeature = NULL, additionalFeaturePch = 20, additionalFeatureCol = "gray70", additionalFeatureCex = 0.9, genesToIgnore = NULL, removeNonMutated = TRUE, fill = TRUE, cohortSize = NULL, colors = NULL, cBioPortal = FALSE, bgCol = "#CCCCCC", borderCol = "white", annoBorderCol = NA, numericAnnoCol = NULL, drawBox = FALSE, fontSize = 0.8, SampleNamefontSize = 1, titleFontSize = 1.5, legendFontSize = 1.2, annotationFontSize = 1.2, sepwd_genes = 0.5, sepwd_samples = 0.25, writeMatrix = FALSE, colbar_pathway = FALSE, showTitle = TRUE, titleText = NULL, showPct = TRUE )
# 基因的突变图
oncoplot(maf = laml, top = 20)
oncoplot(maf = laml, draw_titv = TRUE)
# 自定义颜色
vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(vc_cols) = c(
'Frame_Shift_Del',
'Missense_Mutation',
'Nonsense_Mutation',
'Multi_Hit',
'Frame_Shift_Ins',
'In_Frame_Ins',
'Splice_Site',
'In_Frame_Del'
)
print(vc_cols)
oncoplot(maf = laml, colors = vc_cols, top = 10)
# 加上突变的信号通路
pathways = data.frame(
Genes = c(
"TP53",
"WT1",
"PHF6",
"DNMT3A",
"DNMT3B",
"TET1",
"TET2",
"IDH1",
"IDH2",
"FLT3",
"KIT",
"KRAS",
"NRAS",
"RUNX1",
"CEBPA",
"ASXL1",
"EZH2",
"KDM6A"
),
Pathway = rep(c(
"TSG", "DNAm", "Signalling", "TFs", "ChromMod"
), c(3, 6, 4, 2, 3)),
stringsAsFactors = FALSE
)
head(pathways)
oncoplot(maf = laml, pathways = pathways)
# 加上临床特征
oncoplot(maf = laml, top = 20,clinicalFeatures = "FAB_classification")
4. 其他突变统计分析图
### 1.plotTiTv
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)
##plot titv summary:
# 1.contributions of 6 mutational conversion
# 2. Ti/Tv ratios of each sample
plotTiTv(res = laml.titv)
### 2. lollipopPlot
## 绘制棒棒糖的氨基酸变化图。蛋白质结构域来源于PFAM数据库。
lollipopPlot(
maf = laml,
gene = 'DNMT3A',
AACol = 'Protein_Change',
showMutationRate = TRUE,
labelPos = 882, # 只表这个位置的突变:c(882,909),"all"
repel = TRUE
)
### 3. plotProtein
# 画蛋白结构域图
plotProtein(gene = "TP53")
plotProtein(gene = "TP53",refSeqID = "NM_000546")
plotProtein(gene = "DNMT3A")
plotProtein(gene = "KRAS",showDomainLabel=FALSE)
# http://pfam.xfam.org
### 4. rainfallPlot
## 降雨图显示基因组突变热点区域。
brca <- system.file("extdata", "brca.maf.gz", package = "maftools")
brca = read.maf(maf = brca, verbose = FALSE)
rainfallPlot(maf = brca, detectChangePoints = TRUE, pointSize = 0.4)
### 5. tcgaCompare
## 将输入MAF中的突变负荷与来自MC3项目的所有33个TCGA队列进行比较。
par(fig=c(0,0.8,0,0.8)) #c(left, right,bottom,top)
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML',
logscale = TRUE, capture_size = 50)
### 6. plotVaf
# 统计基因在各自样本中突变频率的箱线图
plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU') # i_TumorVAF_WU: 样本名称
plotVaf(maf = laml, genes = "DNMT3A",vafCol = 'i_TumorVAF_WU')
### 7. plotCBSsegments
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt",
package = "maftools")
# 画某一样本的拷贝数变异图
# tcga.ab.009.seg文件字段:Sample Chromosome Start End Num_Probes Segment_Mean
par(fig=c(0.01,1,0,1)) #c(left, right,bottom,top)
plotCBSsegments(cbsFile = tcga.ab.009.seg)
### 8. somaticInteractions
#前25个突变基因的排他/共出现事件统计分析作图。
dev.new()
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))
### 9. plotOncodrive
# 基于变异的位置聚类检测癌症驱动基因并作图
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change',
minMut = 5, pvalMethod = 'zscore')
head(laml.sig)
plotOncodrive(res = laml.sig, fdrCutOff = 0.01,
useFraction = TRUE, labelSize = 0.5)
### 10. pfamDomains
# 突变结构域统计图
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]
5. 联合生存分析作图
# 根据基因突变分组并作生存分析图
mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup',
Status = 'Overall_Survival_Status', isTCGA = TRUE)
# 预测与生存相关的基因集
prog_geneset = survGroup(maf = laml, top = 20, geneSetSize = 2,
time = "days_to_last_followup",
Status = "Overall_Survival_Status",
verbose = FALSE)
print(prog_geneset)
# 根据基因集(geneset)突变分组并作生存分析图
mafSurvGroup(maf = laml, geneSet = c("DNMT3A", "FLT3"),
time = "days_to_last_followup",
Status = "Overall_Survival_Status")
6.比较MAF并作图
###不同MAF对象比较##########
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)
# fisher检验两个队列(MAF),以发现差异突变基因。
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl,
m1Name = 'Primary', m2Name = 'Relapse',
minMut = 5)
print(pt.vs.rt)
# 不同队列中的基因突变差异森林图。x轴为log10转换优势比,y轴上的差异突变基因。
forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.1)
# 不同队列中的基因突变差异图:显示具体突变类型
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl,
m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL',
genes = genes, removeNonMutated = TRUE)
coBarplot(m1 = primary.apl, m2 = relapse.apl,
m1Name = "Primary", m2Name = "Relapse")
lollipopPlot2(m1 = primary.apl, m2 = relapse.apl,
gene ="PML",
AACol1 = "amino_acid_change",
AACol2 = "amino_acid_change",
m1_name = "Primary", m2_name = "Relapse")
# 不同临床特征分组中基因突变差异比较并作富集图(每一组都和其余比)
fab.ce = clinicalEnrichment(maf = laml, clinicalFeature = 'FAB_classification')
fab.ce$groupwise_comparision[p_value < 0.05]
plotEnrichmentResults(enrich_res = fab.ce, pVal = 0.05)
7. 药物相互作用的突变基因分析
# 队列中和药物的相互作用的突变基因
dgi = drugInteractions(maf = laml, fontSize = 0.75)
length(table(dgi$Gene))
names(table(dgi$Gene))
#dgi[which(dgi$category=="LIPID KINASE"),]
dnmt3a.dgi = drugInteractions(genes = "DNMT3A", drugs = TRUE)
#Printing selected columns.
dnmt3a.dgi[,.(Gene, interaction_types, drug_name, drug_claim_name)]
8. 突变基因在致癌通路的富集分析
# 突变基因在已知致癌通路的富集及作图
OncogenicPathways(maf = laml)
PlotOncogenicPathways(maf = laml, pathways = "RTK-RAS")
PlotOncogenicPathways(maf = laml, pathways = "TP53")
9. VAF聚类
#基于变异等位基因频率(VAF)对某一样本的变异进行聚类
#install.packages("mclust")
library("mclust")
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972',
vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
plotClusters(clusters = tcga.ab.2972.het)
seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
dev.new()
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009',
segFile = seg, vafCol = 'i_TumorVAF_WU')
# 加上拷贝数变异聚类信息
plotClusters(clusters = tcga.ab.3009.het,
genes = 'CN_altered', showCNvars = TRUE)
10. 突变特征(mutational signatures)分析
### Extract mutational signatures###########
# APOBEC富集样品和非APOBEC富集样品之间的差异并作图:
# 包括突变负荷、tCw基序分布和主要突变基因差异的子图。
library("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)
laml.tnm = trinucleotideMatrix(maf = laml, prefix = 'chr',
add = TRUE,
ref_genome = "BSgenome.Hsapiens.UCSC.hg19")
dev.new()
plotApobecDiff(tnm = laml.tnm, maf = laml, pVal = 0.2)
#install.packages("NMF")
library('NMF')
laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6,
pConstant = 0.1,
plotBestFitRes = FALSE,
parallel = 2)
plotCophenetic(res = laml.sign)
laml.sig = extractSignatures(mat = laml.tnm, n = 3,
pConstant = 0.1, parallel = 2)
laml.og30.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "legacy")
#Compate against updated version3 60 signatures
laml.v3.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "SBS")
library('pheatmap')
pheatmap::pheatmap(mat = laml.og30.cosm$cosine_similarities,
cluster_rows = FALSE,
main = "cosine similarity against validated signatures")
pheatmap::pheatmap(mat = laml.v3.cosm$cosine_similarities,
cluster_rows = FALSE,
main = "cosine similarity against validated signatures")
maftools::plotSignatures(nmfRes = laml.sig, title_size = 1.2, sig_db = "SBS")
11.其他格式数据转化为MAF对象
# Converts annovar annotations into MAF.
# 参数 MAFobj 默认为FALSE,返回data.frame格式数据
var.annovar = system.file("extdata", "variants.hg19_multianno.txt",
package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar,
Center = 'CSI-NUS',
refBuild = 'hg19',
MAFobj = TRUE,
tsbCol = 'Tumor_Sample_Barcode',
table = 'ensGene')
# Converts ICGC Simple Somatic Mutation format file to MAF
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc,
addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])
12. 读入并处理gistic输出数据
## Read and summarize gistic output
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")
laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)
class(laml.gistic) #An object of class GISTIC
# 突出显示扩增和缺失区域片段的基因组图。
gisticChromPlot(gistic = laml.gistic, markBands = "all")
# 样本数对cytobands拷贝数变异显著性(-log10q)的气泡图
gisticBubblePlot(gistic = laml.gistic)
# 不同样本拷贝数变异图,类似展示基因突变的oncoplot
# par(fig=c(0,0.2,0.2,0.8)) #c(left, right,bottom,top)
#par(fin=c(5,6))
# par(mai =c(0,0.2,0.2,2)) # 无法设定
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml),
clinicalFeatures = 'FAB_classification',
gene_mar = 8,
barcode_mar = 6,
sepwd_genes = 0.5,
sepwd_samples = 0.25,
sortByAnnotation = TRUE, top = 10)
参考
https://www.bioconductor.org/packages/release/bioc/html/maftools.html