特殊情况下,需要对UMI的单细胞数据做imputation,补全缺失的数据。
工具很多,这篇paper已经帮你评估好了,直接用其推荐的工具即可。
A systematic evaluation of single-cell RNA-sequencing imputation methods
排名第一的单细胞imputation工具:
https://github.com/KrishnaswamyLab/MAGIC
教程:Rmagic Bone Marrow Tutorial
UMI的一般都是大数据,跑起来还是比较耗时的。
安装
library(Rmagic) library(ggplot2) library(readr) library(viridis) library(phateR) # check # don't "source activate py38", otherwise the python package cannot be loaded pymagic_is_available()
测试数据
# # load data # bmmsc <- read_csv("https://github.com/KrishnaswamyLab/PHATE/raw/master/data/BMMC_myeloid.csv.gz")
实际数据
bmmsc <- t(integrated.org@assays$RNA@counts) bmmsc[1:5,1:5]
QC
# keep genes expressed in at least 10 cells keep_cols <- colSums(bmmsc > 0) > 10 bmmsc <- bmmsc[,keep_cols] # look at the distribution of library sizes ggplot() + geom_histogram(aes(x=rowSums(bmmsc)), bins=50) + geom_vline(xintercept = 1000, color='red')
# keep cells with at least 1000 UMIs keep_rows <- rowSums(bmmsc) > 1000 bmmsc <- bmmsc[keep_rows,]
bmmsc <- library.size.normalize(bmmsc) bmmsc <- sqrt(bmmsc)
测试部分基因
# run MAGIC # bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1")) bmmsc_MAGIC <- magic(bmmsc, genes=c("NEUROG2", "NEAT1", "TFAP2A"))
获取全部基因
bmmsc_MAGIC_all <- magic(bmmsc, genes="all_genes", t=4, init=bmmsc_MAGIC)
可视化
ggplot(as.data.frame(bmmsc[,c("NEUROG2", "NEAT1", "TFAP2A")])) + geom_point(aes(NEUROG2, NEAT1, color=TFAP2A)) + scale_color_viridis(option="B")
ggplot(as.data.frame(bmmsc_MAGIC$result[,c("NEUROG2", "NEAT1", "TFAP2A")])) + geom_point(aes(NEUROG2, NEAT1, color=TFAP2A)) + scale_color_viridis(option="B")