特殊情况下,需要对UMI的单细胞数据做imputation,补全缺失的数据。 

 

工具很多,这篇paper已经帮你评估好了,直接用其推荐的工具即可。

A systematic evaluation of single-cell RNA-sequencing imputation methods

 

排名第一的单细胞imputation工具:

https://github.com/KrishnaswamyLab/MAGIC

教程:Rmagic Bone Marrow Tutorial

 

UMI的一般都是大数据,跑起来还是比较耗时的。

 

安装

library(Rmagic)
library(ggplot2)
library(readr)
library(viridis)
library(phateR)

# check
# don't "source activate py38", otherwise the python package cannot be loaded
pymagic_is_available()

  

测试数据

# # load data
# bmmsc <- read_csv("https://github.com/KrishnaswamyLab/PHATE/raw/master/data/BMMC_myeloid.csv.gz")

  

实际数据

bmmsc <- t(integrated.org@assays$RNA@counts)
bmmsc[1:5,1:5]

  

QC

# keep genes expressed in at least 10 cells
keep_cols <- colSums(bmmsc > 0) > 10
bmmsc <- bmmsc[,keep_cols]
# look at the distribution of library sizes
ggplot() +
  geom_histogram(aes(x=rowSums(bmmsc)), bins=50) +
  geom_vline(xintercept = 1000, color='red')

  

# keep cells with at least 1000 UMIs
keep_rows <- rowSums(bmmsc) > 1000
bmmsc <- bmmsc[keep_rows,]

  

bmmsc <- library.size.normalize(bmmsc)
bmmsc <- sqrt(bmmsc)

  

测试部分基因

# run MAGIC
# bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1"))
bmmsc_MAGIC <- magic(bmmsc, genes=c("NEUROG2", "NEAT1", "TFAP2A"))

  

获取全部基因

bmmsc_MAGIC_all <- magic(bmmsc, genes="all_genes", t=4, init=bmmsc_MAGIC)

  

可视化

ggplot(as.data.frame(bmmsc[,c("NEUROG2", "NEAT1", "TFAP2A")])) +
  geom_point(aes(NEUROG2, NEAT1, color=TFAP2A)) +
  scale_color_viridis(option="B")

  

ggplot(as.data.frame(bmmsc_MAGIC$result[,c("NEUROG2", "NEAT1", "TFAP2A")])) +
  geom_point(aes(NEUROG2, NEAT1, color=TFAP2A)) +
  scale_color_viridis(option="B")