论文


Large variation in the association between seasonal antibiotic use and resistance across multiple bacterial species and antibiotic classes


数据代码链接

​https://github.com/orgs/gradlab/repositories​

今天的推文重复一下论文中的 Figure 1B

跟着Plos Biology学作图:R语言ggplot2散点图并添加误差线和带置信区间的拟合曲线_数据 image.png


论文提供的代码涉及到了map系列函数,这个我之前基本没有用过,功能很强大。这里还涉及到了如何批量循环作图


首先是数据集

library(readr)
library(tidyverse)

use.model.params<-read_csv("use-resistance-seasonality/tables/use_model_values.csv")
use.deviates <- read_csv("use-resistance-seasonality/tables/use_seasonal_deviates.csv")

准备作图配色

colors <- setNames( c("#220050", "#b30059","#0091a8","#359023", "#ffa500"), 
c("Macrolides", "Nitrofurans", "Penicillins", "Quinolones", "Tetracyclines") )

三个自定义函数

# 第一个
filter_models_AIC_func <- function(table, group_cols) {

table.fil = table %>%
group_by_at(vars(all_of(group_cols))) %>%
mutate(rank = dense_rank(AIC)) %>%
ungroup() %>%
filter(rank == 1) %>%
select(-AIC, -rank)

return(table.fil)
}

# 第二个

cos_func = function(month, amplitude, phase, omega, intercept) {
amplitude * cos(omega *(month - phase)) + intercept
}

# 第三个是画图函数
plot_use_model_func = function(deviates, class, amplitude, phase, omega, a_lower, a_upper, sig) {

col = colors[class]

if (sig) {
title = paste(class, "*")
} else {
title = class
}

ci = data.frame(month=seq(1,12,0.01)) %>%
mutate(lower_ci = map_dbl(month, ~cos_func(., a_lower, phase, omega, 0))) %>%
mutate(upper_ci = map_dbl(month, ~cos_func(., a_upper, phase, omega, 0)))

p = ggplot(data = deviates, aes(x = month)) +
geom_point(aes(x = month, y = seasonal_deviate), color = col, size = 1) +
geom_errorbar(aes(x = month, ymin = seasonal_deviate - sem, ymax = seasonal_deviate + sem), width = 0.5, color = col) +
stat_function(fun = cos_func, args = list(a = amplitude, phase = phase, omega = omega, intercept = 0), size = 0.7, color = col) +
geom_ribbon(data = ci, aes(x = month, ymin = lower_ci, ymax = upper_ci), fill = col, alpha = 0.3) +
scale_x_continuous(breaks=c(1, 3, 5, 7, 9, 11)) +
ggtitle(title) +
xlab("Month") +
theme_classic() +
theme(legend.position="none",
plot.title = element_text(size = 11, hjust = 0.5, face = "bold"),
axis.text = element_text(size = 10),
axis.title.x = element_text(size = 11),
axis.title.y = element_blank()
)

return(p)
}

数据整合

use.model.params.fil <- filter_models_AIC_func(use.model.params, c("drug_class"))

f1b_data_model = use.model.params.fil %>%
filter(term %in% c("amplitude", "phase")) %>%
select(drug_class, omega, term, estimate, ci.lower, ci.upper, p.value.BH) %>%
gather(variable, value, -(c("drug_class", "term", "omega"))) %>%
unite(temp, term, variable) %>%
spread(temp, value) %>%
mutate(sig = amplitude_p.value.BH < 0.05)

f1b_data_deviates = f1b_data_model %>%
select(drug_class, omega) %>%
left_join(use.deviates) %>%
select(drug_class, month, seasonal_deviate, sem)

作图

f1b_data_model %>%
left_join(
f1b_data_deviates %>%
nest(-drug_class) %>%
rename(deviates_table = data),
by = c("drug_class")
) %>%

#make plots
mutate(plot = pmap(.l = list(deviates = deviates_table, class = drug_class, amplitude = amplitude_estimate,
phase = phase_estimate, omega = omega, a_lower = amplitude_ci.lower,
a_upper = amplitude_ci.upper, sig = sig),
.f = plot_use_model_func)) %>%
pull(plot) -> f1b_plots

这里他的处理方式是将多个图保存到一个数据框的一列

使用ggpubr包中的函数拼图

library(ggpubr)
f1b = do.call(ggarrange, c(f1b_plots, nrow = 2, ncol = 3, align = "hv")) %>%
annotate_figure(left = text_grob("Seasonal deviates in use (mean daily claims/10,000 people)", size = 11, rot = 90))
print(f1b)

跟着Plos Biology学作图:R语言ggplot2散点图并添加误差线和带置信区间的拟合曲线_公众号_02 image.png


这个数据处理和作图的代码整体还相对比较复杂,如果有时间的话争取录视频逐行解释其中的代码。示例数据和代码可以在公众号后台留言20220324获取


欢迎大家关注我的公众号

小明的数据分析笔记本


小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!