library(tidyverse)
library(tibble)
library(data.table)
library(RColorBrewer)
library(ggsci)
# 自定义函数
# 1. 建立函数将R environment中的data.frame转为字符串,用于在文档中保存短小的数据(如案例5数据,注意数据量大时会报错)
texttable <- function(data) {
# 表格转文字格式的函数
col.n <- ncol(data)
col.name <- names(data)
res <- "data <- data.frame("
for (i in 1:col.n) {
val <- data[[i]]
if (is.character(val)) {
tres <- paste0(col.name[i], " = c('", paste0(val, collapse = "', '"), "'), ")
} else {
tres <- paste0(col.name[i], " = c(", paste0(val, collapse = ", "), "), ")
}
res <- paste(res, tres)
}
# res <- str_remove(res,", $") %>% paste0(")")
res <- gsub(", $", "", res)
res <- paste0(res, ")")
return(res)
}
1 前言
ggplot2
可以画出许多漂亮的图形。上一篇blog中,我们探讨了适合学术绘图的配色方案,主要用到了RColorRbrewer
和ggsci
两个扩展包。今天我们继续学术绘图的探索,主要通过案例的形式,实现以下两方面目的:
- 尝试使用
ggplot2
复现一些学术期刊、新闻报刊中看到的漂亮的数据可视化试图。 - 总结自己在科研过程中使用
ggplot2
的绘图技巧。
2 环境设定
3 显示数学符号和公式
ggplot2
中,显示数学符号及公式的方法有两种:baser
中的expression()
函数和html语法的ggtext::element_markdown()
函数:
expression()
中任意字符如不能识别为符号,则以字符串形式(公式中的变量名)显示,例如expression(abscsdfwlapha3)
直接会显示abscsdfwlapha3
,可以有括号,但是要单独在首尾。expression()
或parse()
函数(将字符串转为expression)中,数学符号和字符串之间必须有连接符,连接方法有*
和paste
(无空格连接)以及~
(有空格连接)。例如以下三个结果相同:expression(''^210*Pb[ex])
和expression(paste(''^210,Pb[ex]))
和parse(text="''^210*Pb[ex]")
。注意有一些符号(如上下标)前必须要有字符,没有要加空字符,例如上面号前的’’;注意后至*或~前所有字符都是上标,如果不确定,可以用^{210}指定上标部分,当然遇到特殊符号格式不对会报错。
geom_text
中,使用expression类型的输入会报警告(无论是否有parse=T),但仍可在图像上画出数学符号(ggplot2 v.3.4.2)。解决办法是,直接用字符型,然后设置parse=T(注意仅在geom_text中这么用),例如:建议用geom_text(label="''^210*Pb[ex]",parse=T)取代geom_text(label=expression(''^210*Pb[ex]))
。对于
geom_text
,如果有向量变量(如a)单独存储数学符号(expression或字符串),则只能在aes外指定label:geom_text(label=a,parse=T)
;如果在画图数据中有一列(如dt$l)储存字符串格式的数学符号,则只能这样使用:geom_text(aes(label=l), parse = T)
在数据的一些位置不能使用expression,例如data.table的元素和列名都不支持,而虽然
data.frame
支持,但是在ggplot2分面标题中,也不能使用expression
,此时可借助ggtext::element_markdown()
,使用html
语法实现分面标题的数学符号(如下面第二个图),注意这个方法,除了分面标题,其他位置不建议用,因为常不起作用。关于expression在绘图中显示的符号,详细帮助见?plotmath。
point <- tibble(
px = c(18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40),
py = c(4, 5, 12, 35, 84, 45, 47, 41, 22, 25, 3, 5)
)
fline <- tibble(
lx = seq(15, 43, by = 0.02),
ly = 60.5921 * exp(-((seq(15, 43, by = 0.02) - 28.0006) / 6.0670)^2)
)
x_labl <- c(
expression(gamma), expression(delta), expression(epsilon),
expression(zeta), expression(eta), expression(kappa), expression(lambda)
)
y_labl <- c(
expression(Alpha), expression(Beta), expression(Gamma), expression(Delta),
expression(Epsilon), expression(Zeta), expression(Eta), expression(Theta),
expression(Iota), expression(Kappa)
)
ggplot() +
geom_point(data = point, aes(x = px, y = py, shape = "sha"), color = "red", size = 2) +
geom_line(data = fline, aes(x = lx, y = ly, color = "col"), linetype = 2) +
# 定义图例及图形的标题及形状
scale_color_manual(values = c("col" = "blue"), labels = expression(beta)) +
scale_shape_manual(values = c("sha" = 15), labels = expression(alpha)) +
guides(color = guide_legend(title = expression(r^2 == 111), order = 1)) +
guides(shape = guide_legend(title = expression(r[2] == 222), order = 2)) +
# 在图形中添加公式,注意label中~符号的使用和parse参数。
geom_text(data = NULL, aes(36, 70), label = "Fit~line:~y == 61*e^{-(frac(x-28,6))}^2", parse = T) +
geom_text(data = NULL, aes(36, 60), label = "r^2 == 0.7610", parse = T) +
# 定义坐标轴及并以公式字符形式显示
scale_x_continuous(breaks = seq(15, 45, by = 5), limits = c(15, 45), expand = c(0, 0), labels = x_labl) +
scale_y_continuous(breaks = seq(0, 90, by = 10), limits = c(0, 90), expand = c(0, 0), labels = y_labl) +
# 在标题中显示公式
labs(
title = expression(sqrt(a, b)),
x = expression(bold(prod(plain(P)(X == x), x)) ~ (km^3)),
y = expression(interscet(A[i], i == 1, n) ~ italic(H[2], CO[3] ~ mol %.% L^-1))
) +
theme_bw() +
theme(
aspect.ratio = 1 / 1.5,
panel.border = element_rect(linewidth = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5)
)
4 设置文本及字体
-
一个可行的ggplot2配置、显示字体流程:
- 在R中查看哪些字体可用:
sysfonts::font.families()
; - 选择字体或使用
sysfonts::font.add()
用本地文件添加字体; - 使用
showtext::showtext_auto()
在ggplot2中显示字体; - 使用
ggplot2
画图并定义字体。
- 在R中查看哪些字体可用:
某些时候Rstudio的plot窗口不能显示字体,可在Tools - Global options - General - Graphics - Graphics Device - Backend修改参数,或者将图片保存本地后查看
在Rstudio中绘图显示不同字体,与Rmd和Rmd保存为html时,显示结果不一样。例如有时windows系统字体虽然
sysfonts::font_families()
不显示,但是在Rstudio也能调用,如Times New Roman,但是在Rmd html中则必须要载入、调用,否则提示没有该字体。如果在Rstudio中可以直接用,则不建议showtext::showtext_auto()
,因为它会让文字变得非常小,要显著调大size才能正常显示,也因此建议画完图后就用showtext::showtext_auto(FALSE)
关闭该功能。
library(showtextdb)
library(sysfonts)
library(showtext)
sysfonts::font_add("font1", regular = "D:/#R/learn_ggplot2/STXINGKA.TTF")
sysfonts::font_add("font2", regular = "D:/#R/learn_ggplot2/times.ttf")
sysfonts::font_add("font3", regular = "D:/#R/learn_ggplot2/STHUPO.TTF")
sysfonts::font_families() # 查看可以运行的字体
[1] "sans" "serif" "mono" "wqy-microhei" "font1"
[6] "font2" "font3"
showtext::showtext_auto() # 在ggplot2图中显示字体
p <- ggplot() +
geom_text(aes(1.5, 1), label = "(theme改字体geom_text不生效)\nABcde\n12345\n中文字体", size = 8) +
geom_text(aes(4.5, 1), label = "(必须在geom_text改字体)\nAbcde\n12345\n中文字体", size = 8, family = "font1") +
geom_text(aes(0, 0.86),
label = "注意:轴、label字体会随theme的element_text(family='font1')设置改变",
size = 7, hjust = 0
) +
labs(x = "variable 12345 横坐标轴", y = "value 12345 纵坐标轴", title = "title 12345 图标题") +
scale_x_continuous(limits = c(0, 6)) +
scale_y_continuous(limits = c(0.85, 1.1), expand = c(0, 0)) +
theme_bw() +
theme(
panel.grid = element_blank(),
text = element_text(family = "font1"),
axis.text = element_text(size = 20),
axis.title.x = element_text(family = "font2", size = 20), # Times New Roman不显示中文
axis.title.y = element_text(family = "font3", size = 20),
title = element_text(family = "font3", size = 20)
)
p
showtext_auto(FALSE) # 画完图关闭,打开时画图文字会变得非常小,只能把字号设置的额外大才正常
5 指示线
在作图过程中,我们通常需要加入一些指示线对图形进行进一步的说明。在ggplot2
中,使用geom_text()
和geom_segment()
等函数连用实现。
# 生成作图数据
mydata <- tibble(
response = c("Stringent complete response", "Complete response", "Very good partial response", "Partial response"),
percentage = c(0.327, 0.067, 0.194, 0.042)
)
mydata$response <- factor(mydata$response,
levels = c("Stringent complete response", "Complete response", "Very good partial response", "Partial response")
)
dat_col <- c("darkgreen", "darkolivegreen3", "steelblue4", "lightskyblue3")
dat_tex <- tibble(
x = c(1, 0.55, 1.5),
y = c(0.66, 0.43, 0.33),
l = c("63.0 (104/165)", " ≥CR: \n39.4", " ≥VGPR: \n58.8")
)
dat_seg_left <- data.table(
x = c(0.65, 0.69, 0.69, 0.69),
xe = c(0.69, 0.73, 0.73, 0.69),
y = c(0.43, 0.63, 0.236, 0.236),
ye = c(0.43, 0.63, 0.236, 0.63)
)
dat_seg_righ <- data.table(
x = c(1.31, 1.27, 1.27, 1.31),
xe = c(1.35, 1.31, 1.31, 1.31),
y = c(0.33, 0.63, 0.042, 0.042),
ye = c(0.33, 0.63, 0.042, 0.63)
)
ggplot(mydata, aes(x = " ", y = percentage, fill = response)) +
geom_bar(stat = "identity", width = 0.5, alpha = 0.9) +
geom_text(aes(label = percentage * 100),
position = position_stack(vjust = 0.5),
size = 2.3, color = c("white", "white", "white", "black")
) +
geom_text(data = dat_tex, aes(x, y, label = l, fill = NULL)) + # 无法继承的aes要定义为null
geom_segment(data = dat_seg_left, aes(x = x, xend = xe, y = y, yend = ye, fill = NULL)) +
geom_segment(data = dat_seg_righ, aes(x = x, xend = xe, y = y, yend = ye, fill = NULL)) +
scale_fill_manual(values = dat_col) +
scale_y_continuous(breaks = seq(0, 1, 0.1), label = seq(0, 100, 10), limits = c(0, 1), expand = c(0, 0)) +
labs(x = "All Patients", y = "Percentage of Patients with Response", fill = "Response:") +
coord_cartesian(clip = "off") +
theme_classic() +
theme(
legend.position = "top",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7, face = "bold"),
legend.key.size = unit(3, "mm"),
axis.title = element_text(size = 8, face = "bold"),
axis.text = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.ticks.length.y = unit(2, "mm"),
plot.margin = unit(c(0.6, 5, 0.6, 5), "cm")
)
6 误差棒、显著性
- bar, errorbar和text的
position
参数设置要相同。 - 调整bar和errorbar的图层顺序,可以实现显示全部或半个errorbar。
library(ggsci)
dat <- tibble(
layer = rep(c("A", "B"), 4),
control = rep(c("ck", "slight", "moderate", "severe"), each = 2),
sig = c("a", "a", "a", "a", "b", "b", "c", "c"),
value = c(126.40, 78.47, 112.77, 70.57, 81.87, 55.53, 61.33, 46.83),
sd = c(7.05, 7.94, 8.25, 4.55, 3.98, 3.94, 11.48, 5.14)
)
dat$control <- factor(dat$control, levels = c("ck", "slight", "moderate", "severe"))
ggplot(dat, aes(x = control, y = value, fill = layer)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_errorbar(aes(ymin = value - sd, ymax = value + sd), width = 0.2, position = position_dodge(0.9)) +
geom_text(aes(y = value + 1.5 * sd, label = sig), vjust = 0, position = position_dodge(0.9)) +
scale_fill_npg() + # ggsci配色方案
scale_y_continuous(
breaks = seq(0, 150, by = 25),
limits = (c(0, 150)), expand = c(0, 0)
)
7 案例1:全球气温变化图
dat <- tibble(
Year = c(1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022), CO2 = c(315.98, 316.91, 317.64, 318.45, 318.99, 319.62, 320.04, 321.37, 322.18, 323.05, 324.62, 325.68, 326.32, 327.46, 329.68, 330.19, 331.13, 332.03, 333.84, 335.41, 336.84, 338.76, 340.12, 341.48, 343.15, 344.87, 346.35, 347.61, 349.31, 351.69, 353.2, 354.45, 355.7, 356.54, 357.21, 358.96, 360.97, 362.74, 363.88, 366.84, 368.54, 369.71, 371.32, 373.45, 375.98, 377.7, 379.98, 382.09, 384.02, 385.83, 387.64, 390.1, 391.85, 394.06, 396.74, 398.81, 401.01, 404.41, 406.76, 408.72, 411.66, 414.24, 416.45, 418.56),
Value = c(0.07, 0.02, 0.06, 0.05, 0.08, -0.15, -0.07, -0.02, 0.01, -0.06, 0.09, 0.05, -0.05, 0.02, 0.2, -0.05, 0.01, -0.03, 0.21, 0.11, 0.22, 0.29, 0.35, 0.19, 0.34, 0.19, 0.16, 0.21, 0.34, 0.41, 0.31, 0.44, 0.42, 0.23, 0.27, 0.32, 0.48, 0.35, 0.5, 0.63, 0.42, 0.43, 0.55, 0.62, 0.63, 0.55, 0.7, 0.66, 0.66, 0.55, 0.66, 0.73, 0.63, 0.66, 0.68, 0.76, 0.91, 1.03, 0.94, 0.86, 0.97, 1.01, 0.86, 0.91)
) |>
mutate(col = ifelse(Value > 0, "b", "r"))
ggplot(dat, aes(Year, Value)) +
geom_bar(aes(fill = col), stat = "identity", color = "black", width = 0.7, linewidth = 0.1, show.legend = F) +
geom_smooth(aes(linetype = "l"), color = "red", se = FALSE, method = "lm", formula = y ~ x) +
scale_x_continuous(breaks = seq(1960, 2022, by = 5), limits = c(1958, 2023), expand = c(0, 0)) +
scale_y_continuous(
limits = c(-0.2, 1.2),
breaks = seq(-0.20, 1.20, by = 0.2),
expand = c(0, 0),
labels = sprintf("%.2f\u00B0C", seq(-0.20, 1.20, by = 0.2)),
# 副y轴,转换为华氏度
sec.axis = sec_axis(
trans = ~ . * 1.8, # 注意纵坐标为温度差值,所以只*1.8
breaks = seq(-0.36, 2.16, by = 0.36),
labels = sprintf("%.2f\u00B0F", seq(-0.36, 2.16, by = 0.36))
)
) +
scale_fill_manual(values = c("b" = "#4366AA", "r" = "#961A2B")) +
scale_linetype_discrete(labels = c("l" = "1959 ~ 2022 Trend \n (+1.64℃/Centry)")) +
guides(linetype = guide_legend(title = NULL)) +
labs(
x = NULL, y = NULL,
title = "Global Land and Ocean",
subtitle = "January-December Temperature Anomalies"
) +
theme_bw() +
theme(
aspect.ratio = 0.5,
axis.line = element_line(),
panel.border = element_blank(),
panel.grid = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major = element_line(color = "gray97"),
plot.title = element_text(face = "bold", size = 14, hjust = 0),
plot.subtitle = element_text(size = 12, hjust = 0),
legend.position = c(0.85, 1.09),
legend.background = element_blank(),
legend.text = element_text(size = 12)
)
8 案例2:流域岩石风化碳汇强度
data <- tibble(
Basin = c(
"SiKiang", "Irrawaddi", "Amazon", "Mekong", "Yangtze-Kiang",
"Ganges-Brahmaputra", "Magdalena", "Orinoco", "SevernaiaDvina",
"St.Lawrence", "Fraser", "SaoFrancisco", "Danube", "Yenisei",
"Tigris-Euphrates", "Mississippi", "Yukon", "Columbia",
"Mackenzie", "Indus", "Zaire", "Godavari", "Ob", "Parana",
"Lena", "Indigirka", "Don", "Amour", "Kolyma", "Yana",
"Zambesi", "Nile", "Limpopo", "Niger", "Huangho", "Murray",
"Orange", "Senegal", "Colorado"
),
x = c(
13.88, 10.88, 6.11, 5.72, 5.56, 5.17, 5.07, 2.86, 2.62,
2.41, 2.25, 2.05, 1.64, 1.64, 1.58, 1.54, 1.48, 1.44, 1.38,
1.29, 1.12, 1.02, 0.99, 0.86, 0.74, 0.7, 0.67, 0.66, 0.59,
0.52, 0.41, 0.33, 0.31, 0.2, 0.18, 0.14, 0.13, 0.08, 0.08
)
)
ggplot(data, aes(x = reorder(Basin, x), y = x)) +
geom_bar(stat = "identity", fill = "steelblue", color = "white") +
geom_text(aes(label = x), hjust = 0, vjust = 0.5, size = 2.5) +
geom_segment(aes(x = 28.5, y = 0, xend = 28.5, yend = 7), color = "red", linewidth = 0.5) +
geom_text(
data = NULL, aes(x = 28.5, y = 7.5), label = "Average: 2.21",
hjust = 0, vjust = 0.5, size = 3, color = "red"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.08))) +
labs(x = "Basins", y = expression(CO[2] ~ "Consumed(" ~ tc %.% km^-2 %.% yr^-1 ~ ")")) +
coord_flip() +
theme_light() +
theme(
axis.text = element_text(size = 6),
axis.text.y = element_text(size = 7),
axis.title = element_text(size = 10),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)