ggplot2地理分布叠加饼图

Tidytuesday 2024 Week 22

ggplot2
Tidytuesday
Author

Lee

Published

July 16, 2024

1 准备数据

library(tidyverse)
library(camcorder)
library(readr)

ewf_appearances <- readr::read_csv("D:/Myblog/posts/ggplot2-tidytuesday-2024week28-WFootball/tech/ewf_appearances.csv")

glimpse(ewf_appearances)
Rows: 4,596
Columns: 23
$ season_id       <chr> "S-2011-2011-1-S", "S-2011-2011-1-S", "S-2011-2011-1-S…
$ season          <chr> "2011-2011", "2011-2011", "2011-2011", "2011-2011", "2…
$ tier            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ division        <chr> "FA Women's Super League (WSL)", "FA Women's Super Lea…
$ match_id        <chr> "M-2011-2011-1-001-M", "M-2011-2011-1-001-M", "M-2011-…
$ match_name      <chr> "Chelsea Ladies vs Arsenal Ladies", "Chelsea Ladies vs…
$ date            <date> 2011-04-13, 2011-04-13, 2011-04-13, 2011-04-13, 2011-…
$ attendance      <dbl> 2510, 2510, 742, 742, 602, 602, 835, 835, 220, 220, 34…
$ team_id         <chr> "T-008-T", "T-001-T", "T-016-T", "T-011-T", "T-003-T",…
$ team_name       <chr> "Chelsea Ladies", "Arsenal Ladies", "Lincoln Ladies", …
$ opponent_id     <chr> "T-001-T", "T-008-T", "T-011-T", "T-016-T", "T-006-T",…
$ opponent_name   <chr> "Arsenal Ladies", "Chelsea Ladies", "Doncaster Rovers …
$ home_team       <dbl> 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, …
$ away_team       <dbl> 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, …
$ goals_for       <dbl> 0, 1, 0, 1, 4, 0, 3, 3, 1, 1, 0, 4, 1, 0, 1, 1, 1, 1, …
$ goals_against   <dbl> 1, 0, 1, 0, 0, 4, 3, 3, 1, 1, 4, 0, 0, 1, 1, 1, 1, 1, …
$ goal_difference <dbl> -1, 1, -1, 1, 4, -4, 0, 0, 0, 0, -4, 4, 1, -1, 0, 0, 0…
$ result          <chr> "Loss", "Win", "Loss", "Win", "Win", "Loss", "Draw", "…
$ win             <dbl> 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, …
$ loss            <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, …
$ draw            <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, …
$ note            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ points          <dbl> 0, 3, 0, 3, 3, 0, 1, 1, 1, 1, 0, 3, 3, 0, 1, 1, 1, 1, …

2 数据整理

ewf_cum_goals <- ewf_appearances %>%
  group_by(team_id) %>%
  arrange(date) %>%
  mutate(
    # 删除队名中相关字符,使相同球队的名称用逗号连成一个字符串
    team_names = paste(unique(str_remove(team_name, " Ladies| Women")), collapse = ", ")
  ) %>%
  mutate(cum_goals = cumsum(goals_for)) %>%   # 计算各队伍累积和
  mutate(istop = if_else(max(cum_goals) > 400, TRUE, FALSE)) %>% 
  # 根据 istop 列的值,决定如何生成 team_label 列。如果该球队是顶队(istop 为 TRUE),则将球队名称与其累积进球数结合起来;如果不是,则仅保留球队名称。
  mutate(team_label = ifelse(
    istop, paste(team_names, cum_goals), team_names
  )) %>% 
  ungroup()

ewf_cum_end <- ewf_cum_goals %>% 
  group_by(team_id) %>% 
  filter(date == max(date) & cum_goals == max(cum_goals)) %>% 
  ungroup()
seasons <- ewf_appearances %>% 
  group_by(season_id, division) %>% 
  summarise(
    season_start = min(date),
    season_end = max(date)
  ) %>% 
  ungroup()

seasons_overview <- seasons %>% 
  mutate(division = str_remove_all(division, "FA | \\(.+\\)")) %>% 
  group_by(division) %>% 
  mutate(
    x = case_when(
      division == "Women's Super League" ~ as.Date("2015-10-20"),
      TRUE ~ min(season_start)
    ),
    y = cur_group_id() * 20 + 640
  ) %>% 
  ungroup()

season_goals <- ewf_appearances %>% 
  group_by(team_id, season_id) %>% 
  mutate(
    team_goals = sum(goals_for),
    team_games = n(),
    team_goals_per_game = team_goals / team_games
  ) %>% 
  ungroup() %>% 
  distinct(season_id, division, team_name, team_goals, team_games, team_goals_per_game) %>% 
  left_join(seasons) %>% 
  group_by(division, season_id) %>% 
  mutate(mid_season = mean(c(season_start, season_end))) %>% 
  distinct(season_id, division, team_name, team_goals, mid_season) %>% 
  filter(team_goals == max(team_goals)) %>% 
  ungroup() %>% 
  left_join(seasons_overview, by = "season_id") %>% 
  left_join(ewf_cum_goals %>% distinct(team_name, team_names, istop)) %>% 
  filter(istop)

3 作图

f1 <- "Graphik"
f1b <- "Graphik Compact"
f2 <- "Publico Headline"

ggplot() +
  # Season background bars
  geom_rect(data = seasons_overview, aes(xmin = season_start, xmax = season_end, ymin = -Inf, ymax = y), fill = alpha("purple3", 0.07)) +
  # Line chart
  geom_step(data = ewf_cum_goals, aes(x = date, y = cum_goals, group = team_id, color = if_else(istop, team_names, NA), linewidth = if_else(istop, 1, 0.1))) +
  # Team end points
  geom_point(data = ewf_cum_end, aes(x = date, y = cum_goals, color = if_else(istop, team_names, NA), size = if_else(istop, 2.5, 0.2))) +
  # Top 3 teams labels
  ggrepel::geom_text_repel(data = ewf_cum_end %>% filter(istop), aes(x = date, y = cum_goals, label = team_label, color = team_names), hjust = 1, family = f1b, bg.color = "white", fontface = "bold", size = 5, seed = 99, point.padding = 10) +
  # Season segments
  geom_segment(data = seasons_overview, aes(x = season_start, xend = season_end, y = y), color = "purple4", linewidth = 2, alpha = 0.6) +
  # Season names
  geom_text(data = seasons_overview, aes(x = x - 30, y = y, label = division), hjust = 1, stat = "unique", color = "purple4", family = f1b, size = 3.5) +
  # Season goals
  shadowtext::geom_shadowtext(data = season_goals, aes(x = mid_season, y = y, label = team_goals, color = if_else(istop, team_names, NA)), bg.color = "white", family = f1b, size = 3.5, fontface = "bold") +
  scale_x_date(breaks = c(as.Date(c("2011-04-13", "2014-04-14", "2017-02-11
", "2018-09-08", "2024-05-18"))), date_labels = "%b %Y") +
  scale_y_continuous(position = "right", limits = c(0, max(seasons_overview$y) + 10), breaks = seq(0, 600, 100)) +
  scale_color_manual(values = c("Arsenal" = "#EF0107", "Chelsea" = "#034694", "Manchester City" = "#6CABDD"), na.value = "purple4") +
  scale_linewidth_identity() +
  scale_size_identity() +
  coord_cartesian(clip = "off", expand = FALSE) +
  labs(
    title = "Chelsea, the team with the most goals in English women's football",
    subtitle = str_wrap("Cumulative goals of teams competing in the Women’s Super League and Women’s Championship competitions from 2011 and 2014, respectively, to 2024. The vertical bars show the different competitions and highlight the seasons when one of the top 3 teams scored the highest number of goals.", 120),
    caption = "Source: The English Women's Football (EWF) Database · Graphic: Georgios Karamanis"
  ) +
  theme_minimal(base_family = f1) +
  theme(
    legend.position = "none",
    plot.background = element_rect(fill = "grey99", color = NA),
    axis.title = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    axis.ticks.x = element_line(color = "purple4", linewidth = 0.2),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_text(margin = margin(0, 0, 15, 0), lineheight = 1),
    plot.caption = element_text(margin = margin(10, 0, 0, 0)),
    plot.margin = margin(20, 20, 20, 20)
  )