Visualization Examples

library("tidyverse")

Visualization issues

Anscombe quartet

anscombeL <- anscombe %>%
  pivot_longer(everything(),
    names_to = c(".value", "example"),
    names_pattern = "(.)(.)"
  )
ggplot() +
  annotation_custom(gridExtra::tableGrob(anscombe %>%
    select(
      x1, y1, x2, y2,
      x3, y3, x4, y4
    ))) +
  theme_minimal() +
  labs(title = "Anscombe Quartet")

ggplot(data = anscombeL, aes(x = x, y = y)) +
  geom_smooth(method = "lm", fullrange = TRUE) +
  facet_wrap(~example, ncol = 2) +
  labs(
    title = "Anscombe Quartet",
    subtitle = "Linear regression with confidence bar and points"
  ) +
  theme_minimal()

ggplot(data = anscombeL, aes(x = x, y = y)) +
  geom_point() +
  geom_smooth(method = "lm", fullrange = TRUE) +
  facet_wrap(~example, ncol = 2) +
  labs(
    title = "Anscombe Quartet",
    subtitle = "Linear regression with confidence bar"
  ) +
  theme_minimal()

Bad pie

library(rlang)
pie_stats <- function(df, x0, y0, r0, r1, amount, explode, label_perc) {
  x0 <- enquo(x0)
  y0 <- enquo(y0)
  r0 <- enquo(r0)
  r1 <- enquo(r1)
  amount <- enquo(amount)
  explode <- enquo(explode)

  df %>%
    mutate(
      `x0` = !!x0,
      `y0` = !!y0,
      `r0` = !!r0,
      `r1` = !!r1,
      `explode` = !!explode
    ) %>%
    group_by(x0, y0) %>%
    mutate(end = cumsum(!!amount) / sum(!!amount) * 2 * pi) %>%
    mutate(start = lag(end, default = 0)) %>%
    ungroup() %>%
    mutate(
      x_lab = (!!x0) +
        ((!!r0) + label_perc * ((!!r1) - (!!r0)) + (!!explode)) *
          sin((end + start) / 2),
      y_lab = (!!y0) +
        ((!!r0) + label_perc * ((!!r1) - (!!r0)) + (!!explode)) *
          cos((end + start) / 2)
    )
}
data_pie_fox <- tribble(
  ~candidate, ~percent,
  "Palin", 70,
  "Romney", 60,
  "Huckabee", 63
)

data_pie_fox_pie <- data_pie_fox %>%
  pie_stats(0, 0, 0, 1, percent, FALSE, .55)
ggplot(data_pie_fox_pie) +
  ggforce::geom_arc_bar(aes(
    x0 = x0, y0 = y0,
    r0 = r0, r = r1,
    start = start, end = end,
    fill = candidate
  )) +
  geom_text(
    aes(
      x = x_lab, y = y_lab,
      label = glue::glue("Back {candidate}\n{scales::percent(percent/100)}")
    ),
    size = 6
  ) +
  guides(fill = "none") +
  scale_fill_manual(values = c(
    "Huckabee" = "royalblue",
    "Palin" = "red",
    "Romney" = "green"
  )) +
  theme_void() + coord_equal() +
  labs(
    title = "Fox Bad Pie",
    subtitle = "2012 Presidential Run"
  )

ggplot(data = data_pie_fox) +
  geom_col(aes(
    x = candidate, y = percent,
    fill = candidate
  )) +
  geom_text(
    aes(
      x = candidate, y = percent,
      label = glue::glue("Back {candidate}\n{scales::percent(percent/100)}")
    ),
    size = 4,
    vjust = 1.5
  ) +
  scale_fill_manual(values = c(
    "Huckabee" = "royalblue",
    "Palin" = "red",
    "Romney" = "green"
  )) +
  guides(fill = "none") +
  theme_void() +
  labs(
    title = "Bar Plot",
    subtitle = "2012 Presidential Run"
  )

Scale issue

fox_gas <- tribble(
  ~date, ~price, ~in_fox,
  "08/02/2012", 3.57, TRUE,
  "01/02/2012", 3.51, TRUE,
  "01/02/2011", 3.17, TRUE,
  "01/01/2012", 3.34, FALSE,
  "01/12/2011", 3.30, FALSE,
  "01/11/2011", 3.36, FALSE,
  "01/10/2011", 3.43, FALSE,
  "01/09/2011", 3.56, FALSE,
  "01/08/2011", 3.57, FALSE,
  "01/07/2011", 3.58, FALSE,
  "01/06/2011", 3.60, FALSE,
  "01/05/2011", 3.92, FALSE,
  "01/04/2011", 3.78, FALSE,
  "01/03/2011", 3.54, FALSE
) %>%
  mutate(date = lubridate::dmy(date))
ggplot(fox_gas %>% filter(in_fox)) +
  geom_line(aes(
    x = as.factor(date),
    y = price,
    group = 1
  )) +
  scale_x_discrete(labels = c(
    "Last Year", "Last Week",
    "Current"
  )) +
  labs(
    x = NULL,
    y = "Price",
    title = "Cost of Gas",
    subtitle = "Fox"
  ) +
  theme_minimal()

ggplot(fox_gas %>% filter(in_fox)) +
  geom_line(aes(
    x = date,
    y = price
  )) +
  scale_x_date(
    breaks = sort({
      fox_gas %>%
        filter(in_fox) %>%
        .[["date"]]
    }),
    labels = c(
      "Last Year", "Last Week",
      "Current"
    )
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  theme(axis.text.x = element_text(
    angle = 45,
    hjust = 1
  )) +
  labs(
    x = NULL,
    y = "Price",
    title = "Cost of Gas",
    subtitle = "Scale corrected"
  ) +
  theme_minimal()

ggplot(fox_gas) +
  geom_line(aes(
    x = date,
    y = price
  )) +
  scale_x_date(
    breaks = sort({
      fox_gas %>%
        filter(in_fox) %>%
        .[["date"]]
    }),
    labels = c(
      "Last Year", "Last Week",
      "Current"
    )
  ) +
  scale_y_continuous(limits = c(0, NA)) +
  theme(axis.text.x = element_text(
    angle = 45,
    hjust = 1
  )) +
  labs(
    x = NULL,
    y = "Price",
    title = "Cost of Gas",
    subtitle = "Scale and missing data corrected"
  ) +
  theme_minimal()

Truncated axis

election_venezuela <- tribble(
  ~candidate, ~percent,
  "Nicolas Maduro Moros", 50.66,
  "Henrique Capriles Radonski", 49.07
) %>%
  mutate(candidate = as_factor(candidate))
ggplot(data = election_venezuela) +
  geom_col(aes(
    x = candidate, y = percent,
    fill = candidate
  )) +
  geom_text(
    aes(
      x = candidate, y = percent,
      label = scales::percent(percent / 100)
    ),
    vjust = -.45
  ) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = c("red", "darkblue")) +
  coord_cartesian(ylim = c(49.05, 51)) +
  guides(fill = "none") +
  labs(
    x = NULL, y = NULL,
    title = "2013 Venezuelian presidential election",
    subtitle = "Venezolana Television"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())

ggplot(data = election_venezuela) +
  geom_col(aes(
    x = candidate, y = percent,
    fill = candidate
  )) +
  geom_text(
    aes(
      x = candidate, y = percent,
      label = scales::percent(percent / 100)
    ),
    vjust = -.45
  ) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = c("red", "darkblue")) +
  guides(fill = "none") +
  labs(
    x = NULL, y = NULL,
    title = "2013 Venezuelian presidential election",
    subtitle = "Truncated axis corrected"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())

Scale and selection issue

temperature <- read_csv(
  "data_Examples/Temperature-1895-2019.csv",
  skip = 4,
  col_types = cols(Date = col_character())
) %>%
  mutate(Date = str_sub(Date, end = -3L))

year_selected <- c("1921", "1999", "1934", "2006", "1998", "2012")

temperature <- temperature %>%
  mutate(is_selected = Date %in% year_selected)
ggplot(data = temperature %>%
  filter(is_selected) %>%
  mutate(Date = fct_reorder(
    Date,
    Value
  ))) +
  geom_col(aes(x = Date, y = Value)) +
  geom_text(aes(x = Date, y = Value, label = Value),
    vjust = -0.1
  ) +
  coord_cartesian(ylim = c(53, 55.5)) +
  labs(
    x = "Year",
    y = "Average temperature",
    title = "Warmest year",
    subtitle = "6 warmest years before 2012"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())

ggplot(
  data = temperature %>%
    filter(as.integer(Date) <= 2012),
  aes(
    x = as.integer(Date),
    y = Value
  )
) +
  geom_line() +
  geom_point(
    data = temperature %>% filter(is_selected),
    color = "red",
    size = 2
  ) +
  labs(
    x = "Year",
    y = "Average temperature",
    title = "Warmest year",
    subtitle = "6 warmest years before 2012 in context"
  ) +
  theme_minimal()

ggplot(
  data = temperature %>%
    filter(as.integer(Date) <= 2018),
  aes(
    x = as.integer(Date),
    y = Value
  )
) +
  geom_line() +
  geom_point(
    data = temperature %>% filter(rank(desc(Value)) <= 6),
    color = "red",
    size = 2
  ) +
  labs(
    x = "Year",
    y = "Average temperature",
    title = "Warmest year",
    subtitle = "6 warmest years before 2018 in context"
  ) +
  theme_minimal()

Clutter issue

canada <- tribble(
  ~state, ~age,
  "B.C.", 19,
  "Alberta", 18,
  "Saskatchewan", 19,
  "Manitoba", 18,
  "Ontario", 19,
  "Quebec", 18,
  "New Brunswick", 19,
  "PEI", 19,
  "Nova Scotia", 19,
  "Newfoundland", 19,
  "NWT", 19,
  "Nunavut", 19,
  "Yukon", 19
) %>%
  mutate(state = as_factor(state))
ggplot(
  data = canada,
  aes(x = state, y = age)
) +
  geom_col(fill = "grey60") +
  coord_cartesian(ylim = c(17, 20)) +
  geom_text(
    aes(label = state),
    angle = 90,
    hjust = 1, nudge_y = -.1
  ) +
  geom_text(aes(label = age), nudge_y = .1) +
  labs(
    x = "Province and territories",
    y = "Age",
    title = "Drinking ages across Canada",
    subtitle = "Clutter issue"
  ) +
  geom_vline(
    xintercept = seq(.5, 20, by = 1),
    size = .1,
    color = "grey"
  ) +
  scale_y_continuous(breaks = seq(17, 20, by = .6)) +
  theme_minimal() +
  theme(
    panel.grid.major.x = element_blank(),
    axis.text.x = element_text(size = 5)
  )

ggplot(
  data = canada,
  aes(x = state, y = age)
) +
  geom_col(fill = "grey60") +
  coord_cartesian(ylim = c(17.5, 19.5)) +
  geom_text(
    aes(label = state),
    angle = 90,
    hjust = 1, nudge_y = -.1
  ) +
  geom_text(aes(label = age), nudge_y = .1) +
  labs(
    x = "Province and territories",
    y = "Age",
    title = "Drinking ages across Canada",
    subtitle = "Less clutter issue?"
  ) +
  theme_void() +
  theme(
    panel.grid.major.x = element_blank(),
    axis.text.x = element_blank()
  )

ggplot(
  data = canada,
  aes(
    x = fct_rev(fct_reorder(state, age)),
    y = age
  )
) +
  geom_col(fill = "grey60") +
  geom_text(
    aes(label = state),
    angle = 0,
    hjust = 1, nudge_y = -.1
  ) +
  geom_text(aes(label = age), nudge_y = .1) +
  coord_flip(ylim = c(17.5, 19.5)) +
  theme_void() +
  labs(
    x = "Province and territories",
    y = "Age",
    title = "Drinking ages across Canada",
    subtitle = "Less clutter issue?"
  )

Radius vs area issue

energy <- tribble(
  ~energy_source, ~amount,
  "Carbon capture and storage", 2.3,
  "Renewable energy", 12.2,
  "Corn ethanol", 16.8,
  "Fossil oils", 70.2
) %>%
  mutate(energy_source = as_factor(energy_source))
ggplot(data = energy, aes(x = energy_source, y = "")) +
  geom_point(aes(size = amount)) +
  geom_text(
    aes(label = amount),
    nudge_y = c(.075, .1, .1, .2)
  ) +
  geom_text(aes(label = energy_source),
    nudge_y = -.25
  ) +
  scale_radius(
    range = c(0, 30),
    limits = c(0, NA)
  ) +
  guides(size = "none") +
  scale_x_discrete(breaks = NULL) +
  scale_y_discrete(breaks = NULL) +
  labs(
    x = NULL,
    y = NULL,
    size = "Amount spent in million of $",
    title = "Subsidize this",
    subtitle = "Scale issue"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())

ggplot(data = energy, aes(x = energy_source, y = "")) +
  geom_point(aes(size = amount)) +
  geom_text(
    aes(label = amount),
    nudge_y = c(.075, .1, .1, .17)
  ) +
  geom_text(aes(label = energy_source),
    nudge_y = -.25
  ) +
  scale_size_area(max_size = 30) +
  guides(size = "none") +
  scale_x_discrete(breaks = NULL) +
  scale_y_discrete(breaks = NULL) +
  labs(
    x = NULL,
    y = NULL,
    size = "Amount spent in million of $",
    title = "Subsidize this",
    subtitle = "Scale issue corrected"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())

GDP <- tribble(
  ~country, ~GDP,
  "United States", 14.6,
  "China", 5.7,
  "Japan", 5.3,
  "Germany", 3.3,
  "France", 2.5
) %>%
  mutate(country = as_factor(country))
ggplot(GDP, aes(x = "", y = fct_rev(country))) +
  geom_point(aes(size = GDP)) +
  geom_text(
    aes(label = country),
    nudge_x = -.1,
    hjust = 1
  ) +
  geom_text(
    aes(label = glue::glue("{GDP} trillion")),
    nudge_x = .05,
    hjust = -.5
  ) +
  scale_radius(
    range = c(0, 30),
    limits = c(0, NA)
  ) +
  guides(size = "none") +
  theme_void() +
  labs(
    title = "GDP 2012",
    subtitle = "Size issue"
  )

ggplot(GDP, aes(x = "", y = fct_rev(country))) +
  geom_point(aes(size = GDP)) +
  geom_text(
    aes(label = country),
    nudge_x = -.1,
    hjust = 1
  ) +
  geom_text(
    aes(label = glue::glue("{GDP} trillion")),
    nudge_x = .05,
    hjust = -.5
  ) +
  scale_size_area(max_size = 30) +
  guides(size = "none") +
  theme_void() +
  labs(
    title = "GDP 2012",
    subtitle = "Size issue corrected"
  )

Unconventional axis issue

data(ethanol, package = "SemiPar")
ggplot(data = ethanol) +
  geom_point(aes(x = NOx, y = E)) +
  labs(
    x = "NOx concentration",
    y = "Equivalence ratio",
    title = "A single-cylinder engine study of efficiency and exhaust emissions",
    subtitle = "Equivalence ratio at which the engine was run (a measure of the richness of the air/ethanol mix)\n in function of the NOx concentration"
  ) +
  theme_minimal()

ggplot(data = ethanol) +
  geom_point(aes(x = E, y = NOx)) +
  labs(
    x = "Equivalence ratio",
    y = "NOx concentration",
    title = "A single-cylinder engine study of efficiency and exhaust emissions",
    subtitle = "NOx concentration  in function of the equivalence ratio at which the engine was run (a measure of the richness \n of the air/ethanol mix)"
  ) +
  theme_minimal()

Historical visualizations

Playfair

playfair_balance <- tibble::tribble(
  ~year, ~exports, ~imports,
  1700L, 31.3, 70.7,
  1701L, 35.2, 71.3,
  1702L, 37.9, 72.1,
  1703L, 39.7, 73.1,
  1704L, 41, 74.2,
  1705L, 42.3, 75.5,
  1706L, 44.1, 76.7,
  1707L, 47.1, 77.8,
  1708L, 51.3, 79,
  1709L, 56.2, 80.5,
  1710L, 61.3, 82.3,
  1711L, 66, 83.8,
  1712L, 70.2, 84.8,
  1713L, 73.7, 85.9,
  1714L, 76.3, 87.3,
  1715L, 77.9, 88.6,
  1716L, 78.4, 89.6,
  1717L, 78.3, 90.7,
  1718L, 77.5, 92.4,
  1719L, 76.5, 94.6,
  1720L, 75.4, 96.9,
  1721L, 74.3, 99,
  1722L, 73.5, 100.5,
  1723L, 72.9, 101.4,
  1724L, 72.3, 101.7,
  1725L, 71.8, 101.5,
  1726L, 71, 100.8,
  1727L, 69.9, 99.8,
  1728L, 68.2, 98.7,
  1729L, 66.1, 97.5,
  1730L, 63.8, 96.3,
  1731L, 61.8, 95.2,
  1732L, 60.5, 94.3,
  1733L, 60.1, 93.9,
  1734L, 60.3, 93.8,
  1735L, 60.6, 93.7,
  1736L, 60.9, 93.5,
  1737L, 61.6, 93.1,
  1738L, 62.6, 92.9,
  1739L, 64.2, 92.8,
  1740L, 66.1, 92.9,
  1741L, 68.2, 92.9,
  1742L, 70.2, 92.9,
  1743L, 72.1, 92.6,
  1744L, 73.8, 91.8,
  1745L, 75.1, 90.7,
  1746L, 76, 90,
  1747L, 76.5, 89.8,
  1748L, 76.7, 90,
  1749L, 76.9, 89.9,
  1750L, 77.4, 89.1,
  1751L, 78.1, 87.5,
  1752L, 79.1, 85.5,
  1753L, 80.6, 83.5,
  1754L, 82.5, 81.1,
  1755L, 85, 78.7,
  1756L, 88, 77.5,
  1757L, 91.8, 77.4,
  1758L, 97.4, 77.4,
  1759L, 105.9, 77.4,
  1760L, 117.9, 77.3,
  1761L, 129.6, 77.5,
  1762L, 138.5, 78,
  1763L, 144.6, 78.6,
  1764L, 148.9, 79.3,
  1765L, 151.8, 80,
  1766L, 153.9, 80.7,
  1767L, 155.8, 81.4,
  1768L, 158.1, 82.2,
  1769L, 160.7, 83.2,
  1770L, 163.6, 84.4,
  1771L, 166.9, 85.8,
  1772L, 170.3, 87.4,
  1773L, 173.9, 88.8,
  1774L, 177.3, 90.1,
  1775L, 180.4, 91,
  1776L, 183, 91.5,
  1777L, 185, 91.8,
  1778L, 186.1, 91.8,
  1779L, 186.3, 91.4,
  1780L, 185.3, 90.8
)
ggplot(
  data = playfair_balance %>%
    pivot_longer(-year,
      names_to = "imports/exports",
      values_to = "amount"
    ),
  aes(x = year)
) +
  geom_ribbon(
    data = playfair_balance,
    aes(
      ymin = imports,
      ymax = exports,
      fill = exports >= imports
    ),
    alpha = .3
  ) +
  geom_line(aes(y = amount, color = `imports/exports`)) +
  directlabels::geom_dl(
    aes(
      y = amount,
      label = `imports/exports`
    ),
    color = "black",
    method = list(
      box.color = NA,
      fill = NA,
      "angled.boxes"
    )
  ) +
  guides(fill = "none", color = "none") +
  labs(
    x = "Year", y = "Amount in L10,000",
    color = "Imports/Exports",
    title = "Playfair Line Chart",
    subtitle = "Commercial balance between England, and Danemark and Norway",
    caption = "Values estimated from the original graph"
  ) +
  scale_x_continuous(
    breaks = seq(1700, 1780, by = 10),
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    limits = c(10, 190),
    breaks = seq(10, 190, by = 10),
    position = "right"
  ) +
  scale_fill_manual(values = c(
    "TRUE" = "lightgoldenrod3",
    "FALSE" = "lightpink"
  )) +
  scale_color_manual(values = c(
    "exports" = "red",
    "imports" = "orange"
  )) +
  theme_light() +
  theme(
    panel.grid.minor = element_blank(),
    plot.margin = margin(l = 10)
  )

ggplot(data = playfair_balance, aes(x = year)) +
  geom_ribbon(
    aes(
      ymin = 0,
      ymax = exports - imports,
      fill = exports >= imports
    ),
    alpha = .3
  ) +
  geom_line(aes(y = exports - imports)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  guides(fill = "none") +
  labs(
    x = "Year", y = "Balance in favor of England in L10,000",
    title = "Playfair Line Chart",
    subtitle = "Commercial balance between England, and Danemark and Norway",
    caption = "Values estimated from the original graph"
  ) +
  scale_x_continuous(
    breaks = seq(1700, 1780, by = 10),
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(-50, 90, by = 10),
    position = "right"
  ) +
  scale_fill_manual(values = c(
    "TRUE" = "lightgoldenrod3",
    "FALSE" = "lightpink"
  )) +
  theme_light() +
  theme(
    panel.grid.minor = element_blank(),
    plot.margin = margin(l = 10)
  )

library("HistData")
ggplot(
  data = Wheat %>% pivot_longer(-Year, names_to = "variable", values_to = "value") %>%
    mutate(variable = fct_relevel(
      variable,
      "Wheat"
    )),
  aes(x = Year, y = value)
) +
  pammtools::geom_stepribbon(aes(
    ymax = value,
    ymin = 0,
    fill = variable
  )) +
  labs(
    x = "Year",
    y = "Price of wheat or wages",
    color = NULL,
    title = "Evolution of the price of wheat and of the wages of a good mechanic",
    subtitle = "Playfair"
  ) +
  scale_y_continuous(
    limits = c(0, NA), expand = c(0, 0),
    position = "r",
    sec.axis = sec_axis("identity")
  ) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_manual(values = c(
    "Wages" = "lightblue",
    "Wheat" = "gray25"
  )) +
  scale_color_manual(values = c(
    "Wages" = "lightblue",
    "Wheat" = "gray25"
  )) +
  theme_light() +
  guides(fill = "none", color = "none") +
  geom_text(
    data =
      tibble(
        Year = c(1680, 1690),
        value = c(15, 55),
        variable = c("Wages", "Wheat"),
      ),
    aes(
      label = variable,
      color = variable
    ),
    size = 6
  )

ggplot(data = Wheat, aes(x = Year)) +
  geom_step(aes(y = Wheat / Wages)) +
  scale_y_continuous(limits = c(0, NA)) +
  labs(
    x = "Year",
    y = "Ratio between the price of wheat and the wages",
    title = "Evolution of the price of Wheat and of the wages of a good mechanic",
    subtitle = "Playfair"
  ) +
  theme_minimal()

Minard

library(HistData)
Minard.troops <- Minard.troops %>%
  group_by(group) %>%
  mutate(id = 1:n()) %>%
  ungroup() %>%
  mutate(group = 4 - group) %>%
  arrange(desc(id))

Minard.cities <- Minard.cities %>%
  mutate(nudge_lat = case_when(
    city == "Dorogobouge" ~ -.5,
    city == "Wixma" ~ -.4,
    city == "Malo-Jarosewii" ~ -.3,
    city == "Bobr" ~ -.5,
    city == "Studienska" ~ .05,
    city == "Moiodexno" ~ -.25,
    TRUE ~ 0
  ))

Minard.temp <- Minard.temp %>% 
  mutate(date = str_c(str_sub(Minard.temp$date, end = 3L),
                      " ",
                      str_sub(Minard.temp$date, start = 4L)),
         date = if_else(is.na(date), "", date))
library(ggforce)

p_map <- ggplot(
  data = Minard.troops,
  aes(x = long, y = lat)
) +
   geom_link2(aes(
    group = group,
    size = survivors,
    color = direction
  ),
  lineend = "round") +
  geom_link2(
    data = Minard.troops %>%
      filter(id != 1),
    aes(
      group = group,
      size = survivors,
      color = direction
    ),
    lineend = "round"
  ) +
  geom_point(
    data = Minard.cities,
    size = 1
  ) +
  geom_text(
    data = Minard.cities, aes(
      label = city,
      y = lat + nudge_lat
    ),
    size = 3,
    color = "grey50",
    nudge_y = .15
  ) +
  scale_size(
    range = c(0, 8),
    limits = c(0, NA),
    labels = scales::comma_format(),
    trans = "identity"
  ) +
  scale_color_manual(values = c("lightgoldenrod3", "black")) +
  labs(
    x = "Longitude",
    y = "Latitude",
    size = "Surivors",
    color = "Direction",
    title = "Napoleon's march to Russia",
    subtitle = "Minard"
  ) +
  scale_x_continuous(limits = c(23.9, 37.6)) +
  coord_equal(ratio = 1.75,
              xlim = c(23.9, 37.6)) +
  guides(
    color = "none",
    size = "none"
  ) +
  theme_void()

p_temp <- ggplot(data = Minard.temp,
       aes(x = long, y = temp)) +
  geom_line() +
  scale_x_continuous(breaks = Minard.temp$long,
                     labels = Minard.temp$date,
                     position = "top",
                     limits = c(23.9, 37.6)) +
  scale_y_continuous(position = "right") +
  coord_cartesian(xlim = c(23.2, 38.3)) +
  labs(title = NULL,
       subtitle = "Temperature in Reaumur Degree",
       x = NULL,
       y = NULL) +
  theme_minimal() +
  theme(panel.grid.minor.x = element_blank(),
        axis.text.x.top = element_text(size = 4)
  )

library(patchwork)
p_map + p_temp + 
  plot_layout(ncol = 1, heights = c(4,1))

library(sf)

dpt_ori <- sf::read_sf("departements/departements-20140306-100m.shp") %>%
  select(code_insee, nom, geometry) %>%
  filter(
    code_insee != "2A", code_insee != "2B",
    !str_detect(code_insee, "97.+")
  )

dpt_seine <- tibble(
  code_insee = "78",
  nom = "Seine"
)
st_geometry(dpt_seine) <- dpt_ori %>%
  filter(
    code_insee %in% c("78", "91", "92", "93", "94", "95")
  ) %>%
  st_union() %>%
  st_cast("MULTIPOLYGON")

dpt <- rbind(
  dpt_ori %>%
    filter(
      !(code_insee %in% c("78", "91", "92", "93", "94", "95")
      )
    ),
  dpt_seine
)

dpt_centroids <- dpt %>%
  st_centroid() %>%
  st_coordinates() %>%
  as_data_frame() %>%
  bind_cols(code_insee = dpt[["code_insee"]])

dpt_production <- tribble(
  ~code_insee, ~noir, ~rouge, ~vert,
  "02", 1, 0, 4,
  "03", 10, 0, 0,
  "08", 2, 0, 0,
  "10", 0, 0, 10,
  "14", 120, 0, 0,
  "15", 1, 0, 0,
  "16", 60, 0, 0,
  "17", 44, 0, 2,
  "18", 20, 0, 29,
  "19", 50, 0, 0,
  "21", 10, 0, 5,
  "22", 0, 0, .5,
  "23", 13, 0, 3,
  "24", 47, 0, 5,
  "25", 1, 0, 0,
  "27", 2.5, 20, 1.5,
  "28", 2.5, 30, 2.5,
  "29", 0, 4, 0,
  "31", 3, 0, 0,
  "33", 3, 0, 0,
  "35", 0, 0, .5,
  "36", 26, 0, 25,
  "37", 5, 0, 0,
  "40", 1, 0, 0,
  "41", 3, 0, 0,
  "43", 3, 0, 0,
  "44", 7, 0, 0,
  "45", 2, 18, 22,
  "46", 3, 0, 0,
  "47", 3, 0, 0,
  "49", 100, 0, 20,
  "50", 9, 0, 1,
  "51", 4, 0, 15,
  "52", 5, 0, 0,
  "53", 30, 0, 2,
  "54", 2, 0, 0,
  "55", 2, 0, 0,
  "56", 0, 2, 0,
  "57", 3, 0, 0,
  "58", 50, 0, 2,
  "59", 5, 0, 20,
  "60", 2, 14, 10,
  "61", 60, 0, 0,
  "62", 2, 0, 10,
  "65", 1, 0, 0,
  "71", 4, 0, 0,
  "72", 31, 0, 1,
  "75", 22.5, 0, 2.5,
  "76", 8, 0, 24,
  "77", 7, 20, 10,
  "78", 25, 30, 45,
  "79", 38, 0, 14,
  "80", 2, 0, 10,
  "85", 77, 0, 3,
  "86", 17, 0, 18,
  "87", 50, 0, 10,
  "88", 1, 0, 0,
  "89", 5, 0, 15
) %>%
  mutate(prod_tot = noir + rouge + vert)

dpt_join <- dpt_production %>%
  pivot_longer(c(-code_insee, -prod_tot), names_to = "prod_type", values_to = "prod") %>%
  left_join(dpt_centroids)

plot_pie <- function(df) {
  pie_df <- df %>%
    select(-prod_tot) %>%
    pivot_longer(everything(), names_to = "prod_type", values_to = "prod") %>%
    pie_stats(0, 0, 0, 1, prod, FALSE, 0.8)

  p <- ggplot(data = pie_df) +
    ggforce::geom_arc_bar(
      data = pie_df,
      aes(
        x0 = x0, y0 = y0,
        r0 = r0, r = r1,
        start = start, end = end,
        fill = prod_type
      )
    ) +
    guides(fill = "none") +
    scale_fill_manual(values = c(
      "noir" = "black",
      "vert" = "green",
      "rouge" = "red"
    )) +

    theme_void() +
    coord_equal()

  tibble(plot = list(p), scale = sqrt(df[["prod_tot"]]), prod_tot = df[["prod_tot"]])
}

dpt_production_plot <- dpt_production %>%
  nest(-code_insee) %>%
  mutate(data = map(data, plot_pie)) %>%
  unnest() %>%
  left_join(dpt_centroids) %>%
  mutate(
    X = if_else(code_insee == "78", X - .15, X),
    Y = if_else(code_insee == "78", Y + .1, Y)
  )
ggplot(data = dpt %>% left_join(dpt_production)) +
  geom_sf(aes(fill = !is.na(prod_tot))) +
  ggimage::geom_subview(
    data = dpt_production_plot %>%
      mutate(scale = .09 * scale),
    aes(
      x = X, y = Y,
      subview = plot,
      width = scale,
      height = scale
    )
  ) +
  coord_sf(datum = NA) +
  guides(fill = "none") +
  scale_fill_manual(values = c(
    "TRUE" = "lemonchiffon",
    "FALSE" = "grey70"
  )) +

  theme_void() +
  labs(
    title = "Paris Meat Provenance",
    subtitle = "Minard",
    caption = "approximate dataset!"
  )

Nightingale

Nightingale2 <- Nightingale %>%
  select(Date, Disease, Wounds, Other) %>%
  pivot_longer(c(Disease, Wounds, Other), names_to = "variable", values_to = "value") %>%
  group_by(Date) %>%
  arrange(desc(variable)) %>%
  mutate(value2 = cumsum(value)) %>%
  mutate(value3 = sqrt(value2) - sqrt(lag(value2, default = 0))) %>%
  ungroup()
ggplot(
  data = filter(
    Nightingale2,
    Date <= "1855-03-02"
  ),
  aes(x = as.factor(Date))
) +
  geom_col(
    aes(y = value3, fill = variable),
    color = "black", width = 1
  ) +
  geom_text(
    data = filter(
      Nightingale2,
      Date <= "1855-03-02"
    ) %>%
      group_by(Date) %>%
      summarize(value3 = max(sqrt(value2))) %>%
      mutate(value3 = max(value3)),
    aes(y = value3 * (1.1), label = Date),
    size = 4
  ) +
  coord_polar(start = -pi / 2, direction = 1) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  theme(axis.text.x = element_blank()) +
  scale_x_discrete(label = NULL) +
  labs(
    title = "Nightingale Rose Chart",
    x = NULL,
    y = NULL,
    fill = "Reason of death"
  ) +
  geom_vline(
    xintercept = seq(.5, 13, by = 1),
    size = .1,
    color = "grey"
  ) +
  theme_void()

ggplot(
  data = filter(
    Nightingale2,
    Date > "1855-03-02"
  ),
  aes(x = as.factor(Date))
) +
  geom_col(
    aes(y = value3, fill = variable),
    color = "black", width = 1
  ) +
  geom_text(
    data = filter(
      Nightingale2,
      Date > "1855-03-02"
    ) %>%
      group_by(Date) %>%
      summarize(value3 = max(sqrt(value2))) %>%
      mutate(value3 = max(value3)),
    aes(y = value3 * (1.1), label = Date),
    size = 4
  ) +
  coord_polar(start = -pi / 2, direction = 1) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  theme(axis.text.x = element_blank()) +
  scale_x_discrete(label = NULL) +
  labs(
    title = "Nightingale Rose Chart",
    x = NULL,
    y = NULL,
    fill = "Reason of death"
  ) +
  geom_vline(
    xintercept = seq(.5, 13, by = 1),
    size = .1,
    color = "grey"
  ) +
  theme_void()

ggplot(
  data = filter(
    Nightingale2,
    Date <= "1855-03-02"
  ),
  aes(x = as.factor(Date))
) +
  geom_col(
    aes(y = value, fill = variable),
    color = "black", width = 1
  ) +
  geom_text(
    data = filter(
      Nightingale2,
      Date <= "1855-03-02"
    ) %>%
      group_by(Date) %>%
      summarize(value = max(value2)) %>%
      mutate(value = max(value)),
    aes(y = value * (1.1), label = Date),
    size = 4
  ) +
  coord_polar(start = -pi / 2, direction = 1) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  theme(axis.text.x = element_blank()) +
  scale_x_discrete(label = NULL) +
  labs(
    title = "Nightingale Rose Chart",
    subtitle = "without scale correction",
    x = NULL,
    y = NULL,
    fill = "Reason of death"
  ) +
  geom_vline(
    xintercept = seq(.5, 13, by = 1),
    size = .1,
    color = "grey"
  ) +
  theme_void()

ggplot(
  data = filter(
    Nightingale2,
    Date > "1855-03-02"
  ),
  aes(x = as.factor(Date))
) +
  geom_col(
    aes(y = value, fill = variable),
    color = "black", width = 1
  ) +
  geom_text(
    data = filter(
      Nightingale2,
      Date > "1855-03-02"
    ) %>%
      group_by(Date) %>%
      summarize(value = max(value2)) %>%
      mutate(value = max(value)),
    aes(y = value * (1.1), label = Date),
    size = 4
  ) +
  coord_polar(start = -pi / 2, direction = 1) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  theme(axis.text.x = element_blank()) +
  scale_x_discrete(label = NULL) +
  labs(
    title = "Nightingale Rose Chart",
    subtitle = "without scale correction",
    x = NULL,
    y = NULL,
    fill = "Reason of death"
  ) +
  geom_vline(
    xintercept = seq(.5, 13, by = 1),
    size = .1,
    color = "grey"
  ) +
  theme_void()

ggplot(
  data = filter(Nightingale2, Date <= "1855-03-02"),
  aes(x = as.factor(Date))
) +
  geom_col(
    aes(y = value, fill = variable),
    color = "black", width = 1
  ) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  labs(
    title = "Nightingale Bar Chart",
    x = "Date",
    y = "Number of deaths",
    fill = "Reason of death"
  ) +
  geom_vline(
    xintercept = seq(.5, 13, by = 1),
    size = .1,
    color = "grey"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(
      angle = 45,
      hjust = 1
    ),
    panel.grid.major.x = element_blank()
  )

ggplot(Nightingale2, aes(x = as.factor(Date))) +
  geom_col(
    aes(y = value, fill = variable),
    color = "black", width = 1
  ) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  labs(
    title = "Nightingale Bar Chart",
    x = "Date",
    y = "Number of deaths",
    fill = "Reason of death"
  ) +
  geom_vline(
    xintercept = seq(.5, 25, by = 1),
    size = .1,
    color = "grey"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(
      angle = 45,
      hjust = 1
    ),
    panel.grid.major.x = element_blank()
  )

ggplot(
  data =
    Nightingale2 %>% group_by(variable) %>%
      summarise(value = sum(value)),
  aes(x = variable, y = value, fill = variable)
) +
  geom_col() +
  guides(fill = "none") +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  labs(
    title = "Nightingale Bar Chart",
    x = "Reason of death",
    y = "Number of deaths"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())

ggplot(data = Nightingale2 %>% group_by(variable) %>%
  summarize(value = sum(value)) %>%
  pie_stats(0, 0, 0, 1, value, FALSE, c(.3, .75, .75))) +
  ggforce::geom_arc_bar(
    aes(
      x0 = 0, y0 = 0, r0 = 0, r = 1,
      amount = value, fill = variable
    ),
    stat = "pie"
  ) +
  geom_text(aes(x = x_lab, y = y_lab, label = variable),
    size = 5
  ) +
  scale_fill_manual(values = c(
    "gray70", "gray40",
    "lightpink"
  )) +
  guides(fill = "none") +
  labs(
    title = "Nightingale Pie Chart"
  ) +
  coord_equal() +
  theme_void()

Snow

library(HistData)
ggplot(data = Snow.deaths2, aes(x = x, y = y)) +
  geom_path(data = Snow.streets, aes(group = street)) +
  geom_point(aes(color = "Cholera death")) +
  geom_point(data = Snow.pumps, aes(color = "Pump"), size = 3) +
  scale_color_manual(values = c(
    "Cholera death" = "black",
    "Pump" = "red"
  )) +
  guides(color = guide_legend(
    title = NULL,
    override.aes = list(size = c(1, 3))
  )) +
  coord_equal() +
  theme_void() +
  ggtitle("John Snow cholera map")

ggplot(data = Snow.deaths2, aes(x = x, y = y)) +
  geom_path(data = Snow.streets, aes(group = street)) +
  stat_density_2d(
    aes(fill = ..level..),
    geom = "polygon",
    alpha = .5, color = NA
  ) +
  geom_point(aes(color = "Cholera death")) +
  geom_point(data = Snow.pumps, aes(color = "Pump"), size = 3) +
  scale_color_manual(values = c(
    "Cholera death" = "black",
    "Pump" = "red"
  )) +
  viridis::scale_fill_viridis() +
  guides(
    color = guide_legend(
      title = NULL,
      override.aes = list(size = c(1, 3))
    ),
    fill = guide_colorbar(title = "Dead density")
  ) +
  coord_equal() +
  theme_void() +
  ggtitle("John Snow cholera map", subtitle = " with density")

SnowDeath <- Snow.dates %>%
  mutate(week = lubridate::ymd("1854-09-08") +
    lubridate::dweeks(floor((date - lubridate::ymd("1854-09-08"))
    / lubridate::dweeks(1))))
ggplot(data = SnowDeath, aes(x = week, y = deaths)) +
  geom_col() +
  geom_segment(
    x = as.numeric(lubridate::ymd("1854-09-08")) - 3.5,
    xend = as.numeric(lubridate::ymd("1854-09-08")) - 3.5,
    y = -Inf, yend = Inf, color = "red",
    size = 2
  ) +
  scale_x_date(
    breaks = unique(SnowDeath$week) - 3,
    labels = unique(SnowDeath$week)
  ) +
  labs(
    x = "Date",
    y = "Number of deaths",
    title = "John Snow cholera bar plot",
    subtitle = "by week"
  ) +
  theme_minimal()

ggplot(data = SnowDeath, aes(x = date, y = deaths)) +
  geom_col() +
  geom_segment(
    x = as.numeric(lubridate::ymd("1854-09-08")) - .5,
    xend = as.numeric(lubridate::ymd("1854-09-08")) - .5,
    y = -Inf, yend = Inf, color = "red",
    size = 2
  ) +
  scale_x_date(breaks = unique(SnowDeath$week) - .5) +
  labs(
    x = "Date",
    y = "Number of deaths",
    title = "John Snow cholera bar plot",
    subtitle = "by day"
  ) +
  theme_minimal()

Du Bois

Credit to Matthew A. (statswithmatt)

freemen <- data.frame(
  year = seq(1790, 1870, by = 10),
  pct_free = c(0.08, 0.11, 0.135, 0.13, 0.14, 0.13, 0.12, 0.11, 1)
) %>%
  mutate(
    pct_slave = 1 - pct_free,
    # replace the last value (0%) with the previous one so that it's aligned
    # in the same place as the actual image
    labels = replace(pct_slave, n(), pct_slave[n() - 1])
  )
font_name <- "Inconsolata"
theme_du_bois <- function() {
  theme_gray(base_family = font_name) %+replace%
    theme(
      plot.background = element_rect(
        fill = "antiquewhite2",
        color = "antiquewhite2"
      ),
      panel.background = element_rect(
        fill = "antiquewhite2",
        color = "antiquewhite2"
      ),
      plot.title = element_text(
        hjust = 0.5,
        face = "bold"
      ),
      plot.subtitle = element_text(hjust = 0.5)
    )
}
ppmsca_33913 <- ggplot(
  data = freemen,
  mapping = aes(
    x = year,
    y = pct_slave
  )
) +
  geom_area(aes(y = 1),
    fill = "seagreen"
  ) +
  geom_area(fill = "gray15") +
  labs(
    title = "PROPORTION OF FREEMEN AND SLAVES AMONG AMERICAN NEGROES.\nPROPORTION DES NÈGRES LIBRES ET DES ESCLAVES EN AMÉRIQUE.\n",
    subtitle = "DONE BY ATLANTA UNIVERSITY.\n\n⎄"
  ) +
  scale_x_continuous(
    breaks = seq(1790, 1870, by = 10),
    position = "top"
  ) +
  coord_cartesian(
    expand = FALSE,
    clip = "off",
    xlim = c(1788, 1872)
  ) +
  theme_du_bois()

# annotations for plot
ppmsca_33913 + geom_text(
  aes(
    y = labels,
    label = scales::percent(pct_free, accuracy = 1),
    family = font_name,
    fontface = "bold"
  ),
  nudge_y = 0.02
) +
  annotate(
    "text",
    label = c("SLAVES\nESCLAVES", "FREE - LIBRE"),
    color = c("antiquewhite", "black"),
    size = c(9, 6),
    x = 1830,
    y = c(0.5, 0.97),
    family = font_name,
    fontface = "bold"
  ) +
  ### theme adjustments
  theme(
    text = element_text(face = "bold"),
    panel.background = element_blank(),
    plot.subtitle = element_text(size = 7),
    panel.grid.major.x = element_line(color = "gray25"),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.text.x = element_text(face = "bold", size = 12),
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank()
  )

gender <- c("female", "male")
status <- c("single", "widowed", "married")
age_bins <- c(
  "0-15", "15-20", "20-25", "25-30", "30-35",
  "35-45", "45-55", "55-65", "OVER 65"
)

marital <- expand.grid(age = age_bins, gender = gender, status = status) %>%
  mutate(
    pct = c(
      100, 84, 38, 18, 12, 8, 6, 4, 4,
      100, 99, 66, 30, 18, 10, 6, 4, 4,
      0, 0, 4, 8, 10, 16, 28, 44, 66,
      0, 0, 1, 2, 3, 5, 9, 11, 20,
      0, 16, 58, 74, 78, 76, 66, 52, 30,
      0, 1, 33, 68, 79, 85, 85, 85, 76
    ),
    status = factor(
      status,
      levels = c("widowed", "married", "single")
    ),
    age_numeric = as.numeric(age)
  )
ggplot(
  data = marital,
  mapping = aes(
    x = age_numeric,
    # should just be able to negate pct to get pyramid plot. for gender, men
    # are on the left, so they get the negative
    y = if_else(gender == "male", -pct, pct),
    fill = status
  )
) +
  geom_bar(
    stat = "identity",
    width = 1
  ) +
  scale_x_continuous(
    breaks = (1:9) + 0.5,
    labels = age_bins,
    expand = c(0, 0),
    sec.axis = dup_axis() # dual age axis
  ) +
  scale_y_continuous(
    breaks = seq(-100, 100, by = 10),
    labels = abs,
    expand = c(0, 0),
    # lines on original plot are by 2s
    minor_breaks = seq(-100, 100, by = 2)
  ) +
  scale_fill_manual(
    values = c("seagreen4", "firebrick3", "royalblue3"),
    labels = c("WIDOWED", "MARRIED", "SINGLE")
  ) +
  labs(
    title = "Conjugal condition of American Negroes according to age periods.\nCondition conjugale des Nègres Americains au point de vue de l'age.\n",
    subtitle = "Done by Atlanta University.\n\n",
    x = "AGES.",
    y = "PER CENTS."
  ) +
  coord_flip(clip = "off") +
  theme_du_bois() +
  annotate(
    "text",
    label = rep(c("SINGLE", "MARRIED", "WIDOWED"), each = 2),
    # angle text for marital status
    y = c(-35, 35, -55, 55, -92, 92),
    angle = c(45, -45, 45, -45, 60, -60),
    x = c(2, 2, 5.5, 5.5, 8.5, 7.5),
    size = c(4, 4, 4, 4, 3, 3),
    family = font_name,
    fontface = "bold"
  ) +
  annotate(
    "text",
    label = c("MALES.", "FEMALES."),
    y = c(-50, 50),
    x = Inf,
    vjust = -0.4,
    size = 2.5,
    family = font_name,
    fontface = "bold"
  ) +
  ### theme adjustments
  theme(
    text = element_text(face = "bold"),
    panel.background = element_blank(),
    plot.title = element_text(
      size = 8,
      vjust = 2
    ),
    plot.subtitle = element_text(
      size = 6,
      vjust = 2
    ),
    axis.title = element_text(size = 8),
    axis.ticks = element_blank(),
    panel.grid.major = element_line(
      color = "black",
      size = 0.1
    ),
    panel.grid.minor.x = element_line(
      color = "black",
      size = 0.05
    ),
    panel.grid.minor.y = element_blank(),
    legend.background = element_blank(),
    legend.position = "none",
    legend.key = element_blank(),
    # put grid lines on top so not covered by plot
    panel.ontop = TRUE,
    panel.border = element_rect(
      fill = NA,
      color = "black"
    ),
    axis.text.x = element_text(size = 8),
    # both axes titles for age hortizontal instead of vertical, and put them at
    # the top, just above the values
    axis.title.y = element_text(
      angle = 0,
      vjust = 1
    ),
    axis.title.y.right = element_text(
      angle = 0,
      vjust = 1
    ),
    # age group labels need to be slightly below grid line
    axis.text.y = element_text(
      vjust = 2,
      size = 8
    )
  )

Challenger

data(challeng, package = "alr3")
challeng <- challeng %>%
  rownames_to_column() %>%
  # Fix data issue
  mutate(Fail = if_else(rowname == "51-C",
    3L, Fail
  ))
ggplot(
  data = filter(challeng, Fail > 0),
  aes(x = Temp, y = Fail)
) +
  geom_point(size = 5) +
  labs(
    x = "Temperature",
    y = "Nb of issues",
    title = "Challenger prelauch investigation",
    subtitle = "only with flights with incident"
  ) +
  ggrepel::geom_text_repel(aes(label = rowname),
    point.padding = .5,
    nudge_y = .2,
    segment.size = 0,
    seed = 42,
    direction = "x"
  ) +
  scale_y_continuous(breaks = 0:3) +
  coord_cartesian(ylim = c(0, 3)) +
  theme_light() +
  theme(panel.grid.minor.y = element_blank())

ggplot(
  data = filter(challeng, Fail > 0),
  aes(x = Temp, y = Fail)
) +
  geom_point(size = 5) +
  geom_smooth(
    method = "lm",
    formula = y ~ poly(sqrt(x), 2)
  ) +
  labs(
    x = "Temperature",
    y = "Nb of issues",
    title = "Challenger prelauch investigation",
    subtitle = "only with flights with incident"
  ) +
  scale_y_continuous(breaks = 0:3, limits = c(-0.5, 4)) +
  theme_light() +
  coord_cartesian(ylim = c(0, 3)) +
  theme(panel.grid.minor.y = element_blank())

ggplot(data = challeng, aes(x = Temp, y = Fail)) +
  geom_point(size = 5) +
  labs(
    x = "Temperature",
    y = "Nb of issues",
    title = "Challenger prelauch investigation",
    subtitle = "with all flights included"
  ) +
  scale_y_continuous(breaks = 0:3) +
  coord_cartesian(ylim = c(0, 3)) +
  theme_light() +
  theme(panel.grid.minor.y = element_blank())

ggplot(data = challeng, aes(x = Temp, y = Fail)) +
  geom_point(size = 5) +
  geom_smooth(
    method = "lm",
    formula = y ~ poly(sqrt(x), 2)
  ) +
  labs(
    x = "Temperature",
    y = "Nb of issues",
    title = "Challenger prelauch investigation",
    subtitle = "with all flights included"
  ) +
  scale_y_continuous(breaks = 0:3) +
  coord_cartesian(ylim = c(0, 3)) +
  theme_light() +
  theme(panel.grid.minor.y = element_blank())

ggplot(data = challeng, aes(x = Temp, y = Fail)) +
  geom_point(size = 5) +
  geom_smooth(
    method = "lm",
    formula = y ~ poly(sqrt(x), 2),
    fullrange = TRUE,
    linetype = "dashed"
  ) +
  geom_smooth(
    method = "lm",
    formula = y ~ poly(x, 2),
    se = FALSE
  ) +
  geom_vline(aes(xintercept = 31),
    linetype = "dashed"
  ) +
  geom_point(
    data = tibble(
      Temp = 31,
      Fail = predict(
        lm(Fail ~ poly(sqrt(Temp), 2), data = challeng),
        tibble(Temp = 31)
      )
    ),
    size = 10,
    color = "red"
  ) +
  scale_x_continuous(limit = c(30, NA)) +
  labs(
    x = "Temperature",
    y = "Nb of issues",
    title = "Challenger prelauch investigation",
    subtitle = "with all flights included"
  ) +
  scale_y_continuous(breaks = 0:3) +
  coord_cartesian(ylim = c(0, 16)) +
  theme_minimal() +
  theme(panel.grid.minor.y = element_blank())

Professeur de Mathématiques Appliquées

Professeur de Mathématiques Appliquées, mes sujets d’intérêts en recherche et enseignement vont du traitement du signal à la science des données.

Sur le même sujet