R packages

Für die folgenden Analysen werden zwei R Pakete benötigt:

Data import

data <- map_df(2014:2023, get_lis_babynames)

Data exploration

colnames(data)
#> [1] "ANZAHL"     "VORNAME"    "GESCHLECHT" "YEAR"

Data wrangling

# top.10_boys <- data %>%
#   filter(GESCHLECHT == 'm') %>%
#   group_by(YEAR) %>%
#   slice_max(n = 10, order_by = ANZAHL) %>%
#   ungroup() %>%
#   pull(VORNAME) %>%
#   unique()
#
# top.10_girls <- data %>%
#   filter(GESCHLECHT == 'w') %>%
#   group_by(YEAR) %>%
#   slice_max(n = 10, order_by = ANZAHL) %>%
#   ungroup() %>%
#   pull(VORNAME) %>%
#   unique()




df.top.10_boys <- data %>%
  filter(GESCHLECHT == "m") %>%
  select(-GESCHLECHT) %>%
  group_by(YEAR) %>%
  mutate(RANK = rank(-ANZAHL, ties.method = "min")) %>%
  ungroup() %>%
  filter(RANK <= 10) %>%
  arrange(YEAR, RANK) %>%
  group_by(YEAR, RANK) %>%
  nest() %>%
  mutate(
    VORNAME = map_chr(data, ~ toString(.x$VORNAME)),
    VORNAME = str_replace_all(VORNAME, ",", "\n"),
    ANZAHL = map_int(data, ~ max(.x$ANZAHL))
  ) %>%
  select(VORNAME, YEAR, ANZAHL, RANK)

df.top.10_girls <- data %>%
  filter(GESCHLECHT == "w") %>%
  select(-GESCHLECHT) %>%
  group_by(YEAR) %>%
  mutate(RANK = rank(-ANZAHL, ties.method = "min")) %>%
  ungroup() %>%
  filter(RANK <= 10) %>%
  arrange(YEAR, RANK) %>%
  group_by(YEAR, RANK) %>%
  nest() %>%
  mutate(
    VORNAME = map_chr(data, ~ toString(.x$VORNAME)),
    VORNAME = str_replace_all(VORNAME, ",", "\n"),
    ANZAHL = map_int(data, ~ max(.x$ANZAHL))
  ) %>%
  select(VORNAME, YEAR, ANZAHL, RANK)

Data visualisation

ggplot(
  df.top.10_boys,
  aes(x = YEAR, y = RANK, colour = as.character(YEAR))
) +
  coord_flip() +
  scale_y_continuous(breaks = seq(1, 10, 1), sec.axis = dup_axis()) +
  scale_x_reverse(breaks = rev(seq(2014, 2023, 1))) +
  #  geom_line(aes(group = VORNAME), linetype = 3) +
  geom_text(aes(label = VORNAME), fill = "#1e1e1e", hjust = "center") +
  hrbrthemes::theme_modern_rc() +
  theme(
    legend.position = "none",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  # expand_limits(x = c(2013.8, 2023.1)) +
  labs(
    title = "Most popular male babynames",
    subtitle = "Leipzig, Germany"
  )

ggplot(
  df.top.10_girls,
  aes(x = YEAR, y = RANK, colour = as.character(YEAR))
) +
  coord_flip() +
  scale_y_continuous(breaks = seq(1, 10, 1), sec.axis = dup_axis()) +
  scale_x_reverse(breaks = rev(seq(2014, 2023, 1))) +
  #  geom_line(aes(group = VORNAME), linetype = 3) +
  geom_text(aes(label = VORNAME), fill = "#1e1e1e", hjust = "center") +
  hrbrthemes::theme_modern_rc() +
  theme(
    legend.position = "none",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  expand_limits(x = c(2013.8, 2023.1)) +
  labs(
    title = "Most popular female babynames",
    subtitle = "Leipzig, Germany"
  )

library(DT)
tab <- data %>%
  filter(YEAR == 2023) %>%
  mutate(
    GESCHLECHT = ifelse(GESCHLECHT == "w", "female", "male"),
    GESCHLECHT = factor(GESCHLECHT, levels = c("female", "male")),
    YEAR = as.factor(YEAR)
  ) %>%
  select(Year = YEAR, Geschlecht = GESCHLECHT, Name = VORNAME, Number = ANZAHL) %>%
  arrange(Year)

datatable(tab,
  rownames = FALSE,
  options = list(
    autoWidth = TRUE,
    columnDefs = list(list(
      className = "dt-center",
      width = "200px",
      targets = "_all"
    ))
  ),
  colnames = c("Year", "Sex", "Name", "Number"),
  filter = "top"
)