Equal Pay data

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   0.3.4
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.2.0     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Read the data in

equal_pay <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
Rows: 2088 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): occupation, major_category, minor_category
dbl (9): year, total_workers, workers_male, workers_female, percent_female, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore data

equal_pay %>% # pipe, "and then"
  count(year)

Box plot

equal_pay %>% 
  ggplot(aes(y = major_category,
             x = percent_female)) +
  geom_boxplot()

Bar plot

mean_female_per_category <- equal_pay %>% 
  group_by(major_category) %>% 
  summarize(mean = mean(percent_female))

mean_female_per_category
mean_female_per_category %>% 
  ggplot(aes(y = reorder(major_category, mean),
             x = mean,
             label = round(mean, 2))) +
  geom_col() +
  geom_label()

Scatterplot

equal_pay %>% 
  ggplot(aes(x = percent_female,
             y = wage_percent_of_male,
             color = major_category)) +
  geom_point()
Warning: Removed 846 rows containing missing values (`geom_point()`).

equal_pay %>% 
  filter(major_category == "Computer, Engineering, and Science") %>% 
  ggplot(aes(x = factor(year),
             y = percent_female,
             color = factor(year))) +
  geom_jitter() +
  labs(x = "year",
       y = "percent female",
       color = "year") +
  theme_linedraw()