{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import altair\n",
"import pandas"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read the data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"equal_pay = pandas.read_csv(\"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" occupation | \n",
" major_category | \n",
" minor_category | \n",
" total_workers | \n",
" workers_male | \n",
" workers_female | \n",
" percent_female | \n",
" total_earnings | \n",
" total_earnings_male | \n",
" total_earnings_female | \n",
" wage_percent_of_male | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2013 | \n",
" Chief executives | \n",
" Management, Business, and Financial | \n",
" Management | \n",
" 1024259 | \n",
" 782400 | \n",
" 241859 | \n",
" 23.6 | \n",
" 120254 | \n",
" 126142.0 | \n",
" 95921.0 | \n",
" 76.042080 | \n",
"
\n",
" \n",
" 1 | \n",
" 2013 | \n",
" General and operations managers | \n",
" Management, Business, and Financial | \n",
" Management | \n",
" 977284 | \n",
" 681627 | \n",
" 295657 | \n",
" 30.3 | \n",
" 73557 | \n",
" 81041.0 | \n",
" 60759.0 | \n",
" 74.973162 | \n",
"
\n",
" \n",
" 2 | \n",
" 2013 | \n",
" Legislators | \n",
" Management, Business, and Financial | \n",
" Management | \n",
" 14815 | \n",
" 8375 | \n",
" 6440 | \n",
" 43.5 | \n",
" 67155 | \n",
" 71530.0 | \n",
" 65325.0 | \n",
" 91.325318 | \n",
"
\n",
" \n",
" 3 | \n",
" 2013 | \n",
" Advertising and promotions managers | \n",
" Management, Business, and Financial | \n",
" Management | \n",
" 43015 | \n",
" 17775 | \n",
" 25240 | \n",
" 58.7 | \n",
" 61371 | \n",
" 75190.0 | \n",
" 55860.0 | \n",
" 74.291794 | \n",
"
\n",
" \n",
" 4 | \n",
" 2013 | \n",
" Marketing and sales managers | \n",
" Management, Business, and Financial | \n",
" Management | \n",
" 754514 | \n",
" 440078 | \n",
" 314436 | \n",
" 41.7 | \n",
" 78455 | \n",
" 91998.0 | \n",
" 65040.0 | \n",
" 70.697189 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year occupation \\\n",
"0 2013 Chief executives \n",
"1 2013 General and operations managers \n",
"2 2013 Legislators \n",
"3 2013 Advertising and promotions managers \n",
"4 2013 Marketing and sales managers \n",
"\n",
" major_category minor_category total_workers \\\n",
"0 Management, Business, and Financial Management 1024259 \n",
"1 Management, Business, and Financial Management 977284 \n",
"2 Management, Business, and Financial Management 14815 \n",
"3 Management, Business, and Financial Management 43015 \n",
"4 Management, Business, and Financial Management 754514 \n",
"\n",
" workers_male workers_female percent_female total_earnings \\\n",
"0 782400 241859 23.6 120254 \n",
"1 681627 295657 30.3 73557 \n",
"2 8375 6440 43.5 67155 \n",
"3 17775 25240 58.7 61371 \n",
"4 440078 314436 41.7 78455 \n",
"\n",
" total_earnings_male total_earnings_female wage_percent_of_male \n",
"0 126142.0 95921.0 76.042080 \n",
"1 81041.0 60759.0 74.973162 \n",
"2 71530.0 65325.0 91.325318 \n",
"3 75190.0 55860.0 74.291794 \n",
"4 91998.0 65040.0 70.697189 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"equal_pay.head()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Exploration"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2013 522\n",
"2014 522\n",
"2015 522\n",
"2016 522\n",
"Name: year, dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"equal_pay[\"year\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Chief executives 4\n",
"Helpers, construction trades 4\n",
"Sheet metal workers 4\n",
"Roofers 4\n",
"Reinforcing iron and rebar workers 4\n",
" ..\n",
"Physical therapists 4\n",
"Occupational therapists 4\n",
"Audiologists 4\n",
"Podiatrists 4\n",
"Material moving workers, all other 4\n",
"Name: occupation, Length: 522, dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"equal_pay[\"occupation\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Production, Transportation, and Material Moving 444\n",
"Natural Resources, Construction, and Maintenance 328\n",
"Sales and Office 280\n",
"Service 272\n",
"Computer, Engineering, and Science 236\n",
"Management, Business, and Financial 232\n",
"Education, Legal, Community Service, Arts, and Media 168\n",
"Healthcare Practitioners and Technical 128\n",
"Name: major_category, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"equal_pay[\"major_category\"].value_counts()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Mean by group"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"mean_female_per_category = equal_pay.groupby(\"major_category\").agg(\n",
" mean = pandas.NamedAgg(column=\"percent_female\", aggfunc=\"mean\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mean | \n",
"
\n",
" \n",
" major_category | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Computer, Engineering, and Science | \n",
" 26.966418 | \n",
"
\n",
" \n",
" Education, Legal, Community Service, Arts, and Media | \n",
" 54.402564 | \n",
"
\n",
" \n",
" Healthcare Practitioners and Technical | \n",
" 65.218243 | \n",
"
\n",
" \n",
" Management, Business, and Financial | \n",
" 46.480240 | \n",
"
\n",
" \n",
" Natural Resources, Construction, and Maintenance | \n",
" 5.671603 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mean\n",
"major_category \n",
"Computer, Engineering, and Science 26.966418\n",
"Education, Legal, Community Service, Arts, and ... 54.402564\n",
"Healthcare Practitioners and Technical 65.218243\n",
"Management, Business, and Financial 46.480240\n",
"Natural Resources, Construction, and Maintenance 5.671603"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_female_per_category.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"mean_female_per_category[\"category\"] = mean_female_per_category.index"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mean | \n",
" category | \n",
"
\n",
" \n",
" major_category | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Computer, Engineering, and Science | \n",
" 26.966418 | \n",
" Computer, Engineering, and Science | \n",
"
\n",
" \n",
" Education, Legal, Community Service, Arts, and Media | \n",
" 54.402564 | \n",
" Education, Legal, Community Service, Arts, and... | \n",
"
\n",
" \n",
" Healthcare Practitioners and Technical | \n",
" 65.218243 | \n",
" Healthcare Practitioners and Technical | \n",
"
\n",
" \n",
" Management, Business, and Financial | \n",
" 46.480240 | \n",
" Management, Business, and Financial | \n",
"
\n",
" \n",
" Natural Resources, Construction, and Maintenance | \n",
" 5.671603 | \n",
" Natural Resources, Construction, and Maintenance | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mean \\\n",
"major_category \n",
"Computer, Engineering, and Science 26.966418 \n",
"Education, Legal, Community Service, Arts, and ... 54.402564 \n",
"Healthcare Practitioners and Technical 65.218243 \n",
"Management, Business, and Financial 46.480240 \n",
"Natural Resources, Construction, and Maintenance 5.671603 \n",
"\n",
" category \n",
"major_category \n",
"Computer, Engineering, and Science Computer, Engineering, and Science \n",
"Education, Legal, Community Service, Arts, and ... Education, Legal, Community Service, Arts, and... \n",
"Healthcare Practitioners and Technical Healthcare Practitioners and Technical \n",
"Management, Business, and Financial Management, Business, and Financial \n",
"Natural Resources, Construction, and Maintenance Natural Resources, Construction, and Maintenance "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_female_per_category.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"mean_female_per_category[\"rounded_mean\"] = round(mean_female_per_category[\"mean\"], 2)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
" for col_name, dtype in df.dtypes.iteritems():\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
""
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"altair.Chart(mean_female_per_category).mark_bar().encode(\n",
" altair.Y(\"category:O\", sort=\"-x\"),\n",
" x = \"mean:Q\",\n",
" tooltip = \"rounded_mean:Q\"\n",
").interactive()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
" for col_name, dtype in df.dtypes.iteritems():\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
""
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"altair.Chart(equal_pay).mark_point().encode(\n",
" y = \"percent_female\",\n",
" x = \"wage_percent_of_male\",\n",
" tooltip = \"occupation\",\n",
" color = \"major_category\"\n",
").interactive()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}