{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import altair\n", "import pandas" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Read the data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "equal_pay = pandas.read_csv(\"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearoccupationmajor_categoryminor_categorytotal_workersworkers_maleworkers_femalepercent_femaletotal_earningstotal_earnings_maletotal_earnings_femalewage_percent_of_male
02013Chief executivesManagement, Business, and FinancialManagement102425978240024185923.6120254126142.095921.076.042080
12013General and operations managersManagement, Business, and FinancialManagement97728468162729565730.37355781041.060759.074.973162
22013LegislatorsManagement, Business, and FinancialManagement148158375644043.56715571530.065325.091.325318
32013Advertising and promotions managersManagement, Business, and FinancialManagement43015177752524058.76137175190.055860.074.291794
42013Marketing and sales managersManagement, Business, and FinancialManagement75451444007831443641.77845591998.065040.070.697189
\n", "
" ], "text/plain": [ " year occupation \\\n", "0 2013 Chief executives \n", "1 2013 General and operations managers \n", "2 2013 Legislators \n", "3 2013 Advertising and promotions managers \n", "4 2013 Marketing and sales managers \n", "\n", " major_category minor_category total_workers \\\n", "0 Management, Business, and Financial Management 1024259 \n", "1 Management, Business, and Financial Management 977284 \n", "2 Management, Business, and Financial Management 14815 \n", "3 Management, Business, and Financial Management 43015 \n", "4 Management, Business, and Financial Management 754514 \n", "\n", " workers_male workers_female percent_female total_earnings \\\n", "0 782400 241859 23.6 120254 \n", "1 681627 295657 30.3 73557 \n", "2 8375 6440 43.5 67155 \n", "3 17775 25240 58.7 61371 \n", "4 440078 314436 41.7 78455 \n", "\n", " total_earnings_male total_earnings_female wage_percent_of_male \n", "0 126142.0 95921.0 76.042080 \n", "1 81041.0 60759.0 74.973162 \n", "2 71530.0 65325.0 91.325318 \n", "3 75190.0 55860.0 74.291794 \n", "4 91998.0 65040.0 70.697189 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "equal_pay.head()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Data Exploration" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2013 522\n", "2014 522\n", "2015 522\n", "2016 522\n", "Name: year, dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "equal_pay[\"year\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Chief executives 4\n", "Helpers, construction trades 4\n", "Sheet metal workers 4\n", "Roofers 4\n", "Reinforcing iron and rebar workers 4\n", " ..\n", "Physical therapists 4\n", "Occupational therapists 4\n", "Audiologists 4\n", "Podiatrists 4\n", "Material moving workers, all other 4\n", "Name: occupation, Length: 522, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "equal_pay[\"occupation\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Production, Transportation, and Material Moving 444\n", "Natural Resources, Construction, and Maintenance 328\n", "Sales and Office 280\n", "Service 272\n", "Computer, Engineering, and Science 236\n", "Management, Business, and Financial 232\n", "Education, Legal, Community Service, Arts, and Media 168\n", "Healthcare Practitioners and Technical 128\n", "Name: major_category, dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "equal_pay[\"major_category\"].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Mean by group" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "mean_female_per_category = equal_pay.groupby(\"major_category\").agg(\n", " mean = pandas.NamedAgg(column=\"percent_female\", aggfunc=\"mean\")\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean
major_category
Computer, Engineering, and Science26.966418
Education, Legal, Community Service, Arts, and Media54.402564
Healthcare Practitioners and Technical65.218243
Management, Business, and Financial46.480240
Natural Resources, Construction, and Maintenance5.671603
\n", "
" ], "text/plain": [ " mean\n", "major_category \n", "Computer, Engineering, and Science 26.966418\n", "Education, Legal, Community Service, Arts, and ... 54.402564\n", "Healthcare Practitioners and Technical 65.218243\n", "Management, Business, and Financial 46.480240\n", "Natural Resources, Construction, and Maintenance 5.671603" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_female_per_category.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "mean_female_per_category[\"category\"] = mean_female_per_category.index" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meancategory
major_category
Computer, Engineering, and Science26.966418Computer, Engineering, and Science
Education, Legal, Community Service, Arts, and Media54.402564Education, Legal, Community Service, Arts, and...
Healthcare Practitioners and Technical65.218243Healthcare Practitioners and Technical
Management, Business, and Financial46.480240Management, Business, and Financial
Natural Resources, Construction, and Maintenance5.671603Natural Resources, Construction, and Maintenance
\n", "
" ], "text/plain": [ " mean \\\n", "major_category \n", "Computer, Engineering, and Science 26.966418 \n", "Education, Legal, Community Service, Arts, and ... 54.402564 \n", "Healthcare Practitioners and Technical 65.218243 \n", "Management, Business, and Financial 46.480240 \n", "Natural Resources, Construction, and Maintenance 5.671603 \n", "\n", " category \n", "major_category \n", "Computer, Engineering, and Science Computer, Engineering, and Science \n", "Education, Legal, Community Service, Arts, and ... Education, Legal, Community Service, Arts, and... \n", "Healthcare Practitioners and Technical Healthcare Practitioners and Technical \n", "Management, Business, and Financial Management, Business, and Financial \n", "Natural Resources, Construction, and Maintenance Natural Resources, Construction, and Maintenance " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_female_per_category.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "mean_female_per_category[\"rounded_mean\"] = round(mean_female_per_category[\"mean\"], 2)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", " for col_name, dtype in df.dtypes.iteritems():\n" ] }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "altair.Chart(mean_female_per_category).mark_bar().encode(\n", " altair.Y(\"category:O\", sort=\"-x\"),\n", " x = \"mean:Q\",\n", " tooltip = \"rounded_mean:Q\"\n", ").interactive()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.9/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", " for col_name, dtype in df.dtypes.iteritems():\n" ] }, { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "altair.Chart(equal_pay).mark_point().encode(\n", " y = \"percent_female\",\n", " x = \"wage_percent_of_male\",\n", " tooltip = \"occupation\",\n", " color = \"major_category\"\n", ").interactive()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }