Week 4 HW

Author
Affiliation

Ben Akyrueklier

George Washington University

Published

September 18, 2025

Code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import altair as alt
from sklearn.datasets import load_iris
Code
wb = pd.read_csv("Data/WBnew.csv")
new_column_names = {'2015 [YR2015]': '2015', '2016 [YR2016]': '2016', '2017 [YR2017]': '2017', '2018 [YR2018]': '2018', '2019 [YR2019]': '2019'}
wb1519 = wb.rename(columns=new_column_names)
wb1519 = wb1519.drop(columns=['2005 [YR2005]', '2006 [YR2006]', '2007 [YR2007]', '2008 [YR2008]', '2009 [YR2009]', '2010 [YR2010]', '2011 [YR2011]', '2012 [YR2012]', '2013 [YR2013]', '2014 [YR2014]', '2020 [YR2020]', '2021 [YR2021]', '2022 [YR2022]', '2023 [YR2023]', '2024 [YR2024]'])
wb1519.head()
Country Name Country Code Series Name Series Code 2015 2016 2017 2018 2019
0 Afghanistan AFG GDP per capita (current US$) NY.GDP.PCAP.CD 565.569730408751 522.082215583898 525.469770891619 491.337221382603 496.6025042585
1 Afghanistan AFG Hospital beds (per 1,000 people) SH.MED.BEDS.ZS 0.44 0.45 0.42 0.4 0.38
2 Afghanistan AFG Life expectancy at birth, total (years) SP.DYN.LE00.IN 62.27 62.646 62.406 62.443 62.941
3 Afghanistan AFG Net migration SM.POP.NETM -286314 -143049 -71491 -36753 9159
4 Afghanistan AFG Secure Internet servers (per 1 million people) IT.NET.SECR.P6 2.18729357416894 12.2764405423167 44.1873650754779 53.4795175761047 27.6573503133086
Code
wbmelt = pd.melt(wb1519, id_vars=['Country Name','Series Name'], value_vars=['2015', '2016', '2017', '2018', '2019'], var_name='Year', value_name='Value')
wbmelt = wbmelt.dropna()
wbpivot = wbmelt.pivot(index=['Country Name', 'Year'], columns='Series Name', values='Value').reset_index()
wbpivot = wbpivot.dropna(axis=1, how='all')
wbpivot.head()
Series Name Country Name Year GDP per capita (current US$) Hospital beds (per 1,000 people) Income share held by highest 10% Life expectancy at birth, total (years) Net migration Real interest rate (%) Researchers in R&D (per million people) Secure Internet servers (per 1 million people)
0 Afghanistan 2015 565.569730408751 0.44 .. 62.27 -286314 12.2525481629518 .. 2.18729357416894
1 Afghanistan 2016 522.082215583898 0.45 .. 62.646 -143049 17.5839381624543 .. 12.2764405423167
2 Afghanistan 2017 525.469770891619 0.42 .. 62.406 -71491 12.1411782956513 .. 44.1873650754779
3 Afghanistan 2018 491.337221382603 0.4 .. 62.443 -36753 .. .. 53.4795175761047
4 Afghanistan 2019 496.6025042585 0.38 .. 62.941 9159 .. .. 27.6573503133086
Code
wbEU = wbpivot[wbpivot['Country Name'].isin(['Germany', 'France', 'Italy', 'Romania', 'Portugal', 'Finland', 'Belgium'])]
wbEU['Researchers in R&D (per million people)'] = pd.to_numeric(wbEU['Researchers in R&D (per million people)'])
RDpivot = wbEU.pivot(index='Year', columns='Country Name', values='Researchers in R&D (per million people)')
plt.figure(figsize=(10,6))
RDpivot.plot(marker='o', title="Researchers per Million People Across 5 Years")
plt.xlabel("Year")
plt.ylabel("Researchers in R&D (per million people)")
plt.legend(loc='upper left', bbox_to_anchor=(1.0, 1.0))
plt.grid(True)
plt.show()
/var/folders/9b/bb7yf3dj4qzbc23czfb1z3rh0000gn/T/ipykernel_90174/3833595271.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wbEU['Researchers in R&D (per million people)'] = pd.to_numeric(wbEU['Researchers in R&D (per million people)'])
<Figure size 1000x600 with 0 Axes>

Code
wbpivot['Life expectancy at birth, total (years)'] = pd.to_numeric(wbpivot['Life expectancy at birth, total (years)'])
yravg = wbpivot.groupby("Year")["Life expectancy at birth, total (years)"].mean().reset_index(name="Average Life Expectancy")

yravg.plot(x="Year", y="Average Life Expectancy", kind="bar", title="Average Life Expectancy (2015-2019)", legend=False)
plt.ylabel("Life expectancy at birth (years)")
plt.show()

yravg.plot(x="Year", y="Average Life Expectancy", kind="bar", title="Average Life Expectancy (2015-2019)", legend=False)
plt.ylim(71, 74)
plt.ylabel("Life expectancy at birth (years)")
plt.show()

Code
wbpivot['Hospital beds (per 1,000 people)'] = pd.to_numeric(wbpivot['Hospital beds (per 1,000 people)'], errors='coerce')
healthAgg = wbpivot.groupby("Country Name")[["Life expectancy at birth, total (years)", "Hospital beds (per 1,000 people)"]].mean().reset_index()
alt.Chart(healthAgg).mark_circle().encode(
    x='Hospital beds (per 1,000 people):Q',
    y='Life expectancy at birth, total (years):Q',
    tooltip=['Country Name','Life expectancy at birth, total (years)','Hospital beds (per 1,000 people)']
).interactive().properties(title='Life Expectancy vs Hospital Beds (2015-2019 Average)')
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Code
wbscatter = wbpivot[wbpivot['Year'] == '2015']

alt.Chart(wbscatter).mark_circle().encode(
    x='Income share held by highest 10%:Q',
    y='GDP per capita (current US$):Q',
    tooltip=['Country Name','Year','GDP per capita (current US$)','Income share held by highest 10%']
).interactive().properties(title='GDP per Capita and Income Share of Top 10% in 2015')
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
/opt/anaconda3/lib/python3.10/site-packages/altair/utils/core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)