This article contains information about Economic Data Analysis Project with Python Pandas.
Pull Economic Data
import lib and key
import pandas as pd
import os
from fredapi import Fred
fred_api_key = os.environ.get('Fred_API_Key')
fred obj and auth api
fred = Fred(api_key = fred_api_key)
search for economic data and order by
sp_search = fred.search('S&P', order_by = 'popularity')
Pull Raw Data & Plot
pass the id obtained
sp500 = fred.get_series(series_id = 'sp500')
sp500
plot
sp500.plot(figsize=(10,5), title = 'S&P 500', lw = 2)
plt.show()
Transformations
Pull Multiple Data Series
unemp_search_df = fred.search('unemployment rate state', filter = ('frequency', 'Monthly'))
unemp_search_df
filtering
unemp_search_df_filtered = unemp_search_df.loc[
(unemp_search_df['seasonal_adjustment'] == 'Seasonally Adjusted') &
(unemp_search_df['units'] == 'Percent') &
(unemp_search_df['title'].str.contains('Unemployment Rate'))
]
unemp_search_df_filtered
Filter the 'unemp_search_df_filtered' dataframe to only include rows where the 'id' string length is less than 5:
# Filter the 'unemp_search_df_filtered' dataframe to only include rows where the 'id' string length is less than 5
unemp_search_df_filtered_states = unemp_search_df_filtered.loc[
unemp_search_df_filtered['id'].str.len() < 5
]
# Print the shape of the filtered dataframe to check the number of rows and columns
unemp_search_df_filtered_states.shape
# Create an empty list to store all results
all_results = []
# Loop through each index in the filtered states dataframe
for myid in unemp_search_df_filtered_states.index:
# Get the series for the current index from the fred API, convert it to a dataframe and name the column as the current index
results = fred.get_series(myid).to_frame(name = myid)
# Append the results dataframe to the all_results list
all_results.append(results)
# Pause execution for 0.1 seconds to avoid overwhelming the API with requests
time.sleep(0.1)
# Concatenate all the dataframes in the all_results list into a single dataframe along the columns axis
unemp_states_df = pd.concat(all_results, axis = 1)
columns to drop:
findings nulls and visualizing nulls distribution:
unemp_states_df_filtered.isna().sum(axis = 1)
unemp_states_df_filtered.isna().sum(axis = 1).plot()
replacing the serices Id's with actual state name:
id_to_state = unemp_search_df_filtered_states['title'].str.replace('Unemployment Rate in ','').to_dict()
unemp_states.columns = [id_to_state[c] for c in unemp_states.columns]
Pull May2020 Unemployment Rate Per State
# Select rows from the 'unemp_states' DataFrame where the index is '2020-05-01'
# Transpose the DataFrame and sort the values based on '2020-05-01'
unemp_states_May20 = unemp_states.loc[
unemp_states.index == '2020-05-01'
].T.sort_values('2020-05-01')
# Plot the DataFrame as a horizontal bar chart with specified figure size, bar width, edge color, and title
unemp_states_May20 = unemp_states_May20.plot(kind = 'barh', figsize = (8,12), width = 0.7, edgecolor = 'black', title = 'Unemployment Rate by State, May 2020')
# Remove the legend from the plot
unemp_states_May20.legend().remove()
# Set the label for the x-axis to '% Unemployed'
unemp_states_May20.set_xlabel('% Unemployed')
# Display the plot
plt.show()
Pull Participation Rate
# Search for 'participation rate state' in the 'fred' data source.
# Filter results to only include those with a 'Monthly' frequency.
part_search = fred.search('participation rate state', filter = ('frequency', 'Monthly'))
# Display the search results
part_search
Filter
# Filter the 'part_search' DataFrame based on two conditions:
# 1. The 'seasonal_adjustment' column should be 'Seasonally Adjusted'
# 2. The 'units' column should be 'Percent'
part_filtered = part_search.loc[
(part_search['seasonal_adjustment'] == 'Seasonally Adjusted') &
(part_search['units'] == 'Percent')
]
# Display the filtered DataFrame
part_filtered
Get data for each series ID
concatenate using pd
columns to drop:
Now swap id with actual state name:
Unemployment vs Participation for each state!
unemp_states.query('index >= 2020 and index < 2022')['New York'].plot()
fig, ax = plt.subplots()
unemp_states.query('index >= 2020 and index < 2022')['New York'].plot(ax = ax)
part_states_filter_colsw.query('index >= 2020 and index < 2022')['New York'].plot(ax = ax)
for any state:
state = 'California'
fig, ax = plt.subplots(figsize = (10,5), sharex = True)
ax2 = ax.twinx()
unemp_states2 = unemp_states.asfreq('MS')
l1 = unemp_states2.query('index >= 2020 and index < 2022')[state] \
.plot(ax = ax, label = 'Unemployement', color = '#F3CA52')
l2 = part_states_filter_colsw.dropna().query('index >= 2020 and index <2022')[state] \
.plot(ax = ax2, label = 'Participation', color = '#0A6847')
ax2.grid(False)
ax.set_title(state)
fig.legend(labels = ['Unemployement', 'Participation'])
plt.show()
for all states:
# Create a 10x5 grid of subplots with a shared x-axis, and set the figure size to 30x30
fig, axs = plt.subplots(10, 5, figsize = (30,30), sharex = True)
# Flatten the array of axes (subplots) for easier iteration
axs = axs.flatten()
# Initialize a counter variable
i = 0
# Loop over each column (state) in the unemp_states dataframe
for state in unemp_states.columns:
# Skip the iteration if the state is 'District of Columbia' or 'Puerto Rico'
if state in ['District of Columbia','Puerto Rico']:
continue
# Create a secondary y-axis for the i-th subplot
ax2 = axs[i].twinx()
# Plot the unemployment data for the current state on the i-th subplot's primary y-axis
unemp_states2.query('index >= 2020 and index < 2022')[state] \
.plot(ax = axs[i], label = 'Unemployement', color = '#10439F')
# Plot the participation data for the current state on the i-th subplot's secondary y-axis
part_states_filter_colsw.query('index >= 2020 and index <2022')[state] \
.plot(ax = ax2, label = 'Participation', color = '#FC4100')
# Disable the grid for the secondary y-axis
ax2.grid(False)
# Set the title of the i-th subplot to the current state
axs[i].set_title(state)
# Increment the counter variable
i += 1
# Adjust the padding between and around the subplots
plt.tight_layout()
# Add a legend to the figure with the labels 'Unemployement' and 'Participation'
fig.legend(labels = ['Unemployement', 'Participation'])
# Display the figure with its subplots
plt.show()
Conclusion
Learning Objectives,
Ingest data via API
Pull May2020 Unemployment Rate Per State, Pull Participation Rate, Filter
Get data for each series ID
concatenate using pd
Apply transformations in Pandas
Build visualizations using matplotlib.pyplot