fp3 / app.py
jiyachachan's picture
Update app.py
c65e061 verified
raw
history blame
4.2 kB
import pandas as pd
import altair as alt
import streamlit as st
# Load the data
child_mortality = pd.read_csv("https://huggingface.co/spaces/jiyachachan/fp2/resolve/main/child_mortality_0_5_year_olds_dying_per_1000_born.csv") # Format: Country, Year, Value
gdp_per_capita = pd.read_csv("https://huggingface.co/spaces/jiyachachan/fp2/resolve/main/gdp_pcap.csv") # Format: Country, Year, Value
# Melt datasets to tidy format
child_mortality = child_mortality.melt(id_vars=["country"], var_name="year", value_name="child_mortality")
gdp_per_capita = gdp_per_capita.melt(id_vars=["country"], var_name="year", value_name="gdp_per_capita")
# Merge the datasets
merged_data = pd.merge(child_mortality, gdp_per_capita, on=["country", "year"])
merged_data["year"] = merged_data["year"].astype(int) # Ensure 'year' is an integer
# Convert gdp_per_capita and child_mortality to numeric
merged_data["gdp_per_capita"] = pd.to_numeric(merged_data["gdp_per_capita"], errors="coerce")
merged_data["child_mortality"] = pd.to_numeric(merged_data["child_mortality"], errors="coerce")
# Drop rows with missing or invalid data
merged_data = merged_data.dropna(subset=["gdp_per_capita", "child_mortality"])
# Streamlit app
st.title("Child Mortality VS GDP")
st.text(" ")
st.text("The dataset used in this visualization is sourced from Gapminder, which provides high-quality global development data. Two datasets were used: one capturing child mortality rates (the number of children under five years old dying per 1,000 live births) and the other focusing on GDP per capita (adjusted for inflation and purchasing power parity). These datasets have been reshaped and merged to analyze the relationship between economic development and health outcomes across countries and over time.")
st.text(" ")
# Filter data for a specific year
year = st.slider("Select Year", min_value=int(merged_data["year"].min()), max_value=int(merged_data["year"].max()), value=2020)
filtered_data = merged_data[merged_data["year"] == year]
# Select number of countries to display
num_countries = st.slider("Select Number of Countries to Display", min_value=5, max_value=50, value=30, step=5)
# Get top N countries by GDP per capita
top_countries = filtered_data.nlargest(num_countries, "gdp_per_capita")
# Create scatter plot with regression line
scatter_plot = alt.Chart(top_countries).mark_circle(size=60).encode(
x=alt.X("gdp_per_capita:Q", scale=alt.Scale(type="log"), title="GDP per Capita (Log Scale)"),
y=alt.Y("child_mortality:Q", title="Child Mortality (per 1,000 live births)"),
color="country:N",
tooltip=["country", "gdp_per_capita", "child_mortality"]
).properties(
title=f"Relationship Between GDP Per Capita and Child Mortality ({year})",
width=800,
height=500
)
# Add regression line
regression_line = scatter_plot.transform_regression(
"gdp_per_capita", "child_mortality", method="linear"
).mark_line(color="red")
# Combine scatter plot and regression line
final_chart = scatter_plot + regression_line
# Display chart in Streamlit
st.altair_chart(final_chart, use_container_width=True)
st.text(" ")
st.text("The chart visualizes the relationship between GDP per capita (on a logarithmic scale) and child mortality rates for the selected year. Each point represents a country, with its position determined by its GDP per capita and child mortality rate, and the color identifying the country. A regression line is included to highlight overall trends. The chart demonstrates a clear inverse relationship: as GDP per capita increases, child mortality rates tend to decrease. This trend reflects the significant role of economic development in improving health outcomes, as countries with higher incomes can typically invest more in healthcare, education, and sanitation. However, the chart also highlights disparities among countries, where some nations with similar GDPs show differing child mortality rates, suggesting that factors beyond income, such as governance, infrastructure, and healthcare access, play a critical role. This interactive visualization allows users to explore specific countries and trends for different years and numbers of countries displayed.")