import pandas as pd import altair as alt import streamlit as st # Load the data child_mortality = pd.read_csv("https://huggingface.co/spaces/jiyachachan/fp2/resolve/main/child_mortality_0_5_year_olds_dying_per_1000_born.csv") # Format: Country, Year, Value gdp_per_capita = pd.read_csv("https://huggingface.co/spaces/jiyachachan/fp2/resolve/main/gdp_pcap.csv") # Format: Country, Year, Value # Melt datasets to tidy format child_mortality = child_mortality.melt(id_vars=["country"], var_name="year", value_name="child_mortality") gdp_per_capita = gdp_per_capita.melt(id_vars=["country"], var_name="year", value_name="gdp_per_capita") # Merge the datasets merged_data = pd.merge(child_mortality, gdp_per_capita, on=["country", "year"]) merged_data["year"] = merged_data["year"].astype(int) # Ensure 'year' is an integer # Convert gdp_per_capita and child_mortality to numeric merged_data["gdp_per_capita"] = pd.to_numeric(merged_data["gdp_per_capita"], errors="coerce") merged_data["child_mortality"] = pd.to_numeric(merged_data["child_mortality"], errors="coerce") # Drop rows with missing or invalid data merged_data = merged_data.dropna(subset=["gdp_per_capita", "child_mortality"]) # Streamlit app st.title("Child Mortality VS GDP") st.text(" ") st.text("The dataset used in this visualization is sourced from Gapminder, which provides high-quality global development data. Two datasets were used: one capturing child mortality rates (the number of children under five years old dying per 1,000 live births) and the other focusing on GDP per capita (adjusted for inflation and purchasing power parity). These datasets have been reshaped and merged to analyze the relationship between economic development and health outcomes across countries and over time.") st.text(" ") # Filter data for a specific year year = st.slider("Select Year", min_value=int(merged_data["year"].min()), max_value=int(merged_data["year"].max()), value=2020) filtered_data = merged_data[merged_data["year"] == year] # Select number of countries to display num_countries = st.slider("Select Number of Countries to Display", min_value=5, max_value=50, value=30, step=5) # Get top N countries by GDP per capita top_countries = filtered_data.nlargest(num_countries, "gdp_per_capita") # Create scatter plot with regression line scatter_plot = alt.Chart(top_countries).mark_circle(size=60).encode( x=alt.X("gdp_per_capita:Q", scale=alt.Scale(type="log"), title="GDP per Capita (Log Scale)"), y=alt.Y("child_mortality:Q", title="Child Mortality (per 1,000 live births)"), color="country:N", tooltip=["country", "gdp_per_capita", "child_mortality"] ).properties( title=f"Relationship Between GDP Per Capita and Child Mortality ({year})", width=800, height=500 ) # Add regression line regression_line = scatter_plot.transform_regression( "gdp_per_capita", "child_mortality", method="linear" ).mark_line(color="red") # Combine scatter plot and regression line final_chart = scatter_plot + regression_line # Display chart in Streamlit st.altair_chart(final_chart, use_container_width=True) st.text(" ") st.text("The chart visualizes the relationship between GDP per capita (on a logarithmic scale) and child mortality rates for the selected year. Each point represents a country, with its position determined by its GDP per capita and child mortality rate, and the color identifying the country. A regression line is included to highlight overall trends. The chart demonstrates a clear inverse relationship: as GDP per capita increases, child mortality rates tend to decrease. This trend reflects the significant role of economic development in improving health outcomes, as countries with higher incomes can typically invest more in healthcare, education, and sanitation. However, the chart also highlights disparities among countries, where some nations with similar GDPs show differing child mortality rates, suggesting that factors beyond income, such as governance, infrastructure, and healthcare access, play a critical role. This interactive visualization allows users to explore specific countries and trends for different years and numbers of countries displayed.")