Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the data (replace 'path_to_file.csv' with the actual file paths)
|
| 6 |
+
child_mortality = pd.read_csv("fp2/child_mortality_0_5_year_olds_dying_per_1000_born.csv") # Format: Country, Year, Value
|
| 7 |
+
gdp_per_capita = pd.read_csv("fp2/gdp_pcap.csv") # Format: Country, Year, Value
|
| 8 |
+
|
| 9 |
+
# Melt datasets to tidy format
|
| 10 |
+
child_mortality = child_mortality.melt(id_vars=["country"], var_name="year", value_name="child_mortality")
|
| 11 |
+
gdp_per_capita = gdp_per_capita.melt(id_vars=["country"], var_name="year", value_name="gdp_per_capita")
|
| 12 |
+
|
| 13 |
+
# Merge the datasets
|
| 14 |
+
merged_data = pd.merge(child_mortality, gdp_per_capita, on=["country", "year"])
|
| 15 |
+
merged_data["year"] = merged_data["year"].astype(int) # Ensure 'year' is an integer
|
| 16 |
+
|
| 17 |
+
# Convert gdp_per_capita and child_mortality to numeric
|
| 18 |
+
merged_data["gdp_per_capita"] = pd.to_numeric(merged_data["gdp_per_capita"], errors="coerce")
|
| 19 |
+
merged_data["child_mortality"] = pd.to_numeric(merged_data["child_mortality"], errors="coerce")
|
| 20 |
+
|
| 21 |
+
# Drop rows with missing or invalid data
|
| 22 |
+
merged_data = merged_data.dropna(subset=["gdp_per_capita", "child_mortality"])
|
| 23 |
+
|
| 24 |
+
# Streamlit app
|
| 25 |
+
st.title("Interactive Visualization: GDP vs. Child Mortality")
|
| 26 |
+
|
| 27 |
+
st.text(" ")
|
| 28 |
+
|
| 29 |
+
st.text("The dataset represents global development indicators related to child mortality and GDP per capita for multiple countries over several years. Each row corresponds to a unique country-year combination, with the key fields being country (categorical, representing the country name), year (integer, indicating the year of data collection), child_mortality (numeric, showing the number of children under five dying per 1,000 live births), and gdp_per_capita (numeric, representing GDP per capita in constant 2017 international dollars). The dataset spans a wide range of years and countries, making it suitable for temporal and regional analyses. Missing values are present in some fields, particularly for earlier years or less-developed countries, and were handled during the data cleaning process. The values in child_mortality range from 2.24 to 756.0, while gdp_per_capita spans from $354.00 to $10,000.00, reflecting significant disparities in economic and health outcomes across countries and regions.")
|
| 30 |
+
|
| 31 |
+
st.text(" ")
|
| 32 |
+
|
| 33 |
+
# Filter data for a specific year
|
| 34 |
+
year = st.slider("Select Year", min_value=int(merged_data["year"].min()), max_value=int(merged_data["year"].max()), value=2020)
|
| 35 |
+
filtered_data = merged_data[merged_data["year"] == year]
|
| 36 |
+
|
| 37 |
+
# Select number of countries to display
|
| 38 |
+
num_countries = st.slider("Select Number of Countries to Display", min_value=5, max_value=50, value=10, step=5)
|
| 39 |
+
|
| 40 |
+
# Get top N countries by GDP per capita
|
| 41 |
+
top_countries = filtered_data.nlargest(num_countries, "gdp_per_capita")
|
| 42 |
+
|
| 43 |
+
# Create scatter plot with regression line
|
| 44 |
+
scatter_plot = alt.Chart(top_countries).mark_circle(size=60).encode(
|
| 45 |
+
x=alt.X("gdp_per_capita:Q", scale=alt.Scale(type="log"), title="GDP per Capita (Log Scale)"),
|
| 46 |
+
y=alt.Y("child_mortality:Q", title="Child Mortality (per 1,000 live births)"),
|
| 47 |
+
color="country:N",
|
| 48 |
+
tooltip=["country", "gdp_per_capita", "child_mortality"]
|
| 49 |
+
).properties(
|
| 50 |
+
title=f"Relationship Between GDP Per Capita and Child Mortality ({year})",
|
| 51 |
+
width=800,
|
| 52 |
+
height=500
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Add regression line
|
| 56 |
+
regression_line = scatter_plot.transform_regression(
|
| 57 |
+
"gdp_per_capita", "child_mortality", method="linear"
|
| 58 |
+
).mark_line(color="red")
|
| 59 |
+
|
| 60 |
+
# Combine scatter plot and regression line
|
| 61 |
+
final_chart = scatter_plot + regression_line
|
| 62 |
+
|
| 63 |
+
# Display chart in Streamlit
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
st.text("To build the observatory, I began by preparing the dataset, which involved merging child mortality and GDP per capita data based on common fields: country and year. I ensured that the data was cleaned and formatted correctly, converting numerical fields like child_mortality and gdp_per_capita to numeric types and handling missing values by dropping rows with invalid entries. Once the data was ready, I created initial static visualizations using Altair to explore the relationship between GDP per capita and child mortality. Building on this foundation, I added interactivity through Streamlit, allowing users to dynamically filter the dataset by year and select the number of countries to display. To enhance the visual analysis, I overlaid a regression line on the scatter plot, which provides a clear representation of trends. The app's functionality was refined iteratively, incorporating sliders for user interaction and tooltips for exploring country-specific data points.")
|