File size: 8,813 Bytes
8511cb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import streamlit as st
import pandas as pd
import duckdb
import rasterio
from rasterio.mask import mask
from rasterio.merge import merge
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.crs import CRS
import numpy as np
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
from shapely.geometry import box
import geopandas as gpd
from pathlib import Path

# --- Constants ---
DATA_DIR = Path("data")
STATS_FILE = DATA_DIR / "stats" / "extended_stats.parquet"
QUARTERS_FILE = DATA_DIR / "boundaries" / "Stadtviertel.parquet"
BOROUGHS_FILE = DATA_DIR / "boundaries" / "Stadtbezirke.parquet"
PROCESSED_DIR = DATA_DIR / "processed"
TILES_METADATA_FILE = DATA_DIR / "metadata" / "cologne_tiles.csv"

FLAIR_COLORS = {
    0: [206, 112, 121, 255],   # Building
    1: [185, 226, 212, 255],   # Greenhouse
    2: [98, 208, 255, 255],    # Swimming pool
    3: [166, 170, 183, 255],   # Impervious surface
    4: [152, 119, 82, 255],    # Pervious surface
    5: [187, 176, 150, 255],   # Bare soil
    6: [51, 117, 161, 255],    # Water
    7: [233, 239, 254, 255],   # Snow
    8: [140, 215, 106, 255],   # Herbaceous vegetation
    9: [222, 207, 85, 255],    # Agricultural land
    10: [208, 163, 73, 255],   # Plowed land
    11: [176, 130, 144, 255],  # Vineyard
    12: [76, 145, 41, 255],    # Deciduous
    13: [18, 100, 33, 255],    # Coniferous
    14: [181, 195, 53, 255],   # Brushwood
    15: [228, 142, 77, 255],   # Clear cut
    16: [34, 34, 34, 255],     # Ligneous
    17: [34, 34, 34, 255],     # Mixed
    18: [34, 34, 34, 255],     # Other
}

CLASS_LABELS = {
    0: 'Building', 1: 'Greenhouse', 2: 'Swimming pool',
    3: 'Impervious surface', 4: 'Pervious surface', 5: 'Bare soil',
    6: 'Water', 7: 'Snow', 8: 'Herbaceous vegetation',
    9: 'Agricultural land', 10: 'Plowed land', 11: 'Vineyard',
    12: 'Deciduous', 13: 'Coniferous', 14: 'Brushwood',
    15: 'Clear cut', 16: 'Ligneous', 17: 'Mixed', 18: 'Other'
}

# --- Data Loading ---
@st.cache_data
def load_quarters_with_stats():
    # Load Boundaries
    if not QUARTERS_FILE.exists(): return None
    gdf = gpd.read_parquet(QUARTERS_FILE)
    if gdf.crs != "EPSG:4326": gdf = gdf.to_crs("EPSG:4326")
    
    # Load Stats
    try:
        con = duckdb.connect()
        df_s = con.execute(f"SELECT * FROM '{STATS_FILE}'").df()
        con.close()
        
        # Merge
        if 'name' in gdf.columns and 'name' in df_s.columns:
            gdf = gdf.merge(df_s, on='name', how='left')
            if 'green_area_m2' in gdf.columns:
                gdf['green_area_m2'] = gdf['green_area_m2'].fillna(0)
        
        # Calculate Percentage (Critical)
        if 'green_area_m2' in gdf.columns and 'Shape_Area' in gdf.columns:
             gdf['green_pct'] = (gdf['green_area_m2'] / gdf['Shape_Area']) * 100
        else:
             gdf['green_pct'] = 0.0

    except Exception as e:
        st.error(f"Error loading stats: {e}")
    
    return gdf

@st.cache_data
def load_boroughs():
    if not BOROUGHS_FILE.exists(): return None
    gdf = gpd.read_parquet(BOROUGHS_FILE)
    if gdf.crs != "EPSG:4326": gdf = gdf.to_crs("EPSG:4326")
    if 'STB_NAME' in gdf.columns: gdf = gdf.rename(columns={'STB_NAME': 'name'})
    return gdf

@st.cache_data
def get_tile_to_veedel_mapping():
    if not TILES_METADATA_FILE.exists() or not QUARTERS_FILE.exists(): return {}
    tiles_df = pd.read_csv(TILES_METADATA_FILE)
    geometries = []
    for _, row in tiles_df.iterrows():
        e, n = row['Koordinatenursprung_East'], row['Koordinatenursprung_North']
        geometries.append(box(e, n, e + 1000, n + 1000))
    tiles_gdf = gpd.GeoDataFrame(tiles_df, geometry=geometries, crs="EPSG:25832")
    
    quarters_gdf = gpd.read_parquet(QUARTERS_FILE)
    if quarters_gdf.crs != "EPSG:25832": quarters_gdf = quarters_gdf.to_crs("EPSG:25832")
        
    joined = gpd.sjoin(tiles_gdf, quarters_gdf, how="inner", predicate="intersects")
    return joined.groupby('name')['Kachelname'].apply(list).to_dict()

# --- Mosaic Logic (Shared) ---
def process_mosaic(sources, layer_type):
    """
    Core Mosaic Logic: Merges, Reprojects, and Colorizes open rasterio sources.
    Aguments:
        sources: List of open rasterio DatasetReader objects (file or memory).
        layer_type: String enum ["Satellite", "Land Cover", "NDVI"]
    """
    try:
        if not sources: return None, None

        # 1. Mosaic (Native CRS)
        # We assume sources are compatible (same bands, dtype). Merge handles overlap.
        mosaic, out_trans = merge(sources)
        
        # 2. Reproject to WGS84
        src_crs = CRS.from_epsg(25832) # Assuming all inputs are 25832
        src_height, src_width = mosaic.shape[1], mosaic.shape[2]
        dst_crs = CRS.from_epsg(4326)
        
        dst_transform, dst_width, dst_height = calculate_default_transform(
            src_crs, dst_crs, src_width, src_height, 
            *rasterio.transform.array_bounds(src_height, src_width, out_trans)
        )
        
        count = mosaic.shape[0]
        if layer_type == "Satellite" and count < 3: count = 1 
        
        dst_array = np.zeros((count, dst_height, dst_width), dtype=mosaic.dtype)
        
        reproject(
            source=mosaic, destination=dst_array,
            src_transform=out_trans, src_crs=src_crs,
            dst_transform=dst_transform, dst_crs=dst_crs,
            resampling=Resampling.nearest
        )
        
        # 3. Visualization Post-Processing
        final_image = None
        
        if layer_type == "Satellite":
            if dst_array.shape[0] >= 3:
                rgb = np.moveaxis(dst_array[:3], 0, -1)
                
                # Handle uint16
                if rgb.dtype == 'uint16':
                     p2, p98 = np.percentile(rgb[rgb > 0], (2, 98))
                     rgb = np.clip((rgb - p2) / (p98 - p2), 0, 1)
                     final_image = (rgb * 255).astype(np.uint8)
                else:
                    final_image = rgb
                
                # Alpha (Create transparency for 0 values)
                alpha = np.any(final_image > 0, axis=2).astype(np.uint8) * 255
                final_image = np.dstack((final_image, alpha))

        elif layer_type == "Land Cover":
            mask_data = dst_array[0]
            rgba = np.zeros((mask_data.shape[0], mask_data.shape[1], 4), dtype=np.uint8)
            for cls_id, color in FLAIR_COLORS.items(): 
                rgba[mask_data == cls_id] = color
            final_image = rgba

        elif layer_type == "NDVI":
            ndvi = dst_array[0].astype('float32')
            norm = mcolors.Normalize(vmin=-0.4, vmax=1, clip=True)(ndvi)
            cmap = plt.get_cmap('RdYlGn')
            final_image_float = cmap(norm)
            final_image = (final_image_float * 255).astype(np.uint8)
            
        # Calculate Bounds for Leaflet
        dst_bounds = rasterio.transform.array_bounds(dst_height, dst_width, dst_transform)
        folium_bounds = [[dst_bounds[1], dst_bounds[0]], [dst_bounds[3], dst_bounds[2]]]

        return final_image, folium_bounds

    except Exception as e:
        # st.error(f"Mosaic Process Error: {e}")
        return None, None

def get_mosaic_data_local(tile_names, layer_type):
    """
    Local IO Wrapper for Mosaic Logic
    """
    sources = []
    try:
        for tile_name in tile_names:
            # Determine Suffix
            suffix = "_mask" if ("Land Cover" in layer_type) else "_ndvi"
            if layer_type == "Satellite": suffix = ""
            
            # Paths
            opt_path = DATA_DIR / "web_optimized" / f"{tile_name}{suffix}.tif"
            raw_path = DATA_DIR / "raw" / f"{tile_name}.jp2"
            processed_mask = PROCESSED_DIR / f"{tile_name}_mask.tif"
            processed_ndvi = PROCESSED_DIR / f"{tile_name}_ndvi.tif"
            
            path_to_open = None
            if layer_type == "Satellite":
                if opt_path.exists(): path_to_open = opt_path
                elif raw_path.exists(): path_to_open = raw_path
            elif "Land Cover" in layer_type:
                if opt_path.exists(): path_to_open = opt_path
                elif processed_mask.exists(): path_to_open = processed_mask
            elif layer_type == "NDVI":
                if opt_path.exists(): path_to_open = opt_path
                elif processed_ndvi.exists(): path_to_open = processed_ndvi
            
            if path_to_open:
                sources.append(rasterio.open(path_to_open))
        
        result = process_mosaic(sources, layer_type)
        
        # Cleanup
        for s in sources: s.close()
        
        return result

    except Exception as e:
        for s in sources: s.close()
        return None, None