import gradio as gr import pandas as pd import numpy as np from typing import List, Dict, Any, Tuple, Optional def create_cluster_browser_app(): """ Create a simple Gradio app for browsing prompts by cluster from uploaded CSV file. """ def load_and_validate_csv(file) -> Tuple[Optional[pd.DataFrame], str, List[str], str]: """ Load and validate the uploaded CSV file. Args: file: Uploaded file object from Gradio Returns: Tuple of (dataframe, status_message, cluster_options, cluster_stats) """ if file is None: return None, "Please upload a CSV file with 'prompt' and 'cluster' columns.", ["(No data loaded)"], "" try: df = pd.read_csv(file.name) # Validate required columns required_cols = ['prompt', 'cluster'] missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: return None, f"Missing required columns: {missing_cols}. Please ensure your CSV has 'prompt' and 'cluster' columns.", ["(No data loaded)"], "" # Validate data types if not pd.api.types.is_numeric_dtype(df['cluster']): return None, "The 'cluster' column must contain numeric values.", ["(No data loaded)"], "" # Get cluster options unique_clusters = sorted(df['cluster'].unique()) cluster_options = ["(All Clusters)"] + [f"Cluster {c}" for c in unique_clusters] # Get cluster statistics stats = [] for cluster_num in unique_clusters: count = len(df[df['cluster'] == cluster_num]) stats.append(f"Cluster {cluster_num}: {count} prompts") total_prompts = len(df) stats_text = f"**Total Prompts:** {total_prompts}\n\n**Cluster Distribution:**\n" + "\n".join(stats) return df, f"✅ Successfully loaded {len(df)} prompts with {len(unique_clusters)} clusters.", cluster_options, stats_text except Exception as e: return None, f"Error loading CSV file: {str(e)}", ["(No data loaded)"], "" def filter_by_cluster(df: pd.DataFrame, cluster_sel: str) -> pd.DataFrame: """Filter dataframe by selected cluster.""" if df is None or cluster_sel == "(All Clusters)" or cluster_sel == "(No data loaded)": return df if df is not None else pd.DataFrame() cluster_num = int(cluster_sel.split()[-1]) # Extract number from "Cluster X" return df[df['cluster'] == cluster_num].reset_index(drop=True) def format_prompt_cell(prompt_text: str) -> str: """Format a single prompt in its own cell.""" return f"""
{prompt_text}
""" def format_prompts(df: pd.DataFrame) -> str: """Format all prompts in the dataframe as individual cells.""" if df is None or len(df) == 0: return "No prompts to display." formatted_prompts = [] for idx, row in df.iterrows(): prompt_text = str(row['prompt']).strip() formatted_prompts.append(format_prompt_cell(prompt_text)) return "\n".join(formatted_prompts) def on_file_upload(file): """Handle file upload and validation.""" df, status_msg, cluster_options, cluster_stats = load_and_validate_csv(file) if df is not None: # Show all prompts initially prompts_html = format_prompts(df) return df, status_msg, gr.Dropdown(choices=cluster_options, value="(All Clusters)", interactive=True), prompts_html, cluster_stats else: return None, status_msg, gr.Dropdown(choices=cluster_options, value="(No data loaded)", interactive=False), "No data loaded.", "" def on_cluster_change(df, cluster_sel): """Handle cluster selection change.""" if df is None: return "No data loaded." filtered_df = filter_by_cluster(df, cluster_sel) return format_prompts(filtered_df) # Create the Gradio interface with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: gr.Markdown("# Prompt Cluster Browser") # Store the loaded dataframe df_state = gr.State(None) with gr.Row(): # Sidebar with gr.Column(scale=1): # File upload section file_upload = gr.File( label="Upload Clustered Prompts CSV", file_types=[".csv"], file_count="single" ) # Status status_md = gr.Markdown("Please upload a CSV file to get started.") # Cluster statistics stats_md = gr.Markdown("") # Cluster selection cluster_dropdown = gr.Dropdown( ["(No data loaded)"], label="Select Cluster", value="(No data loaded)", interactive=False ) # Main content area with gr.Column(scale=3): prompts_html = gr.HTML("Upload a CSV file to browse clusters") # Connect event handlers file_upload.change( on_file_upload, [file_upload], [df_state, status_md, cluster_dropdown, prompts_html, stats_md] ) cluster_dropdown.change( on_cluster_change, [df_state, cluster_dropdown], [prompts_html] ) return demo def launch_cluster_browser(): """ Launch the cluster browser app. """ app = create_cluster_browser_app() app.launch() if __name__ == "__main__": launch_cluster_browser()