kshitijthakkar commited on
Commit
fedc47d
·
1 Parent(s): 63f41a1

fix: Resolve test case click errors and CO2 emissions calculations

Browse files

- Fixed token type error in thought_graph: convert prompt/completion tokens to int before addition
- Fixed on_test_case_select return values: return 8 gr.update() values instead of empty dict on errors
- Fixed CO2 emissions calculation: use delta values (diff) for time series charts instead of cumulative sum
- Fixed CO2 summary card: calculate total as final - initial value
- Updated chart titles to reflect "Incremental" values for CO2 and Power Cost

Resolves TypeError when clicking test cases and incorrect CO2 visualization.

Files changed (3) hide show
  1. app.py +18 -5
  2. components/thought_graph.py +3 -2
  3. screens/trace_detail.py +44 -13
app.py CHANGED
@@ -175,18 +175,31 @@ def on_test_case_select(evt: gr.SelectData, df):
175
 
176
  print(f"[DEBUG] on_test_case_select called with index: {evt.index}")
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  # Check if we have a selected run
179
  if current_selected_run is None:
180
  print("[ERROR] No run selected - current_selected_run is None")
181
  gr.Warning("Please select a run from the leaderboard first")
182
- return {}
183
 
184
  try:
185
  # Get selected test case
186
  selected_idx = evt.index[0]
187
  if df is None or df.empty or selected_idx >= len(df):
188
  gr.Warning("Invalid test case selection")
189
- return {}
190
 
191
  test_case = df.iloc[selected_idx].to_dict()
192
  trace_id = test_case.get('trace_id')
@@ -197,7 +210,7 @@ def on_test_case_select(evt: gr.SelectData, df):
197
  traces_dataset = current_selected_run.get('traces_dataset')
198
  if not traces_dataset:
199
  gr.Warning("No traces dataset found in current run")
200
- return {}
201
 
202
  # Update global trace info for MCP debug_trace tool
203
  _current_trace_info["trace_id"] = trace_id
@@ -208,7 +221,7 @@ def on_test_case_select(evt: gr.SelectData, df):
208
 
209
  if not trace_data:
210
  gr.Warning(f"Trace not found: {trace_id}")
211
- return {}
212
 
213
  current_selected_trace = trace_data
214
 
@@ -278,7 +291,7 @@ def on_test_case_select(evt: gr.SelectData, df):
278
  import traceback
279
  traceback.print_exc()
280
  gr.Warning(f"Error loading trace: {e}")
281
- return {}
282
 
283
 
284
 
 
175
 
176
  print(f"[DEBUG] on_test_case_select called with index: {evt.index}")
177
 
178
+ # Helper function to return empty updates for all 8 outputs
179
+ def return_error():
180
+ return (
181
+ gr.update(), # run_detail_screen
182
+ gr.update(), # trace_detail_screen
183
+ gr.update(), # trace_title
184
+ gr.update(), # trace_metadata_html
185
+ gr.update(), # trace_thought_graph
186
+ gr.update(), # span_visualization
187
+ gr.update(), # span_details_table
188
+ gr.update() # span_details_json
189
+ )
190
+
191
  # Check if we have a selected run
192
  if current_selected_run is None:
193
  print("[ERROR] No run selected - current_selected_run is None")
194
  gr.Warning("Please select a run from the leaderboard first")
195
+ return return_error()
196
 
197
  try:
198
  # Get selected test case
199
  selected_idx = evt.index[0]
200
  if df is None or df.empty or selected_idx >= len(df):
201
  gr.Warning("Invalid test case selection")
202
+ return return_error()
203
 
204
  test_case = df.iloc[selected_idx].to_dict()
205
  trace_id = test_case.get('trace_id')
 
210
  traces_dataset = current_selected_run.get('traces_dataset')
211
  if not traces_dataset:
212
  gr.Warning("No traces dataset found in current run")
213
+ return return_error()
214
 
215
  # Update global trace info for MCP debug_trace tool
216
  _current_trace_info["trace_id"] = trace_id
 
221
 
222
  if not trace_data:
223
  gr.Warning(f"Trace not found: {trace_id}")
224
+ return return_error()
225
 
226
  current_selected_trace = trace_data
227
 
 
291
  import traceback
292
  traceback.print_exc()
293
  gr.Warning(f"Error loading trace: {e}")
294
+ return return_error()
295
 
296
 
297
 
components/thought_graph.py CHANGED
@@ -196,8 +196,9 @@ def create_thought_graph(spans: List[Dict[str, Any]], trace_id: str = "Unknown")
196
  if 'tool_name' in node_data:
197
  hover += f"Tool: {node_data['tool_name']}<br>"
198
  if 'prompt_tokens' in node_data or 'completion_tokens' in node_data:
199
- prompt = node_data.get('prompt_tokens', 0) or 0 # Handle None values
200
- completion = node_data.get('completion_tokens', 0) or 0 # Handle None values
 
201
  hover += f"Tokens: {prompt + completion} (p:{prompt}, c:{completion})<br>"
202
  if 'cost' in node_data and node_data['cost'] is not None:
203
  hover += f"Cost: ${node_data['cost']:.6f}<br>"
 
196
  if 'tool_name' in node_data:
197
  hover += f"Tool: {node_data['tool_name']}<br>"
198
  if 'prompt_tokens' in node_data or 'completion_tokens' in node_data:
199
+ # Ensure values are integers, not strings
200
+ prompt = int(node_data.get('prompt_tokens', 0) or 0) # Handle None values and convert to int
201
+ completion = int(node_data.get('completion_tokens', 0) or 0) # Handle None values and convert to int
202
  hover += f"Tokens: {prompt + completion} (p:{prompt}, c:{completion})<br>"
203
  if 'cost' in node_data and node_data['cost'] is not None:
204
  hover += f"Cost: ${node_data['cost']:.6f}<br>"
screens/trace_detail.py CHANGED
@@ -550,20 +550,44 @@ def extract_metrics_data(metrics_df):
550
  gpu_temperature_celsius, gpu_power_watts, co2_emissions_gco2e
551
 
552
  Returns:
553
- DataFrame ready for visualization
554
  """
555
  if metrics_df is None or metrics_df.empty:
556
  return pd.DataFrame()
557
 
 
 
 
558
  # Ensure timestamp is datetime
559
- if 'timestamp' in metrics_df.columns:
560
- if not pd.api.types.is_datetime64_any_dtype(metrics_df['timestamp']):
561
- metrics_df['timestamp'] = pd.to_datetime(metrics_df['timestamp'])
562
 
563
  # Sort by timestamp
564
- metrics_df = metrics_df.sort_values('timestamp')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
- return metrics_df
 
 
 
 
 
 
 
567
 
568
 
569
  def create_gpu_summary_cards(df):
@@ -591,13 +615,19 @@ def create_gpu_summary_cards(df):
591
  utilization = df['gpu_utilization_percent'].mean() if 'gpu_utilization_percent' in df.columns else 0
592
  memory_used = df['gpu_memory_used_mib'].max() if 'gpu_memory_used_mib' in df.columns else 0
593
  temperature = df['gpu_temperature_celsius'].max() if 'gpu_temperature_celsius' in df.columns else 0
594
- co2_emissions = df['co2_emissions_gco2e'].sum() if 'co2_emissions_gco2e' in df.columns else 0
 
 
 
 
 
 
595
  power = df['gpu_power_watts'].mean() if 'gpu_power_watts' in df.columns else 0
596
 
597
  # Get GPU name from first row (it's constant across all rows)
598
  gpu_name = df['gpu_name'].iloc[0] if 'gpu_name' in df.columns and not df.empty else 'Unknown GPU'
599
 
600
- print(f"[DEBUG create_gpu_summary_cards] Aggregated values - util: {utilization:.2f}, mem: {memory_used:.2f}, temp: {temperature:.2f}, gpu_name: {gpu_name}")
601
 
602
  # Get memory total from max value if available
603
  memory_total = df['gpu_memory_total_mib'].max() if 'gpu_memory_total_mib' in df.columns else 0
@@ -662,7 +692,7 @@ def create_gpu_metrics_dashboard(metrics_df):
662
  return None
663
 
664
  # Create subplots for GPU metrics
665
- # We'll show: Utilization, Memory, Temperature, Power, CO2
666
  fig = make_subplots(
667
  rows=3, cols=2,
668
  subplot_titles=[
@@ -670,8 +700,8 @@ def create_gpu_metrics_dashboard(metrics_df):
670
  'GPU Memory (MiB)',
671
  'GPU Temperature (°C)',
672
  'GPU Power (W)',
673
- 'CO2 Emissions (g)',
674
- 'Power Cost (USD)'
675
  ],
676
  vertical_spacing=0.10,
677
  horizontal_spacing=0.12,
@@ -681,13 +711,14 @@ def create_gpu_metrics_dashboard(metrics_df):
681
  colors = ['#667eea', '#f093fb', '#4facfe', '#FFE66D', '#43e97b', '#FF6B6B']
682
 
683
  # Define metrics to plot
 
684
  metrics_config = [
685
  ('gpu_utilization_percent', 'GPU Utilization (%)', 1, 1, colors[0]),
686
  ('gpu_memory_used_mib', 'GPU Memory (MiB)', 1, 2, colors[1]),
687
  ('gpu_temperature_celsius', 'GPU Temperature (°C)', 2, 1, colors[2]),
688
  ('gpu_power_watts', 'GPU Power (W)', 2, 2, colors[3]),
689
- ('co2_emissions_gco2e', 'CO2 Emissions (g)', 3, 1, colors[4]),
690
- ('power_cost_usd', 'Power Cost (USD)', 3, 2, colors[5]),
691
  ]
692
 
693
  for col_name, title, row, col, color in metrics_config:
 
550
  gpu_temperature_celsius, gpu_power_watts, co2_emissions_gco2e
551
 
552
  Returns:
553
+ DataFrame ready for visualization with delta values for cumulative counters
554
  """
555
  if metrics_df is None or metrics_df.empty:
556
  return pd.DataFrame()
557
 
558
+ # Make a copy to avoid modifying original
559
+ df = metrics_df.copy()
560
+
561
  # Ensure timestamp is datetime
562
+ if 'timestamp' in df.columns:
563
+ if not pd.api.types.is_datetime64_any_dtype(df['timestamp']):
564
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
565
 
566
  # Sort by timestamp
567
+ df = df.sort_values('timestamp').reset_index(drop=True)
568
+
569
+ # Calculate deltas for cumulative counters (CO2 and Power Cost)
570
+ # These are cumulative metrics, so we need to show the incremental change
571
+ cumulative_metrics = ['co2_emissions_gco2e', 'power_cost_usd']
572
+
573
+ for metric in cumulative_metrics:
574
+ if metric in df.columns:
575
+ # Calculate delta (difference from previous value)
576
+ # First value gets 0 (since we don't know the previous state)
577
+ df[f'{metric}_delta'] = df[metric].diff().fillna(0)
578
+
579
+ # Handle negative deltas (can happen if counter resets)
580
+ # If delta is negative, use the absolute value of current value
581
+ df.loc[df[f'{metric}_delta'] < 0, f'{metric}_delta'] = df.loc[df[f'{metric}_delta'] < 0, metric]
582
 
583
+ # Replace the original cumulative columns with delta values for visualization
584
+ if 'co2_emissions_gco2e' in df.columns and 'co2_emissions_gco2e_delta' in df.columns:
585
+ df['co2_emissions_gco2e'] = df['co2_emissions_gco2e_delta']
586
+
587
+ if 'power_cost_usd' in df.columns and 'power_cost_usd_delta' in df.columns:
588
+ df['power_cost_usd'] = df['power_cost_usd_delta']
589
+
590
+ return df
591
 
592
 
593
  def create_gpu_summary_cards(df):
 
615
  utilization = df['gpu_utilization_percent'].mean() if 'gpu_utilization_percent' in df.columns else 0
616
  memory_used = df['gpu_memory_used_mib'].max() if 'gpu_memory_used_mib' in df.columns else 0
617
  temperature = df['gpu_temperature_celsius'].max() if 'gpu_temperature_celsius' in df.columns else 0
618
+
619
+ # CO2 emissions is a cumulative counter - calculate delta (final - initial)
620
+ if 'co2_emissions_gco2e' in df.columns and not df.empty:
621
+ co2_emissions = df['co2_emissions_gco2e'].iloc[-1] - df['co2_emissions_gco2e'].iloc[0]
622
+ else:
623
+ co2_emissions = 0
624
+
625
  power = df['gpu_power_watts'].mean() if 'gpu_power_watts' in df.columns else 0
626
 
627
  # Get GPU name from first row (it's constant across all rows)
628
  gpu_name = df['gpu_name'].iloc[0] if 'gpu_name' in df.columns and not df.empty else 'Unknown GPU'
629
 
630
+ print(f"[DEBUG create_gpu_summary_cards] Aggregated values - util: {utilization:.2f}, mem: {memory_used:.2f}, temp: {temperature:.2f}, co2: {co2_emissions:.4f}, gpu_name: {gpu_name}")
631
 
632
  # Get memory total from max value if available
633
  memory_total = df['gpu_memory_total_mib'].max() if 'gpu_memory_total_mib' in df.columns else 0
 
692
  return None
693
 
694
  # Create subplots for GPU metrics
695
+ # We'll show: Utilization, Memory, Temperature, Power, CO2 (delta), Power Cost (delta)
696
  fig = make_subplots(
697
  rows=3, cols=2,
698
  subplot_titles=[
 
700
  'GPU Memory (MiB)',
701
  'GPU Temperature (°C)',
702
  'GPU Power (W)',
703
+ 'CO2 Emissions - Incremental (g)',
704
+ 'Power Cost - Incremental (USD)'
705
  ],
706
  vertical_spacing=0.10,
707
  horizontal_spacing=0.12,
 
711
  colors = ['#667eea', '#f093fb', '#4facfe', '#FFE66D', '#43e97b', '#FF6B6B']
712
 
713
  # Define metrics to plot
714
+ # Note: CO2 and Power Cost are shown as delta/incremental values (calculated in extract_metrics_data)
715
  metrics_config = [
716
  ('gpu_utilization_percent', 'GPU Utilization (%)', 1, 1, colors[0]),
717
  ('gpu_memory_used_mib', 'GPU Memory (MiB)', 1, 2, colors[1]),
718
  ('gpu_temperature_celsius', 'GPU Temperature (°C)', 2, 1, colors[2]),
719
  ('gpu_power_watts', 'GPU Power (W)', 2, 2, colors[3]),
720
+ ('co2_emissions_gco2e', 'CO2 Emissions - Incremental (g)', 3, 1, colors[4]),
721
+ ('power_cost_usd', 'Power Cost - Incremental (USD)', 3, 2, colors[5]),
722
  ]
723
 
724
  for col_name, title, row, col, color in metrics_config: