|
|
import datetime |
|
|
import os |
|
|
import re |
|
|
import yaml |
|
|
from datasets import Dataset, load_dataset |
|
|
from huggingface_hub import create_repo, login |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
DATASET_NAME = "jablonkagroup/eval-cards-dataset" |
|
|
|
|
|
|
|
|
def setup_hf_auth(): |
|
|
"""Setup Hugging Face authentication""" |
|
|
try: |
|
|
hf_token = os.environ.get("HF_TOKEN") |
|
|
if hf_token: |
|
|
login(token=hf_token) |
|
|
return True |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"HF Auth error: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def ensure_dataset_exists(): |
|
|
"""Ensure the dataset repository exists""" |
|
|
try: |
|
|
load_dataset(DATASET_NAME, split="train") |
|
|
return True |
|
|
except Exception: |
|
|
try: |
|
|
if not setup_hf_auth(): |
|
|
return False |
|
|
create_repo( |
|
|
repo_id=DATASET_NAME, repo_type="dataset", private=False, exist_ok=True |
|
|
) |
|
|
empty_data = { |
|
|
"filename": [], |
|
|
"title": [], |
|
|
"summary": [], |
|
|
"authors": [], |
|
|
"creation_date": [], |
|
|
"coverage_score": [], |
|
|
"yaml_content": [], |
|
|
"paper_link": [], |
|
|
"repository_link": [], |
|
|
"timestamp": [], |
|
|
} |
|
|
empty_dataset = Dataset.from_dict(empty_data) |
|
|
empty_dataset.push_to_hub(DATASET_NAME) |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Dataset creation error: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def get_template(): |
|
|
"""Get a basic YAML template""" |
|
|
return """title: "Your Evaluation Title" |
|
|
summary: "Brief description of your evaluation" |
|
|
metadata: |
|
|
authors: ["Author Name"] |
|
|
creation_date: "2025-01-01" |
|
|
paper_link: "" |
|
|
repository_link: "" |
|
|
evaluation_design: |
|
|
purpose: "What is the purpose of this evaluation?" |
|
|
scope: "What does this evaluation cover?" |
|
|
estimand: |
|
|
definition: "What are you trying to estimate?" |
|
|
estimator: |
|
|
method: "How are you estimating it?" |
|
|
estimate: |
|
|
results: "What are the results?" |
|
|
results_communication: |
|
|
format: "How are results communicated?" |
|
|
known_issues_and_limitations: |
|
|
issues: ["List any known issues"] |
|
|
version_and_maintenance: |
|
|
version: "1.0" |
|
|
citation_and_usage: |
|
|
citation: "How to cite this work" |
|
|
""" |
|
|
|
|
|
|
|
|
def compute_coverage_score(eval_data): |
|
|
"""Compute a coverage score for the eval card""" |
|
|
sections = { |
|
|
"metadata": 5, |
|
|
"evaluation_design": 10, |
|
|
"estimand": 20, |
|
|
"estimator": 20, |
|
|
"estimate": 20, |
|
|
"results_communication": 10, |
|
|
"known_issues_and_limitations": 10, |
|
|
"version_and_maintenance": 5, |
|
|
"citation_and_usage": 5, |
|
|
} |
|
|
|
|
|
def count_filled_fields(data): |
|
|
if isinstance(data, dict): |
|
|
filled = total = 0 |
|
|
for value in data.values(): |
|
|
if isinstance(value, (dict, list)): |
|
|
sub_filled, sub_total = count_filled_fields(value) |
|
|
filled += sub_filled |
|
|
total += sub_total |
|
|
else: |
|
|
total += 1 |
|
|
if value and str(value).strip() not in ["", "[]", "{}"]: |
|
|
filled += 1 |
|
|
return filled, total |
|
|
elif isinstance(data, list): |
|
|
if not data: |
|
|
return 0, 1 |
|
|
filled = total = 0 |
|
|
for item in data: |
|
|
sub_filled, sub_total = count_filled_fields(item) |
|
|
filled += sub_filled |
|
|
total += sub_total |
|
|
return filled, total |
|
|
else: |
|
|
return 1 if data else 0, 1 |
|
|
|
|
|
scores = {} |
|
|
total_score = 0 |
|
|
for section, weight in sections.items(): |
|
|
if section in eval_data: |
|
|
filled, total = count_filled_fields(eval_data[section]) |
|
|
completion_rate = filled / total if total > 0 else 0 |
|
|
scores[section] = { |
|
|
"score": round(completion_rate * weight, 2), |
|
|
"max_score": weight, |
|
|
"completion_rate": round(completion_rate * 100, 2), |
|
|
} |
|
|
total_score += scores[section]["score"] |
|
|
else: |
|
|
scores[section] = { |
|
|
"score": 0, |
|
|
"max_score": weight, |
|
|
"completion_rate": 0, |
|
|
} |
|
|
|
|
|
return min(round(total_score, 2), 100), scores |
|
|
|
|
|
|
|
|
def get_sample_cards(): |
|
|
"""Get sample cards for demo purposes""" |
|
|
return [ |
|
|
{ |
|
|
"title": "ChemBench: A Large-Scale Chemical Evaluation", |
|
|
"summary": "A comprehensive benchmark for evaluating chemical property prediction models across multiple datasets and molecular representations.", |
|
|
"authors": "John Doe, Jane Smith", |
|
|
"creation_date": "2024-12-15", |
|
|
"coverage_score": 85.5, |
|
|
"paper_link": "https://arxiv.org/abs/2401.example", |
|
|
"repository_link": "https://github.com/example/chembench", |
|
|
"score_details": { |
|
|
"metadata": {"score": 4.5, "max_score": 5, "completion_rate": 90}, |
|
|
"evaluation_design": { |
|
|
"score": 9.0, |
|
|
"max_score": 10, |
|
|
"completion_rate": 90, |
|
|
}, |
|
|
"estimand": {"score": 18.0, "max_score": 20, "completion_rate": 90}, |
|
|
"estimator": {"score": 17.0, "max_score": 20, "completion_rate": 85}, |
|
|
"estimate": {"score": 16.0, "max_score": 20, "completion_rate": 80}, |
|
|
"results_communication": { |
|
|
"score": 8.0, |
|
|
"max_score": 10, |
|
|
"completion_rate": 80, |
|
|
}, |
|
|
"known_issues_and_limitations": { |
|
|
"score": 7.0, |
|
|
"max_score": 10, |
|
|
"completion_rate": 70, |
|
|
}, |
|
|
"version_and_maintenance": { |
|
|
"score": 3.0, |
|
|
"max_score": 5, |
|
|
"completion_rate": 60, |
|
|
}, |
|
|
"citation_and_usage": { |
|
|
"score": 3.0, |
|
|
"max_score": 5, |
|
|
"completion_rate": 60, |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
"title": "MaterialsML: Property Prediction Framework", |
|
|
"summary": "An evaluation framework for materials property prediction using machine learning approaches on crystal structure data.", |
|
|
"authors": "Alice Johnson, Bob Wilson", |
|
|
"creation_date": "2024-11-20", |
|
|
"coverage_score": 92.0, |
|
|
"paper_link": "", |
|
|
"repository_link": "https://github.com/example/materialsml", |
|
|
"score_details": { |
|
|
"metadata": {"score": 5.0, "max_score": 5, "completion_rate": 100}, |
|
|
"evaluation_design": { |
|
|
"score": 10.0, |
|
|
"max_score": 10, |
|
|
"completion_rate": 100, |
|
|
}, |
|
|
"estimand": {"score": 19.0, "max_score": 20, "completion_rate": 95}, |
|
|
"estimator": {"score": 18.0, "max_score": 20, "completion_rate": 90}, |
|
|
"estimate": {"score": 19.0, "max_score": 20, "completion_rate": 95}, |
|
|
"results_communication": { |
|
|
"score": 9.0, |
|
|
"max_score": 10, |
|
|
"completion_rate": 90, |
|
|
}, |
|
|
"known_issues_and_limitations": { |
|
|
"score": 8.0, |
|
|
"max_score": 10, |
|
|
"completion_rate": 80, |
|
|
}, |
|
|
"version_and_maintenance": { |
|
|
"score": 2.0, |
|
|
"max_score": 5, |
|
|
"completion_rate": 40, |
|
|
}, |
|
|
"citation_and_usage": { |
|
|
"score": 2.0, |
|
|
"max_score": 5, |
|
|
"completion_rate": 40, |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
] |
|
|
|
|
|
|
|
|
def save_eval_card(yaml_content, paper_url="", repo_url=""): |
|
|
"""Save an eval card to the dataset""" |
|
|
try: |
|
|
eval_data = yaml.safe_load(yaml_content) |
|
|
|
|
|
|
|
|
if paper_url: |
|
|
eval_data.setdefault("metadata", {})["paper_link"] = paper_url |
|
|
if repo_url: |
|
|
eval_data.setdefault("metadata", {})["repository_link"] = repo_url |
|
|
|
|
|
yaml_content = yaml.dump(eval_data) |
|
|
filename = re.sub(r"[^\w\-_]", "_", eval_data.get("title", "Unnamed")) |
|
|
filename = ( |
|
|
f"{filename}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml" |
|
|
) |
|
|
|
|
|
score, score_details = compute_coverage_score(eval_data) |
|
|
|
|
|
|
|
|
saved_to_hf = False |
|
|
if setup_hf_auth(): |
|
|
try: |
|
|
dataset = load_dataset(DATASET_NAME, split="train") |
|
|
existing_data = dataset.to_dict() |
|
|
saved_to_hf = True |
|
|
except Exception: |
|
|
try: |
|
|
existing_data = { |
|
|
"filename": [], |
|
|
"title": [], |
|
|
"summary": [], |
|
|
"authors": [], |
|
|
"creation_date": [], |
|
|
"coverage_score": [], |
|
|
"yaml_content": [], |
|
|
"paper_link": [], |
|
|
"repository_link": [], |
|
|
"timestamp": [], |
|
|
} |
|
|
ensure_dataset_exists() |
|
|
saved_to_hf = True |
|
|
except Exception: |
|
|
saved_to_hf = False |
|
|
|
|
|
if saved_to_hf: |
|
|
try: |
|
|
existing_data["filename"].append(filename) |
|
|
existing_data["title"].append(eval_data.get("title", "Unnamed")) |
|
|
existing_data["summary"].append(eval_data.get("summary", "")) |
|
|
existing_data["authors"].append( |
|
|
", ".join(eval_data.get("metadata", {}).get("authors", [])) |
|
|
) |
|
|
existing_data["creation_date"].append( |
|
|
eval_data.get("metadata", {}).get("creation_date", "") |
|
|
) |
|
|
existing_data["coverage_score"].append(float(score)) |
|
|
existing_data["yaml_content"].append(yaml_content) |
|
|
existing_data["paper_link"].append(paper_url or "") |
|
|
existing_data["repository_link"].append(repo_url or "") |
|
|
existing_data["timestamp"].append( |
|
|
datetime.datetime.now().isoformat() |
|
|
) |
|
|
|
|
|
updated_dataset = Dataset.from_dict(existing_data) |
|
|
updated_dataset.push_to_hub(DATASET_NAME) |
|
|
except Exception as e: |
|
|
saved_to_hf = False |
|
|
print(f"Failed to save to HF: {e}") |
|
|
|
|
|
details_str = f"Coverage Score: {score}%\n\nSection Details:\n" |
|
|
for section, details in score_details.items(): |
|
|
details_str += f"β’ {section}: {details['score']}/{details['max_score']} ({details['completion_rate']}%)\n" |
|
|
|
|
|
if saved_to_hf: |
|
|
return f"β
Successfully saved to HF dataset! Filename: {filename}\n\n{details_str}" |
|
|
else: |
|
|
return f"β οΈ Validated successfully but couldn't save to HF dataset (check HF_TOKEN)\nFilename: {filename}\n\n{details_str}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
|
|
|
def load_gallery_cards(): |
|
|
"""Load cards view for gallery with fallback to sample data""" |
|
|
try: |
|
|
|
|
|
if setup_hf_auth(): |
|
|
try: |
|
|
dataset = load_dataset(DATASET_NAME, split="train") |
|
|
if len(dataset) > 0: |
|
|
return create_gallery_html_from_dataset(dataset) |
|
|
except Exception as e: |
|
|
print(f"Failed to load from HF dataset: {e}") |
|
|
|
|
|
|
|
|
sample_cards = get_sample_cards() |
|
|
return create_gallery_html_from_samples(sample_cards) |
|
|
|
|
|
except Exception as e: |
|
|
return f""" |
|
|
<div class="gallery-container"> |
|
|
<div class="error-message"> |
|
|
<h3>β Error loading gallery</h3> |
|
|
<p>{str(e)}</p> |
|
|
<p>Please check your configuration and try again.</p> |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
def create_gallery_html_from_dataset(dataset): |
|
|
"""Create gallery HTML from HF dataset""" |
|
|
cards_html = '<div class="gallery-container">' |
|
|
|
|
|
|
|
|
sorted_indices = sorted( |
|
|
range(len(dataset)), key=lambda i: dataset[i]["coverage_score"], reverse=True |
|
|
) |
|
|
|
|
|
for i in sorted_indices: |
|
|
row = dataset[i] |
|
|
eval_data = yaml.safe_load(row["yaml_content"]) |
|
|
_, score_details = compute_coverage_score(eval_data) |
|
|
|
|
|
card_data = { |
|
|
"title": row["title"], |
|
|
"summary": row["summary"][:300] + "..." |
|
|
if len(row["summary"]) > 300 |
|
|
else row["summary"], |
|
|
"authors": row["authors"], |
|
|
"creation_date": row["creation_date"], |
|
|
"coverage_score": row["coverage_score"], |
|
|
"paper_link": row.get("paper_link", ""), |
|
|
"repository_link": row.get("repository_link", ""), |
|
|
"score_details": score_details, |
|
|
} |
|
|
|
|
|
cards_html += create_card_html(card_data) |
|
|
|
|
|
cards_html += "</div>" |
|
|
return cards_html |
|
|
|
|
|
|
|
|
def create_gallery_html_from_samples(sample_cards): |
|
|
"""Create gallery HTML from sample data""" |
|
|
cards_html = """ |
|
|
<div class="gallery-container"> |
|
|
<div class="demo-notice"> |
|
|
<h3>π― Demo Gallery</h3> |
|
|
<p>This is showing sample data. Set your HF_TOKEN in Space settings to save and load real evaluation cards!</p> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
for card_data in sample_cards: |
|
|
cards_html += create_card_html(card_data) |
|
|
|
|
|
cards_html += "</div>" |
|
|
return cards_html |
|
|
|
|
|
|
|
|
def create_card_html(card_data): |
|
|
"""Create HTML for a single card""" |
|
|
|
|
|
score = card_data["coverage_score"] |
|
|
if score >= 80: |
|
|
score_color = "#2e7d32" |
|
|
score_bg = "#e8f5e8" |
|
|
elif score >= 60: |
|
|
score_color = "#f57c00" |
|
|
score_bg = "#fff3e0" |
|
|
else: |
|
|
score_color = "#d32f2f" |
|
|
score_bg = "#ffebee" |
|
|
|
|
|
html = f""" |
|
|
<div class="eval-card"> |
|
|
<div class="card-header"> |
|
|
<h3 class="card-title">π― {card_data["title"]}</h3> |
|
|
<div class="coverage-badge" style="color: {score_color}; background: {score_bg};"> |
|
|
{card_data["coverage_score"]}% |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="card-content"> |
|
|
<p class="card-summary"><strong>π Summary:</strong> {card_data["summary"]}</p> |
|
|
<div class="card-meta"> |
|
|
<p><strong>π₯ Authors:</strong> {card_data["authors"]}</p> |
|
|
<p><strong>π
Created:</strong> {card_data["creation_date"]}</p> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="coverage-section"> |
|
|
<h4>π Coverage by Section</h4> |
|
|
<div class="coverage-grid"> |
|
|
""" |
|
|
|
|
|
for section, details in card_data["score_details"].items(): |
|
|
section_display = section.replace("_", " ").title() |
|
|
completion = details["completion_rate"] |
|
|
|
|
|
|
|
|
if completion >= 80: |
|
|
bar_color = "#4caf50" |
|
|
elif completion >= 60: |
|
|
bar_color = "#ff9800" |
|
|
else: |
|
|
bar_color = "#f44336" |
|
|
|
|
|
html += f""" |
|
|
<div class="coverage-item"> |
|
|
<div class="coverage-label">{section_display}</div> |
|
|
<div class="coverage-bar"> |
|
|
<div class="coverage-fill" style="width: {completion}%; background-color: {bar_color};"></div> |
|
|
</div> |
|
|
<div class="coverage-text">{details["score"]}/{details["max_score"]} ({completion}%)</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
html += """ |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
if card_data.get("paper_link") or card_data.get("repository_link"): |
|
|
html += '<div class="card-links">' |
|
|
if card_data.get("paper_link"): |
|
|
html += f'<a href="{card_data["paper_link"]}" target="_blank" class="link-button">π Paper</a>' |
|
|
if card_data.get("repository_link"): |
|
|
html += f'<a href="{card_data["repository_link"]}" target="_blank" class="link-button">π» Repository</a>' |
|
|
html += "</div>" |
|
|
|
|
|
html += "</div>" |
|
|
return html |
|
|
|
|
|
|
|
|
def load_gallery_table(): |
|
|
"""Load table view for gallery""" |
|
|
try: |
|
|
if setup_hf_auth(): |
|
|
try: |
|
|
dataset = load_dataset(DATASET_NAME, split="train") |
|
|
if len(dataset) > 0: |
|
|
return create_table_from_dataset(dataset) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
sample_cards = get_sample_cards() |
|
|
return create_table_from_samples(sample_cards) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error loading table: {str(e)}" |
|
|
|
|
|
|
|
|
def create_table_from_dataset(dataset): |
|
|
"""Create table from HF dataset""" |
|
|
table_text = "π Evaluation Cards Summary\n" + "=" * 100 + "\n\n" |
|
|
|
|
|
sorted_indices = sorted( |
|
|
range(len(dataset)), key=lambda i: dataset[i]["coverage_score"], reverse=True |
|
|
) |
|
|
|
|
|
table_text += ( |
|
|
f"{'Rank':<6} {'Title':<35} {'Authors':<30} {'Coverage':<12} {'Created':<12}\n" |
|
|
) |
|
|
table_text += "-" * 100 + "\n" |
|
|
|
|
|
for rank, i in enumerate(sorted_indices[:50], 1): |
|
|
row = dataset[i] |
|
|
title = row["title"][:32] + "..." if len(row["title"]) > 35 else row["title"] |
|
|
authors = ( |
|
|
row["authors"][:27] + "..." if len(row["authors"]) > 30 else row["authors"] |
|
|
) |
|
|
table_text += f"{rank:<6} {title:<35} {authors:<30} {row['coverage_score']:<11}% {row['creation_date']:<12}\n" |
|
|
|
|
|
table_text += "\n" + "=" * 100 |
|
|
table_text += f"\n\nTotal Cards: {len(dataset)}" |
|
|
table_text += f"\nAverage Coverage: {sum(row['coverage_score'] for row in dataset) / len(dataset):.1f}%" |
|
|
|
|
|
return table_text |
|
|
|
|
|
|
|
|
def create_table_from_samples(sample_cards): |
|
|
"""Create table from sample data""" |
|
|
table_text = "π Evaluation Cards Summary (Demo Data)\n" + "=" * 100 + "\n\n" |
|
|
|
|
|
sorted_cards = sorted(sample_cards, key=lambda x: x["coverage_score"], reverse=True) |
|
|
|
|
|
table_text += ( |
|
|
f"{'Rank':<6} {'Title':<35} {'Authors':<30} {'Coverage':<12} {'Created':<12}\n" |
|
|
) |
|
|
table_text += "-" * 100 + "\n" |
|
|
|
|
|
for rank, card in enumerate(sorted_cards, 1): |
|
|
title = card["title"][:32] + "..." if len(card["title"]) > 35 else card["title"] |
|
|
authors = ( |
|
|
card["authors"][:27] + "..." |
|
|
if len(card["authors"]) > 30 |
|
|
else card["authors"] |
|
|
) |
|
|
table_text += f"{rank:<6} {title:<35} {authors:<30} {card['coverage_score']:<11}% {card['creation_date']:<12}\n" |
|
|
|
|
|
table_text += "\n" + "=" * 100 |
|
|
table_text += f"\n\nTotal Cards: {len(sorted_cards)} (demo)" |
|
|
table_text += f"\nAverage Coverage: {sum(card['coverage_score'] for card in sorted_cards) / len(sorted_cards):.1f}%" |
|
|
|
|
|
return table_text |
|
|
|
|
|
|
|
|
def get_llm_feedback(yaml_content): |
|
|
"""Get LLM feedback using Groq""" |
|
|
api_token = os.environ.get("GROQ_API_KEY") |
|
|
if not api_token: |
|
|
return "Please set GROQ_API_KEY in Space settings to get LLM feedback." |
|
|
|
|
|
if not yaml_content.strip(): |
|
|
return "Please provide YAML content first." |
|
|
|
|
|
try: |
|
|
import requests |
|
|
|
|
|
response = requests.post( |
|
|
"https://api.groq.com/openai/v1/chat/completions", |
|
|
headers={ |
|
|
"Content-Type": "application/json", |
|
|
"Authorization": f"Bearer {api_token}", |
|
|
}, |
|
|
json={ |
|
|
"model": "llama-3.3-70b-versatile", |
|
|
"messages": [ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": f"Analyze this evaluation card YAML and provide specific improvement suggestions:\n\n```yaml\n{yaml_content}\n```\n\nFocus on completeness, clarity, and best practices.", |
|
|
} |
|
|
], |
|
|
}, |
|
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
|
return response.json()["choices"][0]["message"]["content"] |
|
|
else: |
|
|
return f"API Error {response.status_code}: {response.text}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error getting feedback: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
def submit_card(yaml_text, paper_url, repo_url): |
|
|
if not yaml_text.strip(): |
|
|
return "Please provide YAML content" |
|
|
|
|
|
try: |
|
|
yaml.safe_load(yaml_text) |
|
|
result = save_eval_card(yaml_text, paper_url, repo_url) |
|
|
return result |
|
|
except yaml.YAMLError as e: |
|
|
return f"Invalid YAML: {str(e)}" |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
def load_template_text(): |
|
|
return get_template() |
|
|
|
|
|
|
|
|
def get_feedback(yaml_text): |
|
|
return get_llm_feedback(yaml_text) |
|
|
|
|
|
|
|
|
def refresh_gallery_cards(): |
|
|
return load_gallery_cards() |
|
|
|
|
|
|
|
|
def refresh_gallery_table(): |
|
|
return load_gallery_table() |
|
|
|
|
|
|
|
|
|
|
|
enhanced_css = """ |
|
|
/* Hide Gradio footer */ |
|
|
footer {visibility: hidden} |
|
|
|
|
|
/* General styling */ |
|
|
body { |
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; |
|
|
} |
|
|
|
|
|
/* Gallery container */ |
|
|
.gallery-container { |
|
|
max-height: 700px; |
|
|
overflow-y: auto; |
|
|
padding: 20px; |
|
|
background-color: var(--background-fill-primary, #f8f9fa); |
|
|
} |
|
|
|
|
|
/* Demo notice */ |
|
|
.demo-notice { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
color: white; |
|
|
padding: 20px; |
|
|
border-radius: 10px; |
|
|
margin-bottom: 25px; |
|
|
text-align: center; |
|
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
|
} |
|
|
|
|
|
.demo-notice h3 { |
|
|
margin: 0 0 10px 0; |
|
|
font-size: 1.3em; |
|
|
} |
|
|
|
|
|
.demo-notice p { |
|
|
margin: 0; |
|
|
opacity: 0.9; |
|
|
} |
|
|
|
|
|
/* Eval card styling - Light mode default */ |
|
|
.eval-card { |
|
|
background: var(--background-fill-secondary, white); |
|
|
color: var(--body-text-color, #374151); |
|
|
border-radius: 12px; |
|
|
padding: 25px; |
|
|
margin-bottom: 25px; |
|
|
box-shadow: 0 4px 20px rgba(0,0,0,0.08); |
|
|
border: 1px solid var(--border-color-primary, #e0e0e0); |
|
|
transition: all 0.3s ease; |
|
|
position: relative; |
|
|
overflow: hidden; |
|
|
} |
|
|
|
|
|
.eval-card:hover { |
|
|
transform: translateY(-2px); |
|
|
box-shadow: 0 8px 30px rgba(0,0,0,0.12); |
|
|
} |
|
|
|
|
|
.eval-card::before { |
|
|
content: ''; |
|
|
position: absolute; |
|
|
top: 0; |
|
|
left: 0; |
|
|
right: 0; |
|
|
height: 4px; |
|
|
background: linear-gradient(90deg, #4CAF50, #2196F3, #FF9800); |
|
|
} |
|
|
|
|
|
/* Card header */ |
|
|
.card-header { |
|
|
display: flex; |
|
|
justify-content: space-between; |
|
|
align-items: flex-start; |
|
|
margin-bottom: 20px; |
|
|
flex-wrap: wrap; |
|
|
gap: 15px; |
|
|
} |
|
|
|
|
|
.card-title { |
|
|
color: var(--color-accent, #1976d2); |
|
|
margin: 0; |
|
|
font-size: 1.4em; |
|
|
font-weight: 600; |
|
|
flex: 1; |
|
|
min-width: 250px; |
|
|
} |
|
|
|
|
|
.coverage-badge { |
|
|
font-weight: bold; |
|
|
padding: 8px 16px; |
|
|
border-radius: 20px; |
|
|
font-size: 1.1em; |
|
|
min-width: 80px; |
|
|
text-align: center; |
|
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1); |
|
|
} |
|
|
|
|
|
/* Card content */ |
|
|
.card-content { |
|
|
margin-bottom: 25px; |
|
|
} |
|
|
|
|
|
.card-summary { |
|
|
margin: 0 0 15px 0; |
|
|
line-height: 1.6; |
|
|
font-size: 1.02em; |
|
|
color: var(--body-text-color, #374151); |
|
|
} |
|
|
|
|
|
.card-meta { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|
|
gap: 10px; |
|
|
margin: 15px 0; |
|
|
} |
|
|
|
|
|
.card-meta p { |
|
|
margin: 0; |
|
|
padding: 8px 12px; |
|
|
background: var(--input-background-fill, #f5f5f5); |
|
|
color: var(--body-text-color, #374151); |
|
|
border-radius: 6px; |
|
|
font-size: 0.95em; |
|
|
} |
|
|
|
|
|
/* Coverage section */ |
|
|
.coverage-section { |
|
|
border-top: 1px solid var(--border-color-primary, #e0e0e0); |
|
|
padding-top: 20px; |
|
|
} |
|
|
|
|
|
.coverage-section h4 { |
|
|
color: var(--color-accent, #1976d2); |
|
|
margin: 0 0 15px 0; |
|
|
font-size: 1.1em; |
|
|
font-weight: 600; |
|
|
} |
|
|
|
|
|
.coverage-grid { |
|
|
display: grid; |
|
|
gap: 12px; |
|
|
} |
|
|
|
|
|
.coverage-item { |
|
|
display: grid; |
|
|
grid-template-columns: 1fr 2fr auto; |
|
|
align-items: center; |
|
|
gap: 15px; |
|
|
padding: 8px 0; |
|
|
} |
|
|
|
|
|
.coverage-label { |
|
|
font-weight: 500; |
|
|
color: var(--body-text-color, #333); |
|
|
font-size: 0.9em; |
|
|
} |
|
|
|
|
|
.coverage-bar { |
|
|
background: var(--neutral-200, #e0e0e0); |
|
|
border-radius: 10px; |
|
|
height: 8px; |
|
|
overflow: hidden; |
|
|
position: relative; |
|
|
} |
|
|
|
|
|
.coverage-fill { |
|
|
height: 100%; |
|
|
border-radius: 10px; |
|
|
transition: width 0.6s ease; |
|
|
} |
|
|
|
|
|
.coverage-text { |
|
|
font-size: 0.85em; |
|
|
color: var(--body-text-color-subdued, #666); |
|
|
min-width: 100px; |
|
|
text-align: right; |
|
|
} |
|
|
|
|
|
/* Card links */ |
|
|
.card-links { |
|
|
margin-top: 20px; |
|
|
display: flex; |
|
|
gap: 12px; |
|
|
flex-wrap: wrap; |
|
|
border-top: 1px solid var(--border-color-primary, #e0e0e0); |
|
|
padding-top: 15px; |
|
|
} |
|
|
|
|
|
.link-button { |
|
|
display: inline-flex; |
|
|
align-items: center; |
|
|
padding: 8px 16px; |
|
|
background: linear-gradient(135deg, #1976d2, #1565c0); |
|
|
color: white !important; |
|
|
text-decoration: none; |
|
|
border-radius: 6px; |
|
|
font-size: 0.9em; |
|
|
font-weight: 500; |
|
|
transition: all 0.3s ease; |
|
|
} |
|
|
|
|
|
.link-button:hover { |
|
|
transform: translateY(-1px); |
|
|
box-shadow: 0 4px 12px rgba(25, 118, 210, 0.3); |
|
|
text-decoration: none; |
|
|
color: white !important; |
|
|
} |
|
|
|
|
|
/* Error styling */ |
|
|
.error-message { |
|
|
text-align: center; |
|
|
padding: 40px; |
|
|
color: #d32f2f; |
|
|
background: var(--error-background, #ffebee); |
|
|
border-radius: 8px; |
|
|
border: 1px solid var(--error-border, #ffcdd2); |
|
|
} |
|
|
|
|
|
.error-message h3 { |
|
|
margin: 0 0 15px 0; |
|
|
} |
|
|
|
|
|
/* Dark mode specific overrides */ |
|
|
.dark .gallery-container { |
|
|
background-color: var(--background-fill-primary, #0b0f19); |
|
|
} |
|
|
|
|
|
.dark .eval-card { |
|
|
background: var(--background-fill-secondary, #1f2937); |
|
|
color: var(--body-text-color, #f3f4f6); |
|
|
border: 1px solid var(--border-color-primary, #374151); |
|
|
box-shadow: 0 4px 20px rgba(0,0,0,0.3); |
|
|
} |
|
|
|
|
|
.dark .eval-card:hover { |
|
|
box-shadow: 0 8px 30px rgba(0,0,0,0.4); |
|
|
} |
|
|
|
|
|
.dark .card-title { |
|
|
color: var(--color-accent, #60a5fa); |
|
|
} |
|
|
|
|
|
.dark .card-summary { |
|
|
color: var(--body-text-color, #f3f4f6); |
|
|
} |
|
|
|
|
|
.dark .card-meta p { |
|
|
background: var(--input-background-fill, #374151); |
|
|
color: var(--body-text-color, #f3f4f6); |
|
|
} |
|
|
|
|
|
.dark .coverage-section { |
|
|
border-top: 1px solid var(--border-color-primary, #4b5563); |
|
|
} |
|
|
|
|
|
.dark .coverage-section h4 { |
|
|
color: var(--color-accent, #60a5fa); |
|
|
} |
|
|
|
|
|
.dark .coverage-label { |
|
|
color: var(--body-text-color, #f3f4f6); |
|
|
} |
|
|
|
|
|
.dark .coverage-bar { |
|
|
background: var(--neutral-700, #4b5563); |
|
|
} |
|
|
|
|
|
.dark .coverage-text { |
|
|
color: var(--body-text-color-subdued, #9ca3af); |
|
|
} |
|
|
|
|
|
.dark .card-links { |
|
|
border-top: 1px solid var(--border-color-primary, #4b5563); |
|
|
} |
|
|
|
|
|
.dark .error-message { |
|
|
background: var(--error-background, #7f1d1d); |
|
|
border: 1px solid var(--error-border, #991b1b); |
|
|
color: #fca5a5; |
|
|
} |
|
|
|
|
|
/* Alternative dark mode detection using data attributes or CSS variables */ |
|
|
[data-theme="dark"] .eval-card, |
|
|
html[data-theme="dark"] .eval-card { |
|
|
background: #1f2937; |
|
|
color: #f3f4f6; |
|
|
border: 1px solid #374151; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .card-title, |
|
|
html[data-theme="dark"] .card-title { |
|
|
color: #60a5fa; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .card-meta p, |
|
|
html[data-theme="dark"] .card-meta p { |
|
|
background: #374151; |
|
|
color: #f3f4f6; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .coverage-section, |
|
|
html[data-theme="dark"] .coverage-section { |
|
|
border-top: 1px solid #4b5563; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .coverage-section h4, |
|
|
html[data-theme="dark"] .coverage-section h4 { |
|
|
color: #60a5fa; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .coverage-label, |
|
|
html[data-theme="dark"] .coverage-label { |
|
|
color: #f3f4f6; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .coverage-bar, |
|
|
html[data-theme="dark"] .coverage-bar { |
|
|
background: #4b5563; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .coverage-text, |
|
|
html[data-theme="dark"] .coverage-text { |
|
|
color: #9ca3af; |
|
|
} |
|
|
|
|
|
[data-theme="dark"] .card-links, |
|
|
html[data-theme="dark"] .card-links { |
|
|
border-top: 1px solid #4b5563; |
|
|
} |
|
|
|
|
|
/* Responsive design */ |
|
|
@media (max-width: 768px) { |
|
|
.gallery-container { |
|
|
padding: 15px; |
|
|
} |
|
|
|
|
|
.eval-card { |
|
|
padding: 20px; |
|
|
} |
|
|
|
|
|
.card-header { |
|
|
flex-direction: column; |
|
|
align-items: flex-start; |
|
|
} |
|
|
|
|
|
.coverage-badge { |
|
|
align-self: flex-start; |
|
|
} |
|
|
|
|
|
.coverage-item { |
|
|
grid-template-columns: 1fr; |
|
|
gap: 8px; |
|
|
} |
|
|
|
|
|
.coverage-text { |
|
|
text-align: left; |
|
|
} |
|
|
|
|
|
.card-meta { |
|
|
grid-template-columns: 1fr; |
|
|
} |
|
|
} |
|
|
|
|
|
/* Force dark theme detection using Gradio's CSS variables */ |
|
|
@media (prefers-color-scheme: dark) { |
|
|
.eval-card { |
|
|
background: var(--background-fill-secondary, #1f2937) !important; |
|
|
color: var(--body-text-color, #f3f4f6) !important; |
|
|
border: 1px solid var(--border-color-primary, #374151) !important; |
|
|
} |
|
|
|
|
|
.card-title { |
|
|
color: var(--color-accent, #60a5fa) !important; |
|
|
} |
|
|
|
|
|
.card-summary { |
|
|
color: var(--body-text-color, #f3f4f6) !important; |
|
|
} |
|
|
|
|
|
.card-meta p { |
|
|
background: var(--input-background-fill, #374151) !important; |
|
|
color: var(--body-text-color, #f3f4f6) !important; |
|
|
} |
|
|
|
|
|
.coverage-section { |
|
|
border-top: 1px solid var(--border-color-primary, #4b5563) !important; |
|
|
} |
|
|
|
|
|
.coverage-section h4 { |
|
|
color: var(--color-accent, #60a5fa) !important; |
|
|
} |
|
|
|
|
|
.coverage-label { |
|
|
color: var(--body-text-color, #f3f4f6) !important; |
|
|
} |
|
|
|
|
|
.coverage-bar { |
|
|
background: var(--neutral-700, #4b5563) !important; |
|
|
} |
|
|
|
|
|
.coverage-text { |
|
|
color: var(--body-text-color-subdued, #9ca3af) !important; |
|
|
} |
|
|
|
|
|
.card-links { |
|
|
border-top: 1px solid var(--border-color-primary, #4b5563) !important; |
|
|
} |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="Evaluation Cards Gallery", |
|
|
theme=gr.themes.Soft(), |
|
|
css=enhanced_css, |
|
|
) as demo: |
|
|
gr.Markdown(f""" |
|
|
# π Evaluation Cards for Machine Learning in Materials Science |
|
|
|
|
|
Upload your evaluation card in YAML format, get feedback from an LLM, and submit it to the gallery. |
|
|
Data is persistently stored in the HF dataset: [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}) |
|
|
|
|
|
Checkout the [GitHub repository](https://github.com/lamalab-org/eval-cards) for more information. |
|
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.TabItem("π Upload & Review"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### βοΈ Create Evaluation Card") |
|
|
|
|
|
yaml_editor = gr.Textbox( |
|
|
label="YAML Content", |
|
|
lines=15, |
|
|
placeholder="Paste your YAML content or click 'Load Template'...", |
|
|
) |
|
|
|
|
|
template_btn = gr.Button("π Load Template") |
|
|
|
|
|
paper_url = gr.Textbox( |
|
|
label="π Paper URL (Optional)", |
|
|
placeholder="https://arxiv.org/abs/...", |
|
|
) |
|
|
|
|
|
repo_url = gr.Textbox( |
|
|
label="π» Repository URL (Optional)", |
|
|
placeholder="https://github.com/...", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
feedback_btn = gr.Button("π€ Get LLM Feedback") |
|
|
submit_btn = gr.Button( |
|
|
"π Submit Evaluation Card", variant="primary" |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### π‘ LLM Feedback") |
|
|
feedback_box = gr.Textbox( |
|
|
label="AI Feedback", lines=10, interactive=False |
|
|
) |
|
|
|
|
|
gr.Markdown("### π€ Submission Result") |
|
|
result_box = gr.Textbox(label="Result", lines=8, interactive=False) |
|
|
|
|
|
with gr.TabItem("ποΈ Gallery"): |
|
|
refresh_btn = gr.Button("π Refresh Gallery") |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.TabItem("π Card View"): |
|
|
gallery_cards = gr.HTML(value="Loading gallery...") |
|
|
|
|
|
with gr.TabItem("π Table View"): |
|
|
gallery_table = gr.Textbox( |
|
|
label="Evaluation Cards Table", |
|
|
lines=25, |
|
|
interactive=False, |
|
|
value="Loading table...", |
|
|
) |
|
|
|
|
|
|
|
|
template_btn.click(load_template_text, outputs=[yaml_editor]) |
|
|
feedback_btn.click(get_feedback, inputs=[yaml_editor], outputs=[feedback_box]) |
|
|
submit_btn.click( |
|
|
submit_card, inputs=[yaml_editor, paper_url, repo_url], outputs=[result_box] |
|
|
) |
|
|
refresh_btn.click(refresh_gallery_cards, outputs=[gallery_cards]) |
|
|
refresh_btn.click(refresh_gallery_table, outputs=[gallery_table]) |
|
|
|
|
|
|
|
|
demo.load(refresh_gallery_cards, outputs=[gallery_cards]) |
|
|
demo.load(refresh_gallery_table, outputs=[gallery_table]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|