n0w0f's picture
fix: css issue
a68d5d2
raw
history blame
33.4 kB
import datetime
import os
import re
import yaml
from datasets import Dataset, load_dataset
from huggingface_hub import create_repo, login
import gradio as gr
# Constants
DATASET_NAME = "jablonkagroup/eval-cards-dataset"
def setup_hf_auth():
"""Setup Hugging Face authentication"""
try:
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
return True
return False
except Exception as e:
print(f"HF Auth error: {e}")
return False
def ensure_dataset_exists():
"""Ensure the dataset repository exists"""
try:
load_dataset(DATASET_NAME, split="train")
return True
except Exception:
try:
if not setup_hf_auth():
return False
create_repo(
repo_id=DATASET_NAME, repo_type="dataset", private=False, exist_ok=True
)
empty_data = {
"filename": [],
"title": [],
"summary": [],
"authors": [],
"creation_date": [],
"coverage_score": [],
"yaml_content": [],
"paper_link": [],
"repository_link": [],
"timestamp": [],
}
empty_dataset = Dataset.from_dict(empty_data)
empty_dataset.push_to_hub(DATASET_NAME)
return True
except Exception as e:
print(f"Dataset creation error: {e}")
return False
def get_template():
"""Get a basic YAML template"""
return """title: "Your Evaluation Title"
summary: "Brief description of your evaluation"
metadata:
authors: ["Author Name"]
creation_date: "2025-01-01"
paper_link: ""
repository_link: ""
evaluation_design:
purpose: "What is the purpose of this evaluation?"
scope: "What does this evaluation cover?"
estimand:
definition: "What are you trying to estimate?"
estimator:
method: "How are you estimating it?"
estimate:
results: "What are the results?"
results_communication:
format: "How are results communicated?"
known_issues_and_limitations:
issues: ["List any known issues"]
version_and_maintenance:
version: "1.0"
citation_and_usage:
citation: "How to cite this work"
"""
def compute_coverage_score(eval_data):
"""Compute a coverage score for the eval card"""
sections = {
"metadata": 5,
"evaluation_design": 10,
"estimand": 20,
"estimator": 20,
"estimate": 20,
"results_communication": 10,
"known_issues_and_limitations": 10,
"version_and_maintenance": 5,
"citation_and_usage": 5,
}
def count_filled_fields(data):
if isinstance(data, dict):
filled = total = 0
for value in data.values():
if isinstance(value, (dict, list)):
sub_filled, sub_total = count_filled_fields(value)
filled += sub_filled
total += sub_total
else:
total += 1
if value and str(value).strip() not in ["", "[]", "{}"]:
filled += 1
return filled, total
elif isinstance(data, list):
if not data:
return 0, 1
filled = total = 0
for item in data:
sub_filled, sub_total = count_filled_fields(item)
filled += sub_filled
total += sub_total
return filled, total
else:
return 1 if data else 0, 1
scores = {}
total_score = 0
for section, weight in sections.items():
if section in eval_data:
filled, total = count_filled_fields(eval_data[section])
completion_rate = filled / total if total > 0 else 0
scores[section] = {
"score": round(completion_rate * weight, 2),
"max_score": weight,
"completion_rate": round(completion_rate * 100, 2),
}
total_score += scores[section]["score"]
else:
scores[section] = {
"score": 0,
"max_score": weight,
"completion_rate": 0,
}
return min(round(total_score, 2), 100), scores
def get_sample_cards():
"""Get sample cards for demo purposes"""
return [
{
"title": "ChemBench: A Large-Scale Chemical Evaluation",
"summary": "A comprehensive benchmark for evaluating chemical property prediction models across multiple datasets and molecular representations.",
"authors": "John Doe, Jane Smith",
"creation_date": "2024-12-15",
"coverage_score": 85.5,
"paper_link": "https://arxiv.org/abs/2401.example",
"repository_link": "https://github.com/example/chembench",
"score_details": {
"metadata": {"score": 4.5, "max_score": 5, "completion_rate": 90},
"evaluation_design": {
"score": 9.0,
"max_score": 10,
"completion_rate": 90,
},
"estimand": {"score": 18.0, "max_score": 20, "completion_rate": 90},
"estimator": {"score": 17.0, "max_score": 20, "completion_rate": 85},
"estimate": {"score": 16.0, "max_score": 20, "completion_rate": 80},
"results_communication": {
"score": 8.0,
"max_score": 10,
"completion_rate": 80,
},
"known_issues_and_limitations": {
"score": 7.0,
"max_score": 10,
"completion_rate": 70,
},
"version_and_maintenance": {
"score": 3.0,
"max_score": 5,
"completion_rate": 60,
},
"citation_and_usage": {
"score": 3.0,
"max_score": 5,
"completion_rate": 60,
},
},
},
{
"title": "MaterialsML: Property Prediction Framework",
"summary": "An evaluation framework for materials property prediction using machine learning approaches on crystal structure data.",
"authors": "Alice Johnson, Bob Wilson",
"creation_date": "2024-11-20",
"coverage_score": 92.0,
"paper_link": "",
"repository_link": "https://github.com/example/materialsml",
"score_details": {
"metadata": {"score": 5.0, "max_score": 5, "completion_rate": 100},
"evaluation_design": {
"score": 10.0,
"max_score": 10,
"completion_rate": 100,
},
"estimand": {"score": 19.0, "max_score": 20, "completion_rate": 95},
"estimator": {"score": 18.0, "max_score": 20, "completion_rate": 90},
"estimate": {"score": 19.0, "max_score": 20, "completion_rate": 95},
"results_communication": {
"score": 9.0,
"max_score": 10,
"completion_rate": 90,
},
"known_issues_and_limitations": {
"score": 8.0,
"max_score": 10,
"completion_rate": 80,
},
"version_and_maintenance": {
"score": 2.0,
"max_score": 5,
"completion_rate": 40,
},
"citation_and_usage": {
"score": 2.0,
"max_score": 5,
"completion_rate": 40,
},
},
},
]
def save_eval_card(yaml_content, paper_url="", repo_url=""):
"""Save an eval card to the dataset"""
try:
eval_data = yaml.safe_load(yaml_content)
# Add URLs to metadata if provided
if paper_url:
eval_data.setdefault("metadata", {})["paper_link"] = paper_url
if repo_url:
eval_data.setdefault("metadata", {})["repository_link"] = repo_url
yaml_content = yaml.dump(eval_data)
filename = re.sub(r"[^\w\-_]", "_", eval_data.get("title", "Unnamed"))
filename = (
f"{filename}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml"
)
score, score_details = compute_coverage_score(eval_data)
# Try to save to HF dataset, but don't fail if it doesn't work
saved_to_hf = False
if setup_hf_auth():
try:
dataset = load_dataset(DATASET_NAME, split="train")
existing_data = dataset.to_dict()
saved_to_hf = True
except Exception:
try:
existing_data = {
"filename": [],
"title": [],
"summary": [],
"authors": [],
"creation_date": [],
"coverage_score": [],
"yaml_content": [],
"paper_link": [],
"repository_link": [],
"timestamp": [],
}
ensure_dataset_exists()
saved_to_hf = True
except Exception:
saved_to_hf = False
if saved_to_hf:
try:
existing_data["filename"].append(filename)
existing_data["title"].append(eval_data.get("title", "Unnamed"))
existing_data["summary"].append(eval_data.get("summary", ""))
existing_data["authors"].append(
", ".join(eval_data.get("metadata", {}).get("authors", []))
)
existing_data["creation_date"].append(
eval_data.get("metadata", {}).get("creation_date", "")
)
existing_data["coverage_score"].append(float(score))
existing_data["yaml_content"].append(yaml_content)
existing_data["paper_link"].append(paper_url or "")
existing_data["repository_link"].append(repo_url or "")
existing_data["timestamp"].append(
datetime.datetime.now().isoformat()
)
updated_dataset = Dataset.from_dict(existing_data)
updated_dataset.push_to_hub(DATASET_NAME)
except Exception as e:
saved_to_hf = False
print(f"Failed to save to HF: {e}")
details_str = f"Coverage Score: {score}%\n\nSection Details:\n"
for section, details in score_details.items():
details_str += f"β€’ {section}: {details['score']}/{details['max_score']} ({details['completion_rate']}%)\n"
if saved_to_hf:
return f"βœ… Successfully saved to HF dataset! Filename: {filename}\n\n{details_str}"
else:
return f"⚠️ Validated successfully but couldn't save to HF dataset (check HF_TOKEN)\nFilename: {filename}\n\n{details_str}"
except Exception as e:
return f"❌ Error: {str(e)}"
def load_gallery_cards():
"""Load cards view for gallery with fallback to sample data"""
try:
# Try to load from HF dataset first
if setup_hf_auth():
try:
dataset = load_dataset(DATASET_NAME, split="train")
if len(dataset) > 0:
return create_gallery_html_from_dataset(dataset)
except Exception as e:
print(f"Failed to load from HF dataset: {e}")
# Fallback to sample data with nice styling
sample_cards = get_sample_cards()
return create_gallery_html_from_samples(sample_cards)
except Exception as e:
return f"""
<div class="gallery-container">
<div class="error-message">
<h3>❌ Error loading gallery</h3>
<p>{str(e)}</p>
<p>Please check your configuration and try again.</p>
</div>
</div>
"""
def create_gallery_html_from_dataset(dataset):
"""Create gallery HTML from HF dataset"""
cards_html = '<div class="gallery-container">'
# Sort by coverage score
sorted_indices = sorted(
range(len(dataset)), key=lambda i: dataset[i]["coverage_score"], reverse=True
)
for i in sorted_indices:
row = dataset[i]
eval_data = yaml.safe_load(row["yaml_content"])
_, score_details = compute_coverage_score(eval_data)
card_data = {
"title": row["title"],
"summary": row["summary"][:300] + "..."
if len(row["summary"]) > 300
else row["summary"],
"authors": row["authors"],
"creation_date": row["creation_date"],
"coverage_score": row["coverage_score"],
"paper_link": row.get("paper_link", ""),
"repository_link": row.get("repository_link", ""),
"score_details": score_details,
}
cards_html += create_card_html(card_data)
cards_html += "</div>"
return cards_html
def create_gallery_html_from_samples(sample_cards):
"""Create gallery HTML from sample data"""
cards_html = """
<div class="gallery-container">
<div class="demo-notice">
<h3>🎯 Demo Gallery</h3>
<p>This is showing sample data. Set your HF_TOKEN in Space settings to save and load real evaluation cards!</p>
</div>
"""
for card_data in sample_cards:
cards_html += create_card_html(card_data)
cards_html += "</div>"
return cards_html
def create_card_html(card_data):
"""Create HTML for a single card"""
# Get coverage color
score = card_data["coverage_score"]
if score >= 80:
score_color = "#2e7d32"
score_bg = "#e8f5e8"
elif score >= 60:
score_color = "#f57c00"
score_bg = "#fff3e0"
else:
score_color = "#d32f2f"
score_bg = "#ffebee"
html = f"""
<div class="eval-card">
<div class="card-header">
<h3 class="card-title">🎯 {card_data["title"]}</h3>
<div class="coverage-badge" style="color: {score_color}; background: {score_bg};">
{card_data["coverage_score"]}%
</div>
</div>
<div class="card-content">
<p class="card-summary"><strong>πŸ“ Summary:</strong> {card_data["summary"]}</p>
<div class="card-meta">
<p><strong>πŸ‘₯ Authors:</strong> {card_data["authors"]}</p>
<p><strong>πŸ“… Created:</strong> {card_data["creation_date"]}</p>
</div>
</div>
<div class="coverage-section">
<h4>πŸ“Š Coverage by Section</h4>
<div class="coverage-grid">
"""
for section, details in card_data["score_details"].items():
section_display = section.replace("_", " ").title()
completion = details["completion_rate"]
# Color coding for completion rates
if completion >= 80:
bar_color = "#4caf50"
elif completion >= 60:
bar_color = "#ff9800"
else:
bar_color = "#f44336"
html += f"""
<div class="coverage-item">
<div class="coverage-label">{section_display}</div>
<div class="coverage-bar">
<div class="coverage-fill" style="width: {completion}%; background-color: {bar_color};"></div>
</div>
<div class="coverage-text">{details["score"]}/{details["max_score"]} ({completion}%)</div>
</div>
"""
html += """
</div>
</div>
"""
# Add links if available
if card_data.get("paper_link") or card_data.get("repository_link"):
html += '<div class="card-links">'
if card_data.get("paper_link"):
html += f'<a href="{card_data["paper_link"]}" target="_blank" class="link-button">πŸ“„ Paper</a>'
if card_data.get("repository_link"):
html += f'<a href="{card_data["repository_link"]}" target="_blank" class="link-button">πŸ’» Repository</a>'
html += "</div>"
html += "</div>"
return html
def load_gallery_table():
"""Load table view for gallery"""
try:
if setup_hf_auth():
try:
dataset = load_dataset(DATASET_NAME, split="train")
if len(dataset) > 0:
return create_table_from_dataset(dataset)
except Exception:
pass
# Fallback to sample data
sample_cards = get_sample_cards()
return create_table_from_samples(sample_cards)
except Exception as e:
return f"Error loading table: {str(e)}"
def create_table_from_dataset(dataset):
"""Create table from HF dataset"""
table_text = "πŸ“Š Evaluation Cards Summary\n" + "=" * 100 + "\n\n"
sorted_indices = sorted(
range(len(dataset)), key=lambda i: dataset[i]["coverage_score"], reverse=True
)
table_text += (
f"{'Rank':<6} {'Title':<35} {'Authors':<30} {'Coverage':<12} {'Created':<12}\n"
)
table_text += "-" * 100 + "\n"
for rank, i in enumerate(sorted_indices[:50], 1):
row = dataset[i]
title = row["title"][:32] + "..." if len(row["title"]) > 35 else row["title"]
authors = (
row["authors"][:27] + "..." if len(row["authors"]) > 30 else row["authors"]
)
table_text += f"{rank:<6} {title:<35} {authors:<30} {row['coverage_score']:<11}% {row['creation_date']:<12}\n"
table_text += "\n" + "=" * 100
table_text += f"\n\nTotal Cards: {len(dataset)}"
table_text += f"\nAverage Coverage: {sum(row['coverage_score'] for row in dataset) / len(dataset):.1f}%"
return table_text
def create_table_from_samples(sample_cards):
"""Create table from sample data"""
table_text = "πŸ“Š Evaluation Cards Summary (Demo Data)\n" + "=" * 100 + "\n\n"
sorted_cards = sorted(sample_cards, key=lambda x: x["coverage_score"], reverse=True)
table_text += (
f"{'Rank':<6} {'Title':<35} {'Authors':<30} {'Coverage':<12} {'Created':<12}\n"
)
table_text += "-" * 100 + "\n"
for rank, card in enumerate(sorted_cards, 1):
title = card["title"][:32] + "..." if len(card["title"]) > 35 else card["title"]
authors = (
card["authors"][:27] + "..."
if len(card["authors"]) > 30
else card["authors"]
)
table_text += f"{rank:<6} {title:<35} {authors:<30} {card['coverage_score']:<11}% {card['creation_date']:<12}\n"
table_text += "\n" + "=" * 100
table_text += f"\n\nTotal Cards: {len(sorted_cards)} (demo)"
table_text += f"\nAverage Coverage: {sum(card['coverage_score'] for card in sorted_cards) / len(sorted_cards):.1f}%"
return table_text
def get_llm_feedback(yaml_content):
"""Get LLM feedback using Groq"""
api_token = os.environ.get("GROQ_API_KEY")
if not api_token:
return "Please set GROQ_API_KEY in Space settings to get LLM feedback."
if not yaml_content.strip():
return "Please provide YAML content first."
try:
import requests
response = requests.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {api_token}",
},
json={
"model": "llama-3.3-70b-versatile",
"messages": [
{
"role": "user",
"content": f"Analyze this evaluation card YAML and provide specific improvement suggestions:\n\n```yaml\n{yaml_content}\n```\n\nFocus on completeness, clarity, and best practices.",
}
],
},
)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
else:
return f"API Error {response.status_code}: {response.text}"
except Exception as e:
return f"Error getting feedback: {str(e)}"
# Simple functions for the interface
def submit_card(yaml_text, paper_url, repo_url):
if not yaml_text.strip():
return "Please provide YAML content"
try:
yaml.safe_load(yaml_text) # Validate YAML
result = save_eval_card(yaml_text, paper_url, repo_url)
return result
except yaml.YAMLError as e:
return f"Invalid YAML: {str(e)}"
except Exception as e:
return f"Error: {str(e)}"
def load_template_text():
return get_template()
def get_feedback(yaml_text):
return get_llm_feedback(yaml_text)
def refresh_gallery_cards():
return load_gallery_cards()
def refresh_gallery_table():
return load_gallery_table()
# Enhanced CSS with dark mode support
enhanced_css = """
/* Hide Gradio footer */
footer {visibility: hidden}
/* General styling */
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
}
/* Gallery container */
.gallery-container {
max-height: 700px;
overflow-y: auto;
padding: 20px;
background-color: var(--background-fill-primary, #f8f9fa);
}
/* Demo notice */
.demo-notice {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 10px;
margin-bottom: 25px;
text-align: center;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
.demo-notice h3 {
margin: 0 0 10px 0;
font-size: 1.3em;
}
.demo-notice p {
margin: 0;
opacity: 0.9;
}
/* Eval card styling - Light mode default */
.eval-card {
background: var(--background-fill-secondary, white);
color: var(--body-text-color, #374151);
border-radius: 12px;
padding: 25px;
margin-bottom: 25px;
box-shadow: 0 4px 20px rgba(0,0,0,0.08);
border: 1px solid var(--border-color-primary, #e0e0e0);
transition: all 0.3s ease;
position: relative;
overflow: hidden;
}
.eval-card:hover {
transform: translateY(-2px);
box-shadow: 0 8px 30px rgba(0,0,0,0.12);
}
.eval-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, #4CAF50, #2196F3, #FF9800);
}
/* Card header */
.card-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: 20px;
flex-wrap: wrap;
gap: 15px;
}
.card-title {
color: var(--color-accent, #1976d2);
margin: 0;
font-size: 1.4em;
font-weight: 600;
flex: 1;
min-width: 250px;
}
.coverage-badge {
font-weight: bold;
padding: 8px 16px;
border-radius: 20px;
font-size: 1.1em;
min-width: 80px;
text-align: center;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Card content */
.card-content {
margin-bottom: 25px;
}
.card-summary {
margin: 0 0 15px 0;
line-height: 1.6;
font-size: 1.02em;
color: var(--body-text-color, #374151);
}
.card-meta {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 10px;
margin: 15px 0;
}
.card-meta p {
margin: 0;
padding: 8px 12px;
background: var(--input-background-fill, #f5f5f5);
color: var(--body-text-color, #374151);
border-radius: 6px;
font-size: 0.95em;
}
/* Coverage section */
.coverage-section {
border-top: 1px solid var(--border-color-primary, #e0e0e0);
padding-top: 20px;
}
.coverage-section h4 {
color: var(--color-accent, #1976d2);
margin: 0 0 15px 0;
font-size: 1.1em;
font-weight: 600;
}
.coverage-grid {
display: grid;
gap: 12px;
}
.coverage-item {
display: grid;
grid-template-columns: 1fr 2fr auto;
align-items: center;
gap: 15px;
padding: 8px 0;
}
.coverage-label {
font-weight: 500;
color: var(--body-text-color, #333);
font-size: 0.9em;
}
.coverage-bar {
background: var(--neutral-200, #e0e0e0);
border-radius: 10px;
height: 8px;
overflow: hidden;
position: relative;
}
.coverage-fill {
height: 100%;
border-radius: 10px;
transition: width 0.6s ease;
}
.coverage-text {
font-size: 0.85em;
color: var(--body-text-color-subdued, #666);
min-width: 100px;
text-align: right;
}
/* Card links */
.card-links {
margin-top: 20px;
display: flex;
gap: 12px;
flex-wrap: wrap;
border-top: 1px solid var(--border-color-primary, #e0e0e0);
padding-top: 15px;
}
.link-button {
display: inline-flex;
align-items: center;
padding: 8px 16px;
background: linear-gradient(135deg, #1976d2, #1565c0);
color: white !important;
text-decoration: none;
border-radius: 6px;
font-size: 0.9em;
font-weight: 500;
transition: all 0.3s ease;
}
.link-button:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(25, 118, 210, 0.3);
text-decoration: none;
color: white !important;
}
/* Error styling */
.error-message {
text-align: center;
padding: 40px;
color: #d32f2f;
background: var(--error-background, #ffebee);
border-radius: 8px;
border: 1px solid var(--error-border, #ffcdd2);
}
.error-message h3 {
margin: 0 0 15px 0;
}
/* Dark mode specific overrides */
.dark .gallery-container {
background-color: var(--background-fill-primary, #0b0f19);
}
.dark .eval-card {
background: var(--background-fill-secondary, #1f2937);
color: var(--body-text-color, #f3f4f6);
border: 1px solid var(--border-color-primary, #374151);
box-shadow: 0 4px 20px rgba(0,0,0,0.3);
}
.dark .eval-card:hover {
box-shadow: 0 8px 30px rgba(0,0,0,0.4);
}
.dark .card-title {
color: var(--color-accent, #60a5fa);
}
.dark .card-summary {
color: var(--body-text-color, #f3f4f6);
}
.dark .card-meta p {
background: var(--input-background-fill, #374151);
color: var(--body-text-color, #f3f4f6);
}
.dark .coverage-section {
border-top: 1px solid var(--border-color-primary, #4b5563);
}
.dark .coverage-section h4 {
color: var(--color-accent, #60a5fa);
}
.dark .coverage-label {
color: var(--body-text-color, #f3f4f6);
}
.dark .coverage-bar {
background: var(--neutral-700, #4b5563);
}
.dark .coverage-text {
color: var(--body-text-color-subdued, #9ca3af);
}
.dark .card-links {
border-top: 1px solid var(--border-color-primary, #4b5563);
}
.dark .error-message {
background: var(--error-background, #7f1d1d);
border: 1px solid var(--error-border, #991b1b);
color: #fca5a5;
}
/* Alternative dark mode detection using data attributes or CSS variables */
[data-theme="dark"] .eval-card,
html[data-theme="dark"] .eval-card {
background: #1f2937;
color: #f3f4f6;
border: 1px solid #374151;
}
[data-theme="dark"] .card-title,
html[data-theme="dark"] .card-title {
color: #60a5fa;
}
[data-theme="dark"] .card-meta p,
html[data-theme="dark"] .card-meta p {
background: #374151;
color: #f3f4f6;
}
[data-theme="dark"] .coverage-section,
html[data-theme="dark"] .coverage-section {
border-top: 1px solid #4b5563;
}
[data-theme="dark"] .coverage-section h4,
html[data-theme="dark"] .coverage-section h4 {
color: #60a5fa;
}
[data-theme="dark"] .coverage-label,
html[data-theme="dark"] .coverage-label {
color: #f3f4f6;
}
[data-theme="dark"] .coverage-bar,
html[data-theme="dark"] .coverage-bar {
background: #4b5563;
}
[data-theme="dark"] .coverage-text,
html[data-theme="dark"] .coverage-text {
color: #9ca3af;
}
[data-theme="dark"] .card-links,
html[data-theme="dark"] .card-links {
border-top: 1px solid #4b5563;
}
/* Responsive design */
@media (max-width: 768px) {
.gallery-container {
padding: 15px;
}
.eval-card {
padding: 20px;
}
.card-header {
flex-direction: column;
align-items: flex-start;
}
.coverage-badge {
align-self: flex-start;
}
.coverage-item {
grid-template-columns: 1fr;
gap: 8px;
}
.coverage-text {
text-align: left;
}
.card-meta {
grid-template-columns: 1fr;
}
}
/* Force dark theme detection using Gradio's CSS variables */
@media (prefers-color-scheme: dark) {
.eval-card {
background: var(--background-fill-secondary, #1f2937) !important;
color: var(--body-text-color, #f3f4f6) !important;
border: 1px solid var(--border-color-primary, #374151) !important;
}
.card-title {
color: var(--color-accent, #60a5fa) !important;
}
.card-summary {
color: var(--body-text-color, #f3f4f6) !important;
}
.card-meta p {
background: var(--input-background-fill, #374151) !important;
color: var(--body-text-color, #f3f4f6) !important;
}
.coverage-section {
border-top: 1px solid var(--border-color-primary, #4b5563) !important;
}
.coverage-section h4 {
color: var(--color-accent, #60a5fa) !important;
}
.coverage-label {
color: var(--body-text-color, #f3f4f6) !important;
}
.coverage-bar {
background: var(--neutral-700, #4b5563) !important;
}
.coverage-text {
color: var(--body-text-color-subdued, #9ca3af) !important;
}
.card-links {
border-top: 1px solid var(--border-color-primary, #4b5563) !important;
}
}
"""
# Create the interface
with gr.Blocks(
title="Evaluation Cards Gallery",
theme=gr.themes.Soft(),
css=enhanced_css,
) as demo:
gr.Markdown(f"""
# πŸ“Š Evaluation Cards for Machine Learning in Materials Science
Upload your evaluation card in YAML format, get feedback from an LLM, and submit it to the gallery.
Data is persistently stored in the HF dataset: [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})
Checkout the [GitHub repository](https://github.com/lamalab-org/eval-cards) for more information.
""")
with gr.Tabs():
with gr.TabItem("πŸ“ Upload & Review"):
with gr.Row():
with gr.Column():
gr.Markdown("### ✏️ Create Evaluation Card")
yaml_editor = gr.Textbox(
label="YAML Content",
lines=15,
placeholder="Paste your YAML content or click 'Load Template'...",
)
template_btn = gr.Button("πŸ“ Load Template")
paper_url = gr.Textbox(
label="πŸ“„ Paper URL (Optional)",
placeholder="https://arxiv.org/abs/...",
)
repo_url = gr.Textbox(
label="πŸ’» Repository URL (Optional)",
placeholder="https://github.com/...",
)
with gr.Row():
feedback_btn = gr.Button("πŸ€– Get LLM Feedback")
submit_btn = gr.Button(
"πŸš€ Submit Evaluation Card", variant="primary"
)
with gr.Column():
gr.Markdown("### πŸ’‘ LLM Feedback")
feedback_box = gr.Textbox(
label="AI Feedback", lines=10, interactive=False
)
gr.Markdown("### πŸ“€ Submission Result")
result_box = gr.Textbox(label="Result", lines=8, interactive=False)
with gr.TabItem("πŸ›οΈ Gallery"):
refresh_btn = gr.Button("πŸ”„ Refresh Gallery")
with gr.Tabs():
with gr.TabItem("πŸ“‹ Card View"):
gallery_cards = gr.HTML(value="Loading gallery...")
with gr.TabItem("πŸ“Š Table View"):
gallery_table = gr.Textbox(
label="Evaluation Cards Table",
lines=25,
interactive=False,
value="Loading table...",
)
# Event handlers
template_btn.click(load_template_text, outputs=[yaml_editor])
feedback_btn.click(get_feedback, inputs=[yaml_editor], outputs=[feedback_box])
submit_btn.click(
submit_card, inputs=[yaml_editor, paper_url, repo_url], outputs=[result_box]
)
refresh_btn.click(refresh_gallery_cards, outputs=[gallery_cards])
refresh_btn.click(refresh_gallery_table, outputs=[gallery_table])
# Load gallery on startup
demo.load(refresh_gallery_cards, outputs=[gallery_cards])
demo.load(refresh_gallery_table, outputs=[gallery_table])
if __name__ == "__main__":
demo.launch()