"use client"; import React, { useState, useEffect } from "react"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Badge } from "@/components/ui/badge"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; import { Label } from "@/components/ui/label"; import { Dialog, DialogTrigger, DialogContent, DialogHeader, DialogTitle, DialogDescription, DialogClose, } from "@/components/ui/dialog"; import { toast } from "sonner"; import { InvokeLLM } from "@/types/entities"; import { motion, AnimatePresence } from "framer-motion"; import { Search, Filter, Sparkles, TrendingUp, Brain, Target, Zap, ArrowRight, Lightbulb } from "lucide-react"; import { Textarea } from "@/components/ui/textarea"; import { adviseForDataset, DatasetAdvisorInput } from "@/ai/flows/dataset-advisor"; import AIAdvisor from "@/components/portal/AIAdvisor"; import DatasetCard from "@/components/portal/DatasetCard"; import FilterPanel from "@/components/portal/FilterPanel"; import RecommendationCard from "@/components/portal/RecommendationCard"; import SearchBar from "@/components/portal/SearchBar"; export default function DataPortalPage() { // Discovery logic const [datasets, setDatasets] = useState([]); const [filteredDatasets, setFilteredDatasets] = useState([]); const [activeSource, setActiveSource] = useState("all"); const [searchQuery, setSearchQuery] = useState(""); const [filters, setFilters] = useState({ category: "all", size: "all", rating: "all" }); const [isLoading, setIsLoading] = useState(true); const [aiRecommendations, setAiRecommendations] = useState([]); const [showFilters, setShowFilters] = useState(false); const [downloadStatus, setDownloadStatus] = useState({}); const [aiLoading, setAiLoading] = useState(false); const [cloudStatus, setCloudStatus] = useState({}); // Form-based logic const [source, setSource] = useState("kaggle"); const [identifier, setIdentifier] = useState(""); const [destDir, setDestDir] = useState("datasets/"); const [kaggleUsername, setKaggleUsername] = useState(""); const [kaggleKey, setKaggleKey] = useState(""); const [split, setSplit] = useState("train"); const [status, setStatus] = useState(null); const [loading, setLoading] = useState(false); const [searchOpen, setSearchOpen] = useState(false); const [searchResults, setSearchResults] = useState([]); const [advisorInput, setAdvisorInput] = useState(""); const [advisorResponse, setAdvisorResponse] = useState(""); // Advisor state const [advisorOpen, setAdvisorOpen] = useState(false); const [projectGoal, setProjectGoal] = useState(""); const [currentData, setCurrentData] = useState(""); // Google-level search features const [searchSuggestions, setSearchSuggestions] = useState([]); const [showSuggestions, setShowSuggestions] = useState(false); const [searchHistory, setSearchHistory] = useState([]); const [isSearching, setIsSearching] = useState(false); const [searchStats, setSearchStats] = useState({ total: 0, time: '0.00' }); // Google-style search suggestions const popularSearches = [ "drone datasets", "aerial imagery", "computer vision", "image classification", "natural language processing", "medical imaging", "covid detection", "oscilloscope data", "signal processing", "mnist handwritten digits", "cifar object recognition", "fashion mnist", "speech recognition", "satellite imagery", "autonomous driving", "robotics datasets" ]; // Real-time search with debouncing (Google-style) useEffect(() => { const delayedSearch = setTimeout(() => { if (searchQuery.trim()) { performGoogleLevelSearch(searchQuery); generateSearchSuggestions(searchQuery); } else { setSearchResults([]); setSearchSuggestions([]); setShowSuggestions(false); } }, 300); // 300ms debounce like Google return () => clearTimeout(delayedSearch); }, [searchQuery]); const generateSearchSuggestions = (query: string) => { if (query.length < 2) { setSearchSuggestions([]); return; } const suggestions = popularSearches .filter(search => search.toLowerCase().includes(query.toLowerCase())) .slice(0, 8); setSearchSuggestions(suggestions); setShowSuggestions(suggestions.length > 0); }; const performGoogleLevelSearch = async (query: string) => { setIsSearching(true); setError(null); const startTime = performance.now(); try { const response = await fetch( `http://localhost:8000/datasets/search?query=${encodeURIComponent(query)}`, { method: 'GET', headers: { 'Content-Type': 'application/json', }, } ); if (!response.ok) { throw new Error(`Search failed: ${response.status}`); } const data = await response.json(); const endTime = performance.now(); const searchTime = ((endTime - startTime) / 1000).toFixed(2); // Google-style result handling setSearchResults(data.results || []); setSearchStats({ total: data.total || data.results?.length || 0, time: parseFloat(searchTime) }); // Update search history (Google-style) if (!searchHistory.includes(query)) { setSearchHistory(prev => [query, ...prev.slice(0, 9)]); // Keep last 10 searches } console.log('🔍 Google-Level Search Results:', data); } catch (error) { console.error('Search error:', error); setError('Search temporarily unavailable. Please try again.'); setSearchResults([]); } setIsSearching(false); }; const handleSearchSubmit = (e: React.FormEvent) => { e.preventDefault(); if (searchQuery.trim()) { performGoogleLevelSearch(searchQuery); setShowSuggestions(false); } }; const selectSuggestion = (suggestion: string) => { setSearchQuery(suggestion); setShowSuggestions(false); performGoogleLevelSearch(suggestion); }; useEffect(() => { // Don't automatically load datasets - only show when user searches // generateAIRecommendations(); }, []); // Apply filters to search results (Google-style filtering) useEffect(() => { if (searchResults.length > 0) { let filtered = [...searchResults]; if (activeSource !== "all") { filtered = filtered.filter(dataset => dataset.source === activeSource); } setFilteredDatasets(filtered); } else { setFilteredDatasets([]); } }, [searchResults, activeSource]); // Legacy function compatibility const loadDatasets = async () => { // Replaced by loadTrendingDatasets }; const applyFilters = () => { // Now handled by the useEffect above }; const handleSearch = () => { // Now handled by performGoogleLevelSearch }; const generateAIRecommendations = async () => { setAiLoading(true); try { const response = await InvokeLLM({ prompt: `Generate 3 trending dataset recommendations for machine learning projects. Focus on popular, high-quality datasets across different domains like computer vision, NLP, and structured data. For each recommendation, provide: name, brief description, and why it's trending.`, response_json_schema: { type: "object", properties: { recommendations: { type: "array", items: { type: "object", properties: { name: { type: "string" }, description: { type: "string" }, reason: { type: "string" }, category: { type: "string" } } } } } } }); setAiRecommendations(response.recommendations || []); } catch (error) { console.error("Error generating AI recommendations:", error); } setAiLoading(false); }; const toggleFavorite = async (datasetId: any) => { setDatasets(ds => ds.map(d => d.id === datasetId ? { ...d, is_favorited: !d.is_favorited } : d)); }; const [showConfirmDialog, setShowConfirmDialog] = useState(false); const [downloadInfo, setDownloadInfo] = useState(null); const [pendingDownload, setPendingDownload] = useState(""); const handleDirectDownload = async (dataset: any) => { try { setIsLoading(true); const identifier = dataset.identifier || dataset.name.replace('🤗 ', ''); if (dataset.source === 'huggingface') { // First, get dataset size info const infoResponse = await fetch(`http://localhost:9001/download-hf?dataset=${encodeURIComponent(identifier)}&info=true`); if (infoResponse.ok) { const info = await infoResponse.json(); if (info.warning || info.estimatedSizeMB > 500) { // Show size warning dialog setDownloadInfo(info); setPendingDownload(identifier); setShowConfirmDialog(true); setIsLoading(false); return; } } // Small dataset - download directly window.open(`http://localhost:9001/download-hf?dataset=${encodeURIComponent(identifier)}&confirm=true`, '_blank'); } else if (dataset.source === 'kaggle') { window.open(`http://localhost:9001/download-kaggle?dataset=${encodeURIComponent(identifier)}`, '_blank'); } else if (dataset.source === 'github') { window.open(`http://localhost:9001/download-github?repo=${encodeURIComponent(identifier)}`, '_blank'); } else { window.open(dataset.url, '_blank'); } } catch (error) { console.error('Download error:', error); } finally { setIsLoading(false); } }; const confirmDownload = () => { if (pendingDownload) { window.open(`http://localhost:9001/download-hf?dataset=${encodeURIComponent(pendingDownload)}&confirm=true`, '_blank'); } setShowConfirmDialog(false); setDownloadInfo(null); setPendingDownload(""); }; const cancelDownload = () => { setShowConfirmDialog(false); setDownloadInfo(null); setPendingDownload(""); }; // --- Form-based logic (from your original page) --- const handleFormSearch = async () => { setSearchResults([ { name: "Example Dataset 1", id: "example1" }, { name: "Example Dataset 2", id: "example2" }, ]); }; const handleAdvisor = async () => { setAdvisorResponse("Try 'mnist' for digit recognition, or 'imagenet' for general vision tasks."); }; const handleSelectResult = (result: any) => { setIdentifier(result.id || result.name); setSearchOpen(false); }; const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); setLoading(true); setStatus(null); const payload: any = { source, identifier, dest_dir: destDir }; if (source === "kaggle") { if (kaggleUsername) payload.kaggle_username = kaggleUsername; if (kaggleKey) payload.kaggle_key = kaggleKey; } if (source === "huggingface") { payload.split = split; } try { const res = await fetch("http://localhost:8000/datasets/download", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(payload), }); const data = await res.json(); setStatus(data.message || JSON.stringify(data)); } catch (err: any) { setStatus("Error: " + err.message); } finally { setLoading(false); } }; // --- UI --- const [error, setError] = useState(null); return ( <>
{/* Header */}

Discover Datasets

Find the perfect data for your next AI project

{/* Main Content - Scrollable */}
{/* Search and Filters */} {/* Google-Level Search Interface */}
{ setSearchQuery(e.target.value); setShowSuggestions(true); }} onFocus={() => setShowSuggestions(searchSuggestions.length > 0)} onBlur={() => setTimeout(() => setShowSuggestions(false), 200)} placeholder="Search datasets... (try 'drones', 'mnist', 'covid')" className="w-full pl-10 pr-12 py-3 bg-white dark:bg-gray-800 border border-gray-300 dark:border-gray-600 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 shadow-lg text-lg text-black dark:text-white" />
{/* Google-Style Search Suggestions */} {showSuggestions && searchSuggestions.length > 0 && (
{searchSuggestions.map((suggestion, index) => ( ))}
)} {/* Google-Style Search Stats */} {searchResults.length > 0 && (
About {searchStats.total.toLocaleString()} results ({searchStats.time} seconds) {searchQuery && ( AI-Enhanced Search )}
)}
All Sources Kaggle Hugging Face GitHub
{/* Results - Scrollable Content */} {/* Search Results or Trending Datasets */} {error && (
⚠️ Search Error
{error}
)} {isSearching && (
Searching with AI intelligence...
)} {!isSearching && !error && filteredDatasets.length === 0 && searchQuery && (
🔍
No datasets found
Try different keywords or check the suggestions above
{popularSearches.slice(0, 6).map(suggestion => ( ))}
)} {!isSearching && !error && filteredDatasets.length > 0 && (

{searchQuery ? `Search Results for "${searchQuery}"` : 'Trending Datasets'}

📊 Sorted by relevance
{/* Scrollable Results Container */}
{filteredDatasets.map((dataset, index) => ( ))}
)}
{/* Size Warning Dialog */} {showConfirmDialog && downloadInfo && (

Dataset Download Warning

Dataset: {downloadInfo.dataset}

Files: {downloadInfo.totalFiles.toLocaleString()}

Estimated Size: {downloadInfo.estimatedSizeMB > 1024 ? `${(downloadInfo.estimatedSizeMB/1024).toFixed(1)}GB` : `${downloadInfo.estimatedSizeMB}MB`}

{downloadInfo.warning && (

{downloadInfo.warning}

)} {downloadInfo.largeFiles?.length > 0 && (

Large files included:

    {downloadInfo.largeFiles.map((file: string, index: number) => (
  • • {file}
  • ))}
)}
)} ); }