Spaces:

irajkoohi
/

Agentic_Rag4_dep_space

Sleeping

App Files Files Community

irajkoohi commited on 8 days ago

Commit

a338b61

1 Parent(s): fef2e14

Redeploy all files after cleaning space

Browse files

Files changed (8) hide show

Dockerfile +39 -0
README.md +30 -0
app.py +957 -0
config.yaml +9 -0
helpers_HF.py +123 -0
helpers_LOCAL.py +165 -0
helpers_SHARED.py +407 -0
requirements.txt +17 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,39 @@

+# Use Python 3.12 slim image
+FROM python:3.12-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY app.py .
+COPY helpers_SHARED.py .
+COPY helpers_HF.py .
+COPY helpers_LOCAL.py .
+# Create data directories
+RUN mkdir -p data/embeddings
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+---
+title: Agentic RAG 4
+emoji: 🚀
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: "4.12.0"
+app_file: app.py
+pinned: false
+---
+---
+# 🤖 Agentic RAG 3
+An intelligent RAG (Retrieval-Augmented Generation) agent with document analysis capabilities.
+## Features
+- Upload and process PDF documents
+- Intelligent document search using FAISS
+- Agent-powered question answering
+- Multiple tool support (list, count, search documents)
+## Usage
+1. Upload your PDF documents
+2. Ask questions about your documents
+3. Get intelligent, context-aware answers
+## Environment
+- Local: Uses Ollama (llama3.2)
+- HF Space: Uses Llama-3.2-3B-Instruct API

app.py ADDED Viewed

	@@ -0,0 +1,957 @@

+"""
+uv init
+uv venv --python 3.12
+source .venv/bin/activate
+uv pip install -r requirements.txt
+"""
+# Note: HuggingFace Spaces reads configuration from the README.md frontmatter, not from a separate YAML file.
+# The config.yaml is for your reference/organization, but the actual Space config must remain in README.md.
+# The Space was created with Docker SDK and README.md frontmatter specifies sdk: docker:
+# huggingface-cli repo create Agentic_Rag3_dep_space --type space --space_sdk docker
+# Without Docker, we use the Gradio SDK option in README.md frontmatter:
+# ---
+# sdk: gradio
+# sdk_version: "6.0.1"
+# python_version: "3.12"
+# app_file: app.py
+# ---
+# Or:
+# huggingface-cli repo create Agentic_Rag3_dep_space --type space --space_sdk gradio
+# AGENT DEPLOYMENT NOTES:
+# =====================
+# - Local Environment: Uses Ollama (llama3.2) for development
+# - HF Space Environment: Uses Llama-3.2-3B-Instruct (cloud API) for production
+# - Environment Auto-Detection: Via SPACE_ID environment variable
+# - Agent Tools Available: Document listing, counting, RAG search
+# - Storage: Temporary (files lost on restart) or persistent (paid plans)
+# - UI Features: Tool-powered sample questions, environment indicators
+# - Security: Token stored as Space secret (HF_token), not in code
+# - Space URL: https://huggingface.co/spaces/irajkoohi/Agentic_Rag3_dep_space
+# A) If you want to run app.py locally:
+"""
+cd /Users/ik/UVcodes/Deployed_Agents_4 && clear && lsof -ti:7860 | xargs kill -9 2>/dev/null; sleep 2 && source .venv/bin/activate && python app.py
+"""
+# B) If you want to run app.py on Hugging Face Space:
+"""
+https://huggingface.co/spaces/irajkoohi/Agentic_Rag4_dep_space
+"""
+# Create and Upload RAG Agent to HF Space Agentic_Rag4_dep_space (Docker SDK)
+"""
+# huggingface-cli repo create Agentic_Rag4_dep_space --type space --space_sdk docker 2>&1
+Create new token with Write role at: https://huggingface.co/settings/tokens
+Add token to Space secrets at: https://huggingface.co/spaces/irajkoohi/Agentic_Rag4_dep_space/settings
+clear
+rm -rf Agentic_Rag4_dep_space && git clone https://huggingface.co/spaces/irajkoohi/Agentic_Rag4_dep_space
+cd /Users/ik/UVcodes/Deployed_Agents_4/Agentic_Rag4_dep_space && cp ../app.py . && cp ../helpers_SHARED.py . && cp ../helpers_HF.py . && cp ../helpers_LOCAL.py . && cp ../requirements.txt . && cp ../README.md . && cp ../Dockerfile . && cp ../config.yaml .
+mkdir -p data/embeddings
+git add . && git commit -m "Deploy RAG Agent with Dockerfile to HF space"
+git push --force
+https://huggingface.co/spaces/irajkoohi/Agentic_Rag4_dep_space
+"""
+# if you want to upload all files:
+"""
+clear
+cd /Users/ik/UVcodes/Deployed_Agents_4/Agentic_Rag4_dep_space
+cp ../app.py .
+cp ../helpers_SHARED.py .
+cp ../helpers_HF.py .
+cp ../helpers_LOCAL.py .
+cp ../requirements.txt .
+# cp ../README.md .
+cp ../Dockerfile .
+cp ../config.yaml .
+git add .
+git commit -m "Update all files"
+git push
+"""
+# If you want to delete all files on HF space
+"""
+cd /Users/ik/UVcodes/Deployed_Agents_4
+rm -rf Agentic_Rag4_dep_space && git clone https://huggingface.co/spaces/irajkoohi/Agentic_Rag4_dep_space
+cd Agentic_Rag4_dep_space && find . -maxdepth 1 -not -name '.git' -not -name '.' -delete
+rm -rf data embeddings
+git add -A && git commit -m "Remove all files to clean the space"
+git push
+ls -la && pwd
+"""
+# If you want to delete a space on HF website
+"""
+1. Go to: https://huggingface.co/spaces/irajkoohi/Agentic_Rag4_dep_space/settings
+2. Scroll down to "Delete this Space"
+4. Type: irajkoohi/Agentic_Rag4_dep_space
+4. Click "Delete"
+"""
+# if you want to sync changes of some files (like app.py and helpers_SHARED.py):
+"""
+cp ../app.py . && cp ../helpers_SHARED.py .
+git add app.py helpers_SHARED.py
+git commit -m "Sync app.py and helpers_SHARED.py with latest changes" && git push
+"""
+#%%
+import os
+import shutil
+import warnings
+from datetime import datetime
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from pydantic import BaseModel
+import gradio as gr
+# Suppress warnings for cleaner output on HF Spaces
+warnings.filterwarnings("ignore", category=UserWarning)
+# Fix event loop issues on HF Spaces
+if os.getenv("SPACE_ID") is not None:
+    try:
+        import nest_asyncio
+        nest_asyncio.apply()
+    except ImportError:
+        pass
+# ============================================================================
+# IMPORT FROM HELPER MODULES
+# ============================================================================
+from helpers_SHARED import (
+    # Configuration
+    CONFIG, IS_HF_SPACE, DATA_DIR, EMBEDDINGS_DIR,
+    HAS_PERSISTENT_STORAGE, STORAGE_WARNING,
+    # Memory functions
+    add_to_memory, get_memory_context, search_memory, clear_memory,
+    # Utility functions
+    get_timestamp, create_elapsed_timer, format_progress_bar,
+    # PDF helpers
+    get_pdf_list, get_pdf_list_ui, make_pdf_dropdown,
+    # Vectorstore
+    build_vectorstore, get_vectorstore, set_vectorstore, embeddings,
+    # Agent tools
+    AGENT_TOOLS, list_documents, count_documents, search_documents,
+    # Sample questions
+    SAMPLE_Q1, SAMPLE_Q2, SAMPLE_Q3, SAMPLE_Q4, SAMPLE_Q5, SAMPLE_Q6, SAMPLE_Q7,
+)
+from helpers_SHARED import floating_progress_bar_html
+# Import environment-specific helpers
+if IS_HF_SPACE:
+    from helpers_HF import (
+        init_hf_llm, hf_generate_chat_response, hf_generate_text_response,
+        get_hf_client, get_hf_llm_name
+    )
+    # Initialize HF LLM (default model from config)
+    hf_client, LLM_NAME = init_hf_llm(CONFIG["hf_model"] if "hf_model" in CONFIG else None)
+    ollama_llm = None
+    agent_executor = None
+else:
+    from helpers_LOCAL import (
+        init_ollama_llm, ollama_generate_response, run_agent,
+        create_langchain_agent, get_ollama_llm, get_local_llm_name, get_agent_executor
+    )
+    # Initialize Ollama LLM
+    ollama_llm, LLM_NAME = init_ollama_llm()
+    hf_client = None
+# Create directories
+os.makedirs(DATA_DIR, exist_ok=True)
+os.makedirs(EMBEDDINGS_DIR, exist_ok=True)
+# Build initial vectorstore
+vs = build_vectorstore()
+# Create agent (local only)
+if not IS_HF_SPACE and ollama_llm is not None:
+    agent_executor = create_langchain_agent()
+else:
+    agent_executor = None
+# Debug: Print initial state
+print(f"🐛 DEBUG: Initial vectorstore state: {vs is not None}")
+print(f"🐛 DEBUG: IS_HF_SPACE: {IS_HF_SPACE}")
+print(f"🐛 DEBUG: DATA_DIR: {DATA_DIR}")
+print(f"🐛 DEBUG: EMBEDDINGS_DIR: {EMBEDDINGS_DIR}")
+if IS_HF_SPACE:
+    print(f"🐛 DEBUG: /data exists: {os.path.exists('/data')}")
+    print(f"🐛 DEBUG: HF token available: {os.getenv('HF_token') is not None}")
+print(f"🐛 DEBUG: LLM available: {(hf_client is not None) if IS_HF_SPACE else (ollama_llm is not None)}")
+# ============================================================================
+# FASTAPI APP (FastAPI is only used for local runs, not on HuggingFace Spaces)
+# ============================================================================
+app = FastAPI(title="RAG Chatbot API")
+class Prompt(BaseModel):
+    prompt: str
+@app.get("/pdfs")
+def list_pdfs():
+    return {"pdfs": get_pdf_list()}
+@app.post("/upload")
+async def upload_pdf(file: UploadFile = File(...)):
+    if not file.filename or not file.filename.endswith(".pdf"):
+        raise HTTPException(status_code=400, detail="Only PDFs allowed.")
+    filepath = os.path.join(DATA_DIR, file.filename)
+    with open(filepath, "wb") as f:
+        f.write(await file.read())
+    build_vectorstore(force_rebuild=True)
+    return {"message": f"Added {file.filename}. Embeddings updated."}
+@app.delete("/delete/{filename}")
+def delete_pdf(filename: str):
+    if filename not in get_pdf_list():
+        raise HTTPException(status_code=404, detail="PDF not found.")
+    filepath = os.path.join(DATA_DIR, filename)
+    os.remove(filepath)
+    build_vectorstore(force_rebuild=True)
+    return {"message": f"Deleted {filename}. Embeddings updated."}
+@app.post("/generate")
+def generate_response(prompt: Prompt):
+    global vs
+    vs = get_vectorstore()
+    if vs is None:
+        raise HTTPException(status_code=400, detail="No PDFs loaded.")
+    # Retrieve relevant docs (limit context size)
+    retriever = vs.as_retriever(search_kwargs={"k": CONFIG["search_k"]})
+    docs = retriever.invoke(prompt.prompt)
+        # Use all retrieved chunks
+        context = "\n\n".join([doc.page_content for doc in docs])
+    # Augment prompt
+    full_prompt = (
+        "Answer the following question based ONLY on the context provided below.\n"
+        "If the answer is not present in the context, reply exactly with: 'I don't know.'\n"
+        "Do NOT make up or guess any information that is not explicitly in the context.\n\n"
+        "Your answer MUST be a concise summary, listing the main topics or key points found in the context.\n"
+        "If the question asks for a list, provide a bulleted or numbered list.\n\n"
+        f"Context:\n{context}\n\n"
+        f"Question: {prompt.prompt}\n\n"
+        "Answer:"
+    )
+    try:
+        if IS_HF_SPACE and hf_client is not None:
+            response = hf_generate_text_response(full_prompt, context, hf_client)
+            return {"response": response}
+        elif not IS_HF_SPACE and ollama_llm is not None:
+            print(f"Generating response with Ollama ({LLM_NAME})...")
+            try:
+                response = ollama_llm.invoke(full_prompt)
+                print(f"✓ Success! Response generated.")
+                return {"response": response}
+            except Exception as ollama_error:
+                print(f"❌ Ollama error: {str(ollama_error)}")
+                return {"response": f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."}
+        else:
+            return {"response": f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."}
+    except Exception as e:
+        print(f"LLM failed: {str(e)}")
+        return {"response": f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."}
+@app.get("/refresh")
+def refresh_embeddings():
+    build_vectorstore(force_rebuild=True)
+    return {"message": "Embeddings refreshed."}
+# ============================================================================
+# GRADIO UI FUNCTIONS
+# ============================================================================
+def add_pdf(files):
+    if files is None or len(files) == 0:
+        return (
+            make_pdf_dropdown(),
+            "No files selected.",
+            "",
+            "\n".join(get_pdf_list())
+        )
+    start_time = datetime.now()
+    get_elapsed = create_elapsed_timer(start_time)
+    results = []
+    total_files = len(files)
+    upload_log = []
+    upload_log.append(f"[{get_timestamp()}] Starting upload process for {total_files} file(s)")
+    for i, file_obj in enumerate(files, 1):
+        filename = os.path.basename(file_obj.name)
+        progress_percent = int((i * 2 - 1) / (total_files * 2) * 100)
+        status_msg = f"📤 Uploading {i}/{total_files}: {filename}..."
+        progress_display = format_progress_bar(get_elapsed(), progress_percent, status_msg)
+        upload_log.append(f"[{get_timestamp()}] Uploading file {i}: {filename}")
+        # Show current embedded files (before upload)
+        yield (
+            make_pdf_dropdown(),
+            "\n".join(results) if results else "Starting upload...",
+            progress_display,
+            "\n".join(get_pdf_list())
+        )
+        try:
+            dest_path = os.path.join(DATA_DIR, filename)
+            shutil.copy2(file_obj.name, dest_path)
+            results.append(f"✓ {filename} uploaded")
+            upload_log.append(f"[{get_timestamp()}] Uploading file {i} completed")
+            progress_percent = int(((i * 2) - 1) / (total_files * 2) * 100)
+            status_msg = f"🧠 Creating embeddings for {filename}..."
+            progress_display = format_progress_bar(get_elapsed(), progress_percent, status_msg)
+            upload_log.append(f"[{get_timestamp()}] Embedding file {i}: {filename}")
+            yield (
+                make_pdf_dropdown(),
+                "\n".join(results),
+                progress_display,
+                "\n".join(get_pdf_list())
+            )
+            try:
+                build_vectorstore(force_rebuild=True)
+                results[-1] = f"✅ {filename} (uploaded & embedded)"
+                upload_log.append(f"[{get_timestamp()}] Embedding file {i} completed")
+                upload_log.append("")
+                # Show progress bar after embedding completes
+                progress_percent = int((i * 2) / (total_files * 2) * 100)
+                status_msg = f"✅ Embedded {i}/{total_files}: {filename}"
+                progress_display = format_progress_bar(get_elapsed(), progress_percent, status_msg)
+                # Update embedded files to show the new file
+                yield (
+                    make_pdf_dropdown(),
+                    "\n".join(results),
+                    progress_display,
+                    "\n".join(get_pdf_list())
+                )
+            except Exception as embed_error:
+                results[-1] = f"⚠️ {filename} (uploaded, embedding error: {str(embed_error)})"
+                upload_log.append(f"[{get_timestamp()}] Embedding file {i} failed")
+                upload_log.append("")
+                completed_progress = int((i * 2) / (total_files * 2) * 100)
+                status_msg = f"⚠️ File {i}/{total_files} completed with error: {filename}"
+                progress_display = format_progress_bar(get_elapsed(), completed_progress, status_msg)
+                yield (
+                    make_pdf_dropdown(),
+                    "\n".join(results),
+                    progress_display,
+                    "\n".join(get_pdf_list())
+                )
+        except Exception as e:
+            results.append(f"❌ {filename}: {str(e)}")
+            upload_log.append(f"[{get_timestamp()}] Uploading file {i} failed")
+    final_message = "\n".join(results)
+    final_progress = format_progress_bar(get_elapsed(), 100, f"🎉 All done! Processed {len(files)} file(s) successfully")
+    upload_log.append(f"[{get_timestamp()}] All {len(files)} file(s) completed")
+    # Only show fully embedded files in the Available Embedded Files window
+    # Reset the progress bar to its original empty state after completion (like delete)
+    yield (
+        make_pdf_dropdown(),
+        final_message,
+        "",
+        "\n".join(get_pdf_list())
+    )
+def delete_pdf_ui(selected_pdf):
+    import time
+    if not selected_pdf:
+        # Hide overlay if nothing to delete
+        yield make_pdf_dropdown(), "\n".join(get_pdf_list()), ""
+        return
+    # Show progress bar immediately on click
+    bar = format_progress_bar("", 0, "Preparing to delete files...")
+    yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+    # Support both single and multiple selection
+    if isinstance(selected_pdf, str):
+        selected_files = [selected_pdf]
+    else:
+        selected_files = list(selected_pdf)
+    total_files = len(selected_files)
+    for idx, file_name in enumerate(selected_files, 1):
+        file_path = os.path.join(DATA_DIR, file_name)
+        # Remove file and all leftovers (e.g., embeddings) before advancing progress
+        deleted = False
+        leftovers_removed = False
+        # Remove file
+        if os.path.exists(file_path):
+            try:
+                os.remove(file_path)
+                deleted = True
+            except Exception:
+                deleted = False
+        # Remove leftovers (add your per-file embedding removal logic here if needed)
+        # Example: remove embedding file if it exists (customize as needed)
+        embedding_path = os.path.join(EMBEDDINGS_DIR, file_name + ".embedding")
+        if os.path.exists(embedding_path):
+            try:
+                os.remove(embedding_path)
+                leftovers_removed = True
+            except Exception:
+                leftovers_removed = False
+        else:
+            leftovers_removed = True  # No leftovers to remove
+        # Only advance progress bar after both file and leftovers are deleted
+        if deleted and leftovers_removed:
+            build_vectorstore(force_rebuild=True)
+            percent = int(idx / total_files * 100) if total_files else 100
+            bar = format_progress_bar("", percent, f"Deleted {idx}/{total_files}: {file_name}")
+            yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+        else:
+            bar = format_progress_bar("", int(idx / total_files * 100) if total_files else 100, f"⚠️ Error deleting {file_name}")
+            yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+        time.sleep(0.2)
+    # Clear progress bar after all deletions
+    yield make_pdf_dropdown(), "\n".join(get_pdf_list()), ""
+def toggle_delete_all_btn():
+    # Check if there is at least one file in Available Embedded Files
+    files = get_pdf_list()
+    return gr.update(interactive=bool(files))
+def delete_all_files():
+    import time
+    all_files = get_pdf_list()
+    if not all_files:
+        bar = format_progress_bar("", 0, "No files to delete.")
+        yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+        return
+    bar = format_progress_bar("", 0, "Preparing to delete all files...")
+    yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+    total_files = len(all_files)
+    for idx, file_name in enumerate(all_files, 1):
+        file_path = os.path.join(DATA_DIR, file_name)
+        deleted = False
+        leftovers_removed = False
+        if os.path.exists(file_path):
+            try:
+                os.remove(file_path)
+                deleted = True
+            except Exception:
+                deleted = False
+        embedding_path = os.path.join(EMBEDDINGS_DIR, file_name + ".embedding")
+        if os.path.exists(embedding_path):
+            try:
+                os.remove(embedding_path)
+                leftovers_removed = True
+            except Exception:
+                leftovers_removed = False
+        else:
+            leftovers_removed = True
+        if deleted and leftovers_removed:
+            build_vectorstore(force_rebuild=True)
+            percent = int(idx / total_files * 100) if total_files else 100
+            bar = format_progress_bar("", percent, f"Deleted {idx}/{total_files}: {file_name}")
+            yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+        else:
+            bar = format_progress_bar("", int(idx / total_files * 100) if total_files else 100, f"⚠️ Error deleting {file_name}")
+            yield make_pdf_dropdown(), "\n".join(get_pdf_list()), bar
+        time.sleep(0.2)
+    yield make_pdf_dropdown(), "\n".join(get_pdf_list()), ""
+def show_delete_all_warning():
+    return (
+        gr.Markdown("**⚠️ Are you sure you want to delete ALL files? This cannot be undone. Click 'Confirm Delete All' to proceed.**", visible=True),
+        gr.update(interactive=True, visible=True)
+    )
+def hide_delete_all_warning():
+    return (
+        gr.Markdown(visible=False),
+        gr.update(interactive=False, visible=False)
+    )
+def analyze_query_and_use_tools(query: str) -> str:
+    """Analyze query and use appropriate tools to gather information."""
+    query_lower = query.lower()
+    results = []
+    # Check for memory-related queries first
+    memory_keywords = ["remember", "earlier", "before", "previous", "last time", "we discussed",
+                       "you said", "i asked", "conversation", "history", "recall", "what did we"]
+    if any(word in query_lower for word in memory_keywords):
+        print(f"🧠 Memory query detected, fetching conversation history...")
+        memory_result = get_memory_context(last_n=10)
+        if memory_result and "No previous conversation" not in memory_result:
+            results.append(f"📝 **Conversation History:**\n{memory_result}")
+        search_result = search_memory(query)
+        if search_result and "No conversation history" not in search_result:
+            results.append(f"🔍 **Relevant Past Discussions:**\n{search_result}")
+        if results:
+            return "\n\n".join(results)
+    # Try using LangGraph agent (local only)
+    if not IS_HF_SPACE and agent_executor is not None:
+        agent_result = run_agent(query)
+        if agent_result:
+            return agent_result
+    # Fallback: Manual tool routing
+    try:
+        if any(word in query_lower for word in ["what documents", "list documents", "available documents", "what files", "documents do i have"]):
+            results.append(list_documents.invoke({}))
+        if any(word in query_lower for word in ["how many", "count", "number of documents"]):
+            results.append(count_documents.invoke({}))
+        results.append(search_documents.invoke({"query": query}))
+        return "\n\n".join(results) if results else "No relevant information found."
+    except Exception as e:
+        return f"Error analyzing query: {str(e)}"
+def chat_response(message, history):
+    """Agent-enhanced chat response function with visual progress tracking."""
+    global vs
+    if not message:
+        return history, "", "💬 Ready for your question"
+    start_time = datetime.now()
+    get_elapsed = create_elapsed_timer(start_time)
+    if not isinstance(history, list):
+        history = []
+    history.append({"role": "user", "content": str(message)})
+    add_to_memory("user", message)
+    try:
+        yield (history, "", format_progress_bar(get_elapsed(), 33, "🔍 Analyzing your question...", bar_length=15))
+        print(f"🤖 Agent analyzing query: {message}")
+        try:
+            pdf_files = get_pdf_list()
+            print(f"🐛 DEBUG: PDF files available: {len(pdf_files)} - {pdf_files}")
+            print(f"🐛 DEBUG: Global vectorstore state: {get_vectorstore() is not None}")
+        except Exception as debug_error:
+            print(f"🐛 DEBUG ERROR: {str(debug_error)}")
+        try:
+            tool_results = analyze_query_and_use_tools(message)
+            print(f"🔧 Tool results: {tool_results[:100]}...")
+        except Exception as tool_error:
+            error_msg = f"❌ Tool execution failed: {str(tool_error)}"
+            print(error_msg)
+            history.append({"role": "assistant", "content": error_msg})
+            yield (history, "", f"{get_elapsed()} | [100%] ❌ Error during tool execution")
+            return
+        yield (history, "", format_progress_bar(get_elapsed(), 66, "🧠 Generating intelligent response...", bar_length=15))
+        try:
+            memory_context = get_memory_context(last_n=5)
+            llm_prompt = f"""
+            You are a helpful assistant with memory of past conversations. Answer the question based on the document excerpts and conversation history.
+            Recent Conversation History:
+            {memory_context}
+            Document excerpts:
+            {tool_results}
+            Question: {message}
+            Answer concisely and accurately. If the user refers to previous discussions, use the conversation history to provide context:
+            """
+            if IS_HF_SPACE and hf_client is not None:
+                result = hf_generate_chat_response(llm_prompt, hf_client)
+                if result is None:
+                    result = tool_results
+            elif not IS_HF_SPACE and ollama_llm is not None:
+                result = ollama_generate_response(llm_prompt, ollama_llm)
+                if result is None:
+                    result = tool_results
+            else:
+                result = tool_results
+                print("ℹ️ No LLM available, returning tool results")
+        except Exception as llm_error:
+            print(f"❌ LLM processing error: {str(llm_error)}")
+            result = tool_results
+        result_str = str(result.content) if hasattr(result, 'content') else str(result)
+        history.append({"role": "assistant", "content": result_str})
+        add_to_memory("assistant", result_str)
+        yield (history, "", format_progress_bar(get_elapsed(), 100, "✅ Response generated successfully!", bar_length=15))
+        # Reset AI Processing Progress to original state
+        yield (history, "", "💬 Ready for your question")
+    except Exception as e:
+        error_msg = f"🚫 System error: {str(e)}\n\nPlease try again or upload your documents again."
+        print(f"💥 CRITICAL ERROR: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        history.append({"role": "assistant", "content": error_msg})
+        yield (history, "", f"{get_elapsed()} | [100%] ❌ System error occurred")
+def refresh_embeddings_ui():
+    """Refresh embeddings directly"""
+    try:
+        build_vectorstore(force_rebuild=True)
+        return make_pdf_dropdown(), "Embeddings refreshed."
+    except Exception as e:
+        return make_pdf_dropdown(), f"Error refreshing embeddings: {str(e)}"
+def clear_chat_and_memory():
+    """Clear chat history and conversation memory."""
+    clear_memory()
+    return [], "", "💬 Chat and memory cleared. Ready for your question"
+# ============================================================================
+# GRADIO UI
+# ============================================================================
+ENV_NAME = "🌐 HuggingFace Space" if IS_HF_SPACE else "💻 Local Environment"
+ENV_COLOR = "#FF6B6B" if IS_HF_SPACE else "#4ECDC4"
+with gr.Blocks(title="RAG Agent Chatbot") as demo:
+    gr.Markdown(f"# 🤖 RAG Agent - AI Assistant with Tools\nUpload PDFs and interact with an intelligent agent that can search, analyze, and answer questions about your documents.")
+    if not IS_HF_SPACE:
+        from helpers_LOCAL import get_installed_llms, init_ollama_llm, create_langchain_agent
+        llm_choices = get_installed_llms()
+        if llm_choices:
+            llm_dropdown = gr.Dropdown(
+                label="Select Local LLM",
+                choices=llm_choices,
+                value=LLM_NAME if LLM_NAME in llm_choices else (llm_choices[0] if llm_choices else None),
+                interactive=True,
+                visible=True
+            )
+            current_llm_display = gr.Markdown(f"**Current LLM:** {LLM_NAME if LLM_NAME else ''}", elem_id="current-llm-display", visible=True)
+            top_banner = gr.Markdown(
+                f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>{LLM_NAME if LLM_NAME else 'None'}</span> | Agent: ✅ Active</div>",
+                elem_id="top-llm-banner"
+            )
+            def update_llm(selected_label):
+                global ollama_llm, LLM_NAME, agent_executor
+                if selected_label:
+                    try:
+                        ollama_llm, LLM_NAME = init_ollama_llm(selected_label)
+                        agent_executor = create_langchain_agent()
+                        banner_html = f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>{selected_label}</span> | Agent: ✅ Active</div>"
+                        return (
+                            gr.Markdown(f"**Current LLM:** {selected_label}", elem_id="current-llm-display"),
+                            banner_html
+                        )
+                    except Exception as e:
+                        ollama_llm = None
+                        LLM_NAME = None
+                        agent_executor = None
+                        banner_html = f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>None</span> | Agent: ❌ Inactive</div>"
+                        return (
+                            gr.Markdown(f"**Current LLM:** (Error initializing {selected_label})", elem_id="current-llm-display"),
+                            banner_html
+                        )
+                banner_html = f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>None</span> | Agent: ❌ Inactive</div>"
+                return gr.Markdown("", elem_id="current-llm-display"), banner_html
+            llm_dropdown.change(
+                fn=lambda label: update_llm(label),
+                inputs=[llm_dropdown],
+                outputs=[current_llm_display, top_banner]
+            )
+        else:
+            gr.Markdown(
+                "<div style='background-color: #ffcccc; padding: 10px; border-radius: 5px; text-align: center; color: #b30000; font-weight: bold;'>&#9888; <b>No local LLMs are installed.</b> Please install an Ollama model to enable LLM selection and chat capabilities.</div>"
+            )
+            llm_dropdown = gr.Dropdown(
+                label="Select Local LLM",
+                choices=[],
+                value=None,
+                interactive=False,
+                visible=True
+            )
+            current_llm_display = gr.Markdown(f"**Current LLM:** None", elem_id="current-llm-display", visible=True)
+            gr.Markdown(f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>None</span> | Agent: ❌ Inactive</div>")
+    else:
+        # --- Hugging Face Space: dynamic LLM selection ---
+        # Static list of free, popular LLMs on HF Inference API (can be expanded)
+        hf_llm_choices = [
+            "Llama-3.2-3B-Instruct",
+            "meta-llama/Meta-Llama-3-8B-Instruct",
+            "mistralai/Mistral-7B-Instruct-v0.2",
+            "google/gemma-7b-it",
+            "HuggingFaceH4/zephyr-7b-beta",
+            "Qwen/Qwen1.5-7B-Chat",
+            "tiiuae/falcon-7b-instruct"
+        ]
+        default_llm = "Llama-3.2-3B-Instruct"
+        llm_dropdown = gr.Dropdown(
+            label="Select HF LLM",
+            choices=hf_llm_choices,
+            value=LLM_NAME if LLM_NAME in hf_llm_choices else default_llm,
+            interactive=True,
+            visible=True
+        )
+        current_llm_display = gr.Markdown(f"**Current LLM:** {LLM_NAME if LLM_NAME else default_llm}", elem_id="current-llm-display", visible=True)
+        top_banner = gr.Markdown(
+            f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>{LLM_NAME if LLM_NAME else default_llm}</span> | Agent: ✅ Active</div>",
+            elem_id="top-llm-banner"
+        )
+        def update_hf_llm(selected_label):
+            global hf_client, LLM_NAME
+            from helpers_HF import init_hf_llm
+            if selected_label:
+                try:
+                    hf_client, LLM_NAME = init_hf_llm(selected_label)
+                    banner_html = f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>{selected_label}</span> | Agent: ✅ Active</div>"
+                    return (
+                        gr.Markdown(f"**Current LLM:** {selected_label}", elem_id="current-llm-display"),
+                        banner_html
+                    )
+                except Exception as e:
+                    hf_client = None
+                    LLM_NAME = None
+                    banner_html = f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>None</span> | Agent: ❌ Inactive</div>"
+                    return (
+                        gr.Markdown(f"**Current LLM:** (Error initializing {selected_label})", elem_id="current-llm-display"),
+                        banner_html
+                    )
+            banner_html = f"<div style='background-color: {ENV_COLOR}; padding: 10px; border-radius: 5px; text-align: center; color: white; font-weight: bold;'>Running on: {ENV_NAME} | LLM: <span id='llm-name'>None</span> | Agent: ❌ Inactive</div>"
+            return gr.Markdown("", elem_id="current-llm-display"), banner_html
+        llm_dropdown.change(
+            fn=lambda label: update_hf_llm(label),
+            inputs=[llm_dropdown],
+            outputs=[current_llm_display, top_banner]
+        )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📁 File Management")
+            if IS_HF_SPACE and STORAGE_WARNING:
+                gr.Markdown(f"**Storage Status:** {STORAGE_WARNING}")
+            file_upload = gr.File(
+                label="Upload Files (Multiple files supported)",
+                file_types=[".pdf"],
+                file_count="multiple"
+            )
+            upload_status = gr.Textbox(
+                label="Upload Status",
+                value="",
+                interactive=False,
+                lines=8,
+                max_lines=8,
+                autoscroll=True
+            )
+            with gr.Row():
+                progress_bar = gr.Textbox(
+                    label="Uploading Progress",
+                    value="",
+                    interactive=False,
+                    lines=1,
+                    max_lines=1,
+                    autoscroll=True
+                )
+            delete_progress_bar = gr.Textbox(
+                label="Deleting Progress",
+                value="",
+                interactive=False,
+                lines=1,
+                max_lines=1,
+                autoscroll=True
+            )
+            embedded_files = gr.Textbox(
+                label="Available Embedded Files",
+                value="\n".join(get_pdf_list()),
+                interactive=False,
+                lines=8,
+                max_lines=8,
+                autoscroll=True
+            )
+            with gr.Row():
+                pdf_dropdown = gr.Dropdown(
+                    label="Select Files to Delete",
+                    choices=get_pdf_list_ui(),
+                    interactive=True,
+                    allow_custom_value=False,
+                    multiselect=True
+                )
+                delete_btn = gr.Button("🗑️ Delete Files", variant="stop", interactive=False)
+                delete_all_btn = gr.Button("🗑️ Delete All", variant="stop", interactive=bool(get_pdf_list()))
+                delete_all_warning = gr.Markdown(visible=False)
+                confirm_delete_all_btn = gr.Button("Confirm Delete All", variant="stop", interactive=True, visible=False)
+            delete_progress_overlay = gr.HTML(floating_progress_bar_html())
+        with gr.Column(scale=4):
+            gr.Markdown("### 🤖 AI Agent Chat")
+            gr.Markdown("**Agent Capabilities:** Search documents, list files, count documents, intelligent reasoning")
+            chatbot = gr.Chatbot(height=CONFIG["chatbot_height"], layout="bubble")
+            if IS_HF_SPACE and not HAS_PERSISTENT_STORAGE:
+                gr.Markdown("⚠️ **Storage Notice:** Files are temporary and will be lost when Space restarts. To enable persistent storage, upgrade to a paid plan in Settings → Hardware.")
+            gr.Markdown("**🛠️ Agent Commands - Try these tool-powered queries:**")
+            with gr.Row():
+                sample1 = gr.Button(f"📋 {SAMPLE_Q1}", size="sm")
+                sample2 = gr.Button(f"🔍 {SAMPLE_Q2}", size="sm")
+                sample3 = gr.Button(f"📊 {SAMPLE_Q3}", size="sm")
+            with gr.Row():
+                sample4 = gr.Button(f"🧠 {SAMPLE_Q4}", size="sm")
+                sample5 = gr.Button(f"🍳 {SAMPLE_Q5}", size="sm")
+                sample6 = gr.Button(f"🧠 {SAMPLE_Q6}", size="sm")
+                sample7 = gr.Button(f"📝 {SAMPLE_Q7}", size="sm")
+            msg_input = gr.Textbox(
+                placeholder="Ask a question about your PDFs...",
+                label="Ask about your PDFs",
+                show_label=False
+            )
+            ai_status = gr.Textbox(
+                label="📊 AI Processing Progress",
+                value="💬 Ready for your question",
+                interactive=False,
+                placeholder="AI processing status with progress tracking..."
+            )
+            with gr.Row():
+                submit_btn = gr.Button("Send", variant="primary", scale=1, interactive=False)
+                clear_btn = gr.Button("Clear", scale=1, interactive=False)
+    # Event handlers
+    file_upload.upload(
+        fn=add_pdf,
+        inputs=[file_upload],
+        outputs=[pdf_dropdown, upload_status, progress_bar, embedded_files]
+    )
+    delete_btn.click(
+        fn=delete_pdf_ui,
+        inputs=[pdf_dropdown],
+        outputs=[pdf_dropdown, embedded_files, delete_progress_bar]
+    )
+    delete_all_btn.click(
+        fn=show_delete_all_warning,
+        inputs=[],
+        outputs=[delete_all_warning, confirm_delete_all_btn]
+    )
+    # Update Delete All button state when files change
+    demo.load(fn=toggle_delete_all_btn, outputs=[delete_all_btn])
+    pdf_dropdown.change(fn=toggle_delete_all_btn, outputs=[delete_all_btn])
+    embedded_files.change(fn=toggle_delete_all_btn, outputs=[delete_all_btn])
+    confirm_delete_all_btn.click(
+        fn=delete_all_files,
+        inputs=[],
+        outputs=[pdf_dropdown, embedded_files, delete_progress_bar]
+    ).then(
+        fn=hide_delete_all_warning,
+        inputs=[],
+        outputs=[delete_all_warning, confirm_delete_all_btn]
+    )
+    # Enable/disable delete button based on selection
+    def toggle_delete_btn(selected):
+        return gr.update(interactive=bool(selected))
+    pdf_dropdown.change(
+        fn=toggle_delete_btn,
+        inputs=[pdf_dropdown],
+        outputs=[delete_btn]
+    )
+    demo.load(fn=lambda: "\n".join(get_pdf_list()), outputs=[embedded_files])
+        # Ensure embedded_files is updated on app start
+    demo.load(fn=lambda: "\n".join(get_pdf_list()), outputs=[embedded_files])
+    # Sample question handlers
+    sample_buttons = [sample1, sample2, sample3, sample4, sample5, sample6, sample7]
+    sample_questions = [SAMPLE_Q1, SAMPLE_Q2, SAMPLE_Q3, SAMPLE_Q4, SAMPLE_Q5, SAMPLE_Q6, SAMPLE_Q7]
+    for btn, question in zip(sample_buttons, sample_questions):
+        btn.click(fn=lambda q=question: q, outputs=[msg_input])
+    msg_input.submit(
+        fn=chat_response,
+        inputs=[msg_input, chatbot],
+        outputs=[chatbot, msg_input, ai_status]
+    )
+    submit_btn.click(
+        fn=chat_response,
+        inputs=[msg_input, chatbot],
+        outputs=[chatbot, msg_input, ai_status]
+    )
+    # Enable/disable send button based on input
+    def toggle_send_btn(text):
+        return gr.update(interactive=bool(text and text.strip()))
+    msg_input.change(
+        fn=toggle_send_btn,
+        inputs=[msg_input],
+        outputs=[submit_btn]
+    )
+    clear_btn.click(
+        fn=clear_chat_and_memory,
+        outputs=[chatbot, msg_input, ai_status]
+    )
+    # Enable/disable clear button based on input or chat
+    def toggle_clear_btn(text, chat):
+        return gr.update(interactive=bool((text and text.strip()) or (chat and len(chat) > 0)))
+    msg_input.change(
+        fn=lambda text: toggle_clear_btn(text, chatbot.value if hasattr(chatbot, 'value') else []),
+        inputs=[msg_input],
+        outputs=[clear_btn]
+    )
+    chatbot.change(
+        fn=lambda chat: toggle_clear_btn(msg_input.value if hasattr(msg_input, 'value') else '', chat),
+        inputs=[chatbot],
+        outputs=[clear_btn]
+    )
+    demo.load(fn=make_pdf_dropdown, outputs=[pdf_dropdown])
+# ============================================================================
+# LAUNCH application
+# ============================================================================
+if IS_HF_SPACE:
+    try:
+        demo.launch(
+            server_name=CONFIG["server_host"],
+            server_port=CONFIG["server_port"],
+            share=True,
+            show_error=True,
+            quiet=False
+        )
+    except Exception as launch_error:
+        print(f"Launch error: {launch_error}")
+        demo.launch(server_name=CONFIG["server_host"], server_port=CONFIG["server_port"])
+else:
+    app_with_gradio = gr.mount_gradio_app(app, demo, path="/")
+    if __name__ == "__main__":
+        import uvicorn
+        import webbrowser
+        from threading import Timer
+        Timer(3, lambda: webbrowser.open(f"http://127.0.0.1:{CONFIG['server_port']}")).start()
+        print("Starting server... Browser will open automatically in 3 seconds.")
+        uvicorn.run(app_with_gradio, host=CONFIG["server_host"], port=CONFIG["server_port"])

config.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+title: Agentic RAG 3
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_file: app.py
+pinned: false
+license: mit
+short_description: RAG Agent with document analysis

helpers_HF.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+HuggingFace Space specific helpers for LLM operations.
+Contains: HF InferenceClient initialization and response generation.
+"""
+import os
+from typing import Optional
+from huggingface_hub import InferenceClient
+from helpers_SHARED import CONFIG, IS_HF_SPACE
+# ============================================================================
+# HUGGINGFACE LLM INITIALIZATION
+# ============================================================================
+hf_client = None
+LLM_NAME = None
+def init_hf_llm(model_name=None):
+    """Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name."""
+    global hf_client, LLM_NAME
+    if not IS_HF_SPACE:
+        print("ℹ️ Not running on HF Space, skipping HF client initialization")
+        return None, None
+    # Use provided model_name or fallback to config
+    if model_name is not None:
+        LLM_NAME = model_name.split("/")[-1]
+        # Patch CONFIG for downstream use
+        CONFIG["hf_model"] = model_name
+    else:
+        LLM_NAME = CONFIG["hf_model"].split("/")[-1]
+    try:
+        hf_client = InferenceClient(token=os.getenv("HF_token"))
+        print(f"✓ HuggingFace InferenceClient initialized successfully for {LLM_NAME}")
+        return hf_client, LLM_NAME
+    except Exception as e:
+        print(f"✗ Warning: HuggingFace InferenceClient not available: {e}")
+        hf_client = None
+        return None, LLM_NAME
+# ============================================================================
+# HUGGINGFACE LLM RESPONSE GENERATION
+# ============================================================================
+def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]:
+    """Generate a chat response using HuggingFace InferenceClient.
+    Args:
+        prompt: The prompt to send to the model
+        hf_client_instance: Optional HF client instance, uses global if not provided
+    Returns:
+        Generated response string or None if failed
+    """
+    client = hf_client_instance or hf_client
+    if client is None:
+        print("❌ HF client not available")
+        return None
+    try:
+        print(f"🧠 Generating response with {LLM_NAME}...")
+        response = client.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            model=CONFIG["hf_model"],
+            max_tokens=CONFIG["max_tokens"],
+            temperature=CONFIG["temperature"]
+        )
+        result = response.choices[0].message.content
+        if result:
+            result = result.strip()
+            print(f"✓ LLM response: {result[:100]}...")
+            return result
+        else:
+            print(f"⚠️ Empty LLM response")
+            return None
+    except Exception as hf_error:
+        print(f"❌ HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}")
+        return None
+def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str:
+    """Generate a text response using HuggingFace text_generation API.
+    Used as fallback for simpler generation tasks.
+    Args:
+        prompt: The full prompt to send
+        context: Document context for fallback response
+        hf_client_instance: Optional HF client instance
+    Returns:
+        Generated response string
+    """
+    client = hf_client_instance or hf_client
+    if client is None:
+        print("❌ HF client not available")
+        return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
+    try:
+        print(f"Generating response with {LLM_NAME}...")
+        response = client.text_generation(
+            prompt,
+            model=CONFIG["hf_model"],
+            max_new_tokens=CONFIG["max_tokens"],
+            temperature=CONFIG["temperature_fallback"],
+            return_full_text=False
+        )
+        print(f"✓ Success! Response generated.")
+        return response
+    except Exception as hf_error:
+        print(f"❌ HF API error: {str(hf_error)}")
+        return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
+def get_hf_client():
+    """Get the HF client instance."""
+    return hf_client
+def get_hf_llm_name():
+    """Get the HF LLM name."""
+    return LLM_NAME

helpers_LOCAL.py ADDED Viewed

	@@ -0,0 +1,165 @@

+# Utility: Get installed Ollama LLMs as a list (for UI)
+def get_installed_llms():
+    """Returns a list of locally installed Ollama LLMs (model names)."""
+    import subprocess
+    try:
+        result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
+        lines = result.stdout.splitlines()
+        models = []
+        for line in lines:
+            if line.strip() and not line.startswith("NAME"):
+                name = line.split()[0]
+                models.append(name)
+        return models
+    except Exception as e:
+        print(f"Error listing local LLMs: {e}")
+        return []
+# Utility: Display installed Ollama LLMs in terminal (for CLI use)
+def display_installed_llms():
+    """Prints a list of locally installed Ollama LLMs to the terminal."""
+    models = get_installed_llms()
+    if models:
+        print("Available local LLMs:")
+        for m in models:
+            print(f"- {m}")
+    else:
+        print("No local LLMs found.")
+"""
+Local environment (Ollama) specific helpers for LLM operations.
+Contains: Ollama/ChatOllama initialization, agent creation, and response generation.
+"""
+from helpers_SHARED import CONFIG, IS_HF_SPACE, AGENT_TOOLS
+# ============================================================================
+# OLLAMA LLM INITIALIZATION
+# ============================================================================
+ollama_llm = None
+LLM_NAME = None
+def init_ollama_llm(model_name=None):
+    """Initialize Ollama (ChatOllama) for local LLM. Accepts a model name for dynamic selection."""
+    global ollama_llm, LLM_NAME
+    if IS_HF_SPACE:
+        print("ℹ️ Running on HF Space, skipping Ollama initialization")
+        return None, None
+    from langchain_ollama import ChatOllama
+    if model_name is None:
+        model_name = CONFIG["ollama_model"]
+    LLM_NAME = model_name
+    try:
+        ollama_llm = ChatOllama(model=model_name, base_url=CONFIG["ollama_base_url"])
+        print(f"✓ Ollama (ChatOllama) initialized successfully with {LLM_NAME}")
+        return ollama_llm, LLM_NAME
+    except Exception as e:
+        print(f"✗ Warning: Ollama not available: {e}")
+        ollama_llm = None
+        return None, LLM_NAME
+# ============================================================================
+# LANGCHAIN AGENT (LOCAL ONLY)
+# ============================================================================
+agent_executor = None
+def create_langchain_agent():
+    """Create a LangGraph ReAct agent with the available tools.
+    Only works in local environment with Ollama.
+    Returns None on HF Spaces.
+    """
+    global agent_executor, ollama_llm
+    if IS_HF_SPACE:
+        print("ℹ️ HF Space detected - using manual tool routing (HF InferenceClient doesn't support LangChain agents)")
+        return None
+    if ollama_llm is None:
+        print("❌ Ollama LLM not initialized, cannot create agent")
+        return None
+    from langgraph.prebuilt import create_react_agent  # type: ignore
+    try:
+        agent_executor = create_react_agent(
+            model=ollama_llm,
+            tools=AGENT_TOOLS,
+        )
+        print("✅ LangGraph ReAct Agent created successfully with Ollama")
+        return agent_executor
+    except Exception as e:
+        print(f"❌ Failed to create LangGraph agent: {e}")
+        return None
+# ============================================================================
+# OLLAMA RESPONSE GENERATION
+# ============================================================================
+def ollama_generate_response(prompt: str, ollama_instance=None) -> str:
+    """Generate a response using Ollama.
+    Args:
+        prompt: The prompt to send to the model
+        ollama_instance: Optional Ollama instance, uses global if not provided
+    Returns:
+        Generated response string or None if failed
+    """
+    llm = ollama_instance or ollama_llm
+    if llm is None:
+        print("❌ Ollama not available")
+        return None
+    try:
+        print(f"🧠 Generating response with Ollama ({LLM_NAME})...")
+        response = llm.invoke(prompt)
+        print(f"✓ Agent response generated.")
+        return response
+    except Exception as ollama_error:
+        print(f"❌ Ollama error: {str(ollama_error)}")
+        return None
+def run_agent(query: str) -> str:
+    """Run the LangGraph agent on a query.
+    Args:
+        query: User query to process
+    Returns:
+        Agent response string or None if agent unavailable
+    """
+    global agent_executor
+    if agent_executor is None:
+        return None
+    try:
+        print(f"🤖 Using LangGraph Agent to process: {query}")
+        result = agent_executor.invoke({"messages": [{"role": "user", "content": query}]})
+        # Extract the last AI message
+        messages = result.get("messages", [])
+        for msg in reversed(messages):
+            if hasattr(msg, 'content') and msg.type == "ai":
+                return msg.content
+        return "No response from agent"
+    except Exception as e:
+        print(f"❌ Agent execution failed: {e}, falling back to manual routing")
+        return None
+def get_ollama_llm():
+    """Get the Ollama LLM instance."""
+    return ollama_llm
+def get_local_llm_name():
+    """Get the local LLM name."""
+    return LLM_NAME
+def get_agent_executor():
+    """Get the agent executor instance."""
+    return agent_executor

helpers_SHARED.py ADDED Viewed

	@@ -0,0 +1,407 @@

+"""
+Shared helper functions used by both HuggingFace Space and Local environments.
+Contains: configuration, memory management, vectorstore operations, PDF helpers, and UI utilities.
+"""
+import os
+from typing import List, Optional
+from datetime import datetime
+from collections import deque
+import gradio as gr
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_core.documents import Document
+from langchain_core.tools import tool
+# ============================================================================
+# CONFIGURATION - All settings in one place
+# ============================================================================
+def setup():
+    """
+    Central configuration for the RAG Agent application.
+    Modify these values to customize the application behavior.
+    Returns a config dictionary with all settings.
+    """
+    return {
+        # Model Configuration
+        "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+        "ollama_model": "qwen2m:latest",      # Local Ollama model
+        "hf_model": "Llama-3.2-3B-Instruct",  # HuggingFace cloud model
+        "ollama_base_url": "http://localhost:11434",
+        # Text Splitting Configuration
+        "chunk_size": 1000,
+        "chunk_overlap": 200,
+        # Search Configuration
+        "search_k": 5,                    # Number of documents to retrieve
+        "search_content_limit": 500,      # Max chars to show per chunk
+        # LLM Generation Configuration
+        "max_tokens": 512,
+        "temperature": 0.1,               # Lower = more deterministic
+        "temperature_fallback": 0.7,      # For text_generation fallback
+        # Memory Configuration
+        "max_memory_turns": 50,           # Max conversation turns to store
+        "memory_context_limit": 500,      # Max chars per memory entry
+        # Server Configuration
+        "server_port": 7860,
+        "server_host": "0.0.0.0",
+        # UI Configuration
+        "chatbot_height": 600,
+        "progress_bar_length": 20,
+        "chat_progress_bar_length": 15,
+    }
+# Initialize configuration
+CONFIG = setup()
+# ============================================================================
+# ENVIRONMENT DETECTION
+# ============================================================================
+IS_HF_SPACE = os.getenv("SPACE_ID") is not None
+# Directories - use persistent storage on HF Spaces if available
+DATA_DIR = "/data" if (IS_HF_SPACE and os.path.exists("/data")) else "data"
+EMBEDDINGS_DIR = os.path.join(DATA_DIR, "embeddings")
+# Check storage persistence status
+HAS_PERSISTENT_STORAGE = IS_HF_SPACE and os.path.exists("/data")
+STORAGE_WARNING = "" if not IS_HF_SPACE else (
+    "✅ Persistent storage enabled - files will survive restarts" if HAS_PERSISTENT_STORAGE else
+    "⚠️ Temporary storage - uploaded files will be lost when Space restarts"
+)
+# Initialize embeddings (shared across environments)
+embeddings = HuggingFaceEmbeddings(model_name=CONFIG["embedding_model"])
+# Global vectorstore (will be set by build_vectorstore)
+vs = None
+# ============================================================================
+# CONVERSATION MEMORY
+# ============================================================================
+conversation_memory: deque = deque(maxlen=CONFIG["max_memory_turns"])
+def add_to_memory(role: str, content: str):
+    """Add a message to conversation memory with timestamp."""
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    conversation_memory.append({
+        "timestamp": timestamp,
+        "role": role,
+        "content": content
+    })
+    print(f"💾 Memory updated: {role} message added (total: {len(conversation_memory)} turns)")
+def get_memory_context(last_n: int = 10) -> str:
+    """Get the last N conversation turns as context."""
+    if not conversation_memory:
+        return "No previous conversation history."
+    recent = list(conversation_memory)[-last_n:]
+    context_parts = []
+    for msg in recent:
+        role_emoji = "👤" if msg["role"] == "user" else "🤖"
+        context_parts.append(f"[{msg['timestamp']}] {role_emoji} {msg['role'].upper()}: {msg['content'][:CONFIG['memory_context_limit']]}")
+    return "\n\n".join(context_parts)
+def search_memory(query: str) -> str:
+    """Search conversation memory for relevant past discussions."""
+    if not conversation_memory:
+        return "No conversation history to search."
+    query_lower = query.lower()
+    matches = []
+    for msg in conversation_memory:
+        content_lower = msg["content"].lower()
+        # Simple keyword matching
+        if any(word in content_lower for word in query_lower.split()):
+            role_emoji = "👤" if msg["role"] == "user" else "🤖"
+            matches.append(f"[{msg['timestamp']}] {role_emoji} {msg['role'].upper()}: {msg['content'][:CONFIG['memory_context_limit'] - 200]}...")
+    if matches:
+        return f"Found {len(matches)} relevant conversation(s):\n\n" + "\n\n---\n\n".join(matches[:5])
+    else:
+        return f"No conversations found matching '{query}'."
+def clear_memory():
+    """Clear all conversation memory."""
+    conversation_memory.clear()
+    print("🧹 Conversation memory cleared")
+# ============================================================================
+# UTILITY FUNCTIONS
+# ============================================================================
+def get_timestamp() -> str:
+    """Get current timestamp in HH:MM:SS format."""
+    return datetime.now().strftime("%H:%M:%S")
+def create_elapsed_timer(start_time: datetime):
+    """Create an elapsed time function for tracking duration."""
+    def get_elapsed() -> str:
+        elapsed = datetime.now() - start_time
+        return f"⏱️ {elapsed.total_seconds():.1f}s"
+    return get_elapsed
+def format_progress_bar(elapsed_time: str, percentage: int, message: str, bar_length: int = 20) -> str:
+    """Format progress with visual progress bar using Unicode blocks."""
+    filled_length = int(bar_length * percentage / 100)
+    bar = '█' * filled_length + '░' * (bar_length - filled_length)
+    return f"{elapsed_time} | [{percentage:3d}%] {bar} {message}"
+# =========================================================================
+# FLOATING PROGRESS BAR HTML/JS (for Gradio UI)
+# =========================================================================
+def floating_progress_bar_html():
+    """Return HTML+JS for a floating, borderless, fit-content progress bar overlay."""
+    return '''
+    <div id="floating-progress" style="
+        display: none;
+        position: fixed;
+        top: 20px; left: 50%; transform: translateX(-50%);
+        background: #222; color: #fff; padding: 8px 0; border-radius: 8px; z-index: 9999;
+        font-family: monospace; font-size: 1.2em; box-shadow: none; border: none;
+        width: fit-content; min-width: 0; max-width: none;
+    ">
+        [....................................................................................................]
+    </div>
+    <script>
+    function showProgressBar(barText) {
+        var el = document.getElementById('floating-progress');
+        el.innerText = barText;
+        el.style.display = 'block';
+    }
+    function hideProgressBar() {
+        document.getElementById('floating-progress').style.display = 'none';
+    }
+    // Example usage (remove or replace with Python/Gradio event):
+    // showProgressBar('[|||||||||||||.............]');
+    // setTimeout(hideProgressBar, 2000);
+    </script>
+    '''
+# ============================================================================
+# PDF HELPERS
+# ============================================================================
+def get_pdf_list() -> List[str]:
+    """Get list of PDF files in data folder."""
+    return [f for f in os.listdir(DATA_DIR) if f.endswith(".pdf")]
+def get_pdf_list_ui() -> List[str]:
+    """Get PDF list for UI dropdown (with error handling)."""
+    try:
+        return get_pdf_list()
+    except Exception as e:
+        print(f"Error getting PDF list: {e}")
+        return []
+def make_pdf_dropdown(value=None):
+    """Create a PDF dropdown with current file list."""
+    return gr.Dropdown(choices=get_pdf_list_ui(), value=value)
+# ============================================================================
+# VECTORSTORE OPERATIONS
+# ============================================================================
+def build_vectorstore(force_rebuild: bool = False) -> Optional[FAISS]:
+    """Build or load FAISS vectorstore from PDFs.
+    Args:
+        force_rebuild: If True, rebuild from scratch even if existing vectorstore found
+    """
+    global vs
+    # Check if we should load existing vectorstore
+    if not force_rebuild and os.path.exists(os.path.join(EMBEDDINGS_DIR, "index.faiss")):
+        try:
+            print("📚 Loading existing vectorstore...")
+            vectorstore = FAISS.load_local(EMBEDDINGS_DIR, embeddings, allow_dangerous_deserialization=True)
+            print("✅ Vectorstore loaded successfully")
+            vs = vectorstore
+            return vectorstore
+        except Exception as e:
+            print(f"❌ Error loading vectorstore: {e}, rebuilding...")
+    # Build new vectorstore from PDFs
+    pdf_files = get_pdf_list()
+    if not pdf_files:
+        print("No PDF files found to build embeddings")
+        vs = None
+        return None
+    print(f"🔨 Building vectorstore from {len(pdf_files)} PDF(s): {pdf_files}")
+    docs: List[Document] = []
+    for filename in pdf_files:
+        try:
+            filepath = os.path.join(DATA_DIR, filename)
+            print(f"📖 Loading {filename}...")
+            loader = PyPDFLoader(filepath)
+            file_docs = loader.load()
+            docs.extend(file_docs)
+            print(f"✅ Loaded {len(file_docs)} pages from {filename}")
+        except Exception as e:
+            print(f"❌ Error loading {filename}: {e}")
+            continue
+    if not docs:
+        print("⚠️ No documents could be loaded")
+        vs = None
+        return None
+    print(f"✂️ Splitting {len(docs)} pages into chunks...")
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=CONFIG["chunk_size"],
+        chunk_overlap=CONFIG["chunk_overlap"]
+    )
+    splits = splitter.split_documents(docs)
+    print(f"🧩 Created {len(splits)} text chunks")
+    print("🤖 Creating FAISS embeddings...")
+    try:
+        vectorstore = FAISS.from_documents(splits, embeddings)
+        print(f"💾 Saving vectorstore to {EMBEDDINGS_DIR}...")
+        vectorstore.save_local(EMBEDDINGS_DIR)
+        vs = vectorstore
+        print("✅ Vectorstore built and saved successfully")
+        return vectorstore
+    except Exception as e:
+        print(f"✗ Failed to build vectorstore: {e}")
+        import traceback
+        traceback.print_exc()
+        vs = None
+        return None
+def get_vectorstore():
+    """Get the current vectorstore instance."""
+    global vs
+    return vs
+def set_vectorstore(vectorstore):
+    """Set the vectorstore instance."""
+    global vs
+    vs = vectorstore
+# ============================================================================
+# RAG AGENT TOOLS (LangChain @tool decorator pattern)
+# ============================================================================
+@tool
+def list_documents() -> str:
+    """List all available PDF documents in the system. Use this tool when the user asks what documents are available, what files they have, or wants to see the document list."""
+    pdfs = get_pdf_list()
+    if pdfs:
+        return f"📁 Available documents: {', '.join(pdfs)}"
+    else:
+        return "📁 No documents are currently uploaded."
+@tool
+def count_documents() -> str:
+    """Count the total number of uploaded PDF documents. Use this tool when the user asks how many documents they have or wants a document count."""
+    count = len(get_pdf_list())
+    return f"📊 Total documents: {count}"
+@tool
+def search_documents(query: str) -> str:
+    """Search document content using RAG (Retrieval Augmented Generation). Use this tool to find information within the uploaded PDF documents based on a search query."""
+    global vs
+    # Check if we have any PDF files first
+    pdf_files = get_pdf_list()
+    if not pdf_files:
+        return "🔍 No documents are currently uploaded. Please upload PDF files first."
+    # Force reload vectorstore from disk if files exist
+    print(f"🔍 Checking vectorstore for {len(pdf_files)} PDF files...")
+    # Check if FAISS files exist on disk
+    faiss_path = os.path.join(EMBEDDINGS_DIR, "index.faiss")
+    pkl_path = os.path.join(EMBEDDINGS_DIR, "index.pkl")
+    if os.path.exists(faiss_path) and os.path.exists(pkl_path):
+        print(f"📁 Found vectorstore files, loading...")
+        try:
+            # Force reload from disk
+            vs = FAISS.load_local(EMBEDDINGS_DIR, embeddings, allow_dangerous_deserialization=True)
+            print(f"✅ Vectorstore loaded successfully from disk")
+        except Exception as e:
+            print(f"❌ Error loading vectorstore: {e}")
+            vs = None
+    else:
+        print(f"📁 No vectorstore files found, attempting to build...")
+        vs = build_vectorstore()
+    if vs is None:
+        return f"🔍 Found {len(pdf_files)} document(s) but search index could not be created. Please try re-uploading your files."
+    try:
+        # Extract key search terms from query (remove common words)
+        search_query = query
+        print(f"🔍 Searching vectorstore for: {search_query}")
+        # Use similarity_search_with_score to filter by relevance
+        docs_with_scores = vs.similarity_search_with_score(search_query, k=CONFIG["search_k"])
+        if docs_with_scores:
+            # Filter by score (lower is better for L2 distance) - adjust threshold as needed
+            # Show more content from each chunk for better context
+            context_parts = []
+            for doc, score in docs_with_scores:
+                # Get source file from metadata
+                source = doc.metadata.get('source', 'Unknown').split('/')[-1]
+                page = doc.metadata.get('page', '?')
+                # Include score and source in debug output
+                print(f"  📄 Score: {score:.3f} | Source: {source} pg{page} - {doc.page_content[:50]}...")
+                # Show more content with source info
+                context_parts.append(f"[Source: {source}, Page: {page}, Relevance: {score:.2f}]\n{doc.page_content[:CONFIG['search_content_limit']]}")
+            context = "\n\n---\n\n".join(context_parts)
+            print(f"✓ Found {len(docs_with_scores)} document chunks")
+            return f"🔍 Search results for '{query}':\n\n{context}"
+        else:
+            print(f"⚠️ No relevant documents found for query: {query}")
+            return f"🔍 No relevant information found for '{query}' in your {len(pdf_files)} document(s). Try different keywords or check if your documents contain relevant content."
+    except Exception as e:
+        error_msg = f"🔍 Search error: {str(e)}. You have {len(pdf_files)} documents available."
+        print(f"❌ Search error: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return error_msg
+@tool
+def search_conversation_history(query: str) -> str:
+    """Search through previous conversation history to find past discussions. Use this tool when the user asks about something they discussed before, wants to recall previous answers, or references past conversations."""
+    return search_memory(query)
+@tool
+def get_recent_conversation(turns: int = 5) -> str:
+    """Get the most recent conversation turns. Use this tool when the user asks what they were discussing, wants a summary of recent chat, or needs context from earlier in the conversation."""
+    return get_memory_context(last_n=turns)
+# List of all available tools
+AGENT_TOOLS = [list_documents, count_documents, search_documents, search_conversation_history, get_recent_conversation]
+# Sample question texts - Enhanced for agent capabilities
+SAMPLE_Q1 = "How many documents are loaded? List their names and types?"
+SAMPLE_Q2 = "Summarize the key points of each document in 5 bullet points."
+SAMPLE_Q3 = "What is the attention mechanism? list the main topics."
+SAMPLE_Q4 = "How can I cook chicken breast with Phillips air fryer recipes?"
+SAMPLE_Q5 = "Summarize each document in max 10 bullet points."
+SAMPLE_Q6 = "What did we discuss earlier?"
+SAMPLE_Q7 = "Summarize it in 50 words."

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+fastapi>=0.104.0
+uvicorn>=0.24.0
+langchain>=0.1.0
+langchain-community>=0.0.10
+langchain-core>=0.1.0
+langchain-text-splitters>=0.0.1
+langchain-huggingface>=0.0.1
+langchain-ollama>=0.1.0
+langgraph>=0.2.0
+huggingface_hub>=0.19.0
+gradio==6.0.1
+pypdf>=3.17.0
+sentence-transformers>=2.2.2
+faiss-cpu>=1.7.4
+requests>=2.31.0
+pydantic>=2.4.0
+python-multipart>=0.0.6