Spaces:

MCP-1st-Birthday
/

Spice_Bae

Running

App Files Files Community

fabiantoh98 commited on 12 days ago

Commit

327d382

1 Parent(s): 30db61e

Add Modal deployment with vLLM support

Browse files

Files changed (3) hide show

modal_app.py +101 -0
modal_vllm.py +105 -0
tools/llama_agent.py +198 -42

modal_app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""Modal deployment for Spice Bae AI Advisor.
+Deploy with: modal deploy modal_app.py
+Test locally: modal serve modal_app.py
+"""
+import modal
+app = modal.App("spice-bae")
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .pip_install(
+        "gradio[mcp]==5.50.0",
+        "fastapi[standard]",
+        "neo4j",
+        "python-dotenv",
+        "requests",
+        "beautifulsoup4>=4.12.0",
+    )
+    .add_local_file("app.py", "/root/app.py")
+    .add_local_dir("tools", "/root/tools")
+    .add_local_dir("data", "/root/data")
+)
+@app.function(
+    image=image,
+    secrets=[modal.Secret.from_name("spice-bae-secrets")],
+    max_containers=1,
+    timeout=600,
+)
+@modal.web_server(port=7860, startup_timeout=120)
+def serve():
+    """Serve the Spice Bae Gradio app."""
+    import sys
+    import os
+    import subprocess
+    # Add mounted code directory to Python path
+    sys.path.insert(0, "/root")
+    os.chdir("/root")
+    # Launch Gradio app directly with its built-in server
+    subprocess.Popen(
+        [
+            sys.executable,
+            "-c",
+            """
+import sys
+sys.path.insert(0, "/root")
+import os
+os.chdir("/root")
+from app import demo
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    mcp_server=True,
+    share=False,
+    ssr_mode=False
+)
+"""
+        ],
+        env={**os.environ}
+    )
+# Instructions for deployment:
+#
+# 1. Install Modal CLI:
+#    pip install modal
+#    modal setup
+#
+# 2. Deploy vLLM first (for open-source LLM):
+#    modal deploy modal_vllm.py
+#    Note the URL: https://YOUR_USERNAME--spice-bae-llm-serve.modal.run
+#
+# 3. Create secrets (using vLLM endpoint):
+#    modal secret create spice-bae-secrets \
+#      NEO4J_URI="neo4j+s://xxx.databases.neo4j.io" \
+#      NEO4J_USERNAME="neo4j" \
+#      NEO4J_PASSWORD="your_password" \
+#      OPENAI_API_BASE="https://YOUR_USERNAME--spice-bae-llm-serve.modal.run/v1" \
+#      OPENAI_API_KEY="not-needed" \
+#      OPENAI_MODEL="Qwen/Qwen2.5-7B-Instruct"
+#
+#    OR use Anthropic instead:
+#    modal secret create spice-bae-secrets \
+#      NEO4J_URI="neo4j+s://xxx.databases.neo4j.io" \
+#      NEO4J_USERNAME="neo4j" \
+#      NEO4J_PASSWORD="your_password" \
+#      ANTHROPIC_API_KEY="sk-ant-xxx"
+#
+# 4. Deploy:
+#    modal deploy modal_app.py
+#
+# 5. Your app will be available at:
+#    https://your-username--spice-bae-serve.modal.run
+#
+# MCP Endpoint:
+#    https://your-username--spice-bae-serve.modal.run/gradio_api/mcp/sse

modal_vllm.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""Modal vLLM deployment for Spice Bae LLM inference.
+This deploys an open-source LLM (Llama 3.1 8B) using vLLM on Modal,
+providing an OpenAI-compatible API endpoint that Spice Bae can use
+instead of Claude API.
+Deploy with: modal deploy modal_vllm.py
+Test locally: modal serve modal_vllm.py
+Uses Modal's $30/month free credits instead of paid API keys.
+"""
+import modal
+MODELS_DIR = "/llm-models"
+MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
+def download_model_to_image(model_dir: str, model_name: str):
+    """Download model during image build."""
+    from huggingface_hub import snapshot_download
+    snapshot_download(
+        model_name,
+        local_dir=model_dir,
+        ignore_patterns=["*.pt", "*.bin"],
+    )
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .pip_install(
+        "vllm==0.6.4.post1",
+        "huggingface_hub",
+        "hf_transfer",
+    )
+    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
+    .run_function(
+        download_model_to_image,
+        kwargs={"model_dir": MODELS_DIR, "model_name": MODEL_NAME},
+        secrets=[modal.Secret.from_name("huggingface-token")],
+        timeout=60 * 20,
+    )
+)
+app = modal.App("spice-bae-llm")
+N_GPU = 1
+MINUTES = 60
+@app.function(
+    image=image,
+    gpu="A10G",
+    scaledown_window=5 * MINUTES,
+    timeout=20 * MINUTES,
+    max_containers=1,
+)
+@modal.web_server(port=8000, startup_timeout=300)
+def serve():
+    """Serve vLLM OpenAI-compatible API using built-in server."""
+    import subprocess
+    cmd = [
+        "python", "-m", "vllm.entrypoints.openai.api_server",
+        "--model", MODELS_DIR,
+        "--served-model-name", MODEL_NAME,
+        "--host", "0.0.0.0",
+        "--port", "8000",
+        "--gpu-memory-utilization", "0.90",
+        "--max-model-len", "4096",
+    ]
+    subprocess.Popen(cmd)
+# =============================================================================
+# DEPLOYMENT INSTRUCTIONS
+# =============================================================================
+#
+# 1. Install Modal CLI:
+#    pip install modal
+#    modal setup
+#
+# 2. Create HuggingFace token secret (for gated models like Llama):
+#    - Get token from https://huggingface.co/settings/tokens
+#    - Accept Llama license at https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
+#    modal secret create huggingface-token HF_TOKEN=hf_xxx
+#
+# 3. Deploy:
+#    modal deploy modal_vllm.py
+#
+# 4. Your API will be at:
+#    https://YOUR_USERNAME--spice-bae-llm-serve.modal.run
+#
+# 5. Test it:
+#    curl https://YOUR_USERNAME--spice-bae-llm-serve.modal.run/v1/chat/completions \
+#      -H "Content-Type: application/json" \
+#      -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "messages": [{"role": "user", "content": "Hello!"}]}'
+#
+# 6. Set environment variable for Spice Bae:
+#    OPENAI_API_BASE=https://YOUR_USERNAME--spice-bae-llm-serve.modal.run/v1
+#    OPENAI_API_KEY=not-needed
+#    USE_OPENAI_COMPATIBLE=true
+#
+# =============================================================================

tools/llama_agent.py CHANGED Viewed

@@ -1,7 +1,11 @@
 """LlamaIndex Agent for Spice Bae conversational interface.
 This module wraps the existing spice database tools as LlamaIndex FunctionTools,
-enabling a conversational AI interface powered by Claude.
 Architecture:
     User Question -> Guardrails -> LlamaIndex AgentWorkflow -> Tool Selection -> Neo4j Query -> Guardrails -> Response
@@ -9,10 +13,10 @@ Architecture:
 import asyncio
 import os
-from typing import Optional
-from llama_index.core.tools import FunctionTool
-from llama_index.core.agent.workflow import AgentWorkflow
-from llama_index.llms.anthropic import Anthropic
 from tools.neo4j_queries import SpiceDatabase
 from tools.guardrails import (
@@ -27,14 +31,14 @@ class SpiceAgent:
     """Conversational agent for medicinal spice queries.
     Wraps the SpiceDatabase functions as LlamaIndex tools and uses
-    Claude for natural language understanding. Includes comprehensive
-    guardrails for safety, cost control, and compliance.
     Attributes:
         db: SpiceDatabase instance for Neo4j queries
-        llm: Anthropic Claude LLM for processing queries
-        workflow: LlamaIndex AgentWorkflow for tool orchestration
         guardrails: GuardrailManager for safety checks
     """
     DEFAULT_MODEL = "claude-sonnet-4-20250514"
@@ -50,17 +54,32 @@ class SpiceAgent:
         """Initialize the spice agent.
         Args:
-            api_key: Anthropic API key. If None, reads from ANTHROPIC_API_KEY env var.
-            model: Model name to use. Defaults to claude-sonnet-4-20250514.
             enable_guardrails: Whether to enable safety guardrails.
             daily_cost_limit: Maximum daily spend in USD (default: $1/day).
             strict_topic_filter: Whether to strictly block off-topic queries.
         """
         self.db = SpiceDatabase()
-        self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
-        self.model = model or self.DEFAULT_MODEL
         self.llm = None
         self.workflow = None
         if enable_guardrails:
             self.guardrails = create_default_guardrails(
@@ -73,14 +92,20 @@ class SpiceAgent:
             self.guardrails = None
             print("[GUARDRAILS] Disabled")
-        if self.api_key:
-            self._initialize_agent()
-    def _initialize_agent(self) -> None:
-        """Initialize LLM and agent workflow with tools."""
         self.llm = Anthropic(
             model=self.model,
-            api_key=self.api_key,
         )
         tools = self._create_tools()
@@ -88,7 +113,12 @@ class SpiceAgent:
         self.workflow = AgentWorkflow.from_tools_or_functions(
             tools_or_functions=tools,
             llm=self.llm,
-            system_prompt="""You are a helpful medicinal cuisine advisor that helps users learn about spices, their nutritional content, and health benefits.
 You have access to a database of 88+ spices with:
 - Nutritional data from USDA FoodData Central
@@ -102,8 +132,44 @@ When answering questions:
 2. Provide clear, helpful responses
 3. Include source attribution (USDA or NCCIH)
 4. Mention relevant safety information when discussing health benefits
-""",
-        )
     def _create_tools(self) -> list:
         """Create LlamaIndex FunctionTools from database methods.
@@ -111,6 +177,8 @@ When answering questions:
         Returns:
             List of FunctionTool objects for the agent.
         """
         tools = [
             FunctionTool.from_defaults(
                 fn=self._get_spice_info,
@@ -205,8 +273,12 @@ When answering questions:
         Returns:
             Agent's response string.
         """
-        if not self.workflow:
-            return "Error: Agent not initialized. Please ensure ANTHROPIC_API_KEY is set."
         if self.guardrails:
             should_proceed, block_message, context = self.guardrails.check_input(
@@ -216,26 +288,106 @@ When answering questions:
                 return block_message
         try:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            try:
-                result = loop.run_until_complete(self._async_chat(message))
-                if self.guardrails:
-                    result = self.guardrails.check_output(result)
-                    usage_tracker = self.guardrails.get_guardrail("usage_tracking")
-                    if usage_tracker and isinstance(usage_tracker, UsageTrackingGuardrail):
-                        input_tokens = len(message) // 4
-                        output_tokens = len(result) // 4
-                        usage_tracker.record_usage(input_tokens, output_tokens, session_id)
-                return result
-            finally:
-                loop.close()
         except Exception as e:
             return f"Error processing request: {str(e)}"
     async def _async_chat(self, message: str) -> str:
         """Async chat handler for the workflow.
@@ -276,9 +428,13 @@ When answering questions:
         """Check if agent is ready to process queries.
         Returns:
-            True if workflow is initialized, False otherwise.
         """
-        return self.workflow is not None
 def create_agent(api_key: Optional[str] = None) -> SpiceAgent:

 """LlamaIndex Agent for Spice Bae conversational interface.
 This module wraps the existing spice database tools as LlamaIndex FunctionTools,
+enabling a conversational AI interface powered by Claude or OpenAI-compatible endpoints.
+Supports:
+- Anthropic Claude (via ANTHROPIC_API_KEY) - uses function calling
+- OpenAI-compatible APIs like vLLM (via OPENAI_API_BASE) - uses ReAct prompting
 Architecture:
     User Question -> Guardrails -> LlamaIndex AgentWorkflow -> Tool Selection -> Neo4j Query -> Guardrails -> Response
 import asyncio
 import os
+import re
+import json
+import requests
+from typing import Optional, Tuple, Any, Dict
 from tools.neo4j_queries import SpiceDatabase
 from tools.guardrails import (
     """Conversational agent for medicinal spice queries.
     Wraps the SpiceDatabase functions as LlamaIndex tools and uses
+    Claude or OpenAI-compatible LLMs for natural language understanding.
     Attributes:
         db: SpiceDatabase instance for Neo4j queries
+        llm: LLM instance for processing queries
+        workflow: LlamaIndex AgentWorkflow for tool orchestration (Anthropic only)
         guardrails: GuardrailManager for safety checks
+        provider: LLM provider type ('anthropic' or 'openai')
     """
     DEFAULT_MODEL = "claude-sonnet-4-20250514"
         """Initialize the spice agent.
         Args:
+            api_key: API key. If None, reads from env vars.
+            model: Model name to use.
             enable_guardrails: Whether to enable safety guardrails.
             daily_cost_limit: Maximum daily spend in USD (default: $1/day).
             strict_topic_filter: Whether to strictly block off-topic queries.
         """
         self.db = SpiceDatabase()
         self.llm = None
         self.workflow = None
+        self.provider = None
+        self.openai_base = os.getenv("OPENAI_API_BASE")
+        self.openai_key = os.getenv("OPENAI_API_KEY", "not-needed")
+        self.anthropic_key = api_key or os.getenv("ANTHROPIC_API_KEY")
+        # Determine provider and model
+        if self.openai_base:
+            self.provider = "openai"
+            self.model = model or os.getenv("OPENAI_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+            print(f"[LLM] Using OpenAI-compatible endpoint: {self.openai_base}")
+            print(f"[LLM] Model: {self.model}")
+        elif self.anthropic_key:
+            self.provider = "anthropic"
+            self.model = model or self.DEFAULT_MODEL
+            print(f"[LLM] Using Anthropic Claude: {self.model}")
+        else:
+            print("[LLM] No API key configured")
         if enable_guardrails:
             self.guardrails = create_default_guardrails(
             self.guardrails = None
             print("[GUARDRAILS] Disabled")
+        if self.provider == "anthropic" and self.anthropic_key:
+            self._initialize_anthropic_agent()
+        elif self.provider == "openai" and self.openai_base:
+            self._tools = self._build_tool_registry()
+    def _initialize_anthropic_agent(self) -> None:
+        """Initialize Anthropic LLM and agent workflow with tools."""
+        from llama_index.core.tools import FunctionTool
+        from llama_index.core.agent.workflow import AgentWorkflow
+        from llama_index.llms.anthropic import Anthropic
         self.llm = Anthropic(
             model=self.model,
+            api_key=self.anthropic_key,
         )
         tools = self._create_tools()
         self.workflow = AgentWorkflow.from_tools_or_functions(
             tools_or_functions=tools,
             llm=self.llm,
+            system_prompt=self._get_system_prompt(),
+        )
+    def _get_system_prompt(self) -> str:
+        """Get the system prompt for the agent."""
+        return """You are a helpful medicinal cuisine advisor that helps users learn about spices, their nutritional content, and health benefits.
 You have access to a database of 88+ spices with:
 - Nutritional data from USDA FoodData Central
 2. Provide clear, helpful responses
 3. Include source attribution (USDA or NCCIH)
 4. Mention relevant safety information when discussing health benefits
+"""
+    def _build_tool_registry(self) -> Dict[str, callable]:
+        """Build a registry of available tools for ReAct agent."""
+        return {
+            "get_spice_information": self._get_spice_info,
+            "list_available_spices": self._list_spices,
+            "get_nutrient_content": self._get_nutrient,
+            "find_spice_substitutes": self._find_substitutes,
+            "get_health_benefits": self._get_health_benefits,
+            "find_spices_for_benefit": self._find_by_benefit,
+            "get_safety_information": self._get_safety_info,
+            "find_medicinal_substitutes": self._find_medicinal_substitutes,
+        }
+    def _get_react_system_prompt(self) -> str:
+        """Get ReAct-style system prompt for OpenAI-compatible endpoints."""
+        return """You are a helpful medicinal cuisine advisor. You help users learn about spices, their nutritional content, and health benefits.
+You have access to a database of 88+ spices. To answer questions, you MUST use the available tools.
+AVAILABLE TOOLS:
+- get_spice_information(spice_name): Get comprehensive information about a spice
+- list_available_spices(): List all spices in the database
+- get_nutrient_content(spice_name, nutrient_name): Get specific nutrient content
+- find_spice_substitutes(spice_name): Find substitute spices
+- get_health_benefits(spice_name): Get health benefits of a spice
+- find_spices_for_benefit(benefit_keyword): Find spices for a health condition
+- get_safety_information(spice_name): Get safety info and cautions
+- find_medicinal_substitutes(spice_name): Find substitutes with similar health benefits
+TO USE A TOOL, respond with EXACTLY this format:
+TOOL: tool_name
+ARGS: {"param1": "value1", "param2": "value2"}
+After receiving tool results, provide a helpful response to the user.
+IMPORTANT: This information is for educational purposes only, not medical advice."""
     def _create_tools(self) -> list:
         """Create LlamaIndex FunctionTools from database methods.
         Returns:
             List of FunctionTool objects for the agent.
         """
+        from llama_index.core.tools import FunctionTool
         tools = [
             FunctionTool.from_defaults(
                 fn=self._get_spice_info,
         Returns:
             Agent's response string.
         """
+        if not self.is_ready():
+            return (
+                "Error: Agent not initialized. Please set either:\n"
+                "- ANTHROPIC_API_KEY for Claude, or\n"
+                "- OPENAI_API_BASE for OpenAI-compatible endpoints"
+            )
         if self.guardrails:
             should_proceed, block_message, context = self.guardrails.check_input(
                 return block_message
         try:
+            if self.provider == "anthropic":
+                result = self._chat_anthropic(message)
+            else:
+                result = self._chat_openai(message)
+            if self.guardrails:
+                result = self.guardrails.check_output(result)
+                usage_tracker = self.guardrails.get_guardrail("usage_tracking")
+                if usage_tracker and isinstance(usage_tracker, UsageTrackingGuardrail):
+                    input_tokens = len(message) // 4
+                    output_tokens = len(result) // 4
+                    usage_tracker.record_usage(input_tokens, output_tokens, session_id)
+            return result
         except Exception as e:
             return f"Error processing request: {str(e)}"
+    def _chat_anthropic(self, message: str) -> str:
+        """Process chat using Anthropic/LlamaIndex workflow."""
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            return loop.run_until_complete(self._async_chat(message))
+        finally:
+            loop.close()
+    def _chat_openai(self, message: str) -> str:
+        """Process chat using OpenAI-compatible endpoint with ReAct prompting."""
+        messages = [
+            {"role": "system", "content": self._get_react_system_prompt()},
+            {"role": "user", "content": message},
+        ]
+        max_iterations = 5
+        for _ in range(max_iterations):
+            response = self._call_openai_api(messages)
+            if not response:
+                return "Error: Failed to get response from LLM"
+            content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
+            tool_match = re.search(r"TOOL:\s*(\w+)\s*\nARGS:\s*(\{.*?\})", content, re.DOTALL)
+            if tool_match:
+                tool_name = tool_match.group(1)
+                try:
+                    args = json.loads(tool_match.group(2))
+                except json.JSONDecodeError:
+                    args = {}
+                tool_result = self._execute_tool(tool_name, args)
+                messages.append({"role": "assistant", "content": content})
+                messages.append({"role": "user", "content": f"TOOL RESULT:\n{tool_result}"})
+            else:
+                return content
+        return content
+    def _call_openai_api(self, messages: list) -> Optional[dict]:
+        """Make a request to OpenAI-compatible API."""
+        url = f"{self.openai_base.rstrip('/')}/chat/completions"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.openai_key}",
+        }
+        data = {
+            "model": self.model,
+            "messages": messages,
+            "temperature": 0.7,
+            "max_tokens": 2048,
+        }
+        try:
+            response = requests.post(url, headers=headers, json=data, timeout=60)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            print(f"[API] Error calling OpenAI API: {e}")
+            return None
+    def _execute_tool(self, tool_name: str, args: dict) -> str:
+        """Execute a tool by name with given arguments."""
+        if not hasattr(self, "_tools"):
+            return f"Error: Tool registry not initialized"
+        tool_fn = self._tools.get(tool_name)
+        if not tool_fn:
+            return f"Error: Unknown tool '{tool_name}'"
+        try:
+            print(f"[TOOL] {tool_name} called with: {args}")
+            return tool_fn(**args)
+        except Exception as e:
+            return f"Error executing {tool_name}: {str(e)}"
     async def _async_chat(self, message: str) -> str:
         """Async chat handler for the workflow.
         """Check if agent is ready to process queries.
         Returns:
+            True if agent is properly initialized, False otherwise.
         """
+        if self.provider == "anthropic":
+            return self.workflow is not None
+        elif self.provider == "openai":
+            return hasattr(self, "_tools") and self._tools is not None
+        return False
 def create_agent(api_key: Optional[str] = None) -> SpiceAgent: