Spaces:

syntaxhacker
/

developer-portfolio-rag

Sleeping

App Files Files Community

rohit commited on Jul 13

Commit

cfa1426

1 Parent(s): 1d87783

Create self-contained app.py with dynamic imports to fix module loading issues

Browse files

Files changed (2) hide show

Dockerfile +1 -1
main.py → app.py +13 -26

Dockerfile CHANGED Viewed

@@ -21,4 +21,4 @@ RUN chmod +x start.sh
 EXPOSE 7860
 # Run the FastAPI application
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 EXPOSE 7860
 # Run the FastAPI application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

main.py → app.py RENAMED Viewed

@@ -3,16 +3,6 @@ from pydantic import BaseModel
 import os
 import logging
 import sys
-from app.config import DATASET_CONFIGS
-# Lazy imports to avoid blocking startup
-# from .pipeline import RAGPipeline  # Will import when needed
-# import umap  # Will import when needed for visualization
-# import plotly.express as px  # Will import when needed for visualization
-# import plotly.graph_objects as go  # Will import when needed for visualization
-# from plotly.subplots import make_subplots  # Will import when needed for visualization
-# import numpy as np  # Will import when needed for visualization
-# from sklearn.preprocessing import normalize  # Will import when needed for visualization
-# import pandas as pd  # Will import when needed for visualization
 import json
 # Configure logging
@@ -32,20 +22,14 @@ pipelines = {}
 google_api_key = os.getenv("GOOGLE_API_KEY")
 logger.info(f"Starting RAG Pipeline API")
-logger.info(f"Port from env: {os.getenv('PORT', 'Not set - will use 8000')}")
 logger.info(f"Google API Key present: {'Yes' if google_api_key else 'No'}")
-logger.info(f"Available datasets: {list(DATASET_CONFIGS.keys())}")
 # Don't load datasets during startup - do it asynchronously after server starts
 logger.info("RAG Pipeline API is ready to serve requests - datasets will load in background")
-# Visualization function disabled to speed up startup
-# def create_3d_visualization(pipeline):
-#     ... (commented out for faster startup)
 class Question(BaseModel):
     text: str
-    dataset: str = "developer-portfolio"  # Default dataset
 @app.post("/answer")
 async def get_answer(question: Question):
@@ -77,11 +61,15 @@ async def load_datasets_background():
     """Load datasets in background after server starts"""
     global pipelines
     if google_api_key:
-        # Import RAGPipeline only when needed
-        from app.pipeline import RAGPipeline
-        # Only load developer-portfolio to save memory
-        dataset_name = "developer-portfolio"
         try:
             logger.info(f"Loading dataset: {dataset_name}")
             pipeline = RAGPipeline.from_preset(
                 google_api_key=google_api_key,
@@ -90,7 +78,7 @@ async def load_datasets_background():
             pipelines[dataset_name] = pipeline
             logger.info(f"Successfully loaded {dataset_name}")
         except Exception as e:
-            logger.error(f"Failed to load {dataset_name}: {e}")
         logger.info(f"Background loading complete - {len(pipelines)} datasets loaded")
     else:
         logger.warning("No Google API key provided - running in demo mode without datasets")
@@ -98,7 +86,7 @@ async def load_datasets_background():
 @app.on_event("startup")
 async def startup_event():
     logger.info("FastAPI application startup complete")
-    logger.info(f"Server should be running on port: {os.getenv('PORT', '8000')}")
     # Start loading datasets in background (non-blocking)
     import asyncio
@@ -123,6 +111,5 @@ async def health_check():
         "datasets_loaded": len(pipelines),
         "total_datasets": 1,  # Only loading developer-portfolio
         "loading_status": loading_status,
-        "port": os.getenv('PORT', '8000')
-    }

 import os
 import logging
 import sys
 import json
 # Configure logging
 google_api_key = os.getenv("GOOGLE_API_KEY")
 logger.info(f"Starting RAG Pipeline API")
 logger.info(f"Google API Key present: {'Yes' if google_api_key else 'No'}")
 # Don't load datasets during startup - do it asynchronously after server starts
 logger.info("RAG Pipeline API is ready to serve requests - datasets will load in background")
 class Question(BaseModel):
     text: str
+    dataset: str = "developer-portfolio"
 @app.post("/answer")
 async def get_answer(question: Question):
     """Load datasets in background after server starts"""
     global pipelines
     if google_api_key:
         try:
+            # Import modules only when needed
+            import sys
+            sys.path.append('/app')
+            from app.pipeline import RAGPipeline
+            from app.config import DATASET_CONFIGS
+            # Only load developer-portfolio to save memory
+            dataset_name = "developer-portfolio"
             logger.info(f"Loading dataset: {dataset_name}")
             pipeline = RAGPipeline.from_preset(
                 google_api_key=google_api_key,
             pipelines[dataset_name] = pipeline
             logger.info(f"Successfully loaded {dataset_name}")
         except Exception as e:
+            logger.error(f"Failed to load dataset: {e}")
         logger.info(f"Background loading complete - {len(pipelines)} datasets loaded")
     else:
         logger.warning("No Google API key provided - running in demo mode without datasets")
 @app.on_event("startup")
 async def startup_event():
     logger.info("FastAPI application startup complete")
+    logger.info(f"Server should be running on port: 7860")
     # Start loading datasets in background (non-blocking)
     import asyncio
         "datasets_loaded": len(pipelines),
         "total_datasets": 1,  # Only loading developer-portfolio
         "loading_status": loading_status,
+        "port": "7860"
+    }