Spaces:

WanIrfan
/

Atlas

Sleeping

File size: 4,053 Bytes

8ada0c0

import logging
import json
import re
from src.doc_qa import AgenticQA
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_google_genai import ChatGoogleGenerativeAI

logger = logging.getLogger(__name__)

def load_rag_system(collection_name,domain):
    """

    Loads an existing RAG system by connecting to the persistent vector store.

    This is fast and does not re-process any documents.

    """
    logger.info(f"Loading RAG system for collection: '{collection_name}'  (Domain: {domain})...")
    try:
        agent = AgenticQA(
            config={
                "retriever": {
                    "collection_name": collection_name,
                    "persist_directory": "chroma_db"
                },
                "domain": domain
            }
        )
        # Check if the agent was actually created
        if not agent.agent_executor:
            raise Exception("Agent Executor was not created. Check logs for errors.")

        logger.info(f"✅ System for '{collection_name}' loaded successfully.")
        return agent
    except Exception as e:
        logger.error(f"❌ Failed to load RAG system for '{collection_name}': {e}")
        logger.warning("Did you run the ingest.py script first?")
        return None

def markdown_bold_to_html(text: str):
    """Converts markdown bold syntax to HTML <strong> tags."""
    return re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)

def standardize_query(query):
    if not query:
        return None
    return query.strip().lower()

def get_standalone_question(input_question, chat_history,llm):
    """Uses LLM to create a standalone question from the chat history."""
    if not chat_history:
        return input_question
    
    contextualize_q_prompt = ChatPromptTemplate.from_messages([
        ("system", "Given a chat history and the latest user question which might reference context in the chat history, "
        "formulate a standalone question which can be understood without the chat history. "
        "IMPORTANT: DO NOT PROVIDE ANY ANSWERS. ONLY REPHRASE THE QUESTION IF NEEDED. "
        "If the question is already clear and standalone, return it exactly as is. "
        "Output ONLY the reformulated question, nothing else."),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ])
    history_aware_retriever_chain = contextualize_q_prompt | llm
    
    response = history_aware_retriever_chain.invoke(
        {"chat_history": chat_history, "input": input_question}
    )
    return response.content

def parse_agent_response(response_dict):
    """A robust helper to parse the dictionary from an AgenticQA agent."""
    answer = markdown_bold_to_html(response_dict.get('answer', 'Error: No answer found.'))
    thoughts = response_dict.get('thoughts', 'No thought process available.')
    validation = response_dict.get('validation', (False, 'Validation failed.'))
    source = response_dict.get('source', 'Unknown')
    
    if validation and validation[1] == "Validation skipped for insurance domain.":
        validation = (True, "Factual Answer")
        
    return answer, thoughts,validation, source
    
def extract_json_from_string(text: str) -> dict:
    """

    Finds and parses the first valid JSON object within a string.

    Returns a dictionary, or an empty dict if no JSON is found.

    """
    # This regex finds the first occurrence of a string starting with { and ending with }
    json_match = re.search(r'\{.*\}', text, re.DOTALL)
    
    if json_match:
        json_string = json_match.group(0)
        try:
            return json.loads(json_string)
        except json.JSONDecodeError:
            # The extracted string is not valid JSON
            return {"error": "Failed to parse extracted JSON", "raw_text": json_string}
    else:
        # No JSON object found in the string
        return {"error": "No JSON object found in the string", "raw_text": text}