Spaces:

WanIrfan
/

Atlas

Sleeping

App Files Files Community

Atlas / src /utils.py

WanIrfan

Upload 5 files

8ada0c0 verified about 1 month ago

raw

history blame

4.05 kB

	import logging
	import json
	import re
	from src.doc_qa import AgenticQA
	from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_google_genai import ChatGoogleGenerativeAI

	logger = logging.getLogger(__name__)

	def load_rag_system(collection_name,domain):
	"""
	Loads an existing RAG system by connecting to the persistent vector store.
	This is fast and does not re-process any documents.
	"""
	logger.info(f"Loading RAG system for collection: '{collection_name}' (Domain: {domain})...")
	try:
	agent = AgenticQA(
	config={
	"retriever": {
	"collection_name": collection_name,
	"persist_directory": "chroma_db"
	},
	"domain": domain
	}
	)
	# Check if the agent was actually created
	if not agent.agent_executor:
	raise Exception("Agent Executor was not created. Check logs for errors.")

	logger.info(f"✅ System for '{collection_name}' loaded successfully.")
	return agent
	except Exception as e:
	logger.error(f"❌ Failed to load RAG system for '{collection_name}': {e}")
	logger.warning("Did you run the ingest.py script first?")
	return None

	def markdown_bold_to_html(text: str):
	"""Converts markdown bold syntax to HTML <strong> tags."""
	return re.sub(r"\\(.?)\\*", r"<strong>\1</strong>", text)

	def standardize_query(query):
	if not query:
	return None
	return query.strip().lower()

	def get_standalone_question(input_question, chat_history,llm):
	"""Uses LLM to create a standalone question from the chat history."""
	if not chat_history:
	return input_question

	contextualize_q_prompt = ChatPromptTemplate.from_messages([
	("system", "Given a chat history and the latest user question which might reference context in the chat history, "
	"formulate a standalone question which can be understood without the chat history. "
	"IMPORTANT: DO NOT PROVIDE ANY ANSWERS. ONLY REPHRASE THE QUESTION IF NEEDED. "
	"If the question is already clear and standalone, return it exactly as is. "
	"Output ONLY the reformulated question, nothing else."),
	MessagesPlaceholder("chat_history"),
	("human", "{input}"),
	])
	history_aware_retriever_chain = contextualize_q_prompt \| llm

	response = history_aware_retriever_chain.invoke(
	{"chat_history": chat_history, "input": input_question}
	)
	return response.content

	def parse_agent_response(response_dict):
	"""A robust helper to parse the dictionary from an AgenticQA agent."""
	answer = markdown_bold_to_html(response_dict.get('answer', 'Error: No answer found.'))
	thoughts = response_dict.get('thoughts', 'No thought process available.')
	validation = response_dict.get('validation', (False, 'Validation failed.'))
	source = response_dict.get('source', 'Unknown')

	if validation and validation[1] == "Validation skipped for insurance domain.":
	validation = (True, "Factual Answer")

	return answer, thoughts,validation, source

	def extract_json_from_string(text: str) -> dict:
	"""
	Finds and parses the first valid JSON object within a string.
	Returns a dictionary, or an empty dict if no JSON is found.
	"""
	# This regex finds the first occurrence of a string starting with { and ending with }
	json_match = re.search(r'\{.*\}', text, re.DOTALL)

	if json_match:
	json_string = json_match.group(0)
	try:
	return json.loads(json_string)
	except json.JSONDecodeError:
	# The extracted string is not valid JSON
	return {"error": "Failed to parse extracted JSON", "raw_text": json_string}
	else:
	# No JSON object found in the string
	return {"error": "No JSON object found in the string", "raw_text": text}