Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage | |
| from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings, | |
| HuggingFaceEndpoint) | |
| from langgraph.graph import START, MessagesState, StateGraph | |
| from langgraph.prebuilt import ToolNode, tools_condition | |
| from tools import (absolute, add, analyze_csv_file, analyze_excel_file, | |
| arvix_search, audio_transcription, compound_interest, | |
| convert_temperature, divide, exponential, extract_text, | |
| factorial, floor_divide, get_current_time_in_timezone, | |
| greatest_common_divisor, is_prime, least_common_multiple, | |
| logarithm, modulus, multiply, percentage_calculator, power, | |
| python_code_parser, reverse_sentence, | |
| roman_calculator_converter, square_root, subtract, | |
| web_search, wiki_search) | |
| # Load Constants | |
| load_dotenv() | |
| HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| tools = [ | |
| multiply, add, subtract, power, divide, modulus, | |
| square_root, floor_divide, absolute, logarithm, | |
| exponential, web_search, roman_calculator_converter, | |
| get_current_time_in_timezone, compound_interest, | |
| convert_temperature, factorial, greatest_common_divisor, | |
| is_prime, least_common_multiple, percentage_calculator, | |
| wiki_search, analyze_excel_file, arvix_search, | |
| audio_transcription, python_code_parser, analyze_csv_file, | |
| extract_text, reverse_sentence | |
| ] | |
| # Load system prompt | |
| system_prompt = """ | |
| You are a general AI assistant. I will ask you a question. | |
| Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. | |
| YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. | |
| If you are asked for a number, don't use comma to write your number neither use units | |
| such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, | |
| neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. | |
| If you are asked for a comma separated list, apply the above rules depending of whether the element to be | |
| put in the list is a number or a string. | |
| """ | |
| # System message | |
| sys_msg = SystemMessage(content=system_prompt) | |
| def get_vector_store(persist_directory="chroma_db"): | |
| """ | |
| Initializes and returns a Chroma vector store. | |
| If the database exists, it loads it. If not, it creates it, | |
| adds some initial documents, and persists them. | |
| """ | |
| embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
| if os.path.exists(persist_directory) and os.listdir(persist_directory): | |
| print("Loading existing vector store...") | |
| vector_store = Chroma( | |
| persist_directory=persist_directory, | |
| embedding_function=embedding_function | |
| ) | |
| else: | |
| print("Creating new vector store...") | |
| os.makedirs(persist_directory, exist_ok=True) | |
| # Example documents to add | |
| initial_documents = [ | |
| "The Principle of Double Effect is an ethical theory that distinguishes between the intended and foreseen consequences of an action.", | |
| "St. Thomas Aquinas is often associated with the development of the Principle of Double Effect.", | |
| "LangGraph is a library for building stateful, multi-actor applications with LLMs.", | |
| "Chroma is a vector database used for storing and retrieving embeddings." | |
| ] | |
| vector_store = Chroma.from_texts( | |
| texts=initial_documents, | |
| embedding=embedding_function, | |
| persist_directory=persist_directory | |
| ) | |
| # No need to call persist() when using from_texts with a persist_directory | |
| return vector_store | |
| # --- Initialize Vector Store and Retriever --- | |
| vector_store = get_vector_store() | |
| retriever_component = vector_store.as_retriever( | |
| search_type="mmr", # Use Maximum Marginal Relevance for diverse results | |
| search_kwargs={'k': 2, 'lambda_mult': 0.5} # Retrieve 2 documents | |
| ) | |
| def build_graph(): | |
| """Build the graph""" | |
| # First create the HuggingFaceEndpoint | |
| llm_endpoint = HuggingFaceEndpoint( | |
| repo_id="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, | |
| #api_key=GEMINI_API_KEY, | |
| temperature=0.3, | |
| max_new_tokens=512, | |
| timeout=60, | |
| ) | |
| # Then wrap it with ChatHuggingFace to get chat model functionality | |
| llm = ChatHuggingFace(llm=llm_endpoint) | |
| # Bind tools to LLM | |
| llm_with_tools = llm.bind_tools(tools) | |
| # --- Nodes --- | |
| def assistant(state: MessagesState): | |
| """Assistant node""" | |
| # Prepend the system message to the state | |
| messages_with_system_prompt = [sys_msg] + state["messages"] | |
| return {"messages": [llm_with_tools.invoke(messages_with_system_prompt)]} | |
| def retriever_node(state: MessagesState): | |
| """ | |
| Retrieves relevant documents from the vector store based on the latest human message. | |
| """ | |
| last_human_message = state["messages"][-1].content | |
| retrieved_docs = retriever_component.invoke(last_human_message) | |
| if retrieved_docs: | |
| retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs]) | |
| # Create a ToolMessage to hold the retrieved context | |
| context_message = ToolMessage( | |
| content=f"Retrieved context from vector store:\n\n{retrieved_context}", | |
| tool_call_id="retriever" # A descriptive ID | |
| ) | |
| return {"messages": [context_message]} | |
| return {"messages": []} | |
| # --- Graph Definition --- | |
| builder = StateGraph(MessagesState) | |
| builder.add_node("retriever", retriever_node) | |
| builder.add_node("assistant", assistant) | |
| builder.add_node("tools", ToolNode(tools)) | |
| builder.add_edge(START, "retriever") | |
| builder.add_edge("retriever", "assistant") | |
| builder.add_conditional_edges("assistant", tools_condition) | |
| builder.add_edge("tools", "assistant") | |
| # Compile graph | |
| return builder.compile() | |
| # test | |
| if __name__ == "__main__": | |
| question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?" | |
| # Build the graph | |
| graph = build_graph() | |
| # Run the graph | |
| messages = [HumanMessage(content=question)] | |
| # The initial state for the graph | |
| initial_state = {"messages": messages} | |
| # Invoke the graph stream to see the steps | |
| for s in graph.stream(initial_state, stream_mode="values"): | |
| message = s["messages"][-1] | |
| if isinstance(message, ToolMessage): | |
| print("---RETRIEVED CONTEXT---") | |
| print(message.content) | |
| print("-----------------------") | |
| else: | |
| message.pretty_print() | |