Spaces:
Runtime error
Runtime error
| import os | |
| import asyncio | |
| import streamlit as st | |
| from crawl4ai import AsyncWebCrawler | |
| from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig | |
| from langchain_core.documents import Document | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
| from langchain_huggingface.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| # Load API keys from Space Secrets | |
| os.environ["HUGGINGFACEHUB_API_KEY"] = st.secrets["hf"] | |
| os.environ["HF_TOKEN"] = st.secrets["hf"] | |
| async def run_pipeline(url: str, query: str): | |
| # 1οΈβ£ Crawler setup | |
| browser_config = BrowserConfig() | |
| run_config = CrawlerRunConfig() | |
| async with AsyncWebCrawler(config=browser_config) as crawler: | |
| result = await crawler.arun(url=url, config=run_config) | |
| # 2οΈβ£ LangChain doc + split | |
| doc = Document(page_content=result.markdown.raw_markdown) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| chunks = text_splitter.split_documents([doc]) | |
| # 3οΈβ£ Embedding + Chroma | |
| emb = HuggingFaceEmbeddings(model="avsolatorio/GIST-small-Embedding-v0") | |
| cb = Chroma(embedding_function=emb) | |
| cb.add_documents(chunks) | |
| # 4οΈβ£ Similarity search | |
| docs = cb.similarity_search(query, k=3) | |
| # 5οΈβ£ Llama3 via Nebius | |
| llama_model = HuggingFaceEndpoint( | |
| repo_id="meta-llama/Llama-3.1-8B-Instruct", | |
| provider="nebius", | |
| temperature=0.7, | |
| max_new_tokens=300, | |
| task="conversational" | |
| ) | |
| llama = ChatHuggingFace( | |
| llm=llama_model, | |
| repo_id="meta-llama/Llama-3.1-8B-Instruct", | |
| provider="nebius", | |
| temperature=0.7, | |
| max_new_tokens=300, | |
| task="conversational" | |
| ) | |
| response = llama.invoke( | |
| f"Context: {docs[0].page_content}\n\nQuestion: {query}" | |
| ) | |
| return response.content | |
| # Streamlit UI | |
| st.title("ππ Ask Any Website with Llama3") | |
| st.write("Enter a URL and your question β this app crawls the site and answers using Llama3!") | |
| url = st.text_input("π Website URL", placeholder="https://www.example.com") | |
| query = st.text_input("π¬ Your Question", placeholder="What is this website about?") | |
| if st.button("π Crawl & Answer"): | |
| if not url.strip() or not query.strip(): | |
| st.warning("β Please enter both a URL and a question.") | |
| else: | |
| with st.spinner("πΈοΈ Crawling website and generating answer..."): | |
| result = asyncio.run(run_pipeline(url, query)) | |
| st.success(f"β **Answer:** {result}") |