Spaces:

Tonic
/

YiJina

Build error

App Files Files Community

Tonic commited on Jul 13, 2024

Commit

5362be0

1 Parent(s): ebe18c1

use in memory chroma client

Browse files

Files changed (2) hide show

.gitignore +2 -1
app.py +14 -42

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@
 chroma_data/
 __pycache__/
 chroma.log
-.venv/

 chroma_data/
 __pycache__/
 chroma.log
+.venv/
+pad.py

app.py CHANGED Viewed

@@ -38,7 +38,9 @@ hf_token, yi_token = load_env_variables()
 def clear_cuda_cache():
     torch.cuda.empty_cache()
-client = OpenAI(api_key=yi_token, base_url=API_BASE)
 class EmbeddingGenerator:
     def __init__(self, model_name: str, token: str, intention_client):
@@ -125,59 +127,29 @@ def load_documents(file_path: str, mode: str = "elements"):
     docs = loader.load()
     return [doc.page_content for doc in docs]
-def wait_for_chroma_server(client, retries=10, delay=0.5):
-    for _ in range(retries):
-        try:
-            client.heartbeat()
-            print("Chroma server is up and running!")
-            return True
-        except Exception as e:
-            print(f"Attempt to connect to Chroma server failed: {e}")
-        time.sleep(delay)
-    print("Failed to connect to Chroma server after multiple attempts.")
-    return False
 def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
-    host = 'localhost'
-    port = 8000
-    client = HttpClient(host=host, port=port, settings=Settings(allow_reset=True, anonymized_telemetry=False))
-    if not wait_for_chroma_server(client):
-        raise ConnectionError("Could not connect to Chroma server. Ensure it is running.")
-    client.reset()  # Empties and completely resets the database
-    collection = client.create_collection(collection_name)
-    return client, collection
-def add_documents_to_chroma(client, collection, documents: list, embedding_function: MyEmbeddingFunction):
     for doc in documents:
         embeddings, metadata = embedding_function.embedding_generator.compute_embeddings(doc)
         for embedding, meta in zip(embeddings, metadata):
-            collection.add(
                 ids=[str(uuid.uuid1())],
                 documents=[doc],
                 embeddings=[embedding],
                 metadatas=[meta]
             )
-def query_chroma(client, collection_name: str, query_text: str, embedding_function: MyEmbeddingFunction):
-    # Compute query embeddings and metadata
-    query_embeddings, query_metadata = embedding_function.embedding_generator.compute_embeddings(query_text)
-    # Initialize Chroma with the collection
-    db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
-    # Perform similarity search using the query embeddings and metadata
-    result_docs = db.similarity_search(
-        query_embeddings=query_embeddings,
-        query_metadata=query_metadata
     )
     return result_docs
 # Initialize clients
 intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
 embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
@@ -246,5 +218,5 @@ with gr.Blocks() as demo:
             query_button.click(query_documents, inputs=query_input, outputs=query_output)
 if __name__ == "__main__":
-    os.system("chroma run --host localhost --port 8000 &")
     demo.launch()

 def clear_cuda_cache():
     torch.cuda.empty_cache()
+client = OpenAI(api_key=yi_token, base_url=API_BASE)
+chroma_client = HttpClient(host="localhost", port=8000)
+chroma_collection = chroma_client.create_collection("all-my-documents")
 class EmbeddingGenerator:
     def __init__(self, model_name: str, token: str, intention_client):
     docs = loader.load()
     return [doc.page_content for doc in docs]
 def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
+    db = Chroma(client=chroma_client, collection_name=collection_name, embedding_function=embedding_function)
+    return db
+def add_documents_to_chroma(documents: list, embedding_function: MyEmbeddingFunction):
     for doc in documents:
         embeddings, metadata = embedding_function.embedding_generator.compute_embeddings(doc)
         for embedding, meta in zip(embeddings, metadata):
+            chroma_collection.add(
                 ids=[str(uuid.uuid1())],
                 documents=[doc],
                 embeddings=[embedding],
                 metadatas=[meta]
             )
+def query_chroma(query_text: str, embedding_function: MyEmbeddingFunction):
+    query_embeddings, query_metadata = embedding_function.embedding_generator.compute_embeddings(query_text)
+    result_docs = chroma_collection.query(
+        query_texts=[query_text],
+        n_results=2
     )
     return result_docs
 # Initialize clients
 intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
 embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
             query_button.click(query_documents, inputs=query_input, outputs=query_output)
 if __name__ == "__main__":
+    # os.system("chroma run --host localhost --port 8000 &")
     demo.launch()