Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -62,16 +62,18 @@ def add_text_to_db(text):
|
|
| 62 |
|
| 63 |
return f"Text added successfully with ID: {doc_id}"
|
| 64 |
|
| 65 |
-
def search_similar_texts(query, k):
|
| 66 |
"""
|
| 67 |
Search for the top k similar texts in the vector database and rerank them.
|
|
|
|
| 68 |
|
| 69 |
Args:
|
| 70 |
query (str): The search query.
|
| 71 |
k (int): Number of results to return.
|
|
|
|
| 72 |
|
| 73 |
Returns:
|
| 74 |
-
str: Formatted search results
|
| 75 |
"""
|
| 76 |
if not query or not query.strip():
|
| 77 |
return "Error: Query cannot be empty."
|
|
@@ -79,21 +81,29 @@ def search_similar_texts(query, k):
|
|
| 79 |
if not isinstance(k, int) or k < 1:
|
| 80 |
return "Error: k must be a positive integer."
|
| 81 |
|
|
|
|
|
|
|
|
|
|
| 82 |
# Retrieve and rerank
|
| 83 |
retriever.search_kwargs["k"] = max(k * 2, 10) # Retrieve 2k or at least 10
|
| 84 |
compressor.top_n = k # Rerank to top k
|
| 85 |
docs = compression_retriever.get_relevant_documents(query)
|
| 86 |
|
| 87 |
if not docs:
|
| 88 |
-
return "No
|
| 89 |
|
| 90 |
-
#
|
| 91 |
results = []
|
| 92 |
-
for i, doc in enumerate(docs[:k]): # Ensure
|
| 93 |
text = doc.metadata.get("text", "No text available")
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
return "\n".join(results)
|
| 97 |
|
| 98 |
# Gradio interface
|
| 99 |
with gr.Blocks() as demo:
|
|
@@ -110,6 +120,7 @@ with gr.Blocks() as demo:
|
|
| 110 |
gr.Markdown("## Search Similar Texts")
|
| 111 |
query_input = gr.Textbox(label="Enter search query")
|
| 112 |
k_input = gr.Number(label="Number of results (k)", value=5, precision=0)
|
|
|
|
| 113 |
search_button = gr.Button("Search")
|
| 114 |
search_output = gr.Textbox(label="Search Results")
|
| 115 |
|
|
@@ -121,7 +132,7 @@ with gr.Blocks() as demo:
|
|
| 121 |
)
|
| 122 |
search_button.click(
|
| 123 |
fn=search_similar_texts,
|
| 124 |
-
inputs=[query_input, k_input],
|
| 125 |
outputs=search_output
|
| 126 |
)
|
| 127 |
|
|
|
|
| 62 |
|
| 63 |
return f"Text added successfully with ID: {doc_id}"
|
| 64 |
|
| 65 |
+
def search_similar_texts(query, k, threshold):
|
| 66 |
"""
|
| 67 |
Search for the top k similar texts in the vector database and rerank them.
|
| 68 |
+
Only return results with similarity scores above the threshold.
|
| 69 |
|
| 70 |
Args:
|
| 71 |
query (str): The search query.
|
| 72 |
k (int): Number of results to return.
|
| 73 |
+
threshold (float): Minimum similarity score (0 to 1).
|
| 74 |
|
| 75 |
Returns:
|
| 76 |
+
str: Formatted search results with similarity scores or "No such record".
|
| 77 |
"""
|
| 78 |
if not query or not query.strip():
|
| 79 |
return "Error: Query cannot be empty."
|
|
|
|
| 81 |
if not isinstance(k, int) or k < 1:
|
| 82 |
return "Error: k must be a positive integer."
|
| 83 |
|
| 84 |
+
if not isinstance(threshold, (int, float)) or threshold < 0 or threshold > 1:
|
| 85 |
+
return "Error: Threshold must be a number between 0 and 1."
|
| 86 |
+
|
| 87 |
# Retrieve and rerank
|
| 88 |
retriever.search_kwargs["k"] = max(k * 2, 10) # Retrieve 2k or at least 10
|
| 89 |
compressor.top_n = k # Rerank to top k
|
| 90 |
docs = compression_retriever.get_relevant_documents(query)
|
| 91 |
|
| 92 |
if not docs:
|
| 93 |
+
return "No such record."
|
| 94 |
|
| 95 |
+
# Filter results by threshold
|
| 96 |
results = []
|
| 97 |
+
for i, doc in enumerate(docs[:k]): # Ensure at most k results
|
| 98 |
text = doc.metadata.get("text", "No text available")
|
| 99 |
+
score = doc.metadata.get("score", 0.0) # Reranker score
|
| 100 |
+
if score >= threshold:
|
| 101 |
+
results.append(f"Result {i+1}:\nText: {text}\nScore: {score:.4f}\n")
|
| 102 |
+
|
| 103 |
+
if not results:
|
| 104 |
+
return "No such record."
|
| 105 |
|
| 106 |
+
return "\n".join(results)
|
| 107 |
|
| 108 |
# Gradio interface
|
| 109 |
with gr.Blocks() as demo:
|
|
|
|
| 120 |
gr.Markdown("## Search Similar Texts")
|
| 121 |
query_input = gr.Textbox(label="Enter search query")
|
| 122 |
k_input = gr.Number(label="Number of results (k)", value=5, precision=0)
|
| 123 |
+
threshold_input = gr.Number(label="Similarity threshold (0 to 1)", value=0.5, minimum=0, maximum=1)
|
| 124 |
search_button = gr.Button("Search")
|
| 125 |
search_output = gr.Textbox(label="Search Results")
|
| 126 |
|
|
|
|
| 132 |
)
|
| 133 |
search_button.click(
|
| 134 |
fn=search_similar_texts,
|
| 135 |
+
inputs=[query_input, k_input, threshold_input],
|
| 136 |
outputs=search_output
|
| 137 |
)
|
| 138 |
|