Spaces:

Presidentlin
/

Aidan-Bench

Runtime error

App Files Files Community

Presidentlin commited on Aug 13, 2024

Commit

8bbf037

1 Parent(s): 76ed6d2

x

Browse files

Files changed (2) hide show

__pycache__/main.cpython-310.pyc +0 -0
main.py +46 -38

__pycache__/main.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ

main.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 import streamlit as st  # Import Streamlit
 def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
@@ -35,9 +36,8 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key):
         return None
-def process_question(question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, total_questions, progress):
     start_time = time.time()
-    st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True)
     previous_answers = []
     question_novelty = 0
@@ -52,39 +52,38 @@ def process_question(question, model_name, open_router_key, openai_api_key, prog
                 break
             if coherence_score <= 3:
-                st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
-                         unsafe_allow_html=True)
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
             if novelty_score < 0.1:
-                st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
-                         unsafe_allow_html=True)
                 break
-            st.write(f"**New Answer:**\n{new_answer}")
-            st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>",
-                     unsafe_allow_html=True)
-            st.write(f"**Novelty Score:** {novelty_score}")
             previous_answers.append(new_answer)
             question_novelty += novelty_score
     except Exception as e:
-        st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>",
-                 unsafe_allow_html=True)
     time_taken = time.time() - start_time
-    st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>",
-             unsafe_allow_html=True)
-    st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>",
-             unsafe_allow_html=True)
-    # Update progress
-    with progress_lock:
-        completed_questions += 1
-        progress = completed_questions / total_questions
     return question_novelty, [
         {
@@ -117,11 +116,10 @@ def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key):
     return novelty
-def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None, progress=0, progress_lock=None):
     novelty_score = 0
-    print_lock = threading.Lock()  # Lock for thread-safe printing
     results = []
-    completed_questions = 0  # Shared variable to track progress
     # Use max_threads if provided, otherwise default to the number of questions
     if max_threads is None:
@@ -130,23 +128,33 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
         max_workers = max_threads
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        future_to_question = {executor.submit(
-            process_question, question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, len(questions), progress): question for question in questions}
-        for future in as_completed(future_to_question):
-            question = future_to_question[future]
             try:
-                question_novelty, question_results = future.result()
-                with print_lock:
-                    novelty_score += question_novelty
-                    results.extend(question_results)
-                    st.write(
-                        f"<span style='color:yellow'>Total novelty score across all questions (so far): {novelty_score}</span>",
-                        unsafe_allow_html=True)
-            except Exception as e:
-                with print_lock:
-                    st.write(f"<span style='color:red'>Error in thread: {str(e)}</span>", unsafe_allow_html=True)
     st.write(f"<span style='color:yellow'>Final total novelty score across all questions: {novelty_score}</span>",
              unsafe_allow_html=True)

 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 import streamlit as st  # Import Streamlit
+import queue
 def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
         return None
+def process_question(question, model_name, open_router_key, openai_api_key, result_queue):
     start_time = time.time()
     previous_answers = []
     question_novelty = 0
                 break
             if coherence_score <= 3:
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
             if novelty_score < 0.1:
                 break
+            # Append results to the queue instead of using st.write
+            result_queue.put({
+                "type": "answer",
+                "question": question,
+                "answer": new_answer,
+                "coherence_score": coherence_score,
+                "novelty_score": novelty_score
+            })
             previous_answers.append(new_answer)
             question_novelty += novelty_score
     except Exception as e:
+        result_queue.put({"type": "error", "message": str(e)})
     time_taken = time.time() - start_time
+    result_queue.put({
+        "type": "summary",
+        "question": question,
+        "total_novelty": question_novelty,
+        "time_taken": time_taken
+    })
     return question_novelty, [
         {
     return novelty
+def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None):
     novelty_score = 0
     results = []
+    result_queue = queue.Queue()  # Create a queue for communication
     # Use max_threads if provided, otherwise default to the number of questions
     if max_threads is None:
         max_workers = max_threads
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # Submit tasks to the thread pool
+        future_to_question = {
+            executor.submit(process_question, question, model_name, open_router_key, openai_api_key, result_queue): question
+            for question in questions
+        }
+        # Process results from the queue in the main thread
+        while True:
             try:
+                result = result_queue.get_nowait()
+                if result["type"] == "answer":
+                    st.write(f"**Question:** {result['question']}")
+                    st.write(f"**New Answer:**\n{result['answer']}")
+                    st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
+                             unsafe_allow_html=True)
+                    st.write(f"**Novelty Score:** {result['novelty_score']}")
+                elif result["type"] == "summary":
+                    st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
+                             unsafe_allow_html=True)
+                    st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
+                             unsafe_allow_html=True)
+                elif result["type"] == "error":
+                    st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
+                             unsafe_allow_html=True)
+            except queue.Empty:
+                if not any(future.running() for future in future_to_question.keys()):
+                    break  # All tasks are done
     st.write(f"<span style='color:yellow'>Final total novelty score across all questions: {novelty_score}</span>",
              unsafe_allow_html=True)