Spaces:

Presidentlin
/

Aidan-Bench

Runtime error

App Files Files Community

Presidentlin commited on Aug 13, 2024

Commit

9aa9fbb

1 Parent(s): 243ccd3

x

Browse files

Files changed (1) hide show

main.py +20 -37

main.py CHANGED Viewed

@@ -7,7 +7,6 @@ import threading
 import streamlit as st  # Import Streamlit
 import queue
 def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
     """Generates an answer to a question using the specified language model."""
     gen_prompt = create_gen_prompt(question, previous_answers)
@@ -16,8 +15,7 @@ def generate_answer(question, previous_answers, model_name, open_router_key, ope
                                      openai_api_key=openai_api_key)
         return new_answer
     except Exception as e:
-        st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>",
-                 unsafe_allow_html=True)
         return None
@@ -31,8 +29,7 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge
         coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
         return coherence_score
     except Exception as e:
-        st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>",
-                 unsafe_allow_html=True)
         return None
@@ -153,21 +150,17 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
                 if result["type"] == "answer":
                     st.write(f"**Question:** {result['question']}")
                     st.write(f"**New Answer:**\n{result['answer']}")
-                    st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
-                             unsafe_allow_html=True)
                     st.write(f"**Novelty Score:** {result['novelty_score']}")
-                    results.extend(result["results"])  # Add results here
-                    novelty_score += result["novelty_score"]  # Update novelty score
-                    st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
                 elif result["type"] == "summary":
-                    st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
-                             unsafe_allow_html=True)
-                    st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
-                             unsafe_allow_html=True)
                 elif result["type"] == "error":
-                    st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
-                             unsafe_allow_html=True)
         # Process remaining results in the queue (if any)
         while not result_queue.empty():
@@ -175,24 +168,20 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
             if result["type"] == "answer":
                 st.write(f"**Question:** {result['question']}")
                 st.write(f"**New Answer:**\n{result['answer']}")
-                st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
-                             unsafe_allow_html=True)
                 st.write(f"**Novelty Score:** {result['novelty_score']}")
                 results.extend(result["results"])  # Add results here
                 novelty_score += result["novelty_score"]  # Update novelty score
                 st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
             elif result["type"] == "summary":
-                st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
-                         unsafe_allow_html=True)
-                st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
-                         unsafe_allow_html=True)
             elif result["type"] == "error":
-                st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
-                         unsafe_allow_html=True)
-    st.warning(f"Final total novelty score across all questions: {novelty_score}")
     return results
@@ -205,24 +194,18 @@ def benchmark_model_sequential(model_name, questions, open_router_key, openai_ap
             if result["type"] == "answer":
                 st.write(f"**Question:** {result['question']}")
                 st.write(f"**New Answer:**\n{result['answer']}")
-                st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
-                         unsafe_allow_html=True)
                 st.write(f"**Novelty Score:** {result['novelty_score']}")
                 results.extend(result["results"])
                 novelty_score += result["novelty_score"] # Add to novelty score
-                st.warning(
-                    f"Total novelty score across processed questions: {novelty_score}")
             elif result["type"] == "summary":
-                st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
-                         unsafe_allow_html=True)
-                st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
-                         unsafe_allow_html=True)
             elif result["type"] == "error":
-                st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
-                         unsafe_allow_html=True)
-    st.warning(f"Final total novelty score across all questions: {novelty_score}")
     return results

 import streamlit as st  # Import Streamlit
 import queue
 def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
     """Generates an answer to a question using the specified language model."""
     gen_prompt = create_gen_prompt(question, previous_answers)
                                      openai_api_key=openai_api_key)
         return new_answer
     except Exception as e:
+        st.error(f"Error generating answer: {str(e)}")  # Use st.error
         return None
         coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
         return coherence_score
     except Exception as e:
+        st.error(f"Error getting judge response: {str(e)}")  # Use st.error
         return None
                 if result["type"] == "answer":
                     st.write(f"**Question:** {result['question']}")
                     st.write(f"**New Answer:**\n{result['answer']}")
+                    st.write(f"Coherence Score: {result['coherence_score']}")  # st.success for coherence
                     st.write(f"**Novelty Score:** {result['novelty_score']}")
+                    results.extend(result["results"])
+                    novelty_score += result["novelty_score"]
+                    st.info(f"Total novelty score across all questions (so far): {novelty_score}") # st.info for running total
                 elif result["type"] == "summary":
+                    st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
+                    st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
                 elif result["type"] == "error":
+                    st.error(f"Error in thread: {result['message']}") # st.error for errors
         # Process remaining results in the queue (if any)
         while not result_queue.empty():
             if result["type"] == "answer":
                 st.write(f"**Question:** {result['question']}")
                 st.write(f"**New Answer:**\n{result['answer']}")
+                st.success(f"Coherence Score: {result['coherence_score']}")  # st.success for coherence
                 st.write(f"**Novelty Score:** {result['novelty_score']}")
                 results.extend(result["results"])  # Add results here
                 novelty_score += result["novelty_score"]  # Update novelty score
                 st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
             elif result["type"] == "summary":
+                st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
+                st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
             elif result["type"] == "error":
+                st.error(f"Error in thread: {result['message']}") # st.error for errors
+    st.info(f"Final total novelty score across all questions: {novelty_score}")
     return results
             if result["type"] == "answer":
                 st.write(f"**Question:** {result['question']}")
                 st.write(f"**New Answer:**\n{result['answer']}")
+                st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
                 st.write(f"**Novelty Score:** {result['novelty_score']}")
                 results.extend(result["results"])
                 novelty_score += result["novelty_score"] # Add to novelty score
+                st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
             elif result["type"] == "summary":
+                st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
+                st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
             elif result["type"] == "error":
+                st.error(f"Error in thread: {result['message']}") # st.error for errors
+    st.info(f"Final total novelty score across all questions: {novelty_score}")
     return results