Spaces:
Runtime error
Runtime error
Commit
·
9aa9fbb
1
Parent(s):
243ccd3
main.py
CHANGED
|
@@ -7,7 +7,6 @@ import threading
|
|
| 7 |
import streamlit as st # Import Streamlit
|
| 8 |
import queue
|
| 9 |
|
| 10 |
-
|
| 11 |
def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
|
| 12 |
"""Generates an answer to a question using the specified language model."""
|
| 13 |
gen_prompt = create_gen_prompt(question, previous_answers)
|
|
@@ -16,8 +15,7 @@ def generate_answer(question, previous_answers, model_name, open_router_key, ope
|
|
| 16 |
openai_api_key=openai_api_key)
|
| 17 |
return new_answer
|
| 18 |
except Exception as e:
|
| 19 |
-
st.
|
| 20 |
-
unsafe_allow_html=True)
|
| 21 |
return None
|
| 22 |
|
| 23 |
|
|
@@ -31,8 +29,7 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge
|
|
| 31 |
coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
|
| 32 |
return coherence_score
|
| 33 |
except Exception as e:
|
| 34 |
-
st.
|
| 35 |
-
unsafe_allow_html=True)
|
| 36 |
return None
|
| 37 |
|
| 38 |
|
|
@@ -153,21 +150,17 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
|
|
| 153 |
if result["type"] == "answer":
|
| 154 |
st.write(f"**Question:** {result['question']}")
|
| 155 |
st.write(f"**New Answer:**\n{result['answer']}")
|
| 156 |
-
st.write(f"
|
| 157 |
-
unsafe_allow_html=True)
|
| 158 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
| 159 |
-
results.extend(result["results"])
|
| 160 |
-
novelty_score += result["novelty_score"]
|
| 161 |
-
st.
|
| 162 |
|
| 163 |
elif result["type"] == "summary":
|
| 164 |
-
st.
|
| 165 |
-
|
| 166 |
-
st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
|
| 167 |
-
unsafe_allow_html=True)
|
| 168 |
elif result["type"] == "error":
|
| 169 |
-
st.
|
| 170 |
-
unsafe_allow_html=True)
|
| 171 |
|
| 172 |
# Process remaining results in the queue (if any)
|
| 173 |
while not result_queue.empty():
|
|
@@ -175,24 +168,20 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
|
|
| 175 |
if result["type"] == "answer":
|
| 176 |
st.write(f"**Question:** {result['question']}")
|
| 177 |
st.write(f"**New Answer:**\n{result['answer']}")
|
| 178 |
-
st.
|
| 179 |
-
unsafe_allow_html=True)
|
| 180 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
| 181 |
results.extend(result["results"]) # Add results here
|
| 182 |
novelty_score += result["novelty_score"] # Update novelty score
|
| 183 |
st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
|
| 184 |
|
| 185 |
elif result["type"] == "summary":
|
| 186 |
-
st.
|
| 187 |
-
|
| 188 |
-
st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
|
| 189 |
-
unsafe_allow_html=True)
|
| 190 |
elif result["type"] == "error":
|
| 191 |
-
st.
|
| 192 |
-
unsafe_allow_html=True)
|
| 193 |
|
| 194 |
|
| 195 |
-
st.
|
| 196 |
return results
|
| 197 |
|
| 198 |
|
|
@@ -205,24 +194,18 @@ def benchmark_model_sequential(model_name, questions, open_router_key, openai_ap
|
|
| 205 |
if result["type"] == "answer":
|
| 206 |
st.write(f"**Question:** {result['question']}")
|
| 207 |
st.write(f"**New Answer:**\n{result['answer']}")
|
| 208 |
-
st.
|
| 209 |
-
unsafe_allow_html=True)
|
| 210 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
| 211 |
results.extend(result["results"])
|
| 212 |
novelty_score += result["novelty_score"] # Add to novelty score
|
| 213 |
-
st.
|
| 214 |
-
f"Total novelty score across processed questions: {novelty_score}")
|
| 215 |
|
| 216 |
elif result["type"] == "summary":
|
| 217 |
-
st.
|
| 218 |
-
|
| 219 |
-
st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
|
| 220 |
-
unsafe_allow_html=True)
|
| 221 |
-
|
| 222 |
elif result["type"] == "error":
|
| 223 |
-
st.
|
| 224 |
-
unsafe_allow_html=True)
|
| 225 |
|
| 226 |
-
st.
|
| 227 |
|
| 228 |
return results
|
|
|
|
| 7 |
import streamlit as st # Import Streamlit
|
| 8 |
import queue
|
| 9 |
|
|
|
|
| 10 |
def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
|
| 11 |
"""Generates an answer to a question using the specified language model."""
|
| 12 |
gen_prompt = create_gen_prompt(question, previous_answers)
|
|
|
|
| 15 |
openai_api_key=openai_api_key)
|
| 16 |
return new_answer
|
| 17 |
except Exception as e:
|
| 18 |
+
st.error(f"Error generating answer: {str(e)}") # Use st.error
|
|
|
|
| 19 |
return None
|
| 20 |
|
| 21 |
|
|
|
|
| 29 |
coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
|
| 30 |
return coherence_score
|
| 31 |
except Exception as e:
|
| 32 |
+
st.error(f"Error getting judge response: {str(e)}") # Use st.error
|
|
|
|
| 33 |
return None
|
| 34 |
|
| 35 |
|
|
|
|
| 150 |
if result["type"] == "answer":
|
| 151 |
st.write(f"**Question:** {result['question']}")
|
| 152 |
st.write(f"**New Answer:**\n{result['answer']}")
|
| 153 |
+
st.write(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
|
| 154 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
| 155 |
+
results.extend(result["results"])
|
| 156 |
+
novelty_score += result["novelty_score"]
|
| 157 |
+
st.info(f"Total novelty score across all questions (so far): {novelty_score}") # st.info for running total
|
| 158 |
|
| 159 |
elif result["type"] == "summary":
|
| 160 |
+
st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
|
| 161 |
+
st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
|
|
|
|
|
|
|
| 162 |
elif result["type"] == "error":
|
| 163 |
+
st.error(f"Error in thread: {result['message']}") # st.error for errors
|
|
|
|
| 164 |
|
| 165 |
# Process remaining results in the queue (if any)
|
| 166 |
while not result_queue.empty():
|
|
|
|
| 168 |
if result["type"] == "answer":
|
| 169 |
st.write(f"**Question:** {result['question']}")
|
| 170 |
st.write(f"**New Answer:**\n{result['answer']}")
|
| 171 |
+
st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
|
| 172 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
| 173 |
results.extend(result["results"]) # Add results here
|
| 174 |
novelty_score += result["novelty_score"] # Update novelty score
|
| 175 |
st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
|
| 176 |
|
| 177 |
elif result["type"] == "summary":
|
| 178 |
+
st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
|
| 179 |
+
st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
|
|
|
|
|
|
|
| 180 |
elif result["type"] == "error":
|
| 181 |
+
st.error(f"Error in thread: {result['message']}") # st.error for errors
|
|
|
|
| 182 |
|
| 183 |
|
| 184 |
+
st.info(f"Final total novelty score across all questions: {novelty_score}")
|
| 185 |
return results
|
| 186 |
|
| 187 |
|
|
|
|
| 194 |
if result["type"] == "answer":
|
| 195 |
st.write(f"**Question:** {result['question']}")
|
| 196 |
st.write(f"**New Answer:**\n{result['answer']}")
|
| 197 |
+
st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
|
| 198 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
| 199 |
results.extend(result["results"])
|
| 200 |
novelty_score += result["novelty_score"] # Add to novelty score
|
| 201 |
+
st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
|
| 202 |
|
| 203 |
elif result["type"] == "summary":
|
| 204 |
+
st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
|
| 205 |
+
st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
|
|
|
|
|
|
|
|
|
|
| 206 |
elif result["type"] == "error":
|
| 207 |
+
st.error(f"Error in thread: {result['message']}") # st.error for errors
|
|
|
|
| 208 |
|
| 209 |
+
st.info(f"Final total novelty score across all questions: {novelty_score}")
|
| 210 |
|
| 211 |
return results
|