Spaces:
Running
on
L40S
Running
on
L40S
Commit
Β·
fe47661
1
Parent(s):
3d9ea22
:clown_face: clown
Browse files- knowledge_cutoff_demo.py +31 -27
knowledge_cutoff_demo.py
CHANGED
|
@@ -185,39 +185,43 @@ def generate_single_response(model_name, input_question):
|
|
| 185 |
@spaces.GPU
|
| 186 |
def process_all_models_parallel(input_question):
|
| 187 |
"""Process all models in parallel for maximum speed"""
|
| 188 |
-
if not input_question.strip():
|
| 189 |
-
|
| 190 |
|
| 191 |
-
start_time = time.time()
|
| 192 |
|
| 193 |
-
# Use ThreadPoolExecutor for parallel processing
|
| 194 |
-
with ThreadPoolExecutor(max_workers=3) as executor:
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
|
| 211 |
-
total_time = time.time() - start_time
|
| 212 |
|
| 213 |
-
# Add total timing to first response
|
| 214 |
-
llama1_response = results.get("Llama-1 7B", "β Error")
|
| 215 |
|
| 216 |
-
return (
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
# def benchmark_models():
|
| 223 |
# """Benchmark all models with a test question"""
|
|
|
|
| 185 |
@spaces.GPU
|
| 186 |
def process_all_models_parallel(input_question):
|
| 187 |
"""Process all models in parallel for maximum speed"""
|
| 188 |
+
# if not input_question.strip():
|
| 189 |
+
# return "β Please enter a question", "β Please enter a question", "β Please enter a question"
|
| 190 |
|
| 191 |
+
# start_time = time.time()
|
| 192 |
|
| 193 |
+
# # Use ThreadPoolExecutor for parallel processing
|
| 194 |
+
# with ThreadPoolExecutor(max_workers=3) as executor:
|
| 195 |
+
# # Submit all tasks simultaneously
|
| 196 |
+
# futures = {
|
| 197 |
+
# executor.submit(generate_single_response, model_name, input_question): model_name
|
| 198 |
+
# for model_name in MODEL_CONFIGS.keys()
|
| 199 |
+
# }
|
| 200 |
|
| 201 |
+
# # Collect results as they complete
|
| 202 |
+
# results = {}
|
| 203 |
+
# for future in futures:
|
| 204 |
+
# model_name = futures[future]
|
| 205 |
+
# try:
|
| 206 |
+
# result = future.result(timeout=45) # Longer timeout for Llama-1
|
| 207 |
+
# results[model_name] = result
|
| 208 |
+
# except Exception as e:
|
| 209 |
+
# results[model_name] = f"β Timeout or error for {model_name}: {str(e)}"
|
| 210 |
|
| 211 |
+
# total_time = time.time() - start_time
|
| 212 |
|
| 213 |
+
# # Add total timing to first response
|
| 214 |
+
# llama1_response = results.get("Llama-1 7B", "β Error")
|
| 215 |
|
| 216 |
+
# return (
|
| 217 |
+
# llama1_response,
|
| 218 |
+
# results.get("Llama-2 7B Chat", "β Error"),
|
| 219 |
+
# results.get("Llama-3.2 3B", "β Error")
|
| 220 |
+
# )
|
| 221 |
+
llama1_response = generate_single_response("Llama-1 7B", input_question)
|
| 222 |
+
llama2_response = generate_single_response("Llama-2 7B Chat", input_question)
|
| 223 |
+
llama3_response = generate_single_response("Llama-3.2 3B", input_question)
|
| 224 |
+
return llama1_response, llama2_response, llama3_response
|
| 225 |
|
| 226 |
# def benchmark_models():
|
| 227 |
# """Benchmark all models with a test question"""
|