Delete ranking.py
Browse files- ranking.py +0 -154
ranking.py
DELETED
|
@@ -1,154 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import json
|
| 3 |
-
import os
|
| 4 |
-
import zipfile
|
| 5 |
-
|
| 6 |
-
def load_data(filepath):
|
| 7 |
-
"""Loads data from the JSON file."""
|
| 8 |
-
with open(filepath, 'r') as f:
|
| 9 |
-
data = json.load(f)
|
| 10 |
-
return data
|
| 11 |
-
|
| 12 |
-
def create_comparison_app(file_paths):
|
| 13 |
-
"""Creates the Gradio app for comparing LLM responses with side-by-side layout for multiple files and browser download."""
|
| 14 |
-
|
| 15 |
-
all_data = {} # Dictionary to store data for each file, keyed by filepath
|
| 16 |
-
current_file_index = 0
|
| 17 |
-
current_prompt_index = 0
|
| 18 |
-
current_filepath = ""
|
| 19 |
-
results_data = {} # store results data in memory
|
| 20 |
-
|
| 21 |
-
def initialize_data(filepath):
|
| 22 |
-
nonlocal all_data, current_prompt_index, current_filepath, results_data
|
| 23 |
-
if filepath not in all_data:
|
| 24 |
-
all_data[filepath] = load_data(filepath)
|
| 25 |
-
results_data[filepath] = list(all_data[filepath]) # Create a copy to store results, important to not modify original data in all_data directly
|
| 26 |
-
current_filepath = filepath
|
| 27 |
-
current_prompt_index = 0
|
| 28 |
-
|
| 29 |
-
def get_progress_text():
|
| 30 |
-
nonlocal current_file_index, file_paths, current_prompt_index
|
| 31 |
-
files_left = len(file_paths) - (current_file_index + 1)
|
| 32 |
-
if current_filepath:
|
| 33 |
-
prompts_left = len(results_data[current_filepath]) - (current_prompt_index + 1) if current_prompt_index < len(results_data[current_filepath]) else 0
|
| 34 |
-
return f"File {current_file_index + 1}/{len(file_paths)} - {prompts_left + 1} prompts left in this file, {files_left} files remaining."
|
| 35 |
-
else:
|
| 36 |
-
return "No file loaded."
|
| 37 |
-
|
| 38 |
-
def display_prompt_and_responses(filepath, index):
|
| 39 |
-
"""Displays the prompt and responses for a given index within the current file."""
|
| 40 |
-
if not filepath or filepath not in results_data: # Use results_data here
|
| 41 |
-
return "No file loaded.", "", "", get_progress_text(), None
|
| 42 |
-
|
| 43 |
-
data = results_data[filepath] # Use results_data
|
| 44 |
-
if 0 <= index < len(data):
|
| 45 |
-
item = data[index]
|
| 46 |
-
prompt_text = item.get("prompt", "No prompt available")
|
| 47 |
-
finetuned_output_text = item.get("finetuned_output", "No finetuned output")
|
| 48 |
-
base_output_text = item.get("base_output", "No base output")
|
| 49 |
-
return prompt_text, finetuned_output_text, base_output_text, get_progress_text(), None # None for file download initially
|
| 50 |
-
else:
|
| 51 |
-
return "File finished! Please proceed to the next file.", "", "", get_progress_text(), None # Indicate file completion, None for file download
|
| 52 |
-
|
| 53 |
-
def record_choice(choice):
|
| 54 |
-
"""Records the user's choice and moves to the next prompt or file, provides download at the end."""
|
| 55 |
-
nonlocal current_prompt_index, results_data, current_filepath, current_file_index, file_paths
|
| 56 |
-
|
| 57 |
-
if not current_filepath:
|
| 58 |
-
return "No file loaded.", "", "", get_progress_text(), None
|
| 59 |
-
|
| 60 |
-
data = results_data[current_filepath] # Use results_data
|
| 61 |
-
if 0 <= current_prompt_index < len(data):
|
| 62 |
-
if choice == "finetuned":
|
| 63 |
-
data[current_prompt_index]["choice"] = "finetuned"
|
| 64 |
-
elif choice == "base":
|
| 65 |
-
data[current_prompt_index]["choice"] = "base_output"
|
| 66 |
-
|
| 67 |
-
current_prompt_index += 1
|
| 68 |
-
if current_prompt_index < len(data):
|
| 69 |
-
return display_prompt_and_responses(current_filepath, current_prompt_index) + (None,) # None for file download
|
| 70 |
-
else:
|
| 71 |
-
# File finished, prepare for download
|
| 72 |
-
if current_file_index < len(file_paths) - 1:
|
| 73 |
-
current_file_index += 1
|
| 74 |
-
next_filepath = file_paths[current_file_index]
|
| 75 |
-
initialize_data(next_filepath) # Initialize for the next file
|
| 76 |
-
return display_prompt_and_responses(current_filepath, current_prompt_index) + (None,) # None for file download, start next
|
| 77 |
-
else:
|
| 78 |
-
# All files finished, prepare zip archive for download
|
| 79 |
-
zip_filepath = create_zip_archive(results_data)
|
| 80 |
-
return "Comparison finished for all files! Please download the results ('Download results' button).", "", "", "Comparison finished for all files!", gr.update(visible=True, value=zip_filepath, label="Download results") # Final completion, with file download
|
| 81 |
-
else:
|
| 82 |
-
# Should not reach here normally, but handle for robustness - in case record_choice is called after file is finished
|
| 83 |
-
if current_file_index < len(file_paths) - 1:
|
| 84 |
-
current_file_index += 1
|
| 85 |
-
next_filepath = file_paths[current_file_index]
|
| 86 |
-
initialize_data(next_filepath) # Initialize for the next file
|
| 87 |
-
return display_prompt_and_responses(current_filepath, current_prompt_index) + (None,) # None for file download, start next
|
| 88 |
-
else:
|
| 89 |
-
# All files finished, prepare zip archive for download
|
| 90 |
-
zip_filepath = create_zip_archive(results_data)
|
| 91 |
-
return "Comparison finished for all files! Please download the results ('Download results' button).", "", "", "Comparison finished for all files!", gr.update(visible=True, value=zip_filepath, label="Download results")
|
| 92 |
-
|
| 93 |
-
def create_zip_archive(results_data):
|
| 94 |
-
"""Creates a zip archive of all result files."""
|
| 95 |
-
zip_filepath = "/tmp/results.zip"
|
| 96 |
-
with zipfile.ZipFile(zip_filepath, 'w') as zipf:
|
| 97 |
-
for filepath, data in results_data.items():
|
| 98 |
-
results_filename = os.path.basename(filepath).replace(".json", "_results.json")
|
| 99 |
-
results_json_string = json.dumps(data, indent=2)
|
| 100 |
-
zipf.writestr(results_filename, results_json_string)
|
| 101 |
-
return zip_filepath
|
| 102 |
-
|
| 103 |
-
with gr.Blocks() as iface:
|
| 104 |
-
progress_markdown = gr.Markdown(get_progress_text()) # Progress indication at the top
|
| 105 |
-
gr.Markdown("# LLM song lyrics generation ranking")
|
| 106 |
-
gr.Markdown("There are 5 files (each with 50 prompts) to compare. For each prompt, choose the better lyrics between Model A and Model B. After you complete all files and prompts, you can download the results.")
|
| 107 |
-
prompt_output = gr.Textbox(label="Lyrics description", lines=3, interactive=False, max_lines=3) # Fixed lines for prompt
|
| 108 |
-
|
| 109 |
-
with gr.Row(): # Row for side-by-side outputs
|
| 110 |
-
with gr.Column(): # Column for Finetuned Output
|
| 111 |
-
finetuned_output_box = gr.Textbox(label="Model A", lines=10, interactive=False, max_lines=10) # Fixed lines for finetuned
|
| 112 |
-
with gr.Column(): # Column for Base Output
|
| 113 |
-
base_output_box = gr.Textbox(label="Model B", lines=10, interactive=False, max_lines=10) # Fixed lines for base
|
| 114 |
-
|
| 115 |
-
with gr.Row(): # Row for buttons
|
| 116 |
-
finetuned_button = gr.Button("Model A is better")
|
| 117 |
-
base_button = gr.Button("Model B is better")
|
| 118 |
-
|
| 119 |
-
file_download_output = gr.DownloadButton(label="Download results", visible=False) # File output component
|
| 120 |
-
|
| 121 |
-
def load_initial_file(files):
|
| 122 |
-
if files:
|
| 123 |
-
filepath = files[0] # Take the first file path from the list
|
| 124 |
-
initialize_data(filepath)
|
| 125 |
-
return display_prompt_and_responses(current_filepath, current_prompt_index) + (None,) # None for initial file download
|
| 126 |
-
return "No file loaded.", "", "", get_progress_text(), None # None for initial file download
|
| 127 |
-
|
| 128 |
-
# Initial display - needs to load the first file
|
| 129 |
-
iface.load(
|
| 130 |
-
load_initial_file,
|
| 131 |
-
inputs=[gr.State(file_paths)], # Pass the list of filepaths as state
|
| 132 |
-
outputs=[prompt_output, finetuned_output_box, base_output_box, progress_markdown, file_download_output] # Added file_download_output
|
| 133 |
-
)
|
| 134 |
-
|
| 135 |
-
# Button click events
|
| 136 |
-
finetuned_button.click(
|
| 137 |
-
fn=record_choice,
|
| 138 |
-
inputs=[gr.State("finetuned")], # Pass "finetuned" string
|
| 139 |
-
outputs=[prompt_output, finetuned_output_box, base_output_box, progress_markdown, file_download_output], # Added file_download_output
|
| 140 |
-
api_name="choose_finetuned"
|
| 141 |
-
)
|
| 142 |
-
base_button.click(
|
| 143 |
-
fn=record_choice,
|
| 144 |
-
inputs=[gr.State("base")], # Pass "base" string
|
| 145 |
-
outputs=[prompt_output, finetuned_output_box, base_output_box, progress_markdown, file_download_output], # Added file_download_output
|
| 146 |
-
api_name="choose_base"
|
| 147 |
-
)
|
| 148 |
-
|
| 149 |
-
return iface
|
| 150 |
-
|
| 151 |
-
if __name__ == '__main__':
|
| 152 |
-
json_files = ["./Qwen2.5-0.5B.json", "./SmolLM135.json", "./SmolLM135-instruct.json", "./SmolLM360.json", "./SmolLM360-instruct.json"]
|
| 153 |
-
app = create_comparison_app(json_files)
|
| 154 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|