Peiran commited on
Commit
7d7268b
·
1 Parent(s): 8ad599c

Add Scene Composition & Object Insertion evaluation UI

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ AGENTS.md
app.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import itertools
3
+ import os
4
+ from datetime import datetime
5
+ from typing import Dict, List, Tuple
6
+
7
+ import gradio as gr
8
+
9
+
10
+ BASE_DIR = os.path.dirname(__file__)
11
+ TASK_CONFIG = {
12
+ "Scene Composition & Object Insertion": {
13
+ "folder": "scene_composition_and_object_insertion",
14
+ "score_fields": [
15
+ ("physical_interaction_fidelity_score", "物理交互保真度 (Physical Interaction Fidelity)"),
16
+ ("optical_effect_accuracy_score", "光学效应准确度 (Optical Effect Accuracy)"),
17
+ ("semantic_functional_alignment_score", "语义/功能对齐度 (Semantic/Functional Alignment)"),
18
+ ("overall_photorealism_score", "整体真实感 (Overall Photorealism)"),
19
+ ],
20
+ },
21
+ }
22
+
23
+
24
+ def _csv_path_for_task(task_name: str, filename: str) -> str:
25
+ folder = TASK_CONFIG[task_name]["folder"]
26
+ return os.path.join(BASE_DIR, folder, filename)
27
+
28
+
29
+ def _resolve_image_path(path: str) -> str:
30
+ return path if os.path.isabs(path) else os.path.join(BASE_DIR, path)
31
+
32
+
33
+ def _load_task_rows(task_name: str) -> List[Dict[str, str]]:
34
+ csv_path = _csv_path_for_task(task_name, "results.csv")
35
+ if not os.path.exists(csv_path):
36
+ raise FileNotFoundError(f"未找到任务 {task_name} 的结果文件: {csv_path}")
37
+
38
+ with open(csv_path, newline="", encoding="utf-8") as csv_file:
39
+ reader = csv.DictReader(csv_file)
40
+ return [row for row in reader]
41
+
42
+
43
+ def _build_image_pairs(rows: List[Dict[str, str]], task_name: str) -> List[Dict[str, str]]:
44
+ grouped: Dict[Tuple[str, str], List[Dict[str, str]]] = {}
45
+ for row in rows:
46
+ key = (row["test_id"], row["org_img"])
47
+ grouped.setdefault(key, []).append(row)
48
+
49
+ pairs: List[Dict[str, str]] = []
50
+ folder = TASK_CONFIG[task_name]["folder"]
51
+
52
+ for (test_id, org_img), entries in grouped.items():
53
+ for model_a, model_b in itertools.combinations(entries, 2):
54
+ if model_a["model_name"] == model_b["model_name"]:
55
+ continue
56
+
57
+ pair = {
58
+ "test_id": test_id,
59
+ "org_img": os.path.join(folder, org_img),
60
+ "model1_name": model_a["model_name"],
61
+ "model1_res": model_a["res"],
62
+ "model1_path": os.path.join(folder, model_a["path"]),
63
+ "model2_name": model_b["model_name"],
64
+ "model2_res": model_b["res"],
65
+ "model2_path": os.path.join(folder, model_b["path"]),
66
+ }
67
+ pairs.append(pair)
68
+
69
+ def sort_key(item: Dict[str, str]):
70
+ test_id = item["test_id"]
71
+ try:
72
+ test_id_key = int(test_id)
73
+ except ValueError:
74
+ test_id_key = test_id
75
+ return (test_id_key, item["model1_name"], item["model2_name"])
76
+
77
+ pairs.sort(key=sort_key)
78
+ return pairs
79
+
80
+
81
+ def load_task(task_name: str):
82
+ if not task_name:
83
+ raise gr.Error("请先选择任务。")
84
+
85
+ rows = _load_task_rows(task_name)
86
+ pairs = _build_image_pairs(rows, task_name)
87
+ if not pairs:
88
+ raise gr.Error("没有找到可评测的图片对,请检查数据文件。")
89
+
90
+ return pairs
91
+
92
+
93
+ def _format_pair_header(pair: Dict[str, str]) -> str:
94
+ return (
95
+ f"**Test ID:** {pair['test_id']} \n"
96
+ f"**Model A:** {pair['model1_name']} ({pair['model1_res']}) \n"
97
+ f"**Model B:** {pair['model2_name']} ({pair['model2_res']})"
98
+ )
99
+
100
+
101
+ def _append_evaluation(task_name: str, pair: Dict[str, str], scores: Dict[str, int]) -> None:
102
+ csv_path = _csv_path_for_task(task_name, "evaluation_results.csv")
103
+ os.makedirs(os.path.dirname(csv_path), exist_ok=True)
104
+ csv_exists = os.path.exists(csv_path)
105
+
106
+ fieldnames = [
107
+ "eval_date",
108
+ "test_id",
109
+ "model1_name",
110
+ "model2_name",
111
+ "org_img",
112
+ "model1_res",
113
+ "model2_res",
114
+ "model1_path",
115
+ "model2_path",
116
+ "physical_interaction_fidelity_score",
117
+ "optical_effect_accuracy_score",
118
+ "semantic_functional_alignment_score",
119
+ "overall_photorealism_score",
120
+ ]
121
+
122
+ with open(csv_path, "a", newline="", encoding="utf-8") as csv_file:
123
+ writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
124
+ if not csv_exists:
125
+ writer.writeheader()
126
+
127
+ row = {
128
+ "eval_date": datetime.utcnow().isoformat(),
129
+ "test_id": pair["test_id"],
130
+ "model1_name": pair["model1_name"],
131
+ "model2_name": pair["model2_name"],
132
+ "org_img": pair["org_img"],
133
+ "model1_res": pair["model1_res"],
134
+ "model2_res": pair["model2_res"],
135
+ "model1_path": pair["model1_path"],
136
+ "model2_path": pair["model2_path"],
137
+ }
138
+ row.update(scores)
139
+ writer.writerow(row)
140
+
141
+
142
+ def on_task_change(task_name: str, _state_pairs: List[Dict[str, str]]):
143
+ pairs = load_task(task_name)
144
+ pair = pairs[0]
145
+ header = _format_pair_header(pair)
146
+ default_scores = [3, 3, 3, 3]
147
+ return (
148
+ pairs,
149
+ gr.update(value=0, minimum=0, maximum=len(pairs) - 1, visible=(len(pairs) > 1)),
150
+ gr.update(value=header),
151
+ _resolve_image_path(pair["org_img"]),
152
+ _resolve_image_path(pair["model1_path"]),
153
+ _resolve_image_path(pair["model2_path"]),
154
+ *default_scores,
155
+ gr.update(value=f"共 {len(pairs)} 个待评测的图片对。"),
156
+ )
157
+
158
+
159
+ def on_pair_navigate(index: int, pairs: List[Dict[str, str]]):
160
+ if not pairs:
161
+ raise gr.Error("请先选择任务。")
162
+ index = int(index)
163
+ index = max(0, min(index, len(pairs) - 1))
164
+ pair = pairs[index]
165
+ header = _format_pair_header(pair)
166
+ return (
167
+ gr.update(value=index),
168
+ gr.update(value=header),
169
+ _resolve_image_path(pair["org_img"]),
170
+ _resolve_image_path(pair["model1_path"]),
171
+ _resolve_image_path(pair["model2_path"]),
172
+ 3,
173
+ 3,
174
+ 3,
175
+ 3,
176
+ )
177
+
178
+
179
+ def on_submit(
180
+ task_name: str,
181
+ index: int,
182
+ pairs: List[Dict[str, str]],
183
+ physical_score: int,
184
+ optical_score: int,
185
+ semantic_score: int,
186
+ overall_score: int,
187
+ ):
188
+ if not task_name:
189
+ raise gr.Error("请先选择任务。")
190
+
191
+ if not pairs:
192
+ raise gr.Error("当前任务没有加载任何图片对。")
193
+
194
+ pair = pairs[index]
195
+ score_map = {
196
+ "physical_interaction_fidelity_score": int(physical_score),
197
+ "optical_effect_accuracy_score": int(optical_score),
198
+ "semantic_functional_alignment_score": int(semantic_score),
199
+ "overall_photorealism_score": int(overall_score),
200
+ }
201
+ _append_evaluation(task_name, pair, score_map)
202
+
203
+ next_index = min(index + 1, len(pairs) - 1)
204
+ info = f"已保存 Test ID {pair['test_id']} 的评价结果。"
205
+
206
+ if next_index != index:
207
+ pair = pairs[next_index]
208
+ header = _format_pair_header(pair)
209
+ return (
210
+ gr.update(value=next_index),
211
+ gr.update(value=header),
212
+ _resolve_image_path(pair["org_img"]),
213
+ _resolve_image_path(pair["model1_path"]),
214
+ _resolve_image_path(pair["model2_path"]),
215
+ 3,
216
+ 3,
217
+ 3,
218
+ 3,
219
+ gr.update(value=info + f" 自动跳转到下一组({next_index + 1}/{len(pairs)})。"),
220
+ )
221
+
222
+ return (
223
+ gr.update(),
224
+ gr.update(),
225
+ gr.update(),
226
+ gr.update(),
227
+ gr.update(),
228
+ 3,
229
+ 3,
230
+ 3,
231
+ 3,
232
+ gr.update(value=info + " 已经是最后一组。"),
233
+ )
234
+
235
+
236
+ with gr.Blocks(title="VisArena Human Evaluation") as demo:
237
+ gr.Markdown(
238
+ """
239
+ # VisArena Human Evaluation
240
+ 请选择任务并对模型生成的图像进行评分。每项评分范围为 **1(效果极差)** 到 **5(效果极佳)**。
241
+ """
242
+ )
243
+
244
+ with gr.Row():
245
+ task_selector = gr.Dropdown(
246
+ label="Task",
247
+ choices=list(TASK_CONFIG.keys()),
248
+ interactive=True,
249
+ value="Scene Composition & Object Insertion",
250
+ )
251
+ index_slider = gr.Slider(
252
+ label="Pair Index",
253
+ value=0,
254
+ minimum=0,
255
+ maximum=0,
256
+ step=1,
257
+ interactive=True,
258
+ visible=False,
259
+ )
260
+
261
+ pair_state = gr.State([])
262
+
263
+ pair_header = gr.Markdown("")
264
+
265
+ with gr.Row():
266
+ with gr.Column(scale=1):
267
+ orig_image = gr.Image(type="filepath", label="原图 Original", interactive=False)
268
+ with gr.Column(scale=1):
269
+ model1_image = gr.Image(type="filepath", label="模型 A 输出", interactive=False)
270
+ with gr.Column(scale=1):
271
+ model2_image = gr.Image(type="filepath", label="模型 B 输出", interactive=False)
272
+
273
+ with gr.Row():
274
+ with gr.Column():
275
+ physical_input = gr.Slider(1, 5, value=3, step=1, label="物理交互保真度 (Physical Interaction Fidelity)")
276
+ optical_input = gr.Slider(1, 5, value=3, step=1, label="光学效应准确度 (Optical Effect Accuracy)")
277
+ with gr.Column():
278
+ semantic_input = gr.Slider(1, 5, value=3, step=1, label="语义/功能对齐度 (Semantic/Functional Alignment)")
279
+ overall_input = gr.Slider(1, 5, value=3, step=1, label="整体真实感 (Overall Photorealism)")
280
+
281
+ submit_button = gr.Button("Submit Evaluation", variant="primary")
282
+ feedback_box = gr.Markdown("")
283
+
284
+ # Event bindings
285
+ task_selector.change(
286
+ fn=on_task_change,
287
+ inputs=[task_selector, pair_state],
288
+ outputs=[
289
+ pair_state,
290
+ index_slider,
291
+ pair_header,
292
+ orig_image,
293
+ model1_image,
294
+ model2_image,
295
+ physical_input,
296
+ optical_input,
297
+ semantic_input,
298
+ overall_input,
299
+ feedback_box,
300
+ ],
301
+ )
302
+
303
+ index_slider.release(
304
+ fn=on_pair_navigate,
305
+ inputs=[index_slider, pair_state],
306
+ outputs=[
307
+ index_slider,
308
+ pair_header,
309
+ orig_image,
310
+ model1_image,
311
+ model2_image,
312
+ physical_input,
313
+ optical_input,
314
+ semantic_input,
315
+ overall_input,
316
+ ],
317
+ )
318
+
319
+ submit_button.click(
320
+ fn=on_submit,
321
+ inputs=[
322
+ task_selector,
323
+ index_slider,
324
+ pair_state,
325
+ physical_input,
326
+ optical_input,
327
+ semantic_input,
328
+ overall_input,
329
+ ],
330
+ outputs=[
331
+ index_slider,
332
+ pair_header,
333
+ orig_image,
334
+ model1_image,
335
+ model2_image,
336
+ physical_input,
337
+ optical_input,
338
+ semantic_input,
339
+ overall_input,
340
+ feedback_box,
341
+ ],
342
+ )
343
+
344
+ # Auto-load default task on startup
345
+ demo.load(
346
+ fn=on_task_change,
347
+ inputs=[task_selector, pair_state],
348
+ outputs=[
349
+ pair_state,
350
+ index_slider,
351
+ pair_header,
352
+ orig_image,
353
+ model1_image,
354
+ model2_image,
355
+ physical_input,
356
+ optical_input,
357
+ semantic_input,
358
+ overall_input,
359
+ feedback_box,
360
+ ],
361
+ )
362
+
363
+
364
+ if __name__ == "__main__":
365
+ demo.queue().launch()
scene_composition_and_object_insertion/dall-e-2/1-dall-e-2.jpg ADDED

Git LFS Details

  • SHA256: 26dbf6f793b07cba0fc19b54abfde6d78cf29555bb4580ae621f5ee7b03b171d
  • Pointer size: 131 Bytes
  • Size of remote file: 236 kB
scene_composition_and_object_insertion/dall-e-3/1-dall-e-3.jpg ADDED

Git LFS Details

  • SHA256: ee09d5733e735214f7ffdb07fd10f22d7a73d7632909431ad82d76e25b638e41
  • Pointer size: 132 Bytes
  • Size of remote file: 3.31 MB
scene_composition_and_object_insertion/org/1.jpg ADDED

Git LFS Details

  • SHA256: 113cb3d0e39d908b856c383d411bf27e0df80651b30d6bb77d946d4fccf975fc
  • Pointer size: 131 Bytes
  • Size of remote file: 140 kB
scene_composition_and_object_insertion/results.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ test_id,model_name,res,path,org_img
2
+ 1,dall-e-2,1024x1024,10-22-dall-e-2/1-dall-e-2.jpg,org/1.jpg
3
+ 1,dall-e-3,1024x1024,10-22-dall-e-3/1-dall-e-3.jpg,org/1.jpg