Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
e27c948
1
Parent(s):
a90efab
debug
Browse files- app.py +70 -18
- old-requirements.txt +17 -0
- requirements.txt +1 -1
- src/about.py +12 -1
app.py
CHANGED
|
@@ -167,34 +167,85 @@ from src.about import TasksMib_Subgraph
|
|
| 167 |
|
| 168 |
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
def init_leaderboard_mib_subgraph(dataframe, track):
|
|
|
|
| 171 |
if dataframe is None or dataframe.empty:
|
| 172 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
-
# Add
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
for model in task.value.models
|
| 185 |
-
if f"{task.value.benchmark}_{model}" in row.index][0],
|
| 186 |
-
axis=1
|
| 187 |
-
)
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
return Leaderboard(
|
| 190 |
value=dataframe,
|
| 191 |
datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
| 192 |
select_columns=SelectColumns(
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
| 196 |
),
|
| 197 |
-
search_columns=["Method"
|
| 198 |
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
| 199 |
bool_checkboxgroup_label="Hide models",
|
| 200 |
interactive=False,
|
|
@@ -211,6 +262,7 @@ def init_leaderboard_mib_subgraph(dataframe, track):
|
|
| 211 |
|
| 212 |
|
| 213 |
|
|
|
|
| 214 |
def init_leaderboard_mib_causalgraph(dataframe, track):
|
| 215 |
# print("Debugging column issues:")
|
| 216 |
# print("\nActual DataFrame columns:")
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
|
| 170 |
+
# def init_leaderboard_mib_subgraph(dataframe, track):
|
| 171 |
+
# if dataframe is None or dataframe.empty:
|
| 172 |
+
# raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 173 |
+
|
| 174 |
+
# # Add filter columns to display
|
| 175 |
+
# dataframe['Task'] = dataframe.apply(
|
| 176 |
+
# lambda row: [task.value.benchmark for task in TasksMib_Subgraph
|
| 177 |
+
# if any(f"{task.value.benchmark}_{model}" in row.index
|
| 178 |
+
# for model in task.value.models)][0],
|
| 179 |
+
# axis=1
|
| 180 |
+
# )
|
| 181 |
+
|
| 182 |
+
# dataframe['Model'] = dataframe.apply(
|
| 183 |
+
# lambda row: [model for task in TasksMib_Subgraph
|
| 184 |
+
# for model in task.value.models
|
| 185 |
+
# if f"{task.value.benchmark}_{model}" in row.index][0],
|
| 186 |
+
# axis=1
|
| 187 |
+
# )
|
| 188 |
+
|
| 189 |
+
# return Leaderboard(
|
| 190 |
+
# value=dataframe,
|
| 191 |
+
# datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
| 192 |
+
# select_columns=SelectColumns(
|
| 193 |
+
# default_selection=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.displayed_by_default],
|
| 194 |
+
# cant_deselect=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.never_hidden],
|
| 195 |
+
# label="Select Columns to Display:",
|
| 196 |
+
# ),
|
| 197 |
+
# search_columns=["Method", "Task", "Model"], # Add Task and Model to searchable columns
|
| 198 |
+
# hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
| 199 |
+
# bool_checkboxgroup_label="Hide models",
|
| 200 |
+
# interactive=False,
|
| 201 |
+
# )
|
| 202 |
+
|
| 203 |
def init_leaderboard_mib_subgraph(dataframe, track):
|
| 204 |
+
"""Initialize the subgraph leaderboard with grouped column selection."""
|
| 205 |
if dataframe is None or dataframe.empty:
|
| 206 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 207 |
+
|
| 208 |
+
# Get tasks and models using the new class methods
|
| 209 |
+
tasks = TasksMib_Subgraph.get_all_tasks()
|
| 210 |
+
models = TasksMib_Subgraph.get_all_models()
|
| 211 |
+
|
| 212 |
+
# Create a mapping from selection to actual column names
|
| 213 |
+
selection_map = {}
|
| 214 |
|
| 215 |
+
# Add task mappings - when a task is selected, show all its columns
|
| 216 |
+
for task in tasks:
|
| 217 |
+
# For each task, find all valid task_model combinations
|
| 218 |
+
valid_combos = []
|
| 219 |
+
for model in models:
|
| 220 |
+
col_name = f"{task}_{model}"
|
| 221 |
+
if col_name in dataframe.columns:
|
| 222 |
+
valid_combos.append(col_name)
|
| 223 |
+
if valid_combos:
|
| 224 |
+
selection_map[task] = valid_combos
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
+
# Add model mappings - when a model is selected, show all its columns
|
| 227 |
+
for model in models:
|
| 228 |
+
# For each model, find all valid task_model combinations
|
| 229 |
+
valid_combos = []
|
| 230 |
+
for task in tasks:
|
| 231 |
+
col_name = f"{task}_{model}"
|
| 232 |
+
if col_name in dataframe.columns:
|
| 233 |
+
valid_combos.append(col_name)
|
| 234 |
+
if valid_combos:
|
| 235 |
+
selection_map[model] = valid_combos
|
| 236 |
+
|
| 237 |
return Leaderboard(
|
| 238 |
value=dataframe,
|
| 239 |
datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
|
| 240 |
select_columns=SelectColumns(
|
| 241 |
+
choices=[tasks, models], # Two groups of choices
|
| 242 |
+
labels=["Tasks", "Models"], # Labels for each group
|
| 243 |
+
default_selection=[*tasks, *models], # Show everything by default
|
| 244 |
+
cant_deselect=["Method"], # Method column always visible
|
| 245 |
+
label="Filter by Tasks or Models:",
|
| 246 |
+
selection_map=selection_map # Map selections to actual columns
|
| 247 |
),
|
| 248 |
+
search_columns=["Method"],
|
| 249 |
hide_columns=[c.name for c in fields(AutoEvalColumn_mib_subgraph) if c.hidden],
|
| 250 |
bool_checkboxgroup_label="Hide models",
|
| 251 |
interactive=False,
|
|
|
|
| 262 |
|
| 263 |
|
| 264 |
|
| 265 |
+
|
| 266 |
def init_leaderboard_mib_causalgraph(dataframe, track):
|
| 267 |
# print("Debugging column issues:")
|
| 268 |
# print("\nActual DataFrame columns:")
|
old-requirements.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
APScheduler
|
| 2 |
+
black
|
| 3 |
+
datasets
|
| 4 |
+
fastapi==0.112.2
|
| 5 |
+
gradio
|
| 6 |
+
gradio[oauth]
|
| 7 |
+
gradio_leaderboard==0.0.13
|
| 8 |
+
gradio_client
|
| 9 |
+
huggingface-hub>=0.18.0
|
| 10 |
+
matplotlib
|
| 11 |
+
numpy
|
| 12 |
+
pandas
|
| 13 |
+
python-dateutil
|
| 14 |
+
tqdm
|
| 15 |
+
transformers
|
| 16 |
+
tokenizers>=0.15.0
|
| 17 |
+
sentencepiece
|
requirements.txt
CHANGED
|
@@ -4,7 +4,7 @@ datasets
|
|
| 4 |
fastapi==0.112.2
|
| 5 |
gradio
|
| 6 |
gradio[oauth]
|
| 7 |
-
gradio_leaderboard==0.0.
|
| 8 |
gradio_client
|
| 9 |
huggingface-hub>=0.18.0
|
| 10 |
matplotlib
|
|
|
|
| 4 |
fastapi==0.112.2
|
| 5 |
gradio
|
| 6 |
gradio[oauth]
|
| 7 |
+
gradio_leaderboard==0.0.15
|
| 8 |
gradio_client
|
| 9 |
huggingface-hub>=0.18.0
|
| 10 |
matplotlib
|
src/about.py
CHANGED
|
@@ -47,7 +47,18 @@ class TasksMib_Subgraph(Enum):
|
|
| 47 |
task4 = TaskMIB_Subgraph("arc_easy", ["gemma2", "llama3"], "arc_easy", ["edge_counts", "faithfulness"])
|
| 48 |
task5 = TaskMIB_Subgraph("arc_challenge", ["llama3"], "arc_challenge", ["edge_counts", "faithfulness"])
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
# @dataclass
|
|
|
|
| 47 |
task4 = TaskMIB_Subgraph("arc_easy", ["gemma2", "llama3"], "arc_easy", ["edge_counts", "faithfulness"])
|
| 48 |
task5 = TaskMIB_Subgraph("arc_challenge", ["llama3"], "arc_challenge", ["edge_counts", "faithfulness"])
|
| 49 |
|
| 50 |
+
@classmethod
|
| 51 |
+
def get_all_tasks(cls):
|
| 52 |
+
"""Returns a list of all task benchmarks"""
|
| 53 |
+
return [task.value.benchmark for task in cls]
|
| 54 |
+
|
| 55 |
+
@classmethod
|
| 56 |
+
def get_all_models(cls):
|
| 57 |
+
"""Returns a list of all unique models across all tasks"""
|
| 58 |
+
models = set()
|
| 59 |
+
for task in cls:
|
| 60 |
+
models.update(task.value.models)
|
| 61 |
+
return sorted(list(models))
|
| 62 |
|
| 63 |
|
| 64 |
# @dataclass
|