Spaces:
Running
Running
Commit
·
cd66ed8
1
Parent(s):
0b8a8d2
show models with some results missing
Browse files- README.md +2 -2
- src/display/formatting.py +2 -0
- src/populate.py +2 -2
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 🥇
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: indigo
|
|
@@ -7,7 +7,7 @@ sdk: gradio
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: true
|
| 9 |
license: apache-2.0
|
| 10 |
-
short_description:
|
| 11 |
sdk_version: 5.19.0
|
| 12 |
---
|
| 13 |
|
|
|
|
| 1 |
---
|
| 2 |
+
title: mSTEB Leaderboard
|
| 3 |
emoji: 🥇
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: indigo
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: true
|
| 9 |
license: apache-2.0
|
| 10 |
+
short_description: Leaderboard for mSTEB benchmark
|
| 11 |
sdk_version: 5.19.0
|
| 12 |
---
|
| 13 |
|
src/display/formatting.py
CHANGED
|
@@ -22,6 +22,8 @@ def styled_message(message):
|
|
| 22 |
def has_no_nan_values(df, columns):
|
| 23 |
return df[columns].notna().all(axis=1)
|
| 24 |
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def has_nan_values(df, columns):
|
| 27 |
return df[columns].isna().any(axis=1)
|
|
|
|
| 22 |
def has_no_nan_values(df, columns):
|
| 23 |
return df[columns].notna().all(axis=1)
|
| 24 |
|
| 25 |
+
def has_at_least_one_benchmark(df, columns):
|
| 26 |
+
return df[columns].notna().any(axis=1)
|
| 27 |
|
| 28 |
def has_nan_values(df, columns):
|
| 29 |
return df[columns].isna().any(axis=1)
|
src/populate.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
-
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
| 8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 9 |
|
|
@@ -19,7 +19,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 19 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 20 |
df = df[cols].round(decimals=2)
|
| 21 |
# filter out if any of the benchmarks have not been produced
|
| 22 |
-
df = df[
|
| 23 |
return df
|
| 24 |
|
| 25 |
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
+
from src.display.formatting import has_no_nan_values, make_clickable_model, has_at_least_one_benchmark
|
| 7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
| 8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 9 |
|
|
|
|
| 19 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 20 |
df = df[cols].round(decimals=2)
|
| 21 |
# filter out if any of the benchmarks have not been produced
|
| 22 |
+
df = df[has_at_least_one_benchmark(df, benchmark_cols)]
|
| 23 |
return df
|
| 24 |
|
| 25 |
|