Spaces:
Sleeping
Sleeping
felix
commited on
Commit
·
e254e41
1
Parent(s):
8b8ceb9
updates
Browse files- README.md +3 -1
- app.py +32 -35
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: 🏆
|
|
| 4 |
colorFrom: yellow
|
| 5 |
colorTo: gray
|
| 6 |
sdk: streamlit
|
| 7 |
-
sdk_version: 1.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
|
@@ -13,4 +13,6 @@ license: apache-2.0
|
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
|
| 15 |
TODO:
|
|
|
|
|
|
|
| 16 |
|
|
|
|
| 4 |
colorFrom: yellow
|
| 5 |
colorTo: gray
|
| 6 |
sdk: streamlit
|
| 7 |
+
sdk_version: 1.26.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
|
|
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
|
| 15 |
TODO:
|
| 16 |
+
Add feedback form on the bottom?
|
| 17 |
+
|
| 18 |
|
app.py
CHANGED
|
@@ -11,8 +11,6 @@ st.title('Meta Open LLM leaderboard')
|
|
| 11 |
|
| 12 |
directories = os.listdir("./data")
|
| 13 |
|
| 14 |
-
#data_dir = directories[0]
|
| 15 |
-
|
| 16 |
def format_dir_date(data_dir):
|
| 17 |
# Extracting date and time information from the path
|
| 18 |
parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
|
|
@@ -20,12 +18,15 @@ def format_dir_date(data_dir):
|
|
| 20 |
# Formatting the parsed date
|
| 21 |
return parsed_date.strftime("%b %d, %Y %H:%M")
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
captions_map = {
|
| 31 |
"hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
|
|
@@ -34,14 +35,13 @@ captions_map = {
|
|
| 34 |
"hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
|
| 35 |
"hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
|
| 36 |
}
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
st.write("Generated on: <b>" + format_dir_date(data_dir) + "</b>", unsafe_allow_html=True)
|
| 39 |
-
st.divider()
|
| 40 |
|
| 41 |
data_path = './data/' + data_dir
|
| 42 |
|
| 43 |
imgs = glob.glob(os.path.join(data_path, '*.png'))
|
| 44 |
-
white_image = './white_image.png'
|
| 45 |
|
| 46 |
# Extracting images that start with "hf_llm_diagram"
|
| 47 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
|
@@ -93,38 +93,45 @@ def print_model_list(file_name, st, split_into_two=False):
|
|
| 93 |
st.write(final_html, unsafe_allow_html=True)
|
| 94 |
|
| 95 |
|
|
|
|
| 96 |
cols = st.columns(2)
|
| 97 |
|
| 98 |
-
cols[0].
|
| 99 |
-
cols[0].image(hf_llm_diagrams[0], use_column_width="auto")
|
| 100 |
-
cols[1].write("<nbsp/>", unsafe_allow_html=True)
|
| 101 |
-
cols[1].image(white_image, use_column_width="auto")
|
| 102 |
|
| 103 |
print_model_list(hf_llm_diagrams[0],st, True)
|
| 104 |
-
st.
|
| 105 |
|
| 106 |
cols = st.columns(2)
|
| 107 |
-
cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
|
| 108 |
|
|
|
|
| 109 |
print_model_list(hf_llm_diagrams[1],cols[0])
|
| 110 |
|
| 111 |
cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
|
| 112 |
-
|
| 113 |
print_model_list(hf_llm_diagrams[2],cols[1])
|
| 114 |
|
| 115 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
-
st.
|
| 118 |
cols = st.columns(2)
|
| 119 |
cols[0].image(bigcode_diagrams[0], use_column_width="auto")
|
| 120 |
-
|
| 121 |
-
cols[1].image(white_image, use_column_width="auto")
|
| 122 |
|
| 123 |
print_model_list(bigcode_diagrams[0],st,True)
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
|
|
|
| 127 |
st.caption("Only models evaluated on both leaderboards are included.")
|
|
|
|
| 128 |
cols = st.columns(2)
|
| 129 |
|
| 130 |
for i, img in enumerate(remaining_imgs):
|
|
@@ -151,17 +158,7 @@ st.write(
|
|
| 151 |
</ul>
|
| 152 |
""", unsafe_allow_html=True
|
| 153 |
)
|
| 154 |
-
st.divider()
|
| 155 |
|
| 156 |
-
cols = st.columns(2)
|
| 157 |
-
cols[0].write("TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size")
|
| 158 |
-
cols[0].image(hf_llm_diagrams[3],use_column_width="auto")
|
| 159 |
-
print_model_list(hf_llm_diagrams[3],cols[0],False)
|
| 160 |
-
|
| 161 |
-
cols[1].write("ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size")
|
| 162 |
-
cols[1].image(hf_llm_diagrams[4],use_column_width="auto")
|
| 163 |
-
print_model_list(hf_llm_diagrams[4],cols[1],False)
|
| 164 |
|
| 165 |
-
st.divider
|
| 166 |
-
st.subheader('About')
|
| 167 |
st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at <a href="https://twitter.com/FZaslavskiy" >@FZaslavskiy</a> or here via community discussions.', unsafe_allow_html=True)
|
|
|
|
| 11 |
|
| 12 |
directories = os.listdir("./data")
|
| 13 |
|
|
|
|
|
|
|
| 14 |
def format_dir_date(data_dir):
|
| 15 |
# Extracting date and time information from the path
|
| 16 |
parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
|
|
|
|
| 18 |
# Formatting the parsed date
|
| 19 |
return parsed_date.strftime("%b %d, %Y %H:%M")
|
| 20 |
|
| 21 |
+
col1, col2 = st.columns(2)
|
| 22 |
+
|
| 23 |
+
with col1:
|
| 24 |
+
data_dir = st.selectbox(
|
| 25 |
+
'Select different data generation date',
|
| 26 |
+
directories,
|
| 27 |
+
format_func=format_dir_date,
|
| 28 |
+
index=len(directories)-1,
|
| 29 |
+
)
|
| 30 |
|
| 31 |
captions_map = {
|
| 32 |
"hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
|
|
|
|
| 35 |
"hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
|
| 36 |
"hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
|
| 37 |
}
|
| 38 |
+
with col2:
|
| 39 |
+
st.write("<div style=\"text-align: center\" >Generated on: <b>" + format_dir_date(data_dir) + "</b></div>", unsafe_allow_html=True)
|
| 40 |
|
|
|
|
|
|
|
| 41 |
|
| 42 |
data_path = './data/' + data_dir
|
| 43 |
|
| 44 |
imgs = glob.glob(os.path.join(data_path, '*.png'))
|
|
|
|
| 45 |
|
| 46 |
# Extracting images that start with "hf_llm_diagram"
|
| 47 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
|
|
|
| 93 |
st.write(final_html, unsafe_allow_html=True)
|
| 94 |
|
| 95 |
|
| 96 |
+
st.header("HuggingFace Open LLM leaderboard by Model Size", divider=True)
|
| 97 |
cols = st.columns(2)
|
| 98 |
|
| 99 |
+
cols[0].image(hf_llm_diagrams[0], caption="Main chart using all the models", use_column_width="auto")
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
print_model_list(hf_llm_diagrams[0],st, True)
|
| 102 |
+
st.write("<nbsp/>", unsafe_allow_html=True)
|
| 103 |
|
| 104 |
cols = st.columns(2)
|
|
|
|
| 105 |
|
| 106 |
+
cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
|
| 107 |
print_model_list(hf_llm_diagrams[1],cols[0])
|
| 108 |
|
| 109 |
cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
|
|
|
|
| 110 |
print_model_list(hf_llm_diagrams[2],cols[1])
|
| 111 |
|
| 112 |
+
st.write("<nbsp/>", unsafe_allow_html=True)
|
| 113 |
+
|
| 114 |
+
cols = st.columns(2)
|
| 115 |
+
cols[0].image(hf_llm_diagrams[3],caption="TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
|
| 116 |
+
print_model_list(hf_llm_diagrams[3],cols[0],False)
|
| 117 |
+
|
| 118 |
+
cols[1].image(hf_llm_diagrams[4],caption="ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
|
| 119 |
+
print_model_list(hf_llm_diagrams[4],cols[1],False)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
|
| 123 |
+
st.header("Big Code Models Leaderboard", divider=True)
|
| 124 |
cols = st.columns(2)
|
| 125 |
cols[0].image(bigcode_diagrams[0], use_column_width="auto")
|
| 126 |
+
|
|
|
|
| 127 |
|
| 128 |
print_model_list(bigcode_diagrams[0],st,True)
|
| 129 |
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
st.header("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
|
| 133 |
st.caption("Only models evaluated on both leaderboards are included.")
|
| 134 |
+
|
| 135 |
cols = st.columns(2)
|
| 136 |
|
| 137 |
for i, img in enumerate(remaining_imgs):
|
|
|
|
| 158 |
</ul>
|
| 159 |
""", unsafe_allow_html=True
|
| 160 |
)
|
|
|
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
st.header('About', divider=True)
|
|
|
|
| 164 |
st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at <a href="https://twitter.com/FZaslavskiy" >@FZaslavskiy</a> or here via community discussions.', unsafe_allow_html=True)
|
requirements.txt
CHANGED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.26.0
|