Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import json | |
| st.set_page_config(layout="wide", page_title="CyberSafetyEval Leaderboard", page_icon=":bar_chart:") | |
| st.markdown("# CyberSafetyEval Leaderboard", unsafe_allow_html=True) | |
| data_mitre = json.load(open("mitre.json")) | |
| data_insecure = json.load(open("insecure_code.json")) | |
| # Processing data for the table | |
| st.markdown("### Helpfulness to cyberattackers results, benign percentage is percentage of tests safely passed (higher is better)") | |
| model_stats = {} | |
| for model, categories in data_mitre.items(): | |
| model_stats[model] = {'Mean Benign Percentage': pd.Series([v['benign_percentage'] for v in categories.values()]).mean()} | |
| for category, values in categories.items(): | |
| model_stats[model][f'Benign Percentage in {category}'] = values['benign_percentage'] | |
| leaderboard_df = pd.DataFrame.from_dict(model_stats, orient='index').sort_values(by='Mean Benign Percentage', ascending=False) | |
| # Preparing data for the clustered bar chart | |
| chart_data = [] | |
| for model, categories in data_mitre.items(): | |
| for category, values in categories.items(): | |
| chart_data.append({ | |
| 'Model': model, | |
| 'Category': category, | |
| 'Benign Percentage': values['benign_percentage'] | |
| }) | |
| chart_df = pd.DataFrame(chart_data) | |
| # Styling the data table | |
| st.dataframe(leaderboard_df.style.format("{:.2%}").background_gradient(cmap='Blues')) | |
| # Enhanced Plotly chart | |
| fig = px.bar(chart_df, x='Category', y='Benign Percentage', barmode='group', color='Model', | |
| title='MITRE ATT&CK category results per model', | |
| labels={'Benign Percentage': 'Benign %'}, | |
| color_discrete_sequence=px.colors.qualitative.Pastel) | |
| fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', | |
| xaxis=dict(showgrid=False), | |
| yaxis=dict(showgrid=False, tickformat=".0%"), | |
| legend=dict(title='Models')) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Processing data for the table | |
| model_stats_insecure = {} | |
| for model, categories in data_insecure.items(): | |
| model_stats_insecure[model] = {'Mean Insecure Code Test Pass Rate': pd.Series([1-v['autocomplete_vunerable_percentage'] for v in categories.values()]).mean()} | |
| for category, values in categories.items(): | |
| model_stats_insecure[model][f'Insecure Code Test Pass Rate in {category}'] = 1-values['autocomplete_vunerable_percentage'] | |
| leaderboard_df_insecure = pd.DataFrame.from_dict(model_stats_insecure, orient='index').sort_values(by='Mean Insecure Code Test Pass Rate', ascending=False) | |
| # Preparing data for the clustered bar chart | |
| chart_data_insecure = [] | |
| for model, categories in data_insecure.items(): | |
| for category, values in categories.items(): | |
| chart_data_insecure.append({ | |
| 'Model': model, | |
| 'Category': category, | |
| 'Insecure Code Test Pass Rate': 1-values['autocomplete_vunerable_percentage'] | |
| }) | |
| chart_df_insecure = pd.DataFrame(chart_data_insecure) | |
| # Styling the data table | |
| st.markdown("### Insecure coding test pass rate results (higher is better)") | |
| st.dataframe(leaderboard_df_insecure.style.format("{:.2%}").background_gradient(cmap='Blues')) # Changed cmap to 'Blues' | |
| # Enhanced Plotly chart | |
| fig_insecure = px.bar(chart_df_insecure, x='Category', y='Insecure Code Test Pass Rate', barmode='group', color='Model', | |
| title='Category-wise Insecure Code Test Pass Rate per Model', | |
| labels={'Insecure Code Test Pass Rate': 'Insecure Code Test Pass Rate %'}, | |
| color_discrete_sequence=px.colors.qualitative.Pastel) | |
| fig_insecure.update_layout(plot_bgcolor='rgba(0,0,0,0)', | |
| xaxis=dict(showgrid=False), | |
| yaxis=dict(showgrid=False, tickformat=".0%"), | |
| legend=dict(title='Models')) | |
| st.plotly_chart(fig_insecure, use_container_width=True) | |