{ "results": { "average_score": 7.729787234042553, "speed": 2.5724615453224007, "contamination_score": 0.0, "execution_time": 7123.527282, "errors": [], "scores_by_category": [ { "category": "Reading Comprehension", "average_score": 10.0, "count": 17 }, { "category": "Function Calling", "average_score": 10.0, "count": 3 }, { "category": "Sentiment Analysis", "average_score": 10.0, "count": 9 }, { "category": "Coding", "average_score": 10.0, "count": 3 }, { "category": "Paraphrasing", "average_score": 9.833333333333334, "count": 6 }, { "category": "Entity Extraction", "average_score": 8.8, "count": 5 }, { "category": "Summarization", "average_score": 8.75, "count": 8 }, { "category": "MMLU", "average_score": 8.677685950413224, "count": 121 }, { "category": "General Knowledge", "average_score": 8.571428571428571, "count": 63 }, { "category": "Trust & Safety", "average_score": 8.333333333333334, "count": 30 }, { "category": "Long Context", "average_score": 8.0, "count": 4 }, { "category": "Transliteration", "average_score": 7.666666666666667, "count": 6 }, { "category": "Instruction Following", "average_score": 7.571428571428571, "count": 7 }, { "category": "Reasoning & Math", "average_score": 7.023255813953488, "count": 43 }, { "category": "Translation (incl Dialects)", "average_score": 6.333333333333333, "count": 36 }, { "category": "Structuring", "average_score": 6.333333333333333, "count": 3 }, { "category": "RAG QA", "average_score": 6.2926829268292686, "count": 41 }, { "category": "Writing (incl Dialects)", "average_score": 6.181818181818182, "count": 22 }, { "category": "Dialect Detection", "average_score": 6.0, "count": 11 }, { "category": "Arabic Language & Grammar", "average_score": 5.352941176470588, "count": 17 }, { "category": "Diacritization", "average_score": 4.916666666666667, "count": 12 }, { "category": "Hallucination", "average_score": 3.3333333333333335, "count": 3 } ], "scores_by_format": [ { "format": "Short Answer", "average_score": 10.0, "count": 5 }, { "format": "MCQ", "average_score": 8.54585152838428, "count": 229 }, { "format": "Generation", "average_score": 6.956140350877193, "count": 228 }, { "format": "Fill-in-the-blank", "average_score": 5.0, "count": 8 } ] }, "config": { "model": "arcee-ai/Virtuoso-Small", "model_sha": "4d6aadc7c9ea2843dccd03c8bec54efa61475dcc", "submitted_time": "2025-06-02T19:37:37Z", "likes": 71, "params": 14.77, "license": "apache-2.0", "model_source": "Hugging Face", "model_category": "Medium" } }