{ "results": { "average_score": 7.176595744680851, "speed": 15.572252842874866, "contamination_score": 0, "execution_time": 806.94811, "errors": [], "scores_by_category": [ { "category": "Paraphrasing", "average_score": 10.0, "count": 6 }, { "category": "Sentiment Analysis", "average_score": 10.0, "count": 9 }, { "category": "Reading Comprehension", "average_score": 10.0, "count": 17 }, { "category": "Coding", "average_score": 9.333333333333334, "count": 3 }, { "category": "MMLU", "average_score": 8.264462809917354, "count": 121 }, { "category": "Summarization", "average_score": 8.25, "count": 8 }, { "category": "Entity Extraction", "average_score": 8.0, "count": 5 }, { "category": "Function Calling", "average_score": 8.0, "count": 3 }, { "category": "General Knowledge", "average_score": 7.857142857142857, "count": 63 }, { "category": "Trust & Safety", "average_score": 7.666666666666667, "count": 30 }, { "category": "Writing (incl Dialects)", "average_score": 7.090909090909091, "count": 22 }, { "category": "Translation (incl Dialects)", "average_score": 6.638888888888889, "count": 36 }, { "category": "Reasoning & Math", "average_score": 6.186046511627907, "count": 43 }, { "category": "Dialect Detection", "average_score": 6.090909090909091, "count": 11 }, { "category": "Instruction Following", "average_score": 5.857142857142857, "count": 7 }, { "category": "Arabic Language & Grammar", "average_score": 5.764705882352941, "count": 17 }, { "category": "Structuring", "average_score": 5.333333333333333, "count": 3 }, { "category": "RAG QA", "average_score": 4.609756097560975, "count": 41 }, { "category": "Diacritization", "average_score": 4.416666666666667, "count": 12 }, { "category": "Long Context", "average_score": 4.0, "count": 4 }, { "category": "Hallucination", "average_score": 3.3333333333333335, "count": 3 }, { "category": "Transliteration", "average_score": 3.1666666666666665, "count": 6 } ], "scores_by_format": [ { "format": "Short Answer", "average_score": 10.0, "count": 5 }, { "format": "MCQ", "average_score": 8.08296943231441, "count": 229 }, { "format": "Generation", "average_score": 6.324561403508772, "count": 228 }, { "format": "Fill-in-the-blank", "average_score": 3.75, "count": 8 } ] }, "config": { "model": "ALLaM-AI/ALLaM-7B-Instruct-preview", "model_sha": "09279909eca600d150b3e6c83a4ef1400abfcdb6", "submitted_time": "2025-05-10T14:56:04Z", "likes": 111, "params": 7.001, "license": "apache-2.0", "model_source": "Hugging Face", "model_category": "Small" } }