Spaces:
Running
Running
Update data/leaderboard_json/afrobench_lite.json
Browse files### Add N-ATLAS-LLM AfroBench-LITE scores to leaderboard
This PR adds N-ATLAS-LLM results from an AfroBench-LITE evaluation run to the leaderboard.
#### Scores added (N-ATLAS-LLM)
- **NLI (AfriXNLI):** 47.7
- **Intent (InjongoIntent):** 70.0
- **MT (FLORES en→xx):** 50.1
- **MMLU (AfriMMLU):** 37.8
- **Math (AfriMGSM):** 40.4
- **Topic (SIB):** 80.5
- **RC (Belebele):** 51.4
#### Reference
- https://huggingface.co/blog/seun-ajayi/n-atlas-evaluation-report
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -24,7 +24,8 @@
|
|
| 24 |
"Gemini-2.5 Flash": 69.3,
|
| 25 |
"Gemini-2.5 Pro": 72.5,
|
| 26 |
"GPT-5 (Aug)": 83.3,
|
| 27 |
-
"Gemini 3 Pro": 77.4
|
|
|
|
| 28 |
}
|
| 29 |
},
|
| 30 |
"Intent": {
|
|
@@ -52,7 +53,8 @@
|
|
| 52 |
"Gemini-2.5 Flash": 87.4,
|
| 53 |
"Gemini-2.5 Pro": 88.0,
|
| 54 |
"GPT-5 (Aug)": 87.3,
|
| 55 |
-
"Gemini 3 Pro": 88.8
|
|
|
|
| 56 |
}
|
| 57 |
},
|
| 58 |
"MT(en/fr-xx)": {
|
|
@@ -80,7 +82,8 @@
|
|
| 80 |
"Gemini-2.5 Flash": 45.3,
|
| 81 |
"Gemini-2.5 Pro": 46.3,
|
| 82 |
"GPT-5 (Aug)": 44.8,
|
| 83 |
-
"Gemini 3 Pro": 47.3
|
|
|
|
| 84 |
}
|
| 85 |
},
|
| 86 |
"MMLU": {
|
|
@@ -108,7 +111,8 @@
|
|
| 108 |
"Gemini-2.5 Flash": 67.3,
|
| 109 |
"Gemini-2.5 Pro": 77.4,
|
| 110 |
"GPT-5 (Aug)": 83.3,
|
| 111 |
-
"Gemini 3 Pro": 86.1
|
|
|
|
| 112 |
}
|
| 113 |
},
|
| 114 |
"Math": {
|
|
@@ -136,7 +140,8 @@
|
|
| 136 |
"Gemini-2.5 Flash": 69.3,
|
| 137 |
"Gemini-2.5 Pro": 73.2,
|
| 138 |
"GPT-5 (Aug)": 73.7,
|
| 139 |
-
"Gemini 3 Pro": 73.4
|
|
|
|
| 140 |
}
|
| 141 |
},
|
| 142 |
"Topic": {
|
|
@@ -164,7 +169,8 @@
|
|
| 164 |
"Gemini-2.5 Flash": 86.8,
|
| 165 |
"Gemini-2.5 Pro": 87.9,
|
| 166 |
"GPT-5 (Aug)": 88.5,
|
| 167 |
-
"Gemini 3 Pro": 87.9
|
|
|
|
| 168 |
}
|
| 169 |
},
|
| 170 |
"RC": {
|
|
@@ -192,7 +198,8 @@
|
|
| 192 |
"Gemini-2.5 Flash": 41.6,
|
| 193 |
"Gemini-2.5 Pro": 76.4,
|
| 194 |
"GPT-5 (Aug)": 83.3,
|
| 195 |
-
"Gemini 3 Pro": 71.2
|
|
|
|
| 196 |
}
|
| 197 |
}
|
| 198 |
}
|
|
|
|
| 24 |
"Gemini-2.5 Flash": 69.3,
|
| 25 |
"Gemini-2.5 Pro": 72.5,
|
| 26 |
"GPT-5 (Aug)": 83.3,
|
| 27 |
+
"Gemini 3 Pro": 77.4,
|
| 28 |
+
"N-ATLAS-LLM": 47.7
|
| 29 |
}
|
| 30 |
},
|
| 31 |
"Intent": {
|
|
|
|
| 53 |
"Gemini-2.5 Flash": 87.4,
|
| 54 |
"Gemini-2.5 Pro": 88.0,
|
| 55 |
"GPT-5 (Aug)": 87.3,
|
| 56 |
+
"Gemini 3 Pro": 88.8,
|
| 57 |
+
"N-ATLAS-LLM": 70.0
|
| 58 |
}
|
| 59 |
},
|
| 60 |
"MT(en/fr-xx)": {
|
|
|
|
| 82 |
"Gemini-2.5 Flash": 45.3,
|
| 83 |
"Gemini-2.5 Pro": 46.3,
|
| 84 |
"GPT-5 (Aug)": 44.8,
|
| 85 |
+
"Gemini 3 Pro": 47.3,
|
| 86 |
+
"N-ATLAS-LLM": 50.1
|
| 87 |
}
|
| 88 |
},
|
| 89 |
"MMLU": {
|
|
|
|
| 111 |
"Gemini-2.5 Flash": 67.3,
|
| 112 |
"Gemini-2.5 Pro": 77.4,
|
| 113 |
"GPT-5 (Aug)": 83.3,
|
| 114 |
+
"Gemini 3 Pro": 86.1,
|
| 115 |
+
"N-ATLAS-LLM": 37.8
|
| 116 |
}
|
| 117 |
},
|
| 118 |
"Math": {
|
|
|
|
| 140 |
"Gemini-2.5 Flash": 69.3,
|
| 141 |
"Gemini-2.5 Pro": 73.2,
|
| 142 |
"GPT-5 (Aug)": 73.7,
|
| 143 |
+
"Gemini 3 Pro": 73.4,
|
| 144 |
+
"N-ATLAS-LLM": 40.4
|
| 145 |
}
|
| 146 |
},
|
| 147 |
"Topic": {
|
|
|
|
| 169 |
"Gemini-2.5 Flash": 86.8,
|
| 170 |
"Gemini-2.5 Pro": 87.9,
|
| 171 |
"GPT-5 (Aug)": 88.5,
|
| 172 |
+
"Gemini 3 Pro": 87.9,
|
| 173 |
+
"N-ATLAS-LLM": 80.5
|
| 174 |
}
|
| 175 |
},
|
| 176 |
"RC": {
|
|
|
|
| 198 |
"Gemini-2.5 Flash": 41.6,
|
| 199 |
"Gemini-2.5 Pro": 76.4,
|
| 200 |
"GPT-5 (Aug)": 83.3,
|
| 201 |
+
"Gemini 3 Pro": 71.2,
|
| 202 |
+
"N-ATLAS-LLM": 51.4
|
| 203 |
}
|
| 204 |
}
|
| 205 |
}
|