Update README.md
Browse files
README.md
CHANGED
|
@@ -297,31 +297,6 @@ This model was evaluated on the well-known OpenLLM v1, OpenLLM v2 and HumanEval_
|
|
| 297 |
<td>91.13</td>
|
| 298 |
<td>97.34</td>
|
| 299 |
</tr>
|
| 300 |
-
<tr>
|
| 301 |
-
<td rowspan="4"><b>Reasoning</b></td>
|
| 302 |
-
<td>AIME24 (0-shot)</td>
|
| 303 |
-
<td>β</td>
|
| 304 |
-
<td>β</td>
|
| 305 |
-
<td>β</td>
|
| 306 |
-
</tr>
|
| 307 |
-
<tr>
|
| 308 |
-
<td>AIME25 (0-shot)</td>
|
| 309 |
-
<td>β</td>
|
| 310 |
-
<td>β</td>
|
| 311 |
-
<td>β</td>
|
| 312 |
-
</tr>
|
| 313 |
-
<tr>
|
| 314 |
-
<td>GPQA (Diamond, 0-shot)</td>
|
| 315 |
-
<td>β</td>
|
| 316 |
-
<td>β</td>
|
| 317 |
-
<td>β</td>
|
| 318 |
-
</tr>
|
| 319 |
-
<tr>
|
| 320 |
-
<td><b>Average</b></td>
|
| 321 |
-
<td><b>β</b></td>
|
| 322 |
-
<td><b>β</b></td>
|
| 323 |
-
<td><b>β</b></td>
|
| 324 |
-
</tr>
|
| 325 |
</tbody>
|
| 326 |
</table>
|
| 327 |
|
|
@@ -366,30 +341,4 @@ lm_eval \
|
|
| 366 |
--tasks humaneval_64_instruct \
|
| 367 |
--batch_size auto
|
| 368 |
```
|
| 369 |
-
|
| 370 |
-
#### LightEval
|
| 371 |
-
```
|
| 372 |
-
# --- model_args.yaml ---
|
| 373 |
-
cat > model_args.yaml <<'YAML'
|
| 374 |
-
model_parameters:
|
| 375 |
-
model_name: "RedHatAI/Qwen3-30B-A3B-NVFP4"
|
| 376 |
-
dtype: auto
|
| 377 |
-
gpu_memory_utilization: 0.9
|
| 378 |
-
tensor_parallel_size: 2
|
| 379 |
-
max_model_length: 40960
|
| 380 |
-
generation_parameters:
|
| 381 |
-
seed: 42
|
| 382 |
-
temperature: 0.6
|
| 383 |
-
top_k: 20
|
| 384 |
-
top_p: 0.95
|
| 385 |
-
min_p: 0.0
|
| 386 |
-
max_new_tokens: 32768
|
| 387 |
-
YAML
|
| 388 |
-
|
| 389 |
-
lighteval vllm model_args.yaml \
|
| 390 |
-
"lighteval|aime24|0,lighteval|aime25|0,lighteval|gpqa:diamond|0" \
|
| 391 |
-
--max-samples -1 \
|
| 392 |
-
--output-dir out_dir
|
| 393 |
-
|
| 394 |
-
```
|
| 395 |
</details>
|
|
|
|
| 297 |
<td>91.13</td>
|
| 298 |
<td>97.34</td>
|
| 299 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
</tbody>
|
| 301 |
</table>
|
| 302 |
|
|
|
|
| 341 |
--tasks humaneval_64_instruct \
|
| 342 |
--batch_size auto
|
| 343 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
</details>
|