nm-research commited on
Commit
a4df9f0
Β·
verified Β·
1 Parent(s): 833c3df

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -51
README.md CHANGED
@@ -297,31 +297,6 @@ This model was evaluated on the well-known OpenLLM v1, OpenLLM v2 and HumanEval_
297
  <td>91.13</td>
298
  <td>97.34</td>
299
  </tr>
300
- <tr>
301
- <td rowspan="4"><b>Reasoning</b></td>
302
- <td>AIME24 (0-shot)</td>
303
- <td>β€”</td>
304
- <td>β€”</td>
305
- <td>β€”</td>
306
- </tr>
307
- <tr>
308
- <td>AIME25 (0-shot)</td>
309
- <td>β€”</td>
310
- <td>β€”</td>
311
- <td>β€”</td>
312
- </tr>
313
- <tr>
314
- <td>GPQA (Diamond, 0-shot)</td>
315
- <td>β€”</td>
316
- <td>β€”</td>
317
- <td>β€”</td>
318
- </tr>
319
- <tr>
320
- <td><b>Average</b></td>
321
- <td><b>β€”</b></td>
322
- <td><b>β€”</b></td>
323
- <td><b>β€”</b></td>
324
- </tr>
325
  </tbody>
326
  </table>
327
 
@@ -366,30 +341,4 @@ lm_eval \
366
  --tasks humaneval_64_instruct \
367
  --batch_size auto
368
  ```
369
-
370
- #### LightEval
371
- ```
372
- # --- model_args.yaml ---
373
- cat > model_args.yaml <<'YAML'
374
- model_parameters:
375
- model_name: "RedHatAI/Qwen3-30B-A3B-NVFP4"
376
- dtype: auto
377
- gpu_memory_utilization: 0.9
378
- tensor_parallel_size: 2
379
- max_model_length: 40960
380
- generation_parameters:
381
- seed: 42
382
- temperature: 0.6
383
- top_k: 20
384
- top_p: 0.95
385
- min_p: 0.0
386
- max_new_tokens: 32768
387
- YAML
388
-
389
- lighteval vllm model_args.yaml \
390
- "lighteval|aime24|0,lighteval|aime25|0,lighteval|gpqa:diamond|0" \
391
- --max-samples -1 \
392
- --output-dir out_dir
393
-
394
- ```
395
  </details>
 
297
  <td>91.13</td>
298
  <td>97.34</td>
299
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  </tbody>
301
  </table>
302
 
 
341
  --tasks humaneval_64_instruct \
342
  --batch_size auto
343
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  </details>