From 7daa976d0c5c4f25f6f513094ae567d74bf11818 Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Mon, 2 Mar 2026 13:35:24 +0000 Subject: [PATCH] Add GPQA Diamond evaluation result --- .eval_results/gpqa.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .eval_results/gpqa.yaml diff --git a/.eval_results/gpqa.yaml b/.eval_results/gpqa.yaml new file mode 100644 index 0000000..a0fac2a --- /dev/null +++ b/.eval_results/gpqa.yaml @@ -0,0 +1,8 @@ +- dataset: + id: Idavidrein/gpqa + task_id: diamond + value: 81.7 + source: + url: https://huggingface.co/Qwen/Qwen3.5-9B + name: Model Card + user: SaylorTwift