From 847547f1586e0cf2bed5b20a5566b85f286d52dc Mon Sep 17 00:00:00 2001 From: Open LLM Leaderboard PR Bot Date: Wed, 12 Jun 2024 19:03:00 +0000 Subject: [PATCH] Adding Evaluation Results This is an automated PR created with https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr The purpose of this PR is to add evaluation results from the Open LLM Leaderboard to your model card. If you encounter any issues, please report them to https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr/discussions --- README.md | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b2d85ca..1e92ab4 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,115 @@ --- -license: mit -license_link: https://huggingface.co/microsoft/phi-1_5/resolve/main/LICENSE language: - en -pipeline_tag: text-generation +license: mit tags: - nlp - code +license_link: https://huggingface.co/microsoft/phi-1_5/resolve/main/LICENSE +pipeline_tag: text-generation +model-index: +- name: phi-1_5 + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: AI2 Reasoning Challenge (25-Shot) + type: ai2_arc + config: ARC-Challenge + split: test + args: + num_few_shot: 25 + metrics: + - type: acc_norm + value: 52.9 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=microsoft/phi-1_5 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: HellaSwag (10-Shot) + type: hellaswag + split: validation + args: + num_few_shot: 10 + metrics: + - type: acc_norm + value: 63.79 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=microsoft/phi-1_5 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU (5-Shot) + type: cais/mmlu + config: all + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 43.89 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=microsoft/phi-1_5 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: TruthfulQA (0-shot) + type: truthful_qa + config: multiple_choice + split: validation + args: + num_few_shot: 0 + metrics: + - type: mc2 + value: 40.89 + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=microsoft/phi-1_5 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: Winogrande (5-shot) + type: winogrande + config: winogrande_xl + split: validation + args: + num_few_shot: 5 + metrics: + - type: acc + value: 72.22 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=microsoft/phi-1_5 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GSM8k (5-shot) + type: gsm8k + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 12.43 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=microsoft/phi-1_5 + name: Open LLM Leaderboard --- ## Model Summary @@ -158,4 +261,17 @@ You can find the paper at https://arxiv.org/abs/2309.05463. Please cite as: ## Trademarks -This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies. \ No newline at end of file +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies. +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_microsoft__phi-1_5) + +| Metric |Value| +|---------------------------------|----:| +|Avg. |47.69| +|AI2 Reasoning Challenge (25-Shot)|52.90| +|HellaSwag (10-Shot) |63.79| +|MMLU (5-Shot) |43.89| +|TruthfulQA (0-shot) |40.89| +|Winogrande (5-shot) |72.22| +|GSM8k (5-shot) |12.43| +