5 changed files with 20 additions and 98 deletions
--- a/README.md
+++ b/README.md
@ -8,8 +8,9 @@ tags:
 - pytorch
 - llama
 - llama-3
-license: llama3
-new_version: meta-llama/Llama-3.1-8B-Instruct
+license: other
+license_name: llama3
+license_link: LICENSE
 extra_gated_prompt: >-
  ### META LLAMA 3 COMMUNITY LICENSE AGREEMENT

@ -184,29 +185,6 @@ extra_gated_fields:
  By clicking Submit below I accept the terms of the license and acknowledge that the information I provide will be collected stored processed and shared in accordance with the Meta Privacy Policy: checkbox
 extra_gated_description: The information you provide will be collected, stored, processed and shared in accordance with the [Meta Privacy Policy](https://www.facebook.com/privacy/policy/).
 extra_gated_button_content: Submit
-widget:
-  - example_title: Hello
-    messages:
-    - role: user
-      content: Hey my name is Julien! How are you?
-  - example_title: Winter holidays
-    messages:
-    - role: system
-      content: You are a helpful and honest assistant. Please, respond concisely and truthfully.
-    - role: user
-      content: Can you recommend a good destination for Winter holidays?
-  - example_title: Programming assistant
-    messages:
-    - role: system
-      content: You are a helpful and honest code and programming assistant. Please, respond concisely and truthfully.
-    - role: user
-      content: Write a function that computes the nth fibonacci number.
-inference:
-  parameters:
-    max_new_tokens: 300
-    stop:
-    - <|end_of_text|>
-    - <|eot_id|>
 ---

 ## Model Details
@ -309,7 +287,7 @@ pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
-    device_map="auto",
+    device="auto",
 )

 messages = [
@ -317,20 +295,26 @@ messages = [
    {"role": "user", "content": "Who are you?"},
 ]

+prompt = pipeline.tokenizer.apply_chat_template(
+		messages, 
+		tokenize=False, 
+		add_generation_prompt=True
+)
+
 terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]

 outputs = pipeline(
-    messages,
+    prompt,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
 )
-print(outputs[0]["generated_text"][-1])
+print(outputs[0]["generated_text"][len(prompt):])
 ```

 #### Transformers AutoModelForCausalLM
@ -448,7 +432,7 @@ For Hugging Face support, we recommend using transformers or TGI, but a similar

 **Overview** Llama 3 was pretrained on over 15 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over 10M human-annotated examples. Neither the pretraining nor the fine-tuning datasets include Meta user data.

-**Data Freshness** The pretraining data has a cutoff of March 2023 for the 8B and December 2023 for the 70B models respectively. 
+**Data Freshness** The pretraining data has a cutoff of March 2023 for the 7B and December 2023 for the 70B models respectively. 


 ## Benchmarks 
--- a/config.json
+++ b/config.json
@ -5,7 +5,7 @@
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
-  "eos_token_id": 128009,
+  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
--- a/generation_config.json
+++ b/generation_config.json
@ -1,9 +1,6 @@
 {
+  "_from_model_config": true,
  "bos_token_id": 128000,
  "eos_token_id": [128001, 128009],
-  "do_sample": true,
-  "temperature": 0.6,
-  "max_length": 4096,
-  "top_p": 0.9,
  "transformers_version": "4.40.0.dev0"
 }
--- a/tokenizer.json
+++ b/tokenizer.json
@ -2329,70 +2329,11 @@
    ]
  },
  "post_processor": {
-    "type": "Sequence",
-    "processors": [
-      {
    "type": "ByteLevel",
    "add_prefix_space": true,
    "trim_offsets": false,
    "use_regex": true
  },
-      {
-        "type": "TemplateProcessing",
-        "single": [
-          {
-            "SpecialToken": {
-              "id": "<|begin_of_text|>",
-              "type_id": 0
-            }
-          },
-          {
-            "Sequence": {
-              "id": "A",
-              "type_id": 0
-            }
-          }
-        ],
-        "pair": [
-          {
-            "SpecialToken": {
-              "id": "<|begin_of_text|>",
-              "type_id": 0
-            }
-          },
-          {
-            "Sequence": {
-              "id": "A",
-              "type_id": 0
-            }
-          },
-          {
-            "SpecialToken": {
-              "id": "<|begin_of_text|>",
-              "type_id": 1
-            }
-          },
-          {
-            "Sequence": {
-              "id": "B",
-              "type_id": 1
-            }
-          }
-        ],
-        "special_tokens": {
-          "<|begin_of_text|>": {
-            "id": "<|begin_of_text|>",
-            "ids": [
-              128000
-            ],
-            "tokens": [
-              "<|begin_of_text|>"
-            ]
-          }
-        }
-      }
-    ]
-  },
  "decoder": {
    "type": "ByteLevel",
    "add_prefix_space": true,
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@ -2052,7 +2052,7 @@
  "bos_token": "<|begin_of_text|>",
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
  "clean_up_tokenization_spaces": true,
-  "eos_token": "<|eot_id|>",
+  "eos_token": "<|end_of_text|>",
  "model_input_names": [
    "input_ids",
    "attention_mask"