From ca8476f4df74475f1809d85359c7e50010b6a886 Mon Sep 17 00:00:00 2001 From: Omar Sanseviero Date: Wed, 12 Mar 2025 06:31:03 +0000 Subject: [PATCH] Update chat_template (#4) - Update chat_template (44bf2153ffe7ee35988c1096b2cb3cd9690158bb) Co-authored-by: Joshua --- README.md | 5 ++--- tokenizer.json | 4 ++-- tokenizer_config.json | 7 ++++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 78c8fa1..7b667e8 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,8 @@ license: gemma library_name: transformers pipeline_tag: text-generation extra_gated_heading: Access Gemma on Hugging Face -extra_gated_prompt: >- - To access Gemma on Hugging Face, you’re required to review and agree to - Google’s usage license. To do this, please ensure you’re logged in to Hugging +extra_gated_prompt: To access Gemma on Hugging Face, you’re required to review and + agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging Face and click below. Requests are processed immediately. extra_gated_button_content: Acknowledge license base_model: google/gemma-3-1b-pt diff --git a/tokenizer.json b/tokenizer.json index 8d5bbd1..29401f9 100644 --- a/tokenizer.json +++ b/tokenizer.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d4046bf0505a327dd5a0abbb427ecd4fc82f99c2ceaa170bc61ecde12809b0c -size 33384570 +oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795 +size 33384568 diff --git a/tokenizer_config.json b/tokenizer_config.json index 397cb58..7bdd14f 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -2160,7 +2160,7 @@ "normalized": false, "rstrip": false, "single_word": false, - "special": false + "special": true }, "256000": { "content": "", @@ -2168,7 +2168,7 @@ "normalized": false, "rstrip": false, "single_word": false, - "special": false + "special": true }, "256001": { "content": "", @@ -51325,7 +51325,7 @@ }, "boi_token": "", "bos_token": "", - "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'model\n'}}\n{%- endif -%}\n", + "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'model\n'}}\n{%- endif -%}\n", "clean_up_tokenization_spaces": false, "eoi_token": "", "eos_token": "", @@ -51337,6 +51337,7 @@ "image_token": "", "model_max_length": 1000000000000000019884624838656, "pad_token": "", + "processor_class": "Gemma3Processor", "sp_model_kwargs": null, "spaces_between_special_tokens": false, "tokenizer_class": "GemmaTokenizer",