Adding ONNX file of this model

Beep boop I am the [ONNX export bot 🤖🏎️](https://huggingface.co/spaces/onnx/export). On behalf of [WRCREX](https://huggingface.co/WRCREX), I would like to add to this repository the model converted to ONNX. What is ONNX? It stands for "Open Neural Network Exchange", and is the most commonly used open standard for machine learning interoperability. You can find out more at [onnx.ai](https://onnx.ai/)! The exported ONNX model can be then be consumed by various backends as TensorRT or TVM, or simply be used in a few lines with 🤗 Optimum through ONNX Runtime, check out how [here](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/models)!
2025-03-10 19:26:12 +00:00 · 2025-03-10 19:26:12 +00:00 · a19aa7c77d
commit a19aa7c77d
parent fe8a4ea1ff
11 changed files with 93515 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+onnx/_model_layers.0_self_attn_rotary_emb_Constant_attr__value filter=lfs diff=lfs merge=lfs -text
+onnx/_model_layers.0_self_attn_rotary_emb_Constant_5_attr__value filter=lfs diff=lfs merge=lfs -text
+onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
--- a/onnx/_model_layers.0_self_attn_rotary_emb_Constant_5_attr__value
+++ b/onnx/_model_layers.0_self_attn_rotary_emb_Constant_5_attr__value
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d078cc99f779ceff367f4bef2518b4954067e43a8153c83a46321c819d43738c
+size 524288
--- a/onnx/_model_layers.0_self_attn_rotary_emb_Constant_attr__value
+++ b/onnx/_model_layers.0_self_attn_rotary_emb_Constant_attr__value
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d50a5df57529925b572ba0190fc9b1f94d72b26e08c42b2af896eb572e8c8948
+size 524288
--- a/onnx/config.json
+++ b/onnx/config.json
@ -0,0 +1,27 @@
+{
+  "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.37.2",
+  "use_cache": true,
+  "vocab_size": 32000
+}
--- a/onnx/generation_config.json
+++ b/onnx/generation_config.json
@ -0,0 +1,7 @@
+{
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "max_length": 2048,
+  "pad_token_id": 0,
+  "transformers_version": "4.37.2"
+}
--- a/onnx/model.onnx
+++ b/onnx/model.onnx
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cfccd25f134394a9f51a5328b7a257d5de053a17b316cd39208509e3763839c
+size 2134755
--- a/onnx/model.onnx_data
+++ b/onnx/model.onnx_data
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b08b7ce45cdef33d82b2b05ad1b01b0785594fd1999a37a3389120f396efe87
+size 4400193536
--- a/onnx/special_tokens_map.json
+++ b/onnx/special_tokens_map.json
@ -0,0 +1,30 @@
+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/onnx/tokenizer.json
+++ b/onnx/tokenizer.json
--- a/onnx/tokenizer.model
+++ b/onnx/tokenizer.model
--- a/onnx/tokenizer_config.json
+++ b/onnx/tokenizer_config.json
@ -0,0 +1,42 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}