From babcf60cae0a1f438d7ade582983d4ba462303c2 Mon Sep 17 00:00:00 2001 From: Xiao Date: Sat, 13 Apr 2024 13:01:41 +0000 Subject: [PATCH] Adding ONNX file of this model (#44) - Adding ONNX file of this model (6a3fd5fa10d7c4e4fabeace29e36b2bfa76d45d5) Co-authored-by: Yash --- .gitattributes | 1 + onnx/Constant_7_attr__value | Bin 0 -> 65552 bytes onnx/config.json | 28 ++++++++++++++++++ onnx/model.onnx | 3 ++ onnx/model.onnx_data | 3 ++ onnx/sentencepiece.bpe.model | 3 ++ onnx/special_tokens_map.json | 51 ++++++++++++++++++++++++++++++++ onnx/tokenizer.json | 3 ++ onnx/tokenizer_config.json | 55 +++++++++++++++++++++++++++++++++++ 9 files changed, 147 insertions(+) create mode 100644 onnx/Constant_7_attr__value create mode 100644 onnx/config.json create mode 100644 onnx/model.onnx create mode 100644 onnx/model.onnx_data create mode 100644 onnx/sentencepiece.bpe.model create mode 100644 onnx/special_tokens_map.json create mode 100644 onnx/tokenizer.json create mode 100644 onnx/tokenizer_config.json diff --git a/.gitattributes b/.gitattributes index 52373fe..3fc40c7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text tokenizer.json filter=lfs diff=lfs merge=lfs -text +onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text diff --git a/onnx/Constant_7_attr__value b/onnx/Constant_7_attr__value new file mode 100644 index 0000000000000000000000000000000000000000..3cdc05e5f550d8bd136f28efcea0ad5e6b4169c8 GIT binary patch literal 65552 zcmeIufdBvi0K=g9Qy<_8g-~I@fB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM z7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b* z1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd z0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwA zz<>b*1`HT5V8DO@0|pEjFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEj wFkrxd0RsjM7%*VKfB^#r3>YwAz<>b*1`HT5V8DO@0|pEjFkrxd0RskpU;rNg0RR91 literal 0 HcmV?d00001 diff --git a/onnx/config.json b/onnx/config.json new file mode 100644 index 0000000..aadef53 --- /dev/null +++ b/onnx/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "BAAI/bge-m3", + "architectures": [ + "XLMRobertaModel" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 8194, + "model_type": "xlm-roberta", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/onnx/model.onnx b/onnx/model.onnx new file mode 100644 index 0000000..8e9ce19 --- /dev/null +++ b/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84251230831afb359ab26d9fd37d5936d4d9bb5d1d5410e66442f630f24435b +size 724923 diff --git a/onnx/model.onnx_data b/onnx/model.onnx_data new file mode 100644 index 0000000..cd06d39 --- /dev/null +++ b/onnx/model.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eebfb28493f67bba03ce0ef64bfdc7fc5a3bd9d7493f818bb1d78cd798416b4 +size 2266820608 diff --git a/onnx/sentencepiece.bpe.model b/onnx/sentencepiece.bpe.model new file mode 100644 index 0000000..7a3f40a --- /dev/null +++ b/onnx/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/onnx/special_tokens_map.json b/onnx/special_tokens_map.json new file mode 100644 index 0000000..b1879d7 --- /dev/null +++ b/onnx/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/onnx/tokenizer.json b/onnx/tokenizer.json new file mode 100644 index 0000000..708f9f8 --- /dev/null +++ b/onnx/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6710678b12670bc442b99edc952c4d996ae309a7020c1fa0096dd245c2faf790 +size 17082821 diff --git a/onnx/tokenizer_config.json b/onnx/tokenizer_config.json new file mode 100644 index 0000000..328a00a --- /dev/null +++ b/onnx/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 8192, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +}