Update post-processor to add bos (#42)

- Update post-processor to add bos (4d3ac242e1d717fbebaa94154be38077f4e1623b)
This commit is contained in:
Pedro Cuenca 2024-04-23 11:33:49 +00:00 committed by system
parent 74fedae5f7
commit 339ce92d05
No known key found for this signature in database
GPG Key ID: 6A528E38E0733467

@ -2329,11 +2329,70 @@
] ]
}, },
"post_processor": { "post_processor": {
"type": "Sequence",
"processors": [
{
"type": "ByteLevel", "type": "ByteLevel",
"add_prefix_space": true, "add_prefix_space": true,
"trim_offsets": false, "trim_offsets": false,
"use_regex": true "use_regex": true
}, },
{
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<|begin_of_text|>": {
"id": "<|begin_of_text|>",
"ids": [
128000
],
"tokens": [
"<|begin_of_text|>"
]
}
}
}
]
},
"decoder": { "decoder": {
"type": "ByteLevel", "type": "ByteLevel",
"add_prefix_space": true, "add_prefix_space": true,