fix(tokenizer): expose errors

This commit is contained in:
Jonathan Tow 2024-01-25 16:17:34 +00:00 committed by system
parent 810b45c00e
commit 21ee10d32c
No known key found for this signature in database
GPG Key ID: 6A528E38E0733467

@ -111,6 +111,8 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
**kwargs,
):
super().__init__(errors=errors, **kwargs)
self.errors = errors
self._tiktoken_config = _arcade100k(vocab_file)
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)