fix(tokenizer): expose errors
This commit is contained in:
parent
810b45c00e
commit
21ee10d32c
@ -111,6 +111,8 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
super().__init__(errors=errors, **kwargs)
|
super().__init__(errors=errors, **kwargs)
|
||||||
|
self.errors = errors
|
||||||
|
|
||||||
self._tiktoken_config = _arcade100k(vocab_file)
|
self._tiktoken_config = _arcade100k(vocab_file)
|
||||||
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user