diff --git a/tokenization_arcade100k.py b/tokenization_arcade100k.py index ddbfa46..7b36a36 100644 --- a/tokenization_arcade100k.py +++ b/tokenization_arcade100k.py @@ -111,6 +111,8 @@ class Arcade100kTokenizer(PreTrainedTokenizer): **kwargs, ): super().__init__(errors=errors, **kwargs) + self.errors = errors + self._tiktoken_config = _arcade100k(vocab_file) self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)