From 21ee10d32c8185d9d4da61ed758d5fcedd60089d Mon Sep 17 00:00:00 2001 From: Jonathan Tow Date: Thu, 25 Jan 2024 16:17:34 +0000 Subject: [PATCH] fix(tokenizer): expose `errors` --- tokenization_arcade100k.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tokenization_arcade100k.py b/tokenization_arcade100k.py index ddbfa46..7b36a36 100644 --- a/tokenization_arcade100k.py +++ b/tokenization_arcade100k.py @@ -111,6 +111,8 @@ class Arcade100kTokenizer(PreTrainedTokenizer): **kwargs, ): super().__init__(errors=errors, **kwargs) + self.errors = errors + self._tiktoken_config = _arcade100k(vocab_file) self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)