feat(tokenizer): expose merge ranks and special tokens for GGUF

This commit is contained in:
Jonathan Tow 2024-01-19 18:22:13 +00:00 committed by system
parent 3aeae29673
commit a8f2f2862b
No known key found for this signature in database
GPG Key ID: 6A528E38E0733467

@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
self.eos_token = self.decoder[self.tokenizer.eot_token]
self.pad_token = self.decoder[self.tokenizer.eot_token]
# Expose for convenience
self.mergeable_ranks = self.tokenizer._mergeable_ranks
self.special_tokens = self.tokenizer._special_tokens
def __len__(self):
return self.tokenizer.n_vocab