feat(tokenizer): expose merge ranks and special tokens for GGUF

This commit is contained in:
Jonathan Tow 2024-01-19 18:22:13 +00:00 committed by system
parent 3aeae29673
commit a8f2f2862b
No known key found for this signature in database
GPG Key ID: 6A528E38E0733467

@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
self.eos_token = self.decoder[self.tokenizer.eot_token]
self.pad_token = self.decoder[self.tokenizer.eot_token]
# Expose for convenience
self.mergeable_ranks = self.tokenizer._mergeable_ranks
self.special_tokens = self.tokenizer._special_tokens
def __len__(self):
return self.tokenizer.n_vocab
@ -270,4 +273,4 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
token_ids = [token_ids]
if skip_special_tokens:
token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
return self.tokenizer.decode(token_ids)
return self.tokenizer.decode(token_ids)