feat(tokenizer): expose merge ranks and special tokens for GGUF
This commit is contained in:
parent
3aeae29673
commit
a8f2f2862b
@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|||||||
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
|
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
|
||||||
self.eos_token = self.decoder[self.tokenizer.eot_token]
|
self.eos_token = self.decoder[self.tokenizer.eot_token]
|
||||||
self.pad_token = self.decoder[self.tokenizer.eot_token]
|
self.pad_token = self.decoder[self.tokenizer.eot_token]
|
||||||
|
# Expose for convenience
|
||||||
|
self.mergeable_ranks = self.tokenizer._mergeable_ranks
|
||||||
|
self.special_tokens = self.tokenizer._special_tokens
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.tokenizer.n_vocab
|
return self.tokenizer.n_vocab
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user