feat(tokenizer): expose merge ranks and special tokens for GGUF
This commit is contained in:
parent
3aeae29673
commit
a8f2f2862b
@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
||||
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
|
||||
self.eos_token = self.decoder[self.tokenizer.eot_token]
|
||||
self.pad_token = self.decoder[self.tokenizer.eot_token]
|
||||
# Expose for convenience
|
||||
self.mergeable_ranks = self.tokenizer._mergeable_ranks
|
||||
self.special_tokens = self.tokenizer._special_tokens
|
||||
|
||||
def __len__(self):
|
||||
return self.tokenizer.n_vocab
|
||||
@ -270,4 +273,4 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
||||
token_ids = [token_ids]
|
||||
if skip_special_tokens:
|
||||
token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
|
||||
return self.tokenizer.decode(token_ids)
|
||||
return self.tokenizer.decode(token_ids)
|
||||
Loading…
Reference in New Issue
Block a user