feat(tokenizer): expose merge ranks and special tokens for GGUF
This commit is contained in:
parent
3aeae29673
commit
a8f2f2862b
@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|||||||
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
|
self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
|
||||||
self.eos_token = self.decoder[self.tokenizer.eot_token]
|
self.eos_token = self.decoder[self.tokenizer.eot_token]
|
||||||
self.pad_token = self.decoder[self.tokenizer.eot_token]
|
self.pad_token = self.decoder[self.tokenizer.eot_token]
|
||||||
|
# Expose for convenience
|
||||||
|
self.mergeable_ranks = self.tokenizer._mergeable_ranks
|
||||||
|
self.special_tokens = self.tokenizer._special_tokens
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return self.tokenizer.n_vocab
|
return self.tokenizer.n_vocab
|
||||||
@ -270,4 +273,4 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|||||||
token_ids = [token_ids]
|
token_ids = [token_ids]
|
||||||
if skip_special_tokens:
|
if skip_special_tokens:
|
||||||
token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
|
token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
|
||||||
return self.tokenizer.decode(token_ids)
|
return self.tokenizer.decode(token_ids)
|
||||||
Loading…
Reference in New Issue
Block a user