# Wrap your own tokenizer from transformers import PreTrainedTokenizerFast wrapped_tokenizer = PreTrainedTokenizerFast( tokenizer_file="tokenizer.json", # You can load from the tokenizer file unk_token="[UNK]", pad_token="[PAD]", cls_token="[CLS]", sep_token="[SEP]", mask_token="[MASK]", ) # Finally, save your own pretrained tokenizer wrapped_tokenizer.save_pretrained('my-tokenizer')