trialstest / test.py
jim-bo's picture
Upload folder using huggingface_hub
0189de0 verified
raw
history blame contribute delete
413 Bytes
# Wrap your own tokenizer
from transformers import PreTrainedTokenizerFast
wrapped_tokenizer = PreTrainedTokenizerFast(
tokenizer_file="tokenizer.json", # You can load from the tokenizer file
unk_token="[UNK]",
pad_token="[PAD]",
cls_token="[CLS]",
sep_token="[SEP]",
mask_token="[MASK]",
)
# Finally, save your own pretrained tokenizer
wrapped_tokenizer.save_pretrained('my-tokenizer')