File size: 432 Bytes
b31f748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
"""Helper script do dump/freeze the current tokenizer"""
from tokenizers import Tokenizer
from pathlib import Path
HERE = Path(__file__).parent.absolute()
def main():
MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
tokenizer = Tokenizer.from_pretrained(MODEL_NAME)
tokenizer.save((HERE.parent / "iscc_sct/tokenizer.json").as_posix(), pretty=False)
if __name__ == "__main__":
main()
|