"""Helper script do dump/freeze the current tokenizer""" from tokenizers import Tokenizer from pathlib import Path HERE = Path(__file__).parent.absolute() def main(): MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" tokenizer = Tokenizer.from_pretrained(MODEL_NAME) tokenizer.save((HERE.parent / "iscc_sct/tokenizer.json").as_posix(), pretty=False) if __name__ == "__main__": main()