zhiqu22
commited on
Commit
·
07b10ca
1
Parent(s):
36d930a
update codes
Browse files- tokenization_mitre.py +2 -0
- tokenizer_config.json +6 -1
tokenization_mitre.py
CHANGED
@@ -207,3 +207,5 @@ def load_json(path: str) -> Union[Dict, List]:
|
|
207 |
def save_json(data, path: str) -> None:
|
208 |
with open(path, "w") as f:
|
209 |
json.dump(data, f, indent=2)
|
|
|
|
|
|
207 |
def save_json(data, path: str) -> None:
|
208 |
with open(path, "w") as f:
|
209 |
json.dump(data, f, indent=2)
|
210 |
+
|
211 |
+
MitreTokenizer.register_for_auto_class("AutoTokenizer")
|
tokenizer_config.json
CHANGED
@@ -6,5 +6,10 @@
|
|
6 |
"sep_token": "</s>",
|
7 |
"unk_token": "<unk>",
|
8 |
"pad_token": "<pad>",
|
9 |
-
"model_max_length": 256
|
|
|
|
|
|
|
|
|
|
|
10 |
}
|
|
|
6 |
"sep_token": "</s>",
|
7 |
"unk_token": "<unk>",
|
8 |
"pad_token": "<pad>",
|
9 |
+
"model_max_length": 256,
|
10 |
+
"name_or_path": "naist-nlp/mitre_466m",
|
11 |
+
"tokenizer_class": "MitreTokenizer",
|
12 |
+
"auto_map": {
|
13 |
+
"AutoTokenizer": "tokenization_mitre.MitreTokenizer"
|
14 |
+
}
|
15 |
}
|