Mila commited on
Commit
3139db4
·
1 Parent(s): 33e257e

This time for sure x4

Browse files
Files changed (39) hide show
  1. app_context.py +253 -257
  2. flan-t5-train.py +234 -301
  3. results/checkpoint-16000/added_tokens.json +102 -0
  4. results/checkpoint-16000/config.json +62 -0
  5. results/checkpoint-16000/generation_config.json +6 -0
  6. results/checkpoint-16000/model.safetensors +3 -0
  7. results/checkpoint-16000/optimizer.pt +3 -0
  8. results/checkpoint-16000/rng_state.pth +3 -0
  9. results/checkpoint-16000/scheduler.pt +3 -0
  10. results/checkpoint-16000/special_tokens_map.json +125 -0
  11. results/checkpoint-16000/spiece.model +3 -0
  12. results/checkpoint-16000/tokenizer_config.json +939 -0
  13. results/checkpoint-16000/trainer_state.json +319 -0
  14. results/checkpoint-16000/training_args.bin +3 -0
  15. results/checkpoint-16500/added_tokens.json +102 -0
  16. results/checkpoint-16500/config.json +62 -0
  17. results/checkpoint-16500/generation_config.json +6 -0
  18. results/checkpoint-16500/model.safetensors +3 -0
  19. results/checkpoint-16500/optimizer.pt +3 -0
  20. results/checkpoint-16500/rng_state.pth +3 -0
  21. results/checkpoint-16500/scheduler.pt +3 -0
  22. results/checkpoint-16500/special_tokens_map.json +125 -0
  23. results/checkpoint-16500/spiece.model +3 -0
  24. results/checkpoint-16500/tokenizer_config.json +939 -0
  25. results/checkpoint-16500/trainer_state.json +325 -0
  26. results/checkpoint-16500/training_args.bin +3 -0
  27. results/checkpoint-17000/added_tokens.json +102 -0
  28. results/checkpoint-17000/config.json +62 -0
  29. results/checkpoint-17000/generation_config.json +6 -0
  30. results/checkpoint-17000/model.safetensors +3 -0
  31. results/checkpoint-17000/optimizer.pt +3 -0
  32. results/checkpoint-17000/rng_state.pth +3 -0
  33. results/checkpoint-17000/scheduler.pt +3 -0
  34. results/checkpoint-17000/special_tokens_map.json +125 -0
  35. results/checkpoint-17000/spiece.model +3 -0
  36. results/checkpoint-17000/tokenizer_config.json +939 -0
  37. results/checkpoint-17000/trainer_state.json +331 -0
  38. results/checkpoint-17000/training_args.bin +3 -0
  39. word_embedding.py +619 -0
app_context.py CHANGED
@@ -1,258 +1,254 @@
1
- import gradio as gr
2
- import math
3
- import spacy
4
- from datasets import load_dataset
5
- from sentence_transformers import SentenceTransformer
6
- from sentence_transformers import InputExample
7
- from sentence_transformers import losses
8
- from sentence_transformers import util
9
- from transformers import pipeline, T5Tokenizer
10
- from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
11
- from transformers import TrainingArguments, Trainer, T5ForConditionalGeneration
12
- import torch
13
- import torch.nn.functional as F
14
- from torch.utils.data import DataLoader
15
- import numpy as np
16
- import evaluate
17
- import nltk
18
- from nltk.corpus import stopwords
19
- import subprocess
20
- import sys
21
- import random
22
- from textwrap import fill
23
-
24
- # !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
25
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl'])
26
- # tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
27
- model_base = "results/checkpoint-17000"
28
- nltk.download('stopwords')
29
- nlp = spacy.load("en_core_web_sm")
30
- stops = stopwords.words("english")
31
- ROMAN_CONSTANTS = (
32
- ( "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX" ),
33
- ( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
34
- ( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
35
- ( "", "M", "MM", "MMM", "", "", "-", "", "", "" ),
36
- ( "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" ),
37
- ( "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" ),
38
- ( "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" ),
39
- ( "", "m", "mm", "mmm", "", "", "-", "", "", "" ),
40
- )
41
-
42
- # answer = "Pizza"
43
- guesses = []
44
- return_guesses = []
45
- answer = "Moon"
46
- word1 = "Black"
47
- word2 = "White"
48
- word3 = "Sun"
49
- base_prompts = ["Sun is to Moon as ", "Black is to White as ", "Atom is to Element as",
50
- "Athens is to Greece as ", "Cat is to Dog as ", "Robin is to Bird as",
51
- "Hunger is to Ambition as "]
52
-
53
-
54
- #Mean Pooling - Take attention mask into account for correct averaging
55
- def mean_pooling(model_output, attention_mask):
56
- token_embeddings = model_output['token_embeddings'] #First element of model_output contains all token embeddings
57
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
58
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
59
-
60
-
61
- def normalize(comment, lowercase, remove_stopwords):
62
- if lowercase:
63
- comment = comment.lower()
64
- comment = nlp(comment)
65
- lemmatized = list()
66
- for word in comment:
67
- lemma = word.lemma_.strip()
68
- if lemma:
69
- if not remove_stopwords or (remove_stopwords and lemma not in stops):
70
- lemmatized.append(lemma)
71
- return " ".join(lemmatized)
72
-
73
-
74
- # def tokenize_function(examples):
75
- # return tokenizer(examples["text"])
76
-
77
-
78
- def compute_metrics(eval_pred):
79
- logits, labels = eval_pred
80
- predictions = np.argmax(logits, axis=-1)
81
- metric = evaluate.load("accuracy")
82
- return metric.compute(predictions=predictions, references=labels)
83
-
84
-
85
- def get_model():
86
- global model_base
87
- # last_checkpoint = "./results/checkpoint-22500"
88
-
89
- finetuned_model = T5ForConditionalGeneration.from_pretrained(model_base)
90
- tokenizer = T5Tokenizer.from_pretrained(model_base)
91
- # model = SentenceTransformer(model_base)
92
- gpu_available = torch.cuda.is_available()
93
- device = torch.device("cuda" if gpu_available else "cpu")
94
- finetuned_model = finetuned_model.to(device)
95
- return finetuned_model, tokenizer
96
-
97
-
98
- def cosine_scores(model, sentence):
99
- global word1
100
- global word2
101
- global word3
102
- # sentence1 = f"{word1} is to {word2} as"
103
- embeddings1 = model.encode(sentence, convert_to_tensor=True)
104
-
105
- def embeddings(model, sentences, tokenizer):
106
- global word1
107
- global word2
108
- global word3
109
- global model_base
110
- gpu_available = torch.cuda.is_available()
111
- device = torch.device("cuda" if gpu_available else "cpu")
112
- # device = torch.device('cuda:0')
113
- # embeddings = model.encode(sentences)
114
- question = "Please answer to this question: " + sentences
115
-
116
- inputs = tokenizer(question, return_tensors="pt")
117
-
118
- print(inputs)
119
- # print(inputs.device)
120
- print(model.device)
121
- print(inputs['input_ids'].device)
122
- print(inputs['attention_mask'].device)
123
-
124
- inputs['attention_mask'] = inputs['attention_mask'].to(device)
125
- inputs['input_ids'] = inputs['input_ids'].to(device)
126
-
127
- outputs = model.generate(**inputs)
128
- answer = tokenizer.decode(outputs[0])
129
- answer = answer[6:-4]
130
- # print(fill(answer, width=80))
131
-
132
- print("ANSWER IS", answer)
133
-
134
- return answer
135
-
136
-
137
- def random_word(model, tokenizer):
138
- global model_base
139
- vocab = tokenizer.get_vocab()
140
- # with open(model_base + '/vocab.txt', 'r') as file:
141
- line = ""
142
- # content = file.readlines()
143
- length = tokenizer.vocab_size
144
- # print(vocab)
145
- while line == "":
146
- rand_line = random.randrange(0, length)
147
- # print("TRYING TO FIND", rand_line, "OUT OF", length, "WITH VOCAB OF TYPE", type(vocab))
148
- for word, id in vocab.items():
149
- if id == rand_line and word[0].isalpha() and word not in stops and word not in ROMAN_CONSTANTS:
150
- # if vocab[rand_line][0].isalpha() and vocab[rand_line][:-1] not in stops and vocab[rand_line][:-1] not in ROMAN_CONSTANTS:
151
- line = word
152
- elif id == rand_line:
153
- print(f"{word} is not alpha or is a stop word")
154
- # for num, aline in enumerate(file, 1997):
155
- # if random.randrange(num) and aline.isalpha():
156
- # continue
157
- # # elif not aline.isalpha():
158
-
159
- # line = aline
160
- print(line)
161
- return line
162
-
163
-
164
- def generate_prompt(model, tokenizer):
165
- global word1
166
- global word2
167
- global word3
168
- global answer
169
- global base_prompts
170
- word1 = random_word(model, tokenizer)
171
- # word2 = random_word()
172
-
173
- word2 = embeddings(model, f"{base_prompts[random.randint(0, len(base_prompts) - 1)]}{word1} is to ___.", tokenizer)
174
- word3 = random_word(model, tokenizer)
175
- sentence = f"{word1} is to {word2} as {word3} is to ___."
176
- print(sentence)
177
- answer = embeddings(model, sentence, tokenizer)
178
- print("ANSWER IS", answer)
179
- return f"# {word1} is to {word2} as {word3} is to ___."
180
- # cosine_scores(model, sentence)
181
-
182
-
183
- def greet(name):
184
- return "Hello " + name + "!!"
185
-
186
- def check_answer(guess:str):
187
- global guesses
188
- global answer
189
- global return_guesses
190
- global word1
191
- global word2
192
- global word3
193
-
194
- model, tokenizer = get_model()
195
- output = ""
196
- protected_guess = guess
197
- sentence = f"{word1} is to {word2} as [MASK] is to {guess}."
198
-
199
- other_word = embeddings(model, sentence, tokenizer)
200
- guesses.append(guess)
201
-
202
-
203
-
204
- for guess in return_guesses:
205
- output += ("- " + guess + "<br>")
206
-
207
- # output = output[:-1]
208
- prompt = f"{word1} is to {word2} as {word3} is to ___."
209
- # print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
210
-
211
- if protected_guess.lower() == answer.lower():
212
- return_guesses.append(f"{protected_guess}: {word1} is to {word2} as {word3} is to {protected_guess}.")
213
- output += f"<span style='color:green'>- {return_guesses[-1]}</span><br>"
214
- new_prompt = generate_prompt(model, tokenizer)
215
- return new_prompt, "Correct!", output
216
- else:
217
- return_guess = f"{protected_guess}: {word1} is to {word2} as {other_word} is to {protected_guess}."
218
- return_guesses.append(return_guess)
219
- output += ("- " + return_guess + " <br>")
220
- return prompt, "Try again!", output
221
-
222
- def main():
223
- global word1
224
- global word2
225
- global word3
226
- global answer
227
- # answer = "Moon"
228
- global guesses
229
-
230
-
231
- # num_rows, data_type, value, example, embeddings = training()
232
- # sent_embeddings = embeddings()
233
- model, tokenizer = get_model()
234
- generate_prompt(model, tokenizer)
235
-
236
- prompt = f"{word1} is to {word2} as {word3} is to ____"
237
- print(prompt)
238
- print("TESTING EMBEDDINGS")
239
- with gr.Blocks() as iface:
240
- mark_question = gr.Markdown(prompt)
241
- with gr.Tab("Guess"):
242
- text_input = gr.Textbox()
243
- text_output = gr.Textbox()
244
- text_button = gr.Button("Submit")
245
- with gr.Accordion("Open for previous guesses"):
246
- text_guesses = gr.Markdown()
247
- # with gr.Tab("Testing"):
248
- # gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
249
- text_button.click(check_answer, inputs=[text_input], outputs=[mark_question, text_output, text_guesses])
250
- # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
251
- iface.launch()
252
-
253
-
254
-
255
-
256
-
257
- if __name__ == "__main__":
258
  main()
 
1
+ import gradio as gr
2
+ import math
3
+ import spacy
4
+ from datasets import load_dataset
5
+ from transformers import pipeline, T5Tokenizer
6
+ from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
7
+ from transformers import TrainingArguments, Trainer, T5ForConditionalGeneration
8
+ import torch
9
+ import torch.nn.functional as F
10
+ from torch.utils.data import DataLoader
11
+ import numpy as np
12
+ import evaluate
13
+ import nltk
14
+ from nltk.corpus import stopwords
15
+ import subprocess
16
+ import sys
17
+ import random
18
+ from textwrap import fill
19
+
20
+ # !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
21
+ subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl'])
22
+ # tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
23
+ model_base = "results/checkpoint-17000"
24
+ nltk.download('stopwords')
25
+ nlp = spacy.load("en_core_web_sm")
26
+ stops = stopwords.words("english")
27
+ ROMAN_CONSTANTS = (
28
+ ( "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX" ),
29
+ ( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
30
+ ( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
31
+ ( "", "M", "MM", "MMM", "", "", "-", "", "", "" ),
32
+ ( "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" ),
33
+ ( "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" ),
34
+ ( "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" ),
35
+ ( "", "m", "mm", "mmm", "", "", "-", "", "", "" ),
36
+ )
37
+
38
+ # answer = "Pizza"
39
+ guesses = []
40
+ return_guesses = []
41
+ answer = "Moon"
42
+ word1 = "Black"
43
+ word2 = "White"
44
+ word3 = "Sun"
45
+ base_prompts = ["Sun is to Moon as ", "Black is to White as ", "Atom is to Element as",
46
+ "Athens is to Greece as ", "Cat is to Dog as ", "Robin is to Bird as",
47
+ "Hunger is to Ambition as "]
48
+
49
+
50
+ #Mean Pooling - Take attention mask into account for correct averaging
51
+ def mean_pooling(model_output, attention_mask):
52
+ token_embeddings = model_output['token_embeddings'] #First element of model_output contains all token embeddings
53
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
54
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
55
+
56
+
57
+ def normalize(comment, lowercase, remove_stopwords):
58
+ if lowercase:
59
+ comment = comment.lower()
60
+ comment = nlp(comment)
61
+ lemmatized = list()
62
+ for word in comment:
63
+ lemma = word.lemma_.strip()
64
+ if lemma:
65
+ if not remove_stopwords or (remove_stopwords and lemma not in stops):
66
+ lemmatized.append(lemma)
67
+ return " ".join(lemmatized)
68
+
69
+
70
+ # def tokenize_function(examples):
71
+ # return tokenizer(examples["text"])
72
+
73
+
74
+ def compute_metrics(eval_pred):
75
+ logits, labels = eval_pred
76
+ predictions = np.argmax(logits, axis=-1)
77
+ metric = evaluate.load("accuracy")
78
+ return metric.compute(predictions=predictions, references=labels)
79
+
80
+
81
+ def get_model():
82
+ global model_base
83
+ # last_checkpoint = "./results/checkpoint-22500"
84
+
85
+ finetuned_model = T5ForConditionalGeneration.from_pretrained(model_base)
86
+ tokenizer = T5Tokenizer.from_pretrained(model_base)
87
+ # model = SentenceTransformer(model_base)
88
+ gpu_available = torch.cuda.is_available()
89
+ device = torch.device("cuda" if gpu_available else "cpu")
90
+ finetuned_model = finetuned_model.to(device)
91
+ return finetuned_model, tokenizer
92
+
93
+
94
+ def cosine_scores(model, sentence):
95
+ global word1
96
+ global word2
97
+ global word3
98
+ # sentence1 = f"{word1} is to {word2} as"
99
+ embeddings1 = model.encode(sentence, convert_to_tensor=True)
100
+
101
+ def embeddings(model, sentences, tokenizer):
102
+ global word1
103
+ global word2
104
+ global word3
105
+ global model_base
106
+ gpu_available = torch.cuda.is_available()
107
+ device = torch.device("cuda" if gpu_available else "cpu")
108
+ # device = torch.device('cuda:0')
109
+ # embeddings = model.encode(sentences)
110
+ question = "Please answer to this question: " + sentences
111
+
112
+ inputs = tokenizer(question, return_tensors="pt")
113
+
114
+ print(inputs)
115
+ # print(inputs.device)
116
+ print(model.device)
117
+ print(inputs['input_ids'].device)
118
+ print(inputs['attention_mask'].device)
119
+
120
+ inputs['attention_mask'] = inputs['attention_mask'].to(device)
121
+ inputs['input_ids'] = inputs['input_ids'].to(device)
122
+
123
+ outputs = model.generate(**inputs)
124
+ answer = tokenizer.decode(outputs[0])
125
+ answer = answer[6:-4]
126
+ # print(fill(answer, width=80))
127
+
128
+ print("ANSWER IS", answer)
129
+
130
+ return answer
131
+
132
+
133
+ def random_word(model, tokenizer):
134
+ global model_base
135
+ vocab = tokenizer.get_vocab()
136
+ # with open(model_base + '/vocab.txt', 'r') as file:
137
+ line = ""
138
+ # content = file.readlines()
139
+ length = tokenizer.vocab_size
140
+ # print(vocab)
141
+ while line == "":
142
+ rand_line = random.randrange(0, length)
143
+ # print("TRYING TO FIND", rand_line, "OUT OF", length, "WITH VOCAB OF TYPE", type(vocab))
144
+ for word, id in vocab.items():
145
+ if id == rand_line and word[0].isalpha() and word not in stops and word not in ROMAN_CONSTANTS:
146
+ # if vocab[rand_line][0].isalpha() and vocab[rand_line][:-1] not in stops and vocab[rand_line][:-1] not in ROMAN_CONSTANTS:
147
+ line = word
148
+ elif id == rand_line:
149
+ print(f"{word} is not alpha or is a stop word")
150
+ # for num, aline in enumerate(file, 1997):
151
+ # if random.randrange(num) and aline.isalpha():
152
+ # continue
153
+ # # elif not aline.isalpha():
154
+
155
+ # line = aline
156
+ print(line)
157
+ return line
158
+
159
+
160
+ def generate_prompt(model, tokenizer):
161
+ global word1
162
+ global word2
163
+ global word3
164
+ global answer
165
+ global base_prompts
166
+ word1 = random_word(model, tokenizer)
167
+ # word2 = random_word()
168
+
169
+ word2 = embeddings(model, f"{base_prompts[random.randint(0, len(base_prompts) - 1)]}{word1} is to ___.", tokenizer)
170
+ word3 = random_word(model, tokenizer)
171
+ sentence = f"{word1} is to {word2} as {word3} is to ___."
172
+ print(sentence)
173
+ answer = embeddings(model, sentence, tokenizer)
174
+ print("ANSWER IS", answer)
175
+ return f"# {word1} is to {word2} as {word3} is to ___."
176
+ # cosine_scores(model, sentence)
177
+
178
+
179
+ def greet(name):
180
+ return "Hello " + name + "!!"
181
+
182
+ def check_answer(guess:str):
183
+ global guesses
184
+ global answer
185
+ global return_guesses
186
+ global word1
187
+ global word2
188
+ global word3
189
+
190
+ model, tokenizer = get_model()
191
+ output = ""
192
+ protected_guess = guess
193
+ sentence = f"{word1} is to {word2} as [MASK] is to {guess}."
194
+
195
+ other_word = embeddings(model, sentence, tokenizer)
196
+ guesses.append(guess)
197
+
198
+
199
+
200
+ for guess in return_guesses:
201
+ output += ("- " + guess + "<br>")
202
+
203
+ # output = output[:-1]
204
+ prompt = f"{word1} is to {word2} as {word3} is to ___."
205
+ # print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
206
+
207
+ if protected_guess.lower() == answer.lower():
208
+ return_guesses.append(f"{protected_guess}: {word1} is to {word2} as {word3} is to {protected_guess}.")
209
+ output += f"<span style='color:green'>- {return_guesses[-1]}</span><br>"
210
+ new_prompt = generate_prompt(model, tokenizer)
211
+ return new_prompt, "Correct!", output
212
+ else:
213
+ return_guess = f"{protected_guess}: {word1} is to {word2} as {other_word} is to {protected_guess}."
214
+ return_guesses.append(return_guess)
215
+ output += ("- " + return_guess + " <br>")
216
+ return prompt, "Try again!", output
217
+
218
+ def main():
219
+ global word1
220
+ global word2
221
+ global word3
222
+ global answer
223
+ # answer = "Moon"
224
+ global guesses
225
+
226
+
227
+ # num_rows, data_type, value, example, embeddings = training()
228
+ # sent_embeddings = embeddings()
229
+ model, tokenizer = get_model()
230
+ generate_prompt(model, tokenizer)
231
+
232
+ prompt = f"{word1} is to {word2} as {word3} is to ____"
233
+ print(prompt)
234
+ print("TESTING EMBEDDINGS")
235
+ with gr.Blocks() as iface:
236
+ mark_question = gr.Markdown(prompt)
237
+ with gr.Tab("Guess"):
238
+ text_input = gr.Textbox()
239
+ text_output = gr.Textbox()
240
+ text_button = gr.Button("Submit")
241
+ with gr.Accordion("Open for previous guesses"):
242
+ text_guesses = gr.Markdown()
243
+ # with gr.Tab("Testing"):
244
+ # gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
245
+ text_button.click(check_answer, inputs=[text_input], outputs=[mark_question, text_output, text_guesses])
246
+ # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
247
+ iface.launch()
248
+
249
+
250
+
251
+
252
+
253
+ if __name__ == "__main__":
 
 
 
 
254
  main()
flan-t5-train.py CHANGED
@@ -1,302 +1,235 @@
1
- import gradio as gr
2
- import math
3
- from datasets import load_dataset
4
- from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
5
- from transformers import TrainingArguments, Trainer
6
- from transformers import T5Tokenizer, T5ForConditionalGeneration
7
- import torch
8
- import torch.nn.functional as F
9
- from torch.utils.data import DataLoader
10
- import numpy as np
11
- import evaluate
12
- import nltk
13
- from nltk.corpus import stopwords
14
- import subprocess
15
- import sys
16
- from transformers import T5Tokenizer, DataCollatorForSeq2Seq
17
- from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
18
- from transformers import DataCollatorWithPadding, DistilBertTokenizerFast
19
- from transformers import TrainingArguments
20
- from transformers import (
21
- BertModel,
22
- BertTokenizerFast,
23
- Trainer,
24
- EvalPrediction
25
- )
26
-
27
- # !pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
28
- # subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl'])
29
- # tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
30
- # data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
31
- # nltk.download('stopwords')
32
- # nlp = spacy.load("en_core_web_sm")
33
- # stops = stopwords.words("english")
34
- nltk.download("punkt", quiet=True)
35
- metric = evaluate.load("rouge")
36
-
37
- # Global Parameters
38
- L_RATE = 3e-4
39
- BATCH_SIZE = 8
40
- PER_DEVICE_EVAL_BATCH = 4
41
- WEIGHT_DECAY = 0.01
42
- SAVE_TOTAL_LIM = 3
43
- NUM_EPOCHS = 10
44
-
45
- # Set up training arguments
46
- training_args = Seq2SeqTrainingArguments(
47
- output_dir="./results",
48
- evaluation_strategy="epoch",
49
- learning_rate=L_RATE,
50
- per_device_train_batch_size=BATCH_SIZE,
51
- per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
52
- weight_decay=WEIGHT_DECAY,
53
- save_total_limit=SAVE_TOTAL_LIM,
54
- num_train_epochs=NUM_EPOCHS,
55
- predict_with_generate=True,
56
- push_to_hub=False
57
- )
58
-
59
- model_id = "google/flan-t5-base"
60
- tokenizer = T5Tokenizer.from_pretrained(model_id)
61
- # tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
62
- # metric = evaluate.load("accuracy")
63
-
64
- def tokenize_function(examples):
65
- return tokenizer(examples["stem"], padding="max_length", truncation=True)
66
-
67
-
68
- #Mean Pooling - Take attention mask into account for correct averaging
69
- def mean_pooling(model_output, attention_mask):
70
- token_embeddings = model_output[0] #First element of model_output contains all token embeddings
71
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
72
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
73
-
74
-
75
- # def compute_metrics(eval_pred):
76
- # logits, labels = eval_pred
77
- # predictions = np.argmax(logits, axis=-1)
78
- # metric = evaluate.load("accuracy")
79
- # return metric.compute(predictions=predictions, references=labels)
80
-
81
- def compute_metrics(eval_preds):
82
- preds, labels = eval_preds
83
-
84
- # decode preds and labels
85
- labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
86
- decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
87
- decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
88
-
89
- # rougeLSum expects newline after each sentence
90
- decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
91
- decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
92
-
93
- result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
94
-
95
- return result
96
-
97
-
98
- def training():
99
- dataset_id = "tomasmcz/word2vec_analogy"
100
- # dataset_id = "relbert/scientific_and_creative_analogy"
101
- # dataset_sub = "Quadruples_Kmiecik_random_split"
102
- print("GETTING DATASET")
103
- dataset = load_dataset(dataset_id)
104
- # dataset = dataset["train"]
105
- # tokenized_datasets = dataset.map(tokenize_function, batched=True)
106
-
107
- print(dataset)
108
- print(f"- The {dataset_id} dataset has {dataset['train'].num_rows} examples.")
109
- print(f"- Each example is a {type(dataset['train'][0])} with a {type(dataset['train'][0])} as value.")
110
- print(f"- Examples look like this: {dataset['train'][0]}")
111
-
112
- # for i in dataset["train"]:
113
- # print(i["AB"], "to", i["CD"], "is", i["label"])
114
-
115
- dataset = dataset["train"].train_test_split(test_size=0.3)
116
-
117
- # We prefix our tasks with "answer the question"
118
- prefix = "Please answer this question: "
119
-
120
- # Define the preprocessing function
121
-
122
- # def preprocess_function(examples):
123
- # """Add prefix to the sentences, tokenize the text, and set the labels"""
124
- # # The "inputs" are the tokenized answer:
125
- # inputs = []
126
- # # print(examples)
127
- # # inputs = [prefix + doc for doc in examples["question"]]
128
- # for doc in examples['source']:
129
- # # print("THE DOC IS:", doc)
130
- # # print("THE DOC IS:", examples[i]['AB'], examples[i]['CD'], examples[i]['label'])
131
- # prompt = f"{prefix}map "
132
- # for item in doc:
133
- # prompt += f"{item}, and "
134
- # prompt = prompt[:-6]
135
- # inputs.append(prompt)
136
- # # inputs = [prefix + doc for doc in examples["question"]]
137
- # for indx, doc in enumerate(examples["target_random"]):
138
- # prompt = f" to "
139
- # for item in doc:
140
- # prompt += f"{item}, and "
141
- # prompt = prompt[:-6] + "."
142
- # inputs[indx] += prompt
143
- # model_inputs = tokenizer(inputs, max_length=128, truncation=True)
144
-
145
- def preprocess_function(examples):
146
- """Add prefix to the sentences, tokenize the text, and set the labels"""
147
- # The "inputs" are the tokenized answer:
148
- inputs = []
149
- # print(examples)
150
- # inputs = [prefix + doc for doc in examples["question"]]
151
- for doc in examples['word_a']:
152
- # print("THE DOC IS:", doc)
153
- # print("THE DOC IS:", examples[i]['AB'], examples[i]['CD'], examples[i]['label'])
154
- prompt = f"{prefix}{doc} is to "
155
- inputs.append(prompt)
156
- # inputs = [prefix + doc for doc in examples["question"]]
157
- for indx, doc in enumerate(examples["word_b"]):
158
- prompt = f"{doc} as "
159
- inputs[indx] += prompt
160
-
161
- for indx, doc in enumerate(examples["word_c"]):
162
- prompt = f"{doc} is to ___."
163
- inputs[indx] += prompt
164
- model_inputs = tokenizer(inputs, max_length=128, truncation=True)
165
-
166
- # print(examples["label"], type(examples["label"]))
167
-
168
- # The "labels" are the tokenized outputs:
169
- labels = tokenizer(text_target=examples["word_d"],
170
- max_length=512,
171
- truncation=True)
172
-
173
- model_inputs["labels"] = labels["input_ids"]
174
- return model_inputs
175
-
176
-
177
-
178
- # Map the preprocessing function across our dataset
179
- tokenized_dataset = dataset.map(preprocess_function, batched=True)
180
- # train_examples = []
181
- # train_data = dataset["test"]
182
- # # For agility we only 1/2 of our available data
183
- # n_examples = dataset["test"].num_rows // 2
184
-
185
- # for i in range(n_examples):
186
- # example = train_data[i]
187
- # temp_word_1 = example["stem"][0]
188
- # temp_word_2 = example["stem"][1]
189
- # temp_word_3 = example["choice"][example["answer"]][0]
190
- # temp_word_4 = example["choice"][example["answer"]][1]
191
- # comp1 = f"{temp_word_1} to {temp_word_2}"
192
- # comp2 = f"{temp_word_3} to {temp_word_4}"
193
- # # example_opposite = dataset_clean[-(i)]
194
- # # print(example["text"])
195
- # train_examples.append(InputExample(texts=[comp1, comp2]))
196
-
197
-
198
- # train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
199
-
200
- print("END DATALOADER")
201
-
202
- # print(train_examples)
203
-
204
- embeddings = finetune(tokenized_dataset)
205
-
206
- return 0
207
-
208
-
209
- def finetune(dataset):
210
- # model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
211
- # model_id = "sentence-transformers/all-MiniLM-L6-v2"
212
- model_id = "google/flan-t5-base"
213
- # model_id = "distilbert-base-uncased"
214
- # tokenizer = DistilBertTokenizerFast.from_pretrained(model_id)
215
- tokenizer = T5Tokenizer.from_pretrained(model_id)
216
- model = T5ForConditionalGeneration.from_pretrained(model_id)
217
- data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
218
- device = torch.device('cuda:0')
219
- model = model.to(device)
220
-
221
- # training_args = TrainingArguments(output_dir="test_trainer")
222
-
223
- # USE THIS LINK
224
- # https://huggingface.co/blog/how-to-train-sentence-transformers
225
-
226
- # train_loss = losses.MegaBatchMarginLoss(model=model)
227
- # ds_train, ds_valid = dataset.train_test_split(test_size=0.2, seed=42)
228
-
229
- print("BEGIN FIT")
230
-
231
- trainer = Seq2SeqTrainer(
232
- model=model,
233
- args=training_args,
234
- train_dataset=dataset["train"],
235
- eval_dataset=dataset["test"],
236
- # evaluation_strategy="no"
237
- tokenizer=tokenizer,
238
- data_collator=data_collator,
239
- compute_metrics=compute_metrics
240
- )
241
-
242
- # model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=10)
243
-
244
- trainer.train()
245
-
246
- # model.save("flan-analogies")
247
-
248
- # model.save_to_hub("smhavens/bert-base-analogies")
249
- # accuracy = compute_metrics(eval, metric)
250
- return 0
251
-
252
- def greet(name):
253
- return "Hello " + name + "!!"
254
-
255
- def check_answer(guess:str):
256
- global guesses
257
- global answer
258
- guesses.append(guess)
259
- output = ""
260
- for guess in guesses:
261
- output += ("- " + guess + "\n")
262
- output = output[:-1]
263
-
264
- if guess.lower() == answer.lower():
265
- return "Correct!", output
266
- else:
267
- return "Try again!", output
268
-
269
- def main():
270
- print("BEGIN")
271
- word1 = "Black"
272
- word2 = "White"
273
- word3 = "Sun"
274
- global answer
275
- answer = "Moon"
276
- global guesses
277
-
278
- training()
279
-
280
- # prompt = f"{word1} is to {word2} as {word3} is to ____"
281
- # with gr.Blocks() as iface:
282
- # gr.Markdown(prompt)
283
- # with gr.Tab("Guess"):
284
- # text_input = gr.Textbox()
285
- # text_output = gr.Textbox()
286
- # text_button = gr.Button("Submit")
287
- # with gr.Accordion("Open for previous guesses"):
288
- # text_guesses = gr.Textbox()
289
- # with gr.Tab("Testing"):
290
- # gr.Markdown(f"""Number of rows in dataset is {num_rows}, with each having type {data_type} and value {value}.
291
- # An example is {example}.
292
- # The Embeddings are {embeddings}.""")
293
- # text_button.click(check_answer, inputs=[text_input], outputs=[text_output, text_guesses])
294
- # # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
295
- # iface.launch()
296
-
297
-
298
-
299
-
300
-
301
- if __name__ == "__main__":
302
  main()
 
1
+ import gradio as gr
2
+ import math
3
+ from datasets import load_dataset
4
+ from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
5
+ from transformers import TrainingArguments, Trainer
6
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
7
+ import torch
8
+ import torch.nn.functional as F
9
+ from torch.utils.data import DataLoader
10
+ import numpy as np
11
+ import evaluate
12
+ import nltk
13
+ from nltk.corpus import stopwords
14
+ import subprocess
15
+ import sys
16
+ from transformers import T5Tokenizer, DataCollatorForSeq2Seq
17
+ from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
18
+ from transformers import DataCollatorWithPadding, DistilBertTokenizerFast
19
+ from transformers import TrainingArguments
20
+ from transformers import (
21
+ BertModel,
22
+ BertTokenizerFast,
23
+ Trainer,
24
+ EvalPrediction
25
+ )
26
+
27
+ nltk.download("punkt", quiet=True)
28
+ metric = evaluate.load("rouge")
29
+
30
+ # Global Parameters
31
+ L_RATE = 3e-4
32
+ BATCH_SIZE = 8
33
+ PER_DEVICE_EVAL_BATCH = 4
34
+ WEIGHT_DECAY = 0.01
35
+ SAVE_TOTAL_LIM = 3
36
+ NUM_EPOCHS = 10
37
+
38
+ # Set up training arguments
39
+ training_args = Seq2SeqTrainingArguments(
40
+ output_dir="./results",
41
+ evaluation_strategy="epoch",
42
+ learning_rate=L_RATE,
43
+ per_device_train_batch_size=BATCH_SIZE,
44
+ per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
45
+ weight_decay=WEIGHT_DECAY,
46
+ save_total_limit=SAVE_TOTAL_LIM,
47
+ num_train_epochs=NUM_EPOCHS,
48
+ predict_with_generate=True,
49
+ push_to_hub=False
50
+ )
51
+
52
+ model_id = "google/flan-t5-base"
53
+ tokenizer = T5Tokenizer.from_pretrained(model_id)
54
+ # tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
55
+ # metric = evaluate.load("accuracy")
56
+
57
+ def tokenize_function(examples):
58
+ return tokenizer(examples["stem"], padding="max_length", truncation=True)
59
+
60
+
61
+ #Mean Pooling - Take attention mask into account for correct averaging
62
+ def mean_pooling(model_output, attention_mask):
63
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
64
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
65
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
66
+
67
+
68
+ # def compute_metrics(eval_pred):
69
+ # logits, labels = eval_pred
70
+ # predictions = np.argmax(logits, axis=-1)
71
+ # metric = evaluate.load("accuracy")
72
+ # return metric.compute(predictions=predictions, references=labels)
73
+
74
+ def compute_metrics(eval_preds):
75
+ preds, labels = eval_preds
76
+
77
+ # decode preds and labels
78
+ labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
79
+ decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
80
+ decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
81
+
82
+ # rougeLSum expects newline after each sentence
83
+ decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
84
+ decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
85
+
86
+ result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
87
+
88
+ return result
89
+
90
+
91
+ def training():
92
+ dataset_id = "tomasmcz/word2vec_analogy"
93
+ # dataset_id = "relbert/scientific_and_creative_analogy"
94
+ # dataset_sub = "Quadruples_Kmiecik_random_split"
95
+ print("GETTING DATASET")
96
+ dataset = load_dataset(dataset_id)
97
+ # dataset = dataset["train"]
98
+ # tokenized_datasets = dataset.map(tokenize_function, batched=True)
99
+
100
+ print(dataset)
101
+ print(f"- The {dataset_id} dataset has {dataset['train'].num_rows} examples.")
102
+ print(f"- Each example is a {type(dataset['train'][0])} with a {type(dataset['train'][0])} as value.")
103
+ print(f"- Examples look like this: {dataset['train'][0]}")
104
+
105
+ # for i in dataset["train"]:
106
+ # print(i["AB"], "to", i["CD"], "is", i["label"])
107
+
108
+ dataset = dataset["train"].train_test_split(test_size=0.3)
109
+
110
+ # We prefix our tasks with "answer the question"
111
+ prefix = "Please answer this question: "
112
+
113
+
114
+ def preprocess_function(examples):
115
+ """Add prefix to the sentences, tokenize the text, and set the labels"""
116
+ # The "inputs" are the tokenized answer:
117
+ inputs = []
118
+ # print(examples)
119
+ # inputs = [prefix + doc for doc in examples["question"]]
120
+ for doc in examples['word_a']:
121
+ # print("THE DOC IS:", doc)
122
+ # print("THE DOC IS:", examples[i]['AB'], examples[i]['CD'], examples[i]['label'])
123
+ prompt = f"{prefix}{doc} is to "
124
+ inputs.append(prompt)
125
+ # inputs = [prefix + doc for doc in examples["question"]]
126
+ for indx, doc in enumerate(examples["word_b"]):
127
+ prompt = f"{doc} as "
128
+ inputs[indx] += prompt
129
+
130
+ for indx, doc in enumerate(examples["word_c"]):
131
+ prompt = f"{doc} is to ___."
132
+ inputs[indx] += prompt
133
+ model_inputs = tokenizer(inputs, max_length=128, truncation=True)
134
+
135
+ # print(examples["label"], type(examples["label"]))
136
+
137
+ # The "labels" are the tokenized outputs:
138
+ labels = tokenizer(text_target=examples["word_d"],
139
+ max_length=512,
140
+ truncation=True)
141
+
142
+ model_inputs["labels"] = labels["input_ids"]
143
+ return model_inputs
144
+
145
+
146
+
147
+ # Map the preprocessing function across our dataset
148
+ tokenized_dataset = dataset.map(preprocess_function, batched=True)
149
+
150
+ print("END DATALOADER")
151
+
152
+ # print(train_examples)
153
+
154
+ embeddings = finetune(tokenized_dataset)
155
+
156
+ return 0
157
+
158
+
159
+ def finetune(dataset):
160
+ # model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
161
+ # model_id = "sentence-transformers/all-MiniLM-L6-v2"
162
+ model_id = "google/flan-t5-base"
163
+ # model_id = "distilbert-base-uncased"
164
+ # tokenizer = DistilBertTokenizerFast.from_pretrained(model_id)
165
+ tokenizer = T5Tokenizer.from_pretrained(model_id)
166
+ model = T5ForConditionalGeneration.from_pretrained(model_id)
167
+ data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
168
+ device = torch.device('cuda:0')
169
+ model = model.to(device)
170
+
171
+ # training_args = TrainingArguments(output_dir="test_trainer")
172
+
173
+ # USE THIS LINK
174
+ # https://huggingface.co/blog/how-to-train-sentence-transformers
175
+
176
+ # train_loss = losses.MegaBatchMarginLoss(model=model)
177
+ # ds_train, ds_valid = dataset.train_test_split(test_size=0.2, seed=42)
178
+
179
+ print("BEGIN FIT")
180
+
181
+ trainer = Seq2SeqTrainer(
182
+ model=model,
183
+ args=training_args,
184
+ train_dataset=dataset["train"],
185
+ eval_dataset=dataset["test"],
186
+ # evaluation_strategy="no"
187
+ tokenizer=tokenizer,
188
+ data_collator=data_collator,
189
+ compute_metrics=compute_metrics
190
+ )
191
+
192
+ # model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=10)
193
+
194
+ trainer.train()
195
+
196
+ # model.save("flan-analogies")
197
+
198
+ # model.save_to_hub("smhavens/bert-base-analogies")
199
+ # accuracy = compute_metrics(eval, metric)
200
+ return 0
201
+
202
+ def greet(name):
203
+ return "Hello " + name + "!!"
204
+
205
+ def check_answer(guess:str):
206
+ global guesses
207
+ global answer
208
+ guesses.append(guess)
209
+ output = ""
210
+ for guess in guesses:
211
+ output += ("- " + guess + "\n")
212
+ output = output[:-1]
213
+
214
+ if guess.lower() == answer.lower():
215
+ return "Correct!", output
216
+ else:
217
+ return "Try again!", output
218
+
219
+ def main():
220
+ print("BEGIN")
221
+ word1 = "Black"
222
+ word2 = "White"
223
+ word3 = "Sun"
224
+ global answer
225
+ answer = "Moon"
226
+ global guesses
227
+
228
+ training()
229
+
230
+
231
+
232
+
233
+
234
+ if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  main()
results/checkpoint-16000/added_tokens.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<extra_id_0>": 32099,
3
+ "<extra_id_10>": 32089,
4
+ "<extra_id_11>": 32088,
5
+ "<extra_id_12>": 32087,
6
+ "<extra_id_13>": 32086,
7
+ "<extra_id_14>": 32085,
8
+ "<extra_id_15>": 32084,
9
+ "<extra_id_16>": 32083,
10
+ "<extra_id_17>": 32082,
11
+ "<extra_id_18>": 32081,
12
+ "<extra_id_19>": 32080,
13
+ "<extra_id_1>": 32098,
14
+ "<extra_id_20>": 32079,
15
+ "<extra_id_21>": 32078,
16
+ "<extra_id_22>": 32077,
17
+ "<extra_id_23>": 32076,
18
+ "<extra_id_24>": 32075,
19
+ "<extra_id_25>": 32074,
20
+ "<extra_id_26>": 32073,
21
+ "<extra_id_27>": 32072,
22
+ "<extra_id_28>": 32071,
23
+ "<extra_id_29>": 32070,
24
+ "<extra_id_2>": 32097,
25
+ "<extra_id_30>": 32069,
26
+ "<extra_id_31>": 32068,
27
+ "<extra_id_32>": 32067,
28
+ "<extra_id_33>": 32066,
29
+ "<extra_id_34>": 32065,
30
+ "<extra_id_35>": 32064,
31
+ "<extra_id_36>": 32063,
32
+ "<extra_id_37>": 32062,
33
+ "<extra_id_38>": 32061,
34
+ "<extra_id_39>": 32060,
35
+ "<extra_id_3>": 32096,
36
+ "<extra_id_40>": 32059,
37
+ "<extra_id_41>": 32058,
38
+ "<extra_id_42>": 32057,
39
+ "<extra_id_43>": 32056,
40
+ "<extra_id_44>": 32055,
41
+ "<extra_id_45>": 32054,
42
+ "<extra_id_46>": 32053,
43
+ "<extra_id_47>": 32052,
44
+ "<extra_id_48>": 32051,
45
+ "<extra_id_49>": 32050,
46
+ "<extra_id_4>": 32095,
47
+ "<extra_id_50>": 32049,
48
+ "<extra_id_51>": 32048,
49
+ "<extra_id_52>": 32047,
50
+ "<extra_id_53>": 32046,
51
+ "<extra_id_54>": 32045,
52
+ "<extra_id_55>": 32044,
53
+ "<extra_id_56>": 32043,
54
+ "<extra_id_57>": 32042,
55
+ "<extra_id_58>": 32041,
56
+ "<extra_id_59>": 32040,
57
+ "<extra_id_5>": 32094,
58
+ "<extra_id_60>": 32039,
59
+ "<extra_id_61>": 32038,
60
+ "<extra_id_62>": 32037,
61
+ "<extra_id_63>": 32036,
62
+ "<extra_id_64>": 32035,
63
+ "<extra_id_65>": 32034,
64
+ "<extra_id_66>": 32033,
65
+ "<extra_id_67>": 32032,
66
+ "<extra_id_68>": 32031,
67
+ "<extra_id_69>": 32030,
68
+ "<extra_id_6>": 32093,
69
+ "<extra_id_70>": 32029,
70
+ "<extra_id_71>": 32028,
71
+ "<extra_id_72>": 32027,
72
+ "<extra_id_73>": 32026,
73
+ "<extra_id_74>": 32025,
74
+ "<extra_id_75>": 32024,
75
+ "<extra_id_76>": 32023,
76
+ "<extra_id_77>": 32022,
77
+ "<extra_id_78>": 32021,
78
+ "<extra_id_79>": 32020,
79
+ "<extra_id_7>": 32092,
80
+ "<extra_id_80>": 32019,
81
+ "<extra_id_81>": 32018,
82
+ "<extra_id_82>": 32017,
83
+ "<extra_id_83>": 32016,
84
+ "<extra_id_84>": 32015,
85
+ "<extra_id_85>": 32014,
86
+ "<extra_id_86>": 32013,
87
+ "<extra_id_87>": 32012,
88
+ "<extra_id_88>": 32011,
89
+ "<extra_id_89>": 32010,
90
+ "<extra_id_8>": 32091,
91
+ "<extra_id_90>": 32009,
92
+ "<extra_id_91>": 32008,
93
+ "<extra_id_92>": 32007,
94
+ "<extra_id_93>": 32006,
95
+ "<extra_id_94>": 32005,
96
+ "<extra_id_95>": 32004,
97
+ "<extra_id_96>": 32003,
98
+ "<extra_id_97>": 32002,
99
+ "<extra_id_98>": 32001,
100
+ "<extra_id_99>": 32000,
101
+ "<extra_id_9>": 32090
102
+ }
results/checkpoint-16000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.35.2",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
results/checkpoint-16000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.35.2"
6
+ }
results/checkpoint-16000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7f96db75733e18d6af8488ab51eea991be641c6c22b24fa5ab3b45101c3398
3
+ size 990345064
results/checkpoint-16000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31aa07bcfc63b03b9dbfb77536457e4d0591b64d537e2f4834f5b81c6bd2ab21
3
+ size 1980860410
results/checkpoint-16000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc296e1811c88d4548bfa74b8cf96485e58c41652ba8a0db69b6e3a9762f9be0
3
+ size 14244
results/checkpoint-16000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c77d751bb87ca04afd8f823ee9102cffea6221900b1a056c2f31d9044f1a0ce
3
+ size 1064
results/checkpoint-16000/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
results/checkpoint-16000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
results/checkpoint-16000/tokenizer_config.json ADDED
@@ -0,0 +1,939 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "legacy": true,
934
+ "model_max_length": 512,
935
+ "pad_token": "<pad>",
936
+ "sp_model_kwargs": {},
937
+ "tokenizer_class": "T5Tokenizer",
938
+ "unk_token": "<unk>"
939
+ }
results/checkpoint-16000/trainer_state.json ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.35672514619883,
5
+ "eval_steps": 500,
6
+ "global_step": 16000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29,
13
+ "learning_rate": 0.0002912280701754386,
14
+ "loss": 0.3858,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.58,
19
+ "learning_rate": 0.0002824561403508772,
20
+ "loss": 0.0819,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.88,
25
+ "learning_rate": 0.00027368421052631573,
26
+ "loss": 0.046,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 1.0,
31
+ "eval_loss": 0.006535602733492851,
32
+ "eval_rouge1": 0.9974420190995907,
33
+ "eval_rouge2": 0.0,
34
+ "eval_rougeL": 0.9974420190995907,
35
+ "eval_rougeLsum": 0.9974420190995907,
36
+ "eval_runtime": 155.5569,
37
+ "eval_samples_per_second": 37.697,
38
+ "eval_steps_per_second": 9.424,
39
+ "step": 1710
40
+ },
41
+ {
42
+ "epoch": 1.17,
43
+ "learning_rate": 0.0002649122807017544,
44
+ "loss": 0.0317,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 1.46,
49
+ "learning_rate": 0.00025614035087719294,
50
+ "loss": 0.0132,
51
+ "step": 2500
52
+ },
53
+ {
54
+ "epoch": 1.75,
55
+ "learning_rate": 0.00024736842105263154,
56
+ "loss": 0.0103,
57
+ "step": 3000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_loss": 0.00893484242260456,
62
+ "eval_rouge1": 0.997612551159618,
63
+ "eval_rouge2": 0.0,
64
+ "eval_rougeL": 0.997612551159618,
65
+ "eval_rougeLsum": 0.997612551159618,
66
+ "eval_runtime": 159.0943,
67
+ "eval_samples_per_second": 36.859,
68
+ "eval_steps_per_second": 9.215,
69
+ "step": 3420
70
+ },
71
+ {
72
+ "epoch": 2.05,
73
+ "learning_rate": 0.00023859649122807015,
74
+ "loss": 0.015,
75
+ "step": 3500
76
+ },
77
+ {
78
+ "epoch": 2.34,
79
+ "learning_rate": 0.00022982456140350875,
80
+ "loss": 0.0078,
81
+ "step": 4000
82
+ },
83
+ {
84
+ "epoch": 2.63,
85
+ "learning_rate": 0.00022105263157894733,
86
+ "loss": 0.0075,
87
+ "step": 4500
88
+ },
89
+ {
90
+ "epoch": 2.92,
91
+ "learning_rate": 0.00021228070175438596,
92
+ "loss": 0.0105,
93
+ "step": 5000
94
+ },
95
+ {
96
+ "epoch": 3.0,
97
+ "eval_loss": 0.002929441863670945,
98
+ "eval_rouge1": 0.9982946793997272,
99
+ "eval_rouge2": 0.0,
100
+ "eval_rougeL": 0.9982946793997272,
101
+ "eval_rougeLsum": 0.9982946793997272,
102
+ "eval_runtime": 155.9353,
103
+ "eval_samples_per_second": 37.605,
104
+ "eval_steps_per_second": 9.401,
105
+ "step": 5130
106
+ },
107
+ {
108
+ "epoch": 3.22,
109
+ "learning_rate": 0.00020350877192982454,
110
+ "loss": 0.0159,
111
+ "step": 5500
112
+ },
113
+ {
114
+ "epoch": 3.51,
115
+ "learning_rate": 0.00019473684210526314,
116
+ "loss": 0.0121,
117
+ "step": 6000
118
+ },
119
+ {
120
+ "epoch": 3.8,
121
+ "learning_rate": 0.00018596491228070172,
122
+ "loss": 0.0105,
123
+ "step": 6500
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_loss": 0.0014885533601045609,
128
+ "eval_rouge1": 0.9986357435197817,
129
+ "eval_rouge2": 0.0,
130
+ "eval_rougeL": 0.9986357435197817,
131
+ "eval_rougeLsum": 0.9986357435197817,
132
+ "eval_runtime": 159.1866,
133
+ "eval_samples_per_second": 36.837,
134
+ "eval_steps_per_second": 9.209,
135
+ "step": 6840
136
+ },
137
+ {
138
+ "epoch": 4.09,
139
+ "learning_rate": 0.00017719298245614035,
140
+ "loss": 0.0136,
141
+ "step": 7000
142
+ },
143
+ {
144
+ "epoch": 4.39,
145
+ "learning_rate": 0.00016842105263157892,
146
+ "loss": 0.0067,
147
+ "step": 7500
148
+ },
149
+ {
150
+ "epoch": 4.68,
151
+ "learning_rate": 0.00015964912280701753,
152
+ "loss": 0.0052,
153
+ "step": 8000
154
+ },
155
+ {
156
+ "epoch": 4.97,
157
+ "learning_rate": 0.00015087719298245613,
158
+ "loss": 0.0032,
159
+ "step": 8500
160
+ },
161
+ {
162
+ "epoch": 5.0,
163
+ "eval_loss": 0.002013931516557932,
164
+ "eval_rouge1": 0.9982946793997272,
165
+ "eval_rouge2": 0.0,
166
+ "eval_rougeL": 0.9982946793997272,
167
+ "eval_rougeLsum": 0.9982946793997272,
168
+ "eval_runtime": 158.9827,
169
+ "eval_samples_per_second": 36.885,
170
+ "eval_steps_per_second": 9.221,
171
+ "step": 8550
172
+ },
173
+ {
174
+ "epoch": 5.26,
175
+ "learning_rate": 0.0001421052631578947,
176
+ "loss": 0.0036,
177
+ "step": 9000
178
+ },
179
+ {
180
+ "epoch": 5.56,
181
+ "learning_rate": 0.0001333333333333333,
182
+ "loss": 0.0028,
183
+ "step": 9500
184
+ },
185
+ {
186
+ "epoch": 5.85,
187
+ "learning_rate": 0.00012456140350877192,
188
+ "loss": 0.0023,
189
+ "step": 10000
190
+ },
191
+ {
192
+ "epoch": 6.0,
193
+ "eval_loss": 0.008263664320111275,
194
+ "eval_rouge1": 0.997612551159618,
195
+ "eval_rouge2": 0.0,
196
+ "eval_rougeL": 0.997612551159618,
197
+ "eval_rougeLsum": 0.997612551159618,
198
+ "eval_runtime": 165.6672,
199
+ "eval_samples_per_second": 35.396,
200
+ "eval_steps_per_second": 8.849,
201
+ "step": 10260
202
+ },
203
+ {
204
+ "epoch": 6.14,
205
+ "learning_rate": 0.00011578947368421051,
206
+ "loss": 0.0045,
207
+ "step": 10500
208
+ },
209
+ {
210
+ "epoch": 6.43,
211
+ "learning_rate": 0.00010701754385964911,
212
+ "loss": 0.0031,
213
+ "step": 11000
214
+ },
215
+ {
216
+ "epoch": 6.73,
217
+ "learning_rate": 9.82456140350877e-05,
218
+ "loss": 0.0013,
219
+ "step": 11500
220
+ },
221
+ {
222
+ "epoch": 7.0,
223
+ "eval_loss": 0.003634733846411109,
224
+ "eval_rouge1": 0.9982946793997272,
225
+ "eval_rouge2": 0.0,
226
+ "eval_rougeL": 0.9982946793997272,
227
+ "eval_rougeLsum": 0.9982946793997272,
228
+ "eval_runtime": 165.5178,
229
+ "eval_samples_per_second": 35.428,
230
+ "eval_steps_per_second": 8.857,
231
+ "step": 11970
232
+ },
233
+ {
234
+ "epoch": 7.02,
235
+ "learning_rate": 8.94736842105263e-05,
236
+ "loss": 0.0017,
237
+ "step": 12000
238
+ },
239
+ {
240
+ "epoch": 7.31,
241
+ "learning_rate": 8.07017543859649e-05,
242
+ "loss": 0.0008,
243
+ "step": 12500
244
+ },
245
+ {
246
+ "epoch": 7.6,
247
+ "learning_rate": 7.19298245614035e-05,
248
+ "loss": 0.0017,
249
+ "step": 13000
250
+ },
251
+ {
252
+ "epoch": 7.89,
253
+ "learning_rate": 6.315789473684209e-05,
254
+ "loss": 0.0012,
255
+ "step": 13500
256
+ },
257
+ {
258
+ "epoch": 8.0,
259
+ "eval_loss": 0.0013940236531198025,
260
+ "eval_rouge1": 0.9982946793997272,
261
+ "eval_rouge2": 0.0,
262
+ "eval_rougeL": 0.9982946793997272,
263
+ "eval_rougeLsum": 0.9982946793997272,
264
+ "eval_runtime": 166.5345,
265
+ "eval_samples_per_second": 35.212,
266
+ "eval_steps_per_second": 8.803,
267
+ "step": 13680
268
+ },
269
+ {
270
+ "epoch": 8.19,
271
+ "learning_rate": 5.4385964912280694e-05,
272
+ "loss": 0.0024,
273
+ "step": 14000
274
+ },
275
+ {
276
+ "epoch": 8.48,
277
+ "learning_rate": 4.561403508771929e-05,
278
+ "loss": 0.0015,
279
+ "step": 14500
280
+ },
281
+ {
282
+ "epoch": 8.77,
283
+ "learning_rate": 3.684210526315789e-05,
284
+ "loss": 0.0012,
285
+ "step": 15000
286
+ },
287
+ {
288
+ "epoch": 9.0,
289
+ "eval_loss": 0.0021317724604159594,
290
+ "eval_rouge1": 0.9982946793997272,
291
+ "eval_rouge2": 0.0,
292
+ "eval_rougeL": 0.9982946793997272,
293
+ "eval_rougeLsum": 0.9982946793997272,
294
+ "eval_runtime": 166.3607,
295
+ "eval_samples_per_second": 35.249,
296
+ "eval_steps_per_second": 8.812,
297
+ "step": 15390
298
+ },
299
+ {
300
+ "epoch": 9.06,
301
+ "learning_rate": 2.807017543859649e-05,
302
+ "loss": 0.0008,
303
+ "step": 15500
304
+ },
305
+ {
306
+ "epoch": 9.36,
307
+ "learning_rate": 1.9298245614035086e-05,
308
+ "loss": 0.0004,
309
+ "step": 16000
310
+ }
311
+ ],
312
+ "logging_steps": 500,
313
+ "max_steps": 17100,
314
+ "num_train_epochs": 10,
315
+ "save_steps": 500,
316
+ "total_flos": 4109164676038656.0,
317
+ "trial_name": null,
318
+ "trial_params": null
319
+ }
results/checkpoint-16000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e8c3aef9cfe94a083e4e678683065ab146cef97e8d157c2108eb635736de7c
3
+ size 4664
results/checkpoint-16500/added_tokens.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<extra_id_0>": 32099,
3
+ "<extra_id_10>": 32089,
4
+ "<extra_id_11>": 32088,
5
+ "<extra_id_12>": 32087,
6
+ "<extra_id_13>": 32086,
7
+ "<extra_id_14>": 32085,
8
+ "<extra_id_15>": 32084,
9
+ "<extra_id_16>": 32083,
10
+ "<extra_id_17>": 32082,
11
+ "<extra_id_18>": 32081,
12
+ "<extra_id_19>": 32080,
13
+ "<extra_id_1>": 32098,
14
+ "<extra_id_20>": 32079,
15
+ "<extra_id_21>": 32078,
16
+ "<extra_id_22>": 32077,
17
+ "<extra_id_23>": 32076,
18
+ "<extra_id_24>": 32075,
19
+ "<extra_id_25>": 32074,
20
+ "<extra_id_26>": 32073,
21
+ "<extra_id_27>": 32072,
22
+ "<extra_id_28>": 32071,
23
+ "<extra_id_29>": 32070,
24
+ "<extra_id_2>": 32097,
25
+ "<extra_id_30>": 32069,
26
+ "<extra_id_31>": 32068,
27
+ "<extra_id_32>": 32067,
28
+ "<extra_id_33>": 32066,
29
+ "<extra_id_34>": 32065,
30
+ "<extra_id_35>": 32064,
31
+ "<extra_id_36>": 32063,
32
+ "<extra_id_37>": 32062,
33
+ "<extra_id_38>": 32061,
34
+ "<extra_id_39>": 32060,
35
+ "<extra_id_3>": 32096,
36
+ "<extra_id_40>": 32059,
37
+ "<extra_id_41>": 32058,
38
+ "<extra_id_42>": 32057,
39
+ "<extra_id_43>": 32056,
40
+ "<extra_id_44>": 32055,
41
+ "<extra_id_45>": 32054,
42
+ "<extra_id_46>": 32053,
43
+ "<extra_id_47>": 32052,
44
+ "<extra_id_48>": 32051,
45
+ "<extra_id_49>": 32050,
46
+ "<extra_id_4>": 32095,
47
+ "<extra_id_50>": 32049,
48
+ "<extra_id_51>": 32048,
49
+ "<extra_id_52>": 32047,
50
+ "<extra_id_53>": 32046,
51
+ "<extra_id_54>": 32045,
52
+ "<extra_id_55>": 32044,
53
+ "<extra_id_56>": 32043,
54
+ "<extra_id_57>": 32042,
55
+ "<extra_id_58>": 32041,
56
+ "<extra_id_59>": 32040,
57
+ "<extra_id_5>": 32094,
58
+ "<extra_id_60>": 32039,
59
+ "<extra_id_61>": 32038,
60
+ "<extra_id_62>": 32037,
61
+ "<extra_id_63>": 32036,
62
+ "<extra_id_64>": 32035,
63
+ "<extra_id_65>": 32034,
64
+ "<extra_id_66>": 32033,
65
+ "<extra_id_67>": 32032,
66
+ "<extra_id_68>": 32031,
67
+ "<extra_id_69>": 32030,
68
+ "<extra_id_6>": 32093,
69
+ "<extra_id_70>": 32029,
70
+ "<extra_id_71>": 32028,
71
+ "<extra_id_72>": 32027,
72
+ "<extra_id_73>": 32026,
73
+ "<extra_id_74>": 32025,
74
+ "<extra_id_75>": 32024,
75
+ "<extra_id_76>": 32023,
76
+ "<extra_id_77>": 32022,
77
+ "<extra_id_78>": 32021,
78
+ "<extra_id_79>": 32020,
79
+ "<extra_id_7>": 32092,
80
+ "<extra_id_80>": 32019,
81
+ "<extra_id_81>": 32018,
82
+ "<extra_id_82>": 32017,
83
+ "<extra_id_83>": 32016,
84
+ "<extra_id_84>": 32015,
85
+ "<extra_id_85>": 32014,
86
+ "<extra_id_86>": 32013,
87
+ "<extra_id_87>": 32012,
88
+ "<extra_id_88>": 32011,
89
+ "<extra_id_89>": 32010,
90
+ "<extra_id_8>": 32091,
91
+ "<extra_id_90>": 32009,
92
+ "<extra_id_91>": 32008,
93
+ "<extra_id_92>": 32007,
94
+ "<extra_id_93>": 32006,
95
+ "<extra_id_94>": 32005,
96
+ "<extra_id_95>": 32004,
97
+ "<extra_id_96>": 32003,
98
+ "<extra_id_97>": 32002,
99
+ "<extra_id_98>": 32001,
100
+ "<extra_id_99>": 32000,
101
+ "<extra_id_9>": 32090
102
+ }
results/checkpoint-16500/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.35.2",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
results/checkpoint-16500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.35.2"
6
+ }
results/checkpoint-16500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7a9d6ae7bd3422210038d1e4c5f886f38a28b91a2226cee03cf256bea0339da
3
+ size 990345064
results/checkpoint-16500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae4b122e5d07511dc6ed1a937d0886c266881e00214d69729a696f37485fcdd
3
+ size 1980860410
results/checkpoint-16500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50ee6c6cc0809088382943141afb0c6017d470447b3a88779f4e8e80f7985298
3
+ size 14244
results/checkpoint-16500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bf5bd3fa8e4f5d9ee10872958f6f1ff7a2d039e46912441d75c700ac706191
3
+ size 1064
results/checkpoint-16500/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
results/checkpoint-16500/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
results/checkpoint-16500/tokenizer_config.json ADDED
@@ -0,0 +1,939 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "legacy": true,
934
+ "model_max_length": 512,
935
+ "pad_token": "<pad>",
936
+ "sp_model_kwargs": {},
937
+ "tokenizer_class": "T5Tokenizer",
938
+ "unk_token": "<unk>"
939
+ }
results/checkpoint-16500/trainer_state.json ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.649122807017545,
5
+ "eval_steps": 500,
6
+ "global_step": 16500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29,
13
+ "learning_rate": 0.0002912280701754386,
14
+ "loss": 0.3858,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.58,
19
+ "learning_rate": 0.0002824561403508772,
20
+ "loss": 0.0819,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.88,
25
+ "learning_rate": 0.00027368421052631573,
26
+ "loss": 0.046,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 1.0,
31
+ "eval_loss": 0.006535602733492851,
32
+ "eval_rouge1": 0.9974420190995907,
33
+ "eval_rouge2": 0.0,
34
+ "eval_rougeL": 0.9974420190995907,
35
+ "eval_rougeLsum": 0.9974420190995907,
36
+ "eval_runtime": 155.5569,
37
+ "eval_samples_per_second": 37.697,
38
+ "eval_steps_per_second": 9.424,
39
+ "step": 1710
40
+ },
41
+ {
42
+ "epoch": 1.17,
43
+ "learning_rate": 0.0002649122807017544,
44
+ "loss": 0.0317,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 1.46,
49
+ "learning_rate": 0.00025614035087719294,
50
+ "loss": 0.0132,
51
+ "step": 2500
52
+ },
53
+ {
54
+ "epoch": 1.75,
55
+ "learning_rate": 0.00024736842105263154,
56
+ "loss": 0.0103,
57
+ "step": 3000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_loss": 0.00893484242260456,
62
+ "eval_rouge1": 0.997612551159618,
63
+ "eval_rouge2": 0.0,
64
+ "eval_rougeL": 0.997612551159618,
65
+ "eval_rougeLsum": 0.997612551159618,
66
+ "eval_runtime": 159.0943,
67
+ "eval_samples_per_second": 36.859,
68
+ "eval_steps_per_second": 9.215,
69
+ "step": 3420
70
+ },
71
+ {
72
+ "epoch": 2.05,
73
+ "learning_rate": 0.00023859649122807015,
74
+ "loss": 0.015,
75
+ "step": 3500
76
+ },
77
+ {
78
+ "epoch": 2.34,
79
+ "learning_rate": 0.00022982456140350875,
80
+ "loss": 0.0078,
81
+ "step": 4000
82
+ },
83
+ {
84
+ "epoch": 2.63,
85
+ "learning_rate": 0.00022105263157894733,
86
+ "loss": 0.0075,
87
+ "step": 4500
88
+ },
89
+ {
90
+ "epoch": 2.92,
91
+ "learning_rate": 0.00021228070175438596,
92
+ "loss": 0.0105,
93
+ "step": 5000
94
+ },
95
+ {
96
+ "epoch": 3.0,
97
+ "eval_loss": 0.002929441863670945,
98
+ "eval_rouge1": 0.9982946793997272,
99
+ "eval_rouge2": 0.0,
100
+ "eval_rougeL": 0.9982946793997272,
101
+ "eval_rougeLsum": 0.9982946793997272,
102
+ "eval_runtime": 155.9353,
103
+ "eval_samples_per_second": 37.605,
104
+ "eval_steps_per_second": 9.401,
105
+ "step": 5130
106
+ },
107
+ {
108
+ "epoch": 3.22,
109
+ "learning_rate": 0.00020350877192982454,
110
+ "loss": 0.0159,
111
+ "step": 5500
112
+ },
113
+ {
114
+ "epoch": 3.51,
115
+ "learning_rate": 0.00019473684210526314,
116
+ "loss": 0.0121,
117
+ "step": 6000
118
+ },
119
+ {
120
+ "epoch": 3.8,
121
+ "learning_rate": 0.00018596491228070172,
122
+ "loss": 0.0105,
123
+ "step": 6500
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_loss": 0.0014885533601045609,
128
+ "eval_rouge1": 0.9986357435197817,
129
+ "eval_rouge2": 0.0,
130
+ "eval_rougeL": 0.9986357435197817,
131
+ "eval_rougeLsum": 0.9986357435197817,
132
+ "eval_runtime": 159.1866,
133
+ "eval_samples_per_second": 36.837,
134
+ "eval_steps_per_second": 9.209,
135
+ "step": 6840
136
+ },
137
+ {
138
+ "epoch": 4.09,
139
+ "learning_rate": 0.00017719298245614035,
140
+ "loss": 0.0136,
141
+ "step": 7000
142
+ },
143
+ {
144
+ "epoch": 4.39,
145
+ "learning_rate": 0.00016842105263157892,
146
+ "loss": 0.0067,
147
+ "step": 7500
148
+ },
149
+ {
150
+ "epoch": 4.68,
151
+ "learning_rate": 0.00015964912280701753,
152
+ "loss": 0.0052,
153
+ "step": 8000
154
+ },
155
+ {
156
+ "epoch": 4.97,
157
+ "learning_rate": 0.00015087719298245613,
158
+ "loss": 0.0032,
159
+ "step": 8500
160
+ },
161
+ {
162
+ "epoch": 5.0,
163
+ "eval_loss": 0.002013931516557932,
164
+ "eval_rouge1": 0.9982946793997272,
165
+ "eval_rouge2": 0.0,
166
+ "eval_rougeL": 0.9982946793997272,
167
+ "eval_rougeLsum": 0.9982946793997272,
168
+ "eval_runtime": 158.9827,
169
+ "eval_samples_per_second": 36.885,
170
+ "eval_steps_per_second": 9.221,
171
+ "step": 8550
172
+ },
173
+ {
174
+ "epoch": 5.26,
175
+ "learning_rate": 0.0001421052631578947,
176
+ "loss": 0.0036,
177
+ "step": 9000
178
+ },
179
+ {
180
+ "epoch": 5.56,
181
+ "learning_rate": 0.0001333333333333333,
182
+ "loss": 0.0028,
183
+ "step": 9500
184
+ },
185
+ {
186
+ "epoch": 5.85,
187
+ "learning_rate": 0.00012456140350877192,
188
+ "loss": 0.0023,
189
+ "step": 10000
190
+ },
191
+ {
192
+ "epoch": 6.0,
193
+ "eval_loss": 0.008263664320111275,
194
+ "eval_rouge1": 0.997612551159618,
195
+ "eval_rouge2": 0.0,
196
+ "eval_rougeL": 0.997612551159618,
197
+ "eval_rougeLsum": 0.997612551159618,
198
+ "eval_runtime": 165.6672,
199
+ "eval_samples_per_second": 35.396,
200
+ "eval_steps_per_second": 8.849,
201
+ "step": 10260
202
+ },
203
+ {
204
+ "epoch": 6.14,
205
+ "learning_rate": 0.00011578947368421051,
206
+ "loss": 0.0045,
207
+ "step": 10500
208
+ },
209
+ {
210
+ "epoch": 6.43,
211
+ "learning_rate": 0.00010701754385964911,
212
+ "loss": 0.0031,
213
+ "step": 11000
214
+ },
215
+ {
216
+ "epoch": 6.73,
217
+ "learning_rate": 9.82456140350877e-05,
218
+ "loss": 0.0013,
219
+ "step": 11500
220
+ },
221
+ {
222
+ "epoch": 7.0,
223
+ "eval_loss": 0.003634733846411109,
224
+ "eval_rouge1": 0.9982946793997272,
225
+ "eval_rouge2": 0.0,
226
+ "eval_rougeL": 0.9982946793997272,
227
+ "eval_rougeLsum": 0.9982946793997272,
228
+ "eval_runtime": 165.5178,
229
+ "eval_samples_per_second": 35.428,
230
+ "eval_steps_per_second": 8.857,
231
+ "step": 11970
232
+ },
233
+ {
234
+ "epoch": 7.02,
235
+ "learning_rate": 8.94736842105263e-05,
236
+ "loss": 0.0017,
237
+ "step": 12000
238
+ },
239
+ {
240
+ "epoch": 7.31,
241
+ "learning_rate": 8.07017543859649e-05,
242
+ "loss": 0.0008,
243
+ "step": 12500
244
+ },
245
+ {
246
+ "epoch": 7.6,
247
+ "learning_rate": 7.19298245614035e-05,
248
+ "loss": 0.0017,
249
+ "step": 13000
250
+ },
251
+ {
252
+ "epoch": 7.89,
253
+ "learning_rate": 6.315789473684209e-05,
254
+ "loss": 0.0012,
255
+ "step": 13500
256
+ },
257
+ {
258
+ "epoch": 8.0,
259
+ "eval_loss": 0.0013940236531198025,
260
+ "eval_rouge1": 0.9982946793997272,
261
+ "eval_rouge2": 0.0,
262
+ "eval_rougeL": 0.9982946793997272,
263
+ "eval_rougeLsum": 0.9982946793997272,
264
+ "eval_runtime": 166.5345,
265
+ "eval_samples_per_second": 35.212,
266
+ "eval_steps_per_second": 8.803,
267
+ "step": 13680
268
+ },
269
+ {
270
+ "epoch": 8.19,
271
+ "learning_rate": 5.4385964912280694e-05,
272
+ "loss": 0.0024,
273
+ "step": 14000
274
+ },
275
+ {
276
+ "epoch": 8.48,
277
+ "learning_rate": 4.561403508771929e-05,
278
+ "loss": 0.0015,
279
+ "step": 14500
280
+ },
281
+ {
282
+ "epoch": 8.77,
283
+ "learning_rate": 3.684210526315789e-05,
284
+ "loss": 0.0012,
285
+ "step": 15000
286
+ },
287
+ {
288
+ "epoch": 9.0,
289
+ "eval_loss": 0.0021317724604159594,
290
+ "eval_rouge1": 0.9982946793997272,
291
+ "eval_rouge2": 0.0,
292
+ "eval_rougeL": 0.9982946793997272,
293
+ "eval_rougeLsum": 0.9982946793997272,
294
+ "eval_runtime": 166.3607,
295
+ "eval_samples_per_second": 35.249,
296
+ "eval_steps_per_second": 8.812,
297
+ "step": 15390
298
+ },
299
+ {
300
+ "epoch": 9.06,
301
+ "learning_rate": 2.807017543859649e-05,
302
+ "loss": 0.0008,
303
+ "step": 15500
304
+ },
305
+ {
306
+ "epoch": 9.36,
307
+ "learning_rate": 1.9298245614035086e-05,
308
+ "loss": 0.0004,
309
+ "step": 16000
310
+ },
311
+ {
312
+ "epoch": 9.65,
313
+ "learning_rate": 1.0526315789473683e-05,
314
+ "loss": 0.0006,
315
+ "step": 16500
316
+ }
317
+ ],
318
+ "logging_steps": 500,
319
+ "max_steps": 17100,
320
+ "num_train_epochs": 10,
321
+ "save_steps": 500,
322
+ "total_flos": 4237085908942848.0,
323
+ "trial_name": null,
324
+ "trial_params": null
325
+ }
results/checkpoint-16500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e8c3aef9cfe94a083e4e678683065ab146cef97e8d157c2108eb635736de7c
3
+ size 4664
results/checkpoint-17000/added_tokens.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<extra_id_0>": 32099,
3
+ "<extra_id_10>": 32089,
4
+ "<extra_id_11>": 32088,
5
+ "<extra_id_12>": 32087,
6
+ "<extra_id_13>": 32086,
7
+ "<extra_id_14>": 32085,
8
+ "<extra_id_15>": 32084,
9
+ "<extra_id_16>": 32083,
10
+ "<extra_id_17>": 32082,
11
+ "<extra_id_18>": 32081,
12
+ "<extra_id_19>": 32080,
13
+ "<extra_id_1>": 32098,
14
+ "<extra_id_20>": 32079,
15
+ "<extra_id_21>": 32078,
16
+ "<extra_id_22>": 32077,
17
+ "<extra_id_23>": 32076,
18
+ "<extra_id_24>": 32075,
19
+ "<extra_id_25>": 32074,
20
+ "<extra_id_26>": 32073,
21
+ "<extra_id_27>": 32072,
22
+ "<extra_id_28>": 32071,
23
+ "<extra_id_29>": 32070,
24
+ "<extra_id_2>": 32097,
25
+ "<extra_id_30>": 32069,
26
+ "<extra_id_31>": 32068,
27
+ "<extra_id_32>": 32067,
28
+ "<extra_id_33>": 32066,
29
+ "<extra_id_34>": 32065,
30
+ "<extra_id_35>": 32064,
31
+ "<extra_id_36>": 32063,
32
+ "<extra_id_37>": 32062,
33
+ "<extra_id_38>": 32061,
34
+ "<extra_id_39>": 32060,
35
+ "<extra_id_3>": 32096,
36
+ "<extra_id_40>": 32059,
37
+ "<extra_id_41>": 32058,
38
+ "<extra_id_42>": 32057,
39
+ "<extra_id_43>": 32056,
40
+ "<extra_id_44>": 32055,
41
+ "<extra_id_45>": 32054,
42
+ "<extra_id_46>": 32053,
43
+ "<extra_id_47>": 32052,
44
+ "<extra_id_48>": 32051,
45
+ "<extra_id_49>": 32050,
46
+ "<extra_id_4>": 32095,
47
+ "<extra_id_50>": 32049,
48
+ "<extra_id_51>": 32048,
49
+ "<extra_id_52>": 32047,
50
+ "<extra_id_53>": 32046,
51
+ "<extra_id_54>": 32045,
52
+ "<extra_id_55>": 32044,
53
+ "<extra_id_56>": 32043,
54
+ "<extra_id_57>": 32042,
55
+ "<extra_id_58>": 32041,
56
+ "<extra_id_59>": 32040,
57
+ "<extra_id_5>": 32094,
58
+ "<extra_id_60>": 32039,
59
+ "<extra_id_61>": 32038,
60
+ "<extra_id_62>": 32037,
61
+ "<extra_id_63>": 32036,
62
+ "<extra_id_64>": 32035,
63
+ "<extra_id_65>": 32034,
64
+ "<extra_id_66>": 32033,
65
+ "<extra_id_67>": 32032,
66
+ "<extra_id_68>": 32031,
67
+ "<extra_id_69>": 32030,
68
+ "<extra_id_6>": 32093,
69
+ "<extra_id_70>": 32029,
70
+ "<extra_id_71>": 32028,
71
+ "<extra_id_72>": 32027,
72
+ "<extra_id_73>": 32026,
73
+ "<extra_id_74>": 32025,
74
+ "<extra_id_75>": 32024,
75
+ "<extra_id_76>": 32023,
76
+ "<extra_id_77>": 32022,
77
+ "<extra_id_78>": 32021,
78
+ "<extra_id_79>": 32020,
79
+ "<extra_id_7>": 32092,
80
+ "<extra_id_80>": 32019,
81
+ "<extra_id_81>": 32018,
82
+ "<extra_id_82>": 32017,
83
+ "<extra_id_83>": 32016,
84
+ "<extra_id_84>": 32015,
85
+ "<extra_id_85>": 32014,
86
+ "<extra_id_86>": 32013,
87
+ "<extra_id_87>": 32012,
88
+ "<extra_id_88>": 32011,
89
+ "<extra_id_89>": 32010,
90
+ "<extra_id_8>": 32091,
91
+ "<extra_id_90>": 32009,
92
+ "<extra_id_91>": 32008,
93
+ "<extra_id_92>": 32007,
94
+ "<extra_id_93>": 32006,
95
+ "<extra_id_94>": 32005,
96
+ "<extra_id_95>": 32004,
97
+ "<extra_id_96>": 32003,
98
+ "<extra_id_97>": 32002,
99
+ "<extra_id_98>": 32001,
100
+ "<extra_id_99>": 32000,
101
+ "<extra_id_9>": 32090
102
+ }
results/checkpoint-17000/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "tie_word_embeddings": false,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.35.2",
60
+ "use_cache": true,
61
+ "vocab_size": 32128
62
+ }
results/checkpoint-17000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.35.2"
6
+ }
results/checkpoint-17000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92fb7ee142103a1cb7adb1d571589e7d21d7239f2e1cb7ca9a6b33c506c487ea
3
+ size 990345064
results/checkpoint-17000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe36b8f5c0d0cd2fb3db9f24cd099ee0a5ac33700d73b159bcfb743c7fb4257
3
+ size 1980860410
results/checkpoint-17000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1276a1a4eea6d9d0454dcea4e04dda05b3562ae9183eaf21b7cce953d6a88e2
3
+ size 14244
results/checkpoint-17000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3992cc3c175d24af106b82e4a70c8b2654ca5720363a954f8a160d3ed6a680f
3
+ size 1064
results/checkpoint-17000/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
results/checkpoint-17000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
results/checkpoint-17000/tokenizer_config.json ADDED
@@ -0,0 +1,939 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "legacy": true,
934
+ "model_max_length": 512,
935
+ "pad_token": "<pad>",
936
+ "sp_model_kwargs": {},
937
+ "tokenizer_class": "T5Tokenizer",
938
+ "unk_token": "<unk>"
939
+ }
results/checkpoint-17000/trainer_state.json ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.941520467836257,
5
+ "eval_steps": 500,
6
+ "global_step": 17000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29,
13
+ "learning_rate": 0.0002912280701754386,
14
+ "loss": 0.3858,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.58,
19
+ "learning_rate": 0.0002824561403508772,
20
+ "loss": 0.0819,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.88,
25
+ "learning_rate": 0.00027368421052631573,
26
+ "loss": 0.046,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 1.0,
31
+ "eval_loss": 0.006535602733492851,
32
+ "eval_rouge1": 0.9974420190995907,
33
+ "eval_rouge2": 0.0,
34
+ "eval_rougeL": 0.9974420190995907,
35
+ "eval_rougeLsum": 0.9974420190995907,
36
+ "eval_runtime": 155.5569,
37
+ "eval_samples_per_second": 37.697,
38
+ "eval_steps_per_second": 9.424,
39
+ "step": 1710
40
+ },
41
+ {
42
+ "epoch": 1.17,
43
+ "learning_rate": 0.0002649122807017544,
44
+ "loss": 0.0317,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 1.46,
49
+ "learning_rate": 0.00025614035087719294,
50
+ "loss": 0.0132,
51
+ "step": 2500
52
+ },
53
+ {
54
+ "epoch": 1.75,
55
+ "learning_rate": 0.00024736842105263154,
56
+ "loss": 0.0103,
57
+ "step": 3000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_loss": 0.00893484242260456,
62
+ "eval_rouge1": 0.997612551159618,
63
+ "eval_rouge2": 0.0,
64
+ "eval_rougeL": 0.997612551159618,
65
+ "eval_rougeLsum": 0.997612551159618,
66
+ "eval_runtime": 159.0943,
67
+ "eval_samples_per_second": 36.859,
68
+ "eval_steps_per_second": 9.215,
69
+ "step": 3420
70
+ },
71
+ {
72
+ "epoch": 2.05,
73
+ "learning_rate": 0.00023859649122807015,
74
+ "loss": 0.015,
75
+ "step": 3500
76
+ },
77
+ {
78
+ "epoch": 2.34,
79
+ "learning_rate": 0.00022982456140350875,
80
+ "loss": 0.0078,
81
+ "step": 4000
82
+ },
83
+ {
84
+ "epoch": 2.63,
85
+ "learning_rate": 0.00022105263157894733,
86
+ "loss": 0.0075,
87
+ "step": 4500
88
+ },
89
+ {
90
+ "epoch": 2.92,
91
+ "learning_rate": 0.00021228070175438596,
92
+ "loss": 0.0105,
93
+ "step": 5000
94
+ },
95
+ {
96
+ "epoch": 3.0,
97
+ "eval_loss": 0.002929441863670945,
98
+ "eval_rouge1": 0.9982946793997272,
99
+ "eval_rouge2": 0.0,
100
+ "eval_rougeL": 0.9982946793997272,
101
+ "eval_rougeLsum": 0.9982946793997272,
102
+ "eval_runtime": 155.9353,
103
+ "eval_samples_per_second": 37.605,
104
+ "eval_steps_per_second": 9.401,
105
+ "step": 5130
106
+ },
107
+ {
108
+ "epoch": 3.22,
109
+ "learning_rate": 0.00020350877192982454,
110
+ "loss": 0.0159,
111
+ "step": 5500
112
+ },
113
+ {
114
+ "epoch": 3.51,
115
+ "learning_rate": 0.00019473684210526314,
116
+ "loss": 0.0121,
117
+ "step": 6000
118
+ },
119
+ {
120
+ "epoch": 3.8,
121
+ "learning_rate": 0.00018596491228070172,
122
+ "loss": 0.0105,
123
+ "step": 6500
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_loss": 0.0014885533601045609,
128
+ "eval_rouge1": 0.9986357435197817,
129
+ "eval_rouge2": 0.0,
130
+ "eval_rougeL": 0.9986357435197817,
131
+ "eval_rougeLsum": 0.9986357435197817,
132
+ "eval_runtime": 159.1866,
133
+ "eval_samples_per_second": 36.837,
134
+ "eval_steps_per_second": 9.209,
135
+ "step": 6840
136
+ },
137
+ {
138
+ "epoch": 4.09,
139
+ "learning_rate": 0.00017719298245614035,
140
+ "loss": 0.0136,
141
+ "step": 7000
142
+ },
143
+ {
144
+ "epoch": 4.39,
145
+ "learning_rate": 0.00016842105263157892,
146
+ "loss": 0.0067,
147
+ "step": 7500
148
+ },
149
+ {
150
+ "epoch": 4.68,
151
+ "learning_rate": 0.00015964912280701753,
152
+ "loss": 0.0052,
153
+ "step": 8000
154
+ },
155
+ {
156
+ "epoch": 4.97,
157
+ "learning_rate": 0.00015087719298245613,
158
+ "loss": 0.0032,
159
+ "step": 8500
160
+ },
161
+ {
162
+ "epoch": 5.0,
163
+ "eval_loss": 0.002013931516557932,
164
+ "eval_rouge1": 0.9982946793997272,
165
+ "eval_rouge2": 0.0,
166
+ "eval_rougeL": 0.9982946793997272,
167
+ "eval_rougeLsum": 0.9982946793997272,
168
+ "eval_runtime": 158.9827,
169
+ "eval_samples_per_second": 36.885,
170
+ "eval_steps_per_second": 9.221,
171
+ "step": 8550
172
+ },
173
+ {
174
+ "epoch": 5.26,
175
+ "learning_rate": 0.0001421052631578947,
176
+ "loss": 0.0036,
177
+ "step": 9000
178
+ },
179
+ {
180
+ "epoch": 5.56,
181
+ "learning_rate": 0.0001333333333333333,
182
+ "loss": 0.0028,
183
+ "step": 9500
184
+ },
185
+ {
186
+ "epoch": 5.85,
187
+ "learning_rate": 0.00012456140350877192,
188
+ "loss": 0.0023,
189
+ "step": 10000
190
+ },
191
+ {
192
+ "epoch": 6.0,
193
+ "eval_loss": 0.008263664320111275,
194
+ "eval_rouge1": 0.997612551159618,
195
+ "eval_rouge2": 0.0,
196
+ "eval_rougeL": 0.997612551159618,
197
+ "eval_rougeLsum": 0.997612551159618,
198
+ "eval_runtime": 165.6672,
199
+ "eval_samples_per_second": 35.396,
200
+ "eval_steps_per_second": 8.849,
201
+ "step": 10260
202
+ },
203
+ {
204
+ "epoch": 6.14,
205
+ "learning_rate": 0.00011578947368421051,
206
+ "loss": 0.0045,
207
+ "step": 10500
208
+ },
209
+ {
210
+ "epoch": 6.43,
211
+ "learning_rate": 0.00010701754385964911,
212
+ "loss": 0.0031,
213
+ "step": 11000
214
+ },
215
+ {
216
+ "epoch": 6.73,
217
+ "learning_rate": 9.82456140350877e-05,
218
+ "loss": 0.0013,
219
+ "step": 11500
220
+ },
221
+ {
222
+ "epoch": 7.0,
223
+ "eval_loss": 0.003634733846411109,
224
+ "eval_rouge1": 0.9982946793997272,
225
+ "eval_rouge2": 0.0,
226
+ "eval_rougeL": 0.9982946793997272,
227
+ "eval_rougeLsum": 0.9982946793997272,
228
+ "eval_runtime": 165.5178,
229
+ "eval_samples_per_second": 35.428,
230
+ "eval_steps_per_second": 8.857,
231
+ "step": 11970
232
+ },
233
+ {
234
+ "epoch": 7.02,
235
+ "learning_rate": 8.94736842105263e-05,
236
+ "loss": 0.0017,
237
+ "step": 12000
238
+ },
239
+ {
240
+ "epoch": 7.31,
241
+ "learning_rate": 8.07017543859649e-05,
242
+ "loss": 0.0008,
243
+ "step": 12500
244
+ },
245
+ {
246
+ "epoch": 7.6,
247
+ "learning_rate": 7.19298245614035e-05,
248
+ "loss": 0.0017,
249
+ "step": 13000
250
+ },
251
+ {
252
+ "epoch": 7.89,
253
+ "learning_rate": 6.315789473684209e-05,
254
+ "loss": 0.0012,
255
+ "step": 13500
256
+ },
257
+ {
258
+ "epoch": 8.0,
259
+ "eval_loss": 0.0013940236531198025,
260
+ "eval_rouge1": 0.9982946793997272,
261
+ "eval_rouge2": 0.0,
262
+ "eval_rougeL": 0.9982946793997272,
263
+ "eval_rougeLsum": 0.9982946793997272,
264
+ "eval_runtime": 166.5345,
265
+ "eval_samples_per_second": 35.212,
266
+ "eval_steps_per_second": 8.803,
267
+ "step": 13680
268
+ },
269
+ {
270
+ "epoch": 8.19,
271
+ "learning_rate": 5.4385964912280694e-05,
272
+ "loss": 0.0024,
273
+ "step": 14000
274
+ },
275
+ {
276
+ "epoch": 8.48,
277
+ "learning_rate": 4.561403508771929e-05,
278
+ "loss": 0.0015,
279
+ "step": 14500
280
+ },
281
+ {
282
+ "epoch": 8.77,
283
+ "learning_rate": 3.684210526315789e-05,
284
+ "loss": 0.0012,
285
+ "step": 15000
286
+ },
287
+ {
288
+ "epoch": 9.0,
289
+ "eval_loss": 0.0021317724604159594,
290
+ "eval_rouge1": 0.9982946793997272,
291
+ "eval_rouge2": 0.0,
292
+ "eval_rougeL": 0.9982946793997272,
293
+ "eval_rougeLsum": 0.9982946793997272,
294
+ "eval_runtime": 166.3607,
295
+ "eval_samples_per_second": 35.249,
296
+ "eval_steps_per_second": 8.812,
297
+ "step": 15390
298
+ },
299
+ {
300
+ "epoch": 9.06,
301
+ "learning_rate": 2.807017543859649e-05,
302
+ "loss": 0.0008,
303
+ "step": 15500
304
+ },
305
+ {
306
+ "epoch": 9.36,
307
+ "learning_rate": 1.9298245614035086e-05,
308
+ "loss": 0.0004,
309
+ "step": 16000
310
+ },
311
+ {
312
+ "epoch": 9.65,
313
+ "learning_rate": 1.0526315789473683e-05,
314
+ "loss": 0.0006,
315
+ "step": 16500
316
+ },
317
+ {
318
+ "epoch": 9.94,
319
+ "learning_rate": 1.7543859649122805e-06,
320
+ "loss": 0.0011,
321
+ "step": 17000
322
+ }
323
+ ],
324
+ "logging_steps": 500,
325
+ "max_steps": 17100,
326
+ "num_train_epochs": 10,
327
+ "save_steps": 500,
328
+ "total_flos": 4365210429186048.0,
329
+ "trial_name": null,
330
+ "trial_params": null
331
+ }
results/checkpoint-17000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e8c3aef9cfe94a083e4e678683065ab146cef97e8d157c2108eb635736de7c
3
+ size 4664
word_embedding.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from datasets import load_dataset
2
  import shutil
3
  import json
@@ -614,4 +615,622 @@ def main():
614
 
615
 
616
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  main()
 
1
+ <<<<<<< HEAD
2
  from datasets import load_dataset
3
  import shutil
4
  import json
 
615
 
616
 
617
  if __name__ == "__main__":
618
+ =======
619
+ from datasets import load_dataset
620
+ import shutil
621
+ import json
622
+ from collections import defaultdict
623
+ import multiprocessing
624
+ import gensim
625
+ from sklearn.metrics import classification_report
626
+ from gensim import corpora
627
+ from gensim.test.utils import common_texts
628
+ from gensim.models import Word2Vec
629
+ from gensim.models import KeyedVectors
630
+ from gensim.models import fasttext
631
+ from gensim.test.utils import datapath
632
+ from wefe.datasets import load_bingliu
633
+ from wefe.metrics import RNSB
634
+ from wefe.query import Query
635
+ from wefe.word_embedding_model import WordEmbeddingModel
636
+ from wefe.utils import plot_queries_results, run_queries
637
+ import pandas as pd
638
+ import gensim.downloader as api
639
+ import glob
640
+ from sklearn.feature_extraction.text import TfidfVectorizer
641
+ from sklearn.ensemble import RandomForestClassifier
642
+ from wefe.metrics import WEAT
643
+ from wefe.datasets import load_weat
644
+ from wefe.utils import run_queries
645
+ from wefe.utils import plot_queries_results
646
+ import random
647
+ from scipy.special import expit
648
+ import math
649
+ import sys
650
+ import os
651
+ import argparse
652
+ import nltk
653
+ import scipy.sparse
654
+ import numpy as np
655
+ import string
656
+ import io
657
+ from sklearn.model_selection import train_test_split
658
+
659
+
660
+ '''STEPS FOR CODE:
661
+ 1. Train word embeddings on Simple English Wikipedia;
662
+ 2. Compare these to other pre-trained embeddings;
663
+ 3. Quantify biases that exist in these word embeddings;
664
+ 4. Use your word embeddings as features in a simple text classifier;
665
+ '''
666
+
667
+
668
+ def load_vectors(fname):
669
+ fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
670
+ n, d = map(int, fin.readline().split())
671
+ data = {}
672
+ # print("Hello", n, d)
673
+ for line in fin:
674
+ tokens = line.rstrip().split(' ')
675
+ data[tokens[0]] = map(float, tokens[1:])
676
+ # print(data)
677
+
678
+ print(data)
679
+ return data
680
+
681
+
682
+ def train_embeddings():
683
+ '''TRAIN WORD EMBEDDINGS
684
+ This will be making use of the dataset from wikipedia and the first step'''
685
+ dataset = load_dataset("wikipedia", "20220301.simple")
686
+ cores = multiprocessing.cpu_count()
687
+ # check the first example of the training portion of the dataset :
688
+ # print(dataset['train'][0])
689
+ dataset_size = len(dataset)
690
+
691
+ ### BUILD VOCAB ###
692
+ # print(type(dataset["train"][0]))
693
+ vocab = set()
694
+ vocab_size = 0
695
+ count = 0
696
+ ## Generate vocab and split sentances and words?
697
+ data = []
698
+ for index, page in enumerate(dataset["train"]):
699
+ document = page["text"]
700
+ document = document.replace("\n", ". ")
701
+ # print(document)
702
+ for sent in document.split("."):
703
+ # print("Sentance:", sent)
704
+ new_sent = []
705
+ clean_sent =[s for s in sent if s.isalnum() or s.isspace()]
706
+ clean_sent = "".join(clean_sent)
707
+ for word in clean_sent.split(" "):
708
+ if len(word) > 0:
709
+ new_word = word.lower()
710
+ # print("Word:", new_word)
711
+ if new_word[0] not in string.punctuation:
712
+ new_sent.append(new_word)
713
+ if len(new_sent) > 0:
714
+ data.append(new_sent)
715
+ # print("New Sent:", new_sent)
716
+
717
+
718
+ for index, page in enumerate(dataset["train"]):
719
+ # print(page["text"])
720
+ # for text in page:
721
+ # print(text)
722
+ text = page["text"]
723
+ clean_text = [s for s in text if s.isalnum() or s.isspace()]
724
+ clean_text = "".join(clean_text)
725
+ clean_text = clean_text.replace("\n", " ")
726
+ # text = text.replace('; ', ' ').replace(", ", " ").replace("\n", " ").replace(":", " ").replace(". ", " ").replace("! ", " ").replace("? ", " ").replace()
727
+
728
+ for word in clean_text.split(" "):
729
+ # print(word)
730
+ if word != "\n" and word != " " and word not in vocab:
731
+ vocab.add(word)
732
+ vocab_size += 1
733
+ # if index == 10:
734
+ # break
735
+ # print(f"word #{index}/{count} is {word}")
736
+ count += 1
737
+
738
+ # print(f"There are {vocab_size} vocab words")
739
+
740
+ embeddings_model = Word2Vec(
741
+ data,
742
+ epochs= 10,
743
+ window=10,
744
+ vector_size= 50)
745
+ embeddings_model.save("word2vec.model")
746
+
747
+ skip_model = Word2Vec(
748
+ data,
749
+ epochs= 10,
750
+ window=10,
751
+ vector_size= 50,
752
+ sg=1)
753
+ skip_model.save("skip2vec.model")
754
+
755
+ embeddings_model = Word2Vec.load("word2vec.model")
756
+ skip_model = Word2Vec.load("skip2vec.model")
757
+
758
+ # embeddings_model.train(dataset, total_examples=dataset_size, epochs=15)
759
+ # print(embeddings_model['train'])
760
+ # print(embeddings_model.wv["france"])
761
+ return embeddings_model, skip_model
762
+
763
+
764
+ def get_data():
765
+ dataset = load_dataset("wikipedia", "20220301.simple")
766
+ cores = multiprocessing.cpu_count()
767
+ # check the first example of the training portion of the dataset :
768
+ # print(dataset['train'][0])
769
+ dataset_size = len(dataset)
770
+
771
+ ### BUILD VOCAB ###
772
+ # print(type(dataset["train"][0]))
773
+ vocab = set()
774
+ vocab_size = 0
775
+ count = 0
776
+ ## Generate vocab and split sentances and words?
777
+ data = []
778
+ num_sents = 0
779
+ for index, page in enumerate(dataset["train"]):
780
+ document = page["text"]
781
+ document = document.replace("\n", ". ")
782
+ # print(document)
783
+ for sent in document.split("."):
784
+ num_sents += 1
785
+ # print("Sentance:", sent)
786
+ new_sent = []
787
+ clean_sent =[s for s in sent if s.isalnum() or s.isspace()]
788
+ clean_sent = "".join(clean_sent)
789
+ for word in clean_sent.split(" "):
790
+ if len(word) > 0:
791
+ new_word = word.lower()
792
+ # print("Word:", new_word)
793
+ if new_word[0] not in string.punctuation:
794
+ new_sent.append(new_word)
795
+ if len(new_sent) > 0:
796
+ data.append(new_sent)
797
+ # print("New Sent:", new_sent)
798
+
799
+ return data, num_sents
800
+
801
+
802
+ def compare_embeddings(cbow, skip, urban, fasttext):
803
+ '''COMPARE EMBEDDINGS'''
804
+ print("Most Similar to dog")
805
+ print("cbow", cbow.wv.most_similar(positive=['dog'], negative=[], topn=2))
806
+ print("skip", skip.wv.most_similar(positive=['dog'], negative=[], topn=2))
807
+ print("urban", urban.most_similar(positive=['dog'], negative=[], topn=2))
808
+ print("fasttext", fasttext.most_similar(positive=['dog'], negative=[], topn=2))
809
+
810
+ print("\nMost Similar to Pizza - Pepperoni + Pretzel")
811
+ print("cbow", cbow.wv.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
812
+ print("skip", skip.wv.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
813
+ print("urban", urban.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
814
+ print("fasttext", fasttext.most_similar(positive=['pizza', 'pretzel'], negative=['pepperoni'], topn=2))
815
+
816
+ print("\nMost Similar to witch - woman + man")
817
+ print("cbow", cbow.wv.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
818
+ print("skip", skip.wv.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
819
+ print("urban", urban.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
820
+ print("fasttext", fasttext.most_similar(positive=['witch', 'man'], negative=['woman'], topn=2))
821
+
822
+ print("\nMost Similar to mayor - town + country")
823
+ print("cbow", cbow.wv.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
824
+ print("skip", skip.wv.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
825
+ print("urban", urban.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
826
+ print("fasttext", fasttext.most_similar(positive=['mayor', 'country'], negative=['town'], topn=2))
827
+
828
+ print("\nMost Similar to death")
829
+ print("cbow", cbow.wv.most_similar(positive=['death'], negative=[], topn=2))
830
+ print("skip", skip.wv.most_similar(positive=['death'], negative=[], topn=2))
831
+ print("urban", urban.most_similar(positive=['death'], negative=[], topn=2))
832
+ print("fasttext", fasttext.most_similar(positive=['death'], negative=[], topn=2))
833
+
834
+
835
+ def quantify_bias(cbow, skip, urban, fasttext):
836
+ '''QUANTIFY BIASES'''
837
+ '''Using WEFE, RNSB'''
838
+
839
+ RNSB_words = [
840
+ ['christianity'],
841
+ ['catholicism'],
842
+ ['islam'],
843
+ ['judaism'],
844
+ ['hinduism'],
845
+ ['buddhism'],
846
+ ['mormonism'],
847
+ ['scientology'],
848
+ ['taoism']]
849
+
850
+ weat_wordset = load_weat()
851
+
852
+ models = [WordEmbeddingModel(cbow.wv, "CBOW"),
853
+ WordEmbeddingModel(skip.wv, "skip-gram"),
854
+ WordEmbeddingModel(urban, "urban dictionary"),
855
+ WordEmbeddingModel(fasttext, "fasttext")]
856
+
857
+ # Define the 10 Queries:
858
+ # print(weat_wordset["science"])
859
+ religions = ['christianity',
860
+ 'catholicism',
861
+ 'islam',
862
+ 'judaism',
863
+ 'hinduism',
864
+ 'buddhism',
865
+ 'mormonism',
866
+ 'scientology',
867
+ 'taoism',
868
+ 'atheism']
869
+ queries = [
870
+ # Flowers vs Insects wrt Pleasant (5) and Unpleasant (5)
871
+ Query([religions, weat_wordset['arts']],
872
+ [weat_wordset['career'], weat_wordset['family']],
873
+ ['Religion', 'Art'], ['Career', 'Family']),
874
+
875
+ Query([religions, weat_wordset['weapons']],
876
+ [weat_wordset['male_terms'], weat_wordset['female_terms']],
877
+ ['Religion', 'Weapons'], ['Male terms', 'Female terms']),
878
+
879
+ ]
880
+
881
+ wefe_results = run_queries(WEAT,
882
+ queries,
883
+ models,
884
+ metric_params ={
885
+ 'preprocessors': [
886
+ {},
887
+ {'lowercase': True }
888
+ ]
889
+ },
890
+ warn_not_found_words = True
891
+ ).T.round(2)
892
+
893
+ print(wefe_results)
894
+ plot_queries_results(wefe_results).show()
895
+
896
+
897
+ def text_classifier(cbow):
898
+ '''SIMPLE TEXT CLASSIFIER'''
899
+ '''For each document, average together all embeddings for the
900
+ individual words in that document to get a new, d-dimensional representation
901
+ of that document (this is essentially a “continuous bag-of-words”). Note that
902
+ your input feature size is only d now, instead of the size of your entire vocabulary.
903
+ Compare the results of training a model using these “CBOW” input features to
904
+ your original (discrete) BOW model.'''
905
+ pos_train_files = glob.glob('aclImdb/train/pos/*')
906
+ neg_train_files = glob.glob('aclImdb/train/neg/*')
907
+ # print(pos_train_files[:5])
908
+
909
+ num_files_per_class = 1000
910
+ # bow_train_files = cbow
911
+ all_train_files = pos_train_files[:num_files_per_class] + neg_train_files[:num_files_per_class]
912
+ # vectorizer = TfidfVectorizer(input="filename", stop_words="english")
913
+ # vectors = vectorizer.fit_transform(all_train_files)
914
+ d = len(cbow.wv["man"])
915
+ vectors = np.empty([len(all_train_files), d])
916
+ count = 0
917
+ vocab = set()
918
+ for doc in all_train_files:
919
+ temp_array = avg_embeddings(doc, cbow, vocab)
920
+ if len(temp_array) > 0:
921
+ vectors[count] = temp_array
922
+ count += 1
923
+ else:
924
+ vectors = np.delete(vectors, count)
925
+ # vectors = np.array(avg_embeddings(doc, cbow) for doc in all_train_files)
926
+ # print(vectors)
927
+ # print(vocab)
928
+
929
+ # len(vectorizer.vocabulary_)
930
+ vectors[0].sum()
931
+ # print("Vector at 0", vectors[0])
932
+
933
+ X = vectors
934
+ y = [1] * num_files_per_class + [0] * num_files_per_class
935
+ len(y)
936
+
937
+ x_0 = X[0]
938
+ w = np.zeros(X.shape[1])
939
+ # x_0_dense = x_0.todense()
940
+ x_0.dot(w)
941
+
942
+ w,b = sgd_for_lr_with_ce(X,y)
943
+ # w
944
+
945
+ # sorted_vocab = sorted([(k,v) for k,v in vectorizer.vocabulary_.items()],key=lambda x:x[1])
946
+ sorted_vocab = sorted(vocab)
947
+ # sorted_vocab = [a for (a,b) in sorted_vocab]
948
+
949
+ sorted_words_weights = sorted([x for x in zip(sorted_vocab, w)], key=lambda x:x[1])
950
+ sorted_words_weights[-50:]
951
+
952
+ preds = predict_y_lr(w,b,X)
953
+
954
+ preds
955
+
956
+ w,b = sgd_for_lr_with_ce(X, y, num_passes=10)
957
+ y_pred = predict_y_lr(w,b,X)
958
+ print(classification_report(y, y_pred))
959
+
960
+ # compute for dev set
961
+ # pos_dev_files = glob.glob('aclImdb/test/pos/*')
962
+ # neg_dev_files = glob.glob('aclImdb/test/neg/*')
963
+ # num_dev_files_per_class = 100
964
+ # all_dev_files = pos_dev_files[:num_dev_files_per_class] + neg_dev_files[:num_dev_files_per_class]
965
+ # # use the same vectorizer from before! otherwise features won't line up
966
+ # # don't fit it again, just use it to transform!
967
+ # X_dev = vectorizer.transform(all_dev_files)
968
+ # y_dev = [1]* num_dev_files_per_class + [0]* num_dev_files_per_class
969
+ # # don't need new w and b, these are from out existing model
970
+ # y_dev_pred = predict_y_lr(w,b,X_dev)
971
+ # print(classification_report(y_dev, y_dev_pred))
972
+
973
+
974
+ def avg_embeddings(doc, model, vocab: set):
975
+ words = []
976
+ # remove out-of-vocabulary words
977
+ with open(doc, "r") as file:
978
+ for line in file:
979
+ for word in line.split():
980
+ words.append(word)
981
+ vocab.add(word)
982
+ words = [word for word in words if word in model.wv.index_to_key]
983
+ if len(words) >= 1:
984
+ return np.mean(model.wv[words], axis=0)
985
+ else:
986
+ return []
987
+
988
+
989
+
990
+ def sent_vec(sent, cbow):
991
+ vector_size = cbow.wv.vector_size
992
+ wv_res = np.zeros(vector_size)
993
+ # print(wv_res)
994
+ ctr = 1
995
+ for w in sent:
996
+ if w in cbow.wv:
997
+ ctr += 1
998
+ wv_res += cbow.wv[w]
999
+ wv_res = wv_res/ctr
1000
+ return wv_res
1001
+
1002
+
1003
+ def spacy_tokenizer(sentence):
1004
+ # Creating our token object, which is used to create documents with linguistic annotations.
1005
+ # doc = nlp(sentence)
1006
+
1007
+
1008
+
1009
+ # print(doc)
1010
+ # print(type(doc))
1011
+
1012
+ # Lemmatizing each token and converting each token into lowercase
1013
+ # mytokens = [ word.lemma_.lower().strip() for word in doc ]
1014
+
1015
+ # print(mytokens)
1016
+
1017
+ # Removing stop words
1018
+ # mytokens = [ word for word in mytokens if word not in stop_words and word not in punctuations ]
1019
+
1020
+ # return preprocessed list of tokens
1021
+ return 0
1022
+
1023
+
1024
+ def cbow_classifier(cbow, data, num_sentances):
1025
+ vocab_len = len(cbow.wv.index_to_key)
1026
+
1027
+ embeddings = []
1028
+ embedding_dict = {}
1029
+ vocab = set(cbow.wv.index_to_key)
1030
+
1031
+ # print("Data len", len(data))
1032
+ # print("Data at 0", data[0])
1033
+
1034
+ X_temp = np.empty([len(data), 1])
1035
+ X_train_vect = np.array([np.array([cbow.wv[i] for i in ls if i in vocab])
1036
+ for ls in data])
1037
+ X_test_vect = np.array([np.array([cbow.wv[i] for i in ls if i in vocab])
1038
+ for ls in data])
1039
+
1040
+ # words = [word for word in words if word in cbow.wv.index_to_key]
1041
+ for word in vocab:
1042
+ # embedding[word] = cbow.wv[word]
1043
+ embeddings.append(np.mean(cbow.wv[word], axis=0))
1044
+ embedding_dict[word] = np.mean(cbow.wv[word], axis=0)
1045
+
1046
+ X = embeddings
1047
+
1048
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y)
1049
+
1050
+ # print(embeddings)
1051
+ # print(vocab_len)
1052
+
1053
+ # X_train_vect_avg = []
1054
+ # for v in X_train_vect:
1055
+ # if v.size:
1056
+ # X_train_vect_avg.append(v.mean(axis=0))
1057
+ # else:
1058
+ # X_train_vect_avg.append(np.zeros(100, dtype=float))
1059
+
1060
+ # X_test_vect_avg = []
1061
+ # for v in X_test_vect:
1062
+ # if v.size:
1063
+ # X_test_vect_avg.append(v.mean(axis=0))
1064
+ # else:
1065
+ # X_test_vect_avg.append(np.zeros(100, dtype=float))
1066
+
1067
+ # # for i, v in enumerate(X_train_vect_avg):
1068
+ # # print(len(data.iloc[i]), len(v))
1069
+
1070
+ # x_0 = X_train_vect_avg[0]
1071
+ # num_files_per_class = 100
1072
+ # y = [1] * num_files_per_class + [0] * num_files_per_class
1073
+ # w = np.zeros(X_train_vect_avg.shape[1])
1074
+ # x_0_dense = x_0.todense()
1075
+ # x_0.dot(w)
1076
+
1077
+ # w,b = sgd_for_lr_with_ce(X_train_vect_avg, y)
1078
+ # w
1079
+
1080
+ # sorted_vocab = sorted([(k,v) for k,v in enumerate(embedding_dict)],key=lambda x:x[1])
1081
+ # sorted_vocab = [a for (a,b) in sorted_vocab]
1082
+
1083
+ # sorted_words_weights = sorted([x for x in zip(sorted_vocab, w)], key=lambda x:x[1])
1084
+ # sorted_words_weights[-50:]
1085
+
1086
+ # preds = predict_y_lr(w,b,X_train_vect_avg)
1087
+
1088
+ # preds
1089
+
1090
+ # w,b = sgd_for_lr_with_ce(X_train_vect_avg, y, num_passes=10)
1091
+ # y_pred = predict_y_lr(w,b,X_train_vect_avg)
1092
+ # print(classification_report(y, y_pred))
1093
+
1094
+ # # compute for dev set
1095
+ # pos_dev_files = glob.glob('aclImdb/test/pos/*')
1096
+ # neg_dev_files = glob.glob('aclImdb/test/neg/*')
1097
+ # num_dev_files_per_class = 100
1098
+ # all_dev_files = pos_dev_files[:num_dev_files_per_class] + neg_dev_files[:num_dev_files_per_class]
1099
+ # # use the same vectorizer from before! otherwise features won't line up
1100
+ # # don't fit it again, just use it to transform!
1101
+ # # X_dev = vectorizer.transform(all_dev_files)
1102
+ # # y_dev = [1]* num_dev_files_per_class + [0]* num_dev_files_per_class
1103
+ # # # don't need new w and b, these are from out existing model
1104
+ # # y_dev_pred = predict_y_lr(w,b,X_dev)
1105
+ # # print(classification_report(y_dev, y_dev_pred))
1106
+
1107
+
1108
+ def sgd_for_lr_with_ce(X, y, num_passes=5, learning_rate = 0.1):
1109
+
1110
+ num_data_points = X.shape[0]
1111
+
1112
+ # Initialize theta -> 0
1113
+ num_features = X.shape[1]
1114
+ w = np.zeros(num_features)
1115
+ b = 0.0
1116
+
1117
+ # repeat until done
1118
+ # how to define "done"? let's just make it num passes for now
1119
+ # we can also do norm of gradient and when it is < epsilon (something tiny)
1120
+ # we stop
1121
+
1122
+ for current_pass in range(num_passes):
1123
+
1124
+ # iterate through entire dataset in random order
1125
+ order = list(range(num_data_points))
1126
+ random.shuffle(order)
1127
+ for i in order:
1128
+
1129
+ # compute y-hat for this value of i given y_i and x_i
1130
+ x_i = X[i]
1131
+ y_i = y[i]
1132
+
1133
+ # need to compute based on w and b
1134
+ # sigmoid(w dot x + b)
1135
+ z = x_i.dot(w) + b
1136
+ y_hat_i = expit(z)
1137
+
1138
+ # for each w (and b), modify by -lr * (y_hat_i - y_i) * x_i
1139
+ w = w - learning_rate * (y_hat_i - y_i) * x_i
1140
+ b = b - learning_rate * (y_hat_i - y_i)
1141
+
1142
+ # return theta
1143
+ return w,b
1144
+
1145
+
1146
+ def predict_y_lr(w,b,X,threshold=0.5):
1147
+
1148
+ # use our matrix operation version of the logistic regression model
1149
+ # X dot w + b
1150
+ # need to make w a column vector so the dimensions line up correctly
1151
+ y_hat = X.dot( w.reshape((-1,1)) ) + b
1152
+
1153
+ # then just check if it's > threshold
1154
+ preds = np.where(y_hat > threshold,1,0)
1155
+
1156
+ return preds
1157
+
1158
+
1159
+ def main():
1160
+ parser = argparse.ArgumentParser(
1161
+ prog='word_embedding',
1162
+ description='This program will train a word embedding model using simple wikipedia.',
1163
+ epilog='To skip training the model and to used the saved model "word2vec.model", use the command --skip or -s.'
1164
+ )
1165
+ parser.add_argument('-s', '--skip', action='store_true')
1166
+ parser.add_argument('-e', '--extra', action='store_true')
1167
+ parser.add_argument('-b', '--bias', action='store_true')
1168
+ parser.add_argument('-c', '--compare', action='store_true')
1169
+ parser.add_argument('-t', '--text', action='store_true')
1170
+
1171
+ args = parser.parse_args()
1172
+ skip_model = None
1173
+ cbow_model = None
1174
+ ud_model = None
1175
+ wiki_model = None
1176
+ if args.compare:
1177
+ if args.skip:
1178
+ # print("Skipping")
1179
+ cbow_model = Word2Vec.load("word2vec.model")
1180
+ skip_model = Word2Vec.load("skip2vec.model")
1181
+ ud_model = KeyedVectors.load("urban2vec.model")
1182
+ wiki_model = KeyedVectors.load("wiki2vec.model")
1183
+ elif args.extra:
1184
+ # print("Extra mode")
1185
+ cbow_model = Word2Vec.load("word2vec.model")
1186
+ skip_model = Word2Vec.load("skip2vec.model")
1187
+ wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
1188
+ ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
1189
+ wiki_model.save("wiki2vec.model")
1190
+ ud_model.save("urban2vec.model")
1191
+ else:
1192
+ cbow_model, skip_model = train_embeddings()
1193
+ wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
1194
+ ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
1195
+ wiki_model.save("wiki2vec.model")
1196
+ ud_model.save("urban2vec.model")
1197
+ compare_embeddings(cbow_model, skip_model, ud_model, wiki_model)
1198
+ if args.bias:
1199
+ if args.skip:
1200
+ # print("Skipping")
1201
+ cbow_model = Word2Vec.load("word2vec.model")
1202
+ skip_model = Word2Vec.load("skip2vec.model")
1203
+ ud_model = KeyedVectors.load("urban2vec.model")
1204
+ wiki_model = KeyedVectors.load("wiki2vec.model")
1205
+ elif args.extra:
1206
+ # print("Extra mode")
1207
+ cbow_model = Word2Vec.load("word2vec.model")
1208
+ skip_model = Word2Vec.load("skip2vec.model")
1209
+ wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
1210
+ ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
1211
+ wiki_model.save("wiki2vec.model")
1212
+ ud_model.save("urban2vec.model")
1213
+ else:
1214
+ cbow_model, skip_model = train_embeddings()
1215
+ wiki_model = KeyedVectors.load_word2vec_format("wiki-news-300d-1M-subwords.vec", binary=False)
1216
+ ud_model = KeyedVectors.load_word2vec_format("ud_basic.vec", binary=False)
1217
+ wiki_model.save("wiki2vec.model")
1218
+ ud_model.save("urban2vec.model")
1219
+ quantify_bias(cbow_model, skip_model, ud_model, wiki_model)
1220
+ if args.text:
1221
+ if args.skip:
1222
+ # print("Skipping")
1223
+ cbow_model = Word2Vec.load("word2vec.model")
1224
+ else:
1225
+ cbow_model, skip_model = train_embeddings()
1226
+
1227
+ text_classifier(cbow_model)
1228
+ # data, sents = get_data()
1229
+ # cbow_classifier(cbow_model, data, sents)
1230
+
1231
+ # print("No errors?")
1232
+
1233
+
1234
+ if __name__ == "__main__":
1235
+ >>>>>>> 7d5b505 (New in-context model with working UI System)
1236
  main()