Muennighoff
commited on
Commit
•
74d3f8d
1
Parent(s):
ed6e7ab
Better model with bs=1024
Browse files- README.md +1 -1
- config.json +1 -1
- config_sentence_transformers.json +2 -2
- eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_average_precision.json +7 -0
- eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_detailed.json +66 -0
- eval/quora.json +1 -0
- eval/similarity_evaluation_sts-dev_results.csv +11 -11
- pytorch_model.bin → pytorch_model-00001-of-00002.bin +2 -2
- pytorch_model-00002-of-00002.bin +3 -0
- pytorch_model.bin.index.json +3 -0
- similarity_evaluation_sts-test_results.csv +0 -2
- tokenizer.json +0 -0
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
|
|
14 |
|
15 |
## Evaluation Results
|
16 |
|
17 |
-
For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
|
18 |
|
19 |
## Training
|
20 |
The model was trained with the parameters:
|
|
|
14 |
|
15 |
## Evaluation Results
|
16 |
|
17 |
+
For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
|
18 |
|
19 |
## Training
|
20 |
The model was trained with the parameters:
|
config.json
CHANGED
@@ -75,7 +75,7 @@
|
|
75 |
},
|
76 |
"tokenizer_class": "GPT2Tokenizer",
|
77 |
"torch_dtype": "float32",
|
78 |
-
"transformers_version": "4.
|
79 |
"use_cache": true,
|
80 |
"vocab_size": 50257,
|
81 |
"window_size": 256
|
|
|
75 |
},
|
76 |
"tokenizer_class": "GPT2Tokenizer",
|
77 |
"torch_dtype": "float32",
|
78 |
+
"transformers_version": "4.20.0.dev0",
|
79 |
"use_cache": true,
|
80 |
"vocab_size": 50257,
|
81 |
"window_size": 256
|
config_sentence_transformers.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
"sentence_transformers": "2.1.0",
|
4 |
-
"transformers": "4.
|
5 |
-
"pytorch": "1.10.
|
6 |
}
|
7 |
}
|
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
"sentence_transformers": "2.1.0",
|
4 |
+
"transformers": "4.20.0.dev0",
|
5 |
+
"pytorch": "1.10.2"
|
6 |
}
|
7 |
}
|
eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_average_precision.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"askubuntu": 57.48,
|
3 |
+
"cqadupstack": 14.04,
|
4 |
+
"twitterpara": 73.4,
|
5 |
+
"scidocs": 74.69,
|
6 |
+
"avg": 54.9025
|
7 |
+
}
|
eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_detailed.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"askubuntu": {
|
3 |
+
"map_askubuntu_title": 57.48,
|
4 |
+
"p@1_askubuntu_title": 56.99,
|
5 |
+
"p@5_askubuntu_title": 43.23,
|
6 |
+
"mrr_askubuntu_title": 70.74
|
7 |
+
},
|
8 |
+
"cqadupstack": {
|
9 |
+
"map@100_cqadupstack_unix": 14.82,
|
10 |
+
"ndcg@10_cqadupstack_unix": 16.39,
|
11 |
+
"map@100_cqadupstack_gaming": 26.14,
|
12 |
+
"ndcg@10_cqadupstack_gaming": 28.7,
|
13 |
+
"map@100_cqadupstack_wordpress": 4.64,
|
14 |
+
"ndcg@10_cqadupstack_wordpress": 5.88,
|
15 |
+
"map@100_cqadupstack_stats": 15.42,
|
16 |
+
"ndcg@10_cqadupstack_stats": 16.15,
|
17 |
+
"map@100_cqadupstack_tex": 8.28,
|
18 |
+
"ndcg@10_cqadupstack_tex": 8.96,
|
19 |
+
"map@100_cqadupstack_english": 15.02,
|
20 |
+
"ndcg@10_cqadupstack_english": 16.54,
|
21 |
+
"map@100_cqadupstack_programmers": 13.27,
|
22 |
+
"ndcg@10_cqadupstack_programmers": 14.41,
|
23 |
+
"map@100_cqadupstack_mathematica": 11.74,
|
24 |
+
"ndcg@10_cqadupstack_mathematica": 13.47,
|
25 |
+
"map@100_cqadupstack_physics": 16.81,
|
26 |
+
"ndcg@10_cqadupstack_physics": 18.61,
|
27 |
+
"map@100_cqadupstack_gis": 15.47,
|
28 |
+
"ndcg@10_cqadupstack_gis": 16.67,
|
29 |
+
"map@100_cqadupstack_webmasters": 9.72,
|
30 |
+
"ndcg@10_cqadupstack_webmasters": 10.48,
|
31 |
+
"map@100_cqadupstack_android": 17.12,
|
32 |
+
"ndcg@10_cqadupstack_android": 19.1,
|
33 |
+
"map@100_cqadupstack_avg": 14.04,
|
34 |
+
"ndcg@10_cqadupstack_avg": 15.45
|
35 |
+
},
|
36 |
+
"twitterpara": {
|
37 |
+
"ap_twitter_twitterurl": 75.84,
|
38 |
+
"spearman_twitter_twitterurl": 70.81,
|
39 |
+
"ap_twitter_pit": 70.96,
|
40 |
+
"spearman_twitter_pit": 56.64,
|
41 |
+
"ap_twitter_avg": 73.4,
|
42 |
+
"spearman_twitter_avg": 63.73
|
43 |
+
},
|
44 |
+
"scidocs": {
|
45 |
+
"map_scidocs_cite_euclidean": 72.29,
|
46 |
+
"ndcg_scidocs_cite_euclidean": 86.43,
|
47 |
+
"map_scidocs_cite_cosine": 72.29,
|
48 |
+
"ndcg_scidocs_cite_cosine": 86.43,
|
49 |
+
"map_scidocs_cocite_euclidean": 75.36,
|
50 |
+
"ndcg_scidocs_cocite_euclidean": 88.17,
|
51 |
+
"map_scidocs_cocite_cosine": 75.36,
|
52 |
+
"ndcg_scidocs_cocite_cosine": 88.17,
|
53 |
+
"map_scidocs_coview_euclidean": 76.46,
|
54 |
+
"ndcg_scidocs_coview_euclidean": 87.8,
|
55 |
+
"map_scidocs_coview_cosine": 76.46,
|
56 |
+
"ndcg_scidocs_coview_cosine": 87.8,
|
57 |
+
"map_scidocs_coread_euclidean": 74.65,
|
58 |
+
"ndcg_scidocs_coread_euclidean": 87.0,
|
59 |
+
"map_scidocs_coread_cosine": 74.65,
|
60 |
+
"ndcg_scidocs_coread_cosine": 87.0,
|
61 |
+
"map_scidocs_euclidean_avg": 74.69,
|
62 |
+
"ndcg_scidocs_euclidean_avg": 87.35,
|
63 |
+
"map_scidocs_cosine_avg": 74.69,
|
64 |
+
"ndcg_scidocs_cosine_avg": 87.35
|
65 |
+
}
|
66 |
+
}
|
eval/quora.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"SGPT-2.7B-weightedmean-nli-bitfit": {"quora": {"NDCG@1": 0.7461, "NDCG@3": 0.79099, "NDCG@5": 0.80989, "NDCG@10": 0.82645, "NDCG@100": 0.84542, "NDCG@1000": 0.8478}}}
|
eval/similarity_evaluation_sts-dev_results.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
-
0,
|
3 |
-
0,
|
4 |
-
0,
|
5 |
-
0,
|
6 |
-
0,
|
7 |
-
0,
|
8 |
-
0,
|
9 |
-
0,
|
10 |
-
0,
|
11 |
-
0,
|
12 |
-
0,-1,0.
|
|
|
1 |
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
+
0,440,0.8547642811744708,0.8604042254467369,0.8564294919751527,0.8580900991676403,0.8592109600985026,0.8606819249266364,0.7727095196030622,0.7702266970220912
|
3 |
+
0,880,0.8605577862452674,0.8664787776815549,0.8594251818914206,0.8613580463896765,0.8625107129203857,0.8646695024116025,0.7777588243182069,0.7747923460803676
|
4 |
+
0,1320,0.8641797068951016,0.8701385809189678,0.8600269510402324,0.8623718727121046,0.8630083477192438,0.8655171844912587,0.7819169473167653,0.7801439440796124
|
5 |
+
0,1760,0.863861676991937,0.8699711768888497,0.8610538565702486,0.8639273844617363,0.8638166020097834,0.8668464202840234,0.7762041862089968,0.7743392283299438
|
6 |
+
0,2200,0.8667562252351253,0.8722428457163393,0.8609922973987619,0.8640152137038429,0.8637144738269167,0.866880911411029,0.7797478785593531,0.7772643228752733
|
7 |
+
0,2640,0.8658548494423817,0.8702345916613825,0.8583041242377912,0.8613687812725296,0.8606700693927242,0.8638960499205391,0.7781537908033099,0.7761412829543439
|
8 |
+
0,3080,0.8643810926871549,0.8692556763950754,0.857059374227981,0.8600999939200575,0.8594169577592663,0.8625821753483399,0.7756975962910497,0.7737551514144106
|
9 |
+
0,3520,0.8661238263202532,0.8701186430665476,0.8576723473616406,0.8614191549727733,0.8600335298933423,0.8639555339473548,0.777608544440925,0.7758958610767906
|
10 |
+
0,3960,0.8659908588458113,0.8699200451809654,0.8570869831042444,0.860628569017929,0.8594235992474281,0.8631523750300969,0.7746604455122261,0.7731850887434243
|
11 |
+
0,4400,0.8662536580670237,0.8702266564863804,0.8571446774934243,0.8608170966035958,0.8594735292127258,0.8633222114462352,0.7763396949906898,0.7751198872316742
|
12 |
+
0,-1,0.8662583413659,0.8702579991508459,0.8571471752177104,0.8608379873119169,0.8594701305965878,0.8633306420570356,0.7763449731290442,0.7751015402439239
|
pytorch_model.bin → pytorch_model-00001-of-00002.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3328d67e21cfc12696ed04be3b111723b5d0e712ecc53de8cacf360745de42c5
|
3 |
+
size 9996965077
|
pytorch_model-00002-of-00002.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:020dafb349b69af0f5e674afb6950199a2b705093f1a618500e9a55809240b52
|
3 |
+
size 742637183
|
pytorch_model.bin.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1faacf954a349388fab25a3b20e1f0a1e09d87d1eb840569b3af5e4333b3785a
|
3 |
+
size 32846
|
similarity_evaluation_sts-test_results.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
-
-1,-1,0.8421195813886659,0.8544705934133663,0.8331975612681435,0.8365254010816424,0.8308796371031103,0.8350842363910842,0.7416086337173262,0.730738158975577
|
|
|
|
|
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
|
|
|
1 |
+
{"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}
|