grg commited on
Commit
ce42d0c
·
1 Parent(s): f0948c9

Adding Mistral-Small-2409 and Qwen2.5

Browse files
Files changed (29) hide show
  1. static/leaderboard.csv +25 -20
  2. static/models_data/Mistral-Large-Instruct-2407/model_detail.html +6 -1
  3. static/models_data/Mistral-Small-Instruct-2409/cfa_metrics.csv +10 -0
  4. static/models_data/Mistral-Small-Instruct-2409/matrix.svg +1965 -0
  5. static/models_data/Mistral-Small-Instruct-2409/model_detail.html +5 -0
  6. static/models_data/Mistral-Small-Instruct-2409/ranks.svg +0 -0
  7. static/models_data/Mistral-Small-Instruct-2409/structure.svg +0 -0
  8. static/models_data/Qwen2.5-0.5B-Instruct/cfa_metrics.csv +10 -0
  9. static/models_data/Qwen2.5-0.5B-Instruct/matrix.svg +2004 -0
  10. static/models_data/Qwen2.5-0.5B-Instruct/model_detail.html +7 -0
  11. static/models_data/Qwen2.5-0.5B-Instruct/ranks.svg +0 -0
  12. static/models_data/Qwen2.5-0.5B-Instruct/structure.svg +0 -0
  13. static/models_data/Qwen2.5-32B-Instruct/cfa_metrics.csv +10 -0
  14. static/models_data/Qwen2.5-32B-Instruct/matrix.svg +1963 -0
  15. static/models_data/Qwen2.5-32B-Instruct/model_detail.html +7 -0
  16. static/models_data/Qwen2.5-32B-Instruct/ranks.svg +0 -0
  17. static/models_data/Qwen2.5-32B-Instruct/structure.svg +0 -0
  18. static/models_data/Qwen2.5-72B-Instruct/cfa_metrics.csv +10 -0
  19. static/models_data/Qwen2.5-72B-Instruct/matrix.svg +1967 -0
  20. static/models_data/Qwen2.5-72B-Instruct/model_detail.html +7 -0
  21. static/models_data/Qwen2.5-72B-Instruct/ranks.svg +0 -0
  22. static/models_data/Qwen2.5-72B-Instruct/structure.svg +0 -0
  23. static/models_data/Qwen2.5-7B-Instruct/cfa_metrics.csv +10 -0
  24. static/models_data/Qwen2.5-7B-Instruct/matrix.svg +1964 -0
  25. static/models_data/Qwen2.5-7B-Instruct/model_detail.html +7 -0
  26. static/models_data/Qwen2.5-7B-Instruct/ranks.svg +0 -0
  27. static/models_data/Qwen2.5-7B-Instruct/structure.svg +0 -0
  28. static/models_data/cardinal.svg +590 -352
  29. static/models_data/ordinal.svg +685 -415
static/leaderboard.csv CHANGED
@@ -1,21 +1,26 @@
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
- phi-3-mini-128k-instruct,0.35891812865497075,0.39387631706052895,0.039299993295009855,0.281800547806919,0.7509527777777777,0.25489166666666674,0.22045000000000003
3
- phi-3-medium-128k-instruct,0.3691520467836257,0.4026069526718651,0.09692037989916814,0.2651981204439735,0.6727694444444445,0.2984500000000001,0.2759472222222221
4
- Mistral-7B-Instruct-v0.1,0.21125730994152045,0.30674462188144647,0.027216280472015988,0.2829498135031582,0.500288888888889,0.45314444444444446,0.4191027777777777
5
- Mistral-7B-Instruct-v0.2,0.4130116959064327,0.4028886762146369,0.14417876497818388,0.265188983528973,0.5787944444444445,0.35010277777777776,0.3171083333333333
6
- Mistral-7B-Instruct-v0.3,0.2902046783625731,0.34429493368035685,0.07960539866974455,0.2742399030139009,0.5231444444444444,0.4214972222222223,0.3914694444444443
7
- Mixtral-8x7B-Instruct-v0.1,0.5073099415204678,0.47204265176392696,0.21473356319081474,0.2624402608740656,0.6766166666666665,0.25611666666666666,0.24065277777777772
8
- Mixtral-8x22B-Instruct-v0.1,0.2967836257309941,0.3496962191659786,0.1414001940345544,0.2548838005881672,0.45902777777777776,0.4849916666666666,0.4871833333333333
9
- command_r_plus,0.6228070175438596,0.5698450422762357,0.3429686514651868,0.23811982320641845,0.7772111111111112,0.17755277777777778,0.17465277777777777
10
- llama_3_8b_instruct,0.5314327485380116,0.5066363890459272,0.24527785038654715,0.245806400289881,0.7348277777777779,0.20952222222222228,0.20751944444444437
11
- llama_3_70b_instruct,0.7741228070175438,0.7270613281502669,0.607020698814379,0.18525883672204868,0.8298166666666668,0.10965277777777771,0.14649722222222217
12
- llama_3.1_8b_instruct,0.5957602339181286,0.5599895255443657,0.4295080949846363,0.22060228669473025,0.6379333333333334,0.3225500000000001,0.3328972222222223
13
- llama_3.1_70b_instruct,0.8179824561403508,0.7630277652278956,0.691365862744007,0.1709718847084183,0.8203805555555554,0.14023055555555552,0.17041944444444446
14
- llama_3.1_405b_instruct_4bit,0.7112573099415205,0.6993503239272297,0.7232098126552619,0.1702199925365422,0.6062611111111111,0.3538527777777777,0.38022500000000004
15
- Qwen2-7B-Instruct,0.4780701754385965,0.46812644016430927,0.25108519506513916,0.25776537005719313,0.6248583333333334,0.32358611111111113,0.3028361111111111
16
- Qwen2-72B-Instruct,0.6235380116959064,0.64867678910782,0.6465993243020925,0.20297742879025626,0.5559722222222221,0.3575638888888889,0.39241388888888884
17
- gpt-3.5-turbo-0125,0.2368421052631579,0.328243163867074,0.08240359836763214,0.28728574920060357,0.4998916666666666,0.47583055555555553,0.4404444444444445
18
- gpt-4o-0513,0.7587719298245614,0.6713251724661671,0.5122163952167618,0.19201420113771173,0.7998694444444445,0.14606111111111109,0.1400583333333334
19
- gpt-4o-mini-2024-07-18,0.40058479532163743,0.40825697940501954,0.13575309046266867,0.2707065266105181,0.6141777777777777,0.32648055555555555,0.29394722222222214
20
- Mistral-Large-Instruct-2407,0.8428362573099416,0.7808285247091349,0.7644582301049158,0.16944638941325085,0.7604888888888888,0.18767499999999993,0.21457222222222228
21
- dummy,0.14985380116959063,0.2784036220050126,-0.009004148398032956,0.2928877637010999,0.5076361111111111,0.4973388888888889,0.4541638888888889
 
 
 
 
 
 
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
+ phi-3-mini-128k-instruct,0.34490740740740744,0.39387631706052895,0.039299993295009855,0.281800547806919,0.7509527777777777,0.25489166666666674,0.22045000000000003
3
+ phi-3-medium-128k-instruct,0.34317129629629634,0.4026069526718651,0.09692037989916814,0.2651981204439735,0.6727694444444445,0.2984500000000001,0.2759472222222221
4
+ Mistral-7B-Instruct-v0.1,0.1996527777777778,0.30674462188144647,0.027216280472015988,0.2829498135031582,0.500288888888889,0.45314444444444446,0.4191027777777777
5
+ Mistral-7B-Instruct-v0.2,0.3755787037037038,0.4028886762146369,0.14417876497818388,0.265188983528973,0.5787944444444445,0.35010277777777776,0.3171083333333333
6
+ Mistral-7B-Instruct-v0.3,0.2708333333333333,0.34429493368035685,0.07960539866974455,0.2742399030139009,0.5231444444444444,0.4214972222222223,0.3914694444444443
7
+ Mixtral-8x7B-Instruct-v0.1,0.4496527777777778,0.47204265176392696,0.21473356319081474,0.2624402608740656,0.6766166666666665,0.25611666666666666,0.24065277777777772
8
+ Mixtral-8x22B-Instruct-v0.1,0.26620370370370366,0.3496962191659786,0.1414001940345544,0.2548838005881672,0.45902777777777776,0.4849916666666666,0.4871833333333333
9
+ command_r_plus,0.5815972222222222,0.5698450422762357,0.3429686514651868,0.23811982320641845,0.7772111111111112,0.17755277777777778,0.17465277777777777
10
+ llama_3_8b_instruct,0.48900462962962954,0.5066363890459272,0.24527785038654715,0.245806400289881,0.7348277777777779,0.20952222222222228,0.20751944444444437
11
+ llama_3_70b_instruct,0.7291666666666666,0.7270613281502669,0.607020698814379,0.18525883672204868,0.8298166666666668,0.10965277777777771,0.14649722222222217
12
+ llama_3.1_8b_instruct,0.5434027777777778,0.5599895255443657,0.4295080949846363,0.22060228669473025,0.6379333333333334,0.3225500000000001,0.3328972222222223
13
+ llama_3.1_70b_instruct,0.7847222222222222,0.7630277652278956,0.691365862744007,0.1709718847084183,0.8203805555555554,0.14023055555555552,0.17041944444444446
14
+ llama_3.1_405b_instruct_4bit,0.6886574074074073,0.6993503239272297,0.7232098126552619,0.1702199925365422,0.6062611111111111,0.3538527777777777,0.38022500000000004
15
+ Qwen2-7B-Instruct,0.43287037037037035,0.46812644016430927,0.25108519506513916,0.25776537005719313,0.6248583333333334,0.32358611111111113,0.3028361111111111
16
+ Qwen2-72B-Instruct,0.5810185185185186,0.64867678910782,0.6465993243020925,0.20297742879025626,0.5559722222222221,0.3575638888888889,0.39241388888888884
17
+ Qwen2.5-0.5B-Instruct,0.28877314814814814,0.3796838812739187,0.002970456550606876,0.2928913315666324,0.7497416666666666,0.24648888888888887,0.18477222222222223
18
+ Qwen2.5-7B-Instruct,0.6186342592592592,0.5896473181421169,0.333554494486959,0.2505866550331236,0.8311222222222222,0.10302222222222213,0.09455277777777782
19
+ Qwen2.5-32B-Instruct,0.7442129629629629,0.731635015756055,0.6724190751477237,0.1806656189868978,0.7584111111111111,0.19748055555555544,0.21686111111111106
20
+ Qwen2.5-72B-Instruct,0.7991898148148148,0.754401345305127,0.6974116787371809,0.16176650806326276,0.7859583333333332,0.177875,0.2007527777777779
21
+ gpt-3.5-turbo-0125,0.21643518518518517,0.328243163867074,0.08240359836763214,0.28728574920060357,0.4998916666666666,0.47583055555555553,0.4404444444444445
22
+ gpt-4o-0513,0.7025462962962963,0.6713251724661671,0.5122163952167618,0.19201420113771173,0.7998694444444445,0.14606111111111109,0.1400583333333334
23
+ gpt-4o-mini-2024-07-18,0.3628472222222222,0.40825697940501954,0.13575309046266867,0.2707065266105181,0.6141777777777777,0.32648055555555555,0.29394722222222214
24
+ Mistral-Large-Instruct-2407,0.8217592592592592,0.7808285247091349,0.7644582301049158,0.16944638941325085,0.7604888888888888,0.18767499999999993,0.21457222222222228
25
+ Mistral-Small-Instruct-2409,0.7083333333333335,0.7319149695591499,0.6416815833333804,0.1894343546381,0.7891722222222222,0.1387222222222222,0.17242222222222225
26
+ dummy,0.14872685185185186,0.2784036220050126,-0.009004148398032956,0.2928877637010999,0.5076361111111111,0.4973388888888889,0.4541638888888889
static/models_data/Mistral-Large-Instruct-2407/model_detail.html CHANGED
@@ -1 +1,6 @@
1
- <p>This model was released by Mistral AI</p>
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a href="https://mistral.ai/">Mistral AI<a>.
3
+ You can find the release blog post <a href="https://mistral.ai/news/mistral-large-2407/">here</a>.
4
+ The model is available on the huggingface hub: <a href="https://huggingface.co/mistralai/Mistral-Large-Instruct-2407">https://huggingface.co/mistralai/Mistral-Large-Instruct-2407</a>.
5
+ The 123B model supports up to 128K token context windows.
6
+ </p>
static/models_data/Mistral-Small-Instruct-2409/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.789825,0.71305,0.12095,0.167425
3
+ chunk_1,0.862975,0.8278750000000001,0.086675,0.110275
4
+ chunk_2,0.851325,0.796775,0.108275,0.132775
5
+ chunk_3,0.6166499999999999,0.572825,0.33632500000000004,0.355075
6
+ chunk_4,0.830525,0.769525,0.11827499999999999,0.15587499999999999
7
+ chunk_chess_0,0.823325,0.765625,0.10767499999999999,0.147825
8
+ chunk_grammar_1,0.77625,0.696125,0.12664999999999998,0.1973
9
+ chunk_no_conv,0.8255,0.7659750000000001,0.12215000000000001,0.163975
10
+ chunk_svs_no_conv,0.726175,0.6696500000000001,0.121525,0.121275
static/models_data/Mistral-Small-Instruct-2409/matrix.svg ADDED
static/models_data/Mistral-Small-Instruct-2409/model_detail.html ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a href="https://mistral.ai/">Mistral AI<a>.
3
+ The model is available on the huggingface hub: <a href="https://huggingface.co/mistralai/Mistral-Small-Instruct-2409">https://huggingface.co/mistralai/Mistral-Small-Instruct-2409</a>.
4
+ The 22B model supports up to 32K token sequences.
5
+ </p>
static/models_data/Mistral-Small-Instruct-2409/ranks.svg ADDED
static/models_data/Mistral-Small-Instruct-2409/structure.svg ADDED
static/models_data/Qwen2.5-0.5B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.7500249999999999,-2.21615,0.10572500000000001,0.0545
3
+ chunk_1,0.75,1.25775,0.3161,0.25
4
+ chunk_2,0.73175,0.9472249999999999,0.31827500000000003,0.2564
5
+ chunk_3,0.75,1.0417,0.32295,0.25
6
+ chunk_4,0.8639749999999999,0.9359999999999999,0.10375,0.0408
7
+ chunk_chess_0,0.904775,0.28604999999999997,0.09230000000000001,0.020275
8
+ chunk_grammar_1,0.75,5.354475,0.30865,0.25
9
+ chunk_no_conv,0.600475,0.04259999999999997,0.322475,0.27175000000000005
10
+ chunk_svs_no_conv,0.646675,0.6581250000000001,0.328175,0.269225
static/models_data/Qwen2.5-0.5B-Instruct/matrix.svg ADDED
static/models_data/Qwen2.5-0.5B-Instruct/model_detail.html ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
3
+ You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
4
+ The model is available on the huggingface hub: <a href="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct">https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct</a>.
5
+ The 0.5B model was pretrained on 18 trillion tokens spanning 29 languages.
6
+ It supports up to 128K tokens and can generate up to 8K tokens.
7
+ </p>
static/models_data/Qwen2.5-0.5B-Instruct/ranks.svg ADDED
static/models_data/Qwen2.5-0.5B-Instruct/structure.svg ADDED
static/models_data/Qwen2.5-32B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.407525,0.379375,0.54825,0.5520999999999999
3
+ chunk_1,0.662925,0.6309750000000001,0.324725,0.33095
4
+ chunk_2,0.8544,0.804925,0.09639999999999999,0.1127
5
+ chunk_3,0.910725,0.8782500000000001,0.08855,0.0821
6
+ chunk_4,0.63385,0.5929,0.320975,0.34572499999999995
7
+ chunk_chess_0,0.8669749999999999,0.8261499999999999,0.09075,0.1154
8
+ chunk_grammar_1,0.8252,0.7672,0.090675,0.1505
9
+ chunk_no_conv,0.8589749999999999,0.81545,0.110475,0.13344999999999999
10
+ chunk_svs_no_conv,0.805125,0.782975,0.106525,0.128825
static/models_data/Qwen2.5-32B-Instruct/matrix.svg ADDED
static/models_data/Qwen2.5-32B-Instruct/model_detail.html ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
3
+ You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
4
+ The model is available on the huggingface hub: <a href="https://huggingface.co/Qwen/Qwen2.5-32B-Instruct">https://huggingface.co/Qwen/Qwen2.5-32B-Instruct</a>.
5
+ The 32B model was pretrained on 18 trillion tokens spanning 29 languages.
6
+ It supports up to 128K tokens and can generate up to 8K tokens.
7
+ </p>
static/models_data/Qwen2.5-32B-Instruct/ranks.svg ADDED
static/models_data/Qwen2.5-32B-Instruct/structure.svg ADDED
static/models_data/Qwen2.5-72B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.848975,0.804025,0.102925,0.09962499999999999
3
+ chunk_1,0.87305,0.83225,0.10135,0.122
4
+ chunk_2,0.82955,0.7724,0.106,0.12817499999999998
5
+ chunk_3,0.86325,0.8176,0.11270000000000001,0.13125
6
+ chunk_4,0.615525,0.568375,0.32222500000000004,0.388625
7
+ chunk_chess_0,0.6388,0.59945,0.32495,0.3749
8
+ chunk_grammar_1,0.66355,0.6322749999999999,0.33272500000000005,0.33187500000000003
9
+ chunk_no_conv,0.889775,0.8521249999999999,0.09684999999999999,0.14735
10
+ chunk_svs_no_conv,0.85115,0.8130499999999999,0.10115,0.082975
static/models_data/Qwen2.5-72B-Instruct/matrix.svg ADDED
static/models_data/Qwen2.5-72B-Instruct/model_detail.html ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
3
+ You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
4
+ The model is available on the huggingface hub: <a href="https://huggingface.co/Qwen/Qwen2.5-72B-Instruct">https://huggingface.co/Qwen/Qwen2.5-72B-Instruct</a>.
5
+ The 72B model was pretrained on 18 trillion tokens spanning 29 languages.
6
+ It supports up to 128K tokens and can generate up to 8K tokens.
7
+ </p>
static/models_data/Qwen2.5-72B-Instruct/ranks.svg ADDED
static/models_data/Qwen2.5-72B-Instruct/structure.svg ADDED
static/models_data/Qwen2.5-7B-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.7448499999999999,0.6389,0.1069,0.11257499999999998
3
+ chunk_1,0.83535,0.8164499999999999,0.10495,0.064675
4
+ chunk_2,0.8623,-3.591025,0.09625,0.071425
5
+ chunk_3,0.88345,0.886125,0.097575,0.05395
6
+ chunk_4,0.9301250000000001,0.9451499999999999,0.089075,0.049049999999999996
7
+ chunk_chess_0,0.8341,0.764,0.112125,0.122975
8
+ chunk_grammar_1,0.865475,0.877975,0.0904,0.0895
9
+ chunk_no_conv,0.780725,0.695675,0.10905000000000001,0.15252499999999997
10
+ chunk_svs_no_conv,0.743725,0.6745749999999999,0.120875,0.1343
static/models_data/Qwen2.5-7B-Instruct/matrix.svg ADDED
static/models_data/Qwen2.5-7B-Instruct/model_detail.html ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
3
+ You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
4
+ The model is available on the huggingface hub: <a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct">https://huggingface.co/Qwen/Qwen2.5-7B-Instruct</a>.
5
+ The 7B model was pretrained on 18 trillion tokens spanning 29 languages.
6
+ It supports up to 128K tokens and can generate up to 8K tokens.
7
+ </p>
static/models_data/Qwen2.5-7B-Instruct/ranks.svg ADDED
static/models_data/Qwen2.5-7B-Instruct/structure.svg ADDED
static/models_data/cardinal.svg CHANGED
static/models_data/ordinal.svg CHANGED