MaziyarPanahi commited on
Commit
52538ad
·
verified ·
1 Parent(s): 6ea2490

Update README.md (#2)

Browse files

- Update README.md (9c171ea698ca3574a9c0c3db01b4546cfc27f764)

Files changed (1) hide show
  1. README.md +385 -0
README.md CHANGED
@@ -75,4 +75,389 @@ Cons of Docker System:
75
  - Learning curve - It takes time to learn how to use Docker effectively, as there are many commands and concepts involved.
76
  - Limited customization options - While Docker provides some basic configuration options, more advanced features such as network routing require additional tools.
77
  - Performance overhead - Running multiple containers on a single host may result in slower performance due to increased memory usage.</s>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ```
 
75
  - Learning curve - It takes time to learn how to use Docker effectively, as there are many commands and concepts involved.
76
  - Limited customization options - While Docker provides some basic configuration options, more advanced features such as network routing require additional tools.
77
  - Performance overhead - Running multiple containers on a single host may result in slower performance due to increased memory usage.</s>
78
+ ```
79
+
80
+ ## Eval
81
+
82
+ ```python
83
+ {
84
+ "all": {
85
+ "acc": 0.6309850839451187,
86
+ "acc_stderr": 0.032333688535613636,
87
+ "acc_norm": 0.6368691004374645,
88
+ "acc_norm_stderr": 0.03298401757997533,
89
+ "mc1": 0.29008567931456547,
90
+ "mc1_stderr": 0.01588623687420952,
91
+ "mc2": 0.41501661742948026,
92
+ "mc2_stderr": 0.014285902986671931
93
+ },
94
+ "harness|arc:challenge|25": {
95
+ "acc": 0.5750853242320819,
96
+ "acc_stderr": 0.014445698968520767,
97
+ "acc_norm": 0.6092150170648464,
98
+ "acc_norm_stderr": 0.01425856388051378
99
+ },
100
+ "harness|hellaswag|10": {
101
+ "acc": 0.6221868153754232,
102
+ "acc_stderr": 0.0048384969668239025,
103
+ "acc_norm": 0.8212507468631747,
104
+ "acc_norm_stderr": 0.0038235918141330347
105
+ },
106
+ "harness|hendrycksTest-abstract_algebra|5": {
107
+ "acc": 0.32,
108
+ "acc_stderr": 0.046882617226215034,
109
+ "acc_norm": 0.32,
110
+ "acc_norm_stderr": 0.046882617226215034
111
+ },
112
+ "harness|hendrycksTest-anatomy|5": {
113
+ "acc": 0.6,
114
+ "acc_stderr": 0.04232073695151589,
115
+ "acc_norm": 0.6,
116
+ "acc_norm_stderr": 0.04232073695151589
117
+ },
118
+ "harness|hendrycksTest-astronomy|5": {
119
+ "acc": 0.6447368421052632,
120
+ "acc_stderr": 0.038947344870133176,
121
+ "acc_norm": 0.6447368421052632,
122
+ "acc_norm_stderr": 0.038947344870133176
123
+ },
124
+ "harness|hendrycksTest-business_ethics|5": {
125
+ "acc": 0.57,
126
+ "acc_stderr": 0.04975698519562428,
127
+ "acc_norm": 0.57,
128
+ "acc_norm_stderr": 0.04975698519562428
129
+ },
130
+ "harness|hendrycksTest-clinical_knowledge|5": {
131
+ "acc": 0.6792452830188679,
132
+ "acc_stderr": 0.02872750295788027,
133
+ "acc_norm": 0.6792452830188679,
134
+ "acc_norm_stderr": 0.02872750295788027
135
+ },
136
+ "harness|hendrycksTest-college_biology|5": {
137
+ "acc": 0.7430555555555556,
138
+ "acc_stderr": 0.03653946969442099,
139
+ "acc_norm": 0.7430555555555556,
140
+ "acc_norm_stderr": 0.03653946969442099
141
+ },
142
+ "harness|hendrycksTest-college_chemistry|5": {
143
+ "acc": 0.49,
144
+ "acc_stderr": 0.05024183937956912,
145
+ "acc_norm": 0.49,
146
+ "acc_norm_stderr": 0.05024183937956912
147
+ },
148
+ "harness|hendrycksTest-college_computer_science|5": {
149
+ "acc": 0.56,
150
+ "acc_stderr": 0.04988876515698589,
151
+ "acc_norm": 0.56,
152
+ "acc_norm_stderr": 0.04988876515698589
153
+ },
154
+ "harness|hendrycksTest-college_mathematics|5": {
155
+ "acc": 0.36,
156
+ "acc_stderr": 0.048241815132442176,
157
+ "acc_norm": 0.36,
158
+ "acc_norm_stderr": 0.048241815132442176
159
+ },
160
+ "harness|hendrycksTest-college_medicine|5": {
161
+ "acc": 0.653179190751445,
162
+ "acc_stderr": 0.036291466701596636,
163
+ "acc_norm": 0.653179190751445,
164
+ "acc_norm_stderr": 0.036291466701596636
165
+ },
166
+ "harness|hendrycksTest-college_physics|5": {
167
+ "acc": 0.4019607843137255,
168
+ "acc_stderr": 0.048786087144669955,
169
+ "acc_norm": 0.4019607843137255,
170
+ "acc_norm_stderr": 0.048786087144669955
171
+ },
172
+ "harness|hendrycksTest-computer_security|5": {
173
+ "acc": 0.79,
174
+ "acc_stderr": 0.04093601807403326,
175
+ "acc_norm": 0.79,
176
+ "acc_norm_stderr": 0.04093601807403326
177
+ },
178
+ "harness|hendrycksTest-conceptual_physics|5": {
179
+ "acc": 0.5702127659574469,
180
+ "acc_stderr": 0.03236214467715564,
181
+ "acc_norm": 0.5702127659574469,
182
+ "acc_norm_stderr": 0.03236214467715564
183
+ },
184
+ "harness|hendrycksTest-econometrics|5": {
185
+ "acc": 0.49122807017543857,
186
+ "acc_stderr": 0.047028804320496165,
187
+ "acc_norm": 0.49122807017543857,
188
+ "acc_norm_stderr": 0.047028804320496165
189
+ },
190
+ "harness|hendrycksTest-electrical_engineering|5": {
191
+ "acc": 0.5862068965517241,
192
+ "acc_stderr": 0.04104269211806232,
193
+ "acc_norm": 0.5862068965517241,
194
+ "acc_norm_stderr": 0.04104269211806232
195
+ },
196
+ "harness|hendrycksTest-elementary_mathematics|5": {
197
+ "acc": 0.3915343915343915,
198
+ "acc_stderr": 0.025138091388851116,
199
+ "acc_norm": 0.3915343915343915,
200
+ "acc_norm_stderr": 0.025138091388851116
201
+ },
202
+ "harness|hendrycksTest-formal_logic|5": {
203
+ "acc": 0.4444444444444444,
204
+ "acc_stderr": 0.04444444444444449,
205
+ "acc_norm": 0.4444444444444444,
206
+ "acc_norm_stderr": 0.04444444444444449
207
+ },
208
+ "harness|hendrycksTest-global_facts|5": {
209
+ "acc": 0.32,
210
+ "acc_stderr": 0.04688261722621504,
211
+ "acc_norm": 0.32,
212
+ "acc_norm_stderr": 0.04688261722621504
213
+ },
214
+ "harness|hendrycksTest-high_school_biology|5": {
215
+ "acc": 0.7419354838709677,
216
+ "acc_stderr": 0.02489246917246283,
217
+ "acc_norm": 0.7419354838709677,
218
+ "acc_norm_stderr": 0.02489246917246283
219
+ },
220
+ "harness|hendrycksTest-high_school_chemistry|5": {
221
+ "acc": 0.5024630541871922,
222
+ "acc_stderr": 0.035179450386910616,
223
+ "acc_norm": 0.5024630541871922,
224
+ "acc_norm_stderr": 0.035179450386910616
225
+ },
226
+ "harness|hendrycksTest-high_school_computer_science|5": {
227
+ "acc": 0.67,
228
+ "acc_stderr": 0.047258156262526066,
229
+ "acc_norm": 0.67,
230
+ "acc_norm_stderr": 0.047258156262526066
231
+ },
232
+ "harness|hendrycksTest-high_school_european_history|5": {
233
+ "acc": 0.7575757575757576,
234
+ "acc_stderr": 0.03346409881055953,
235
+ "acc_norm": 0.7575757575757576,
236
+ "acc_norm_stderr": 0.03346409881055953
237
+ },
238
+ "harness|hendrycksTest-high_school_geography|5": {
239
+ "acc": 0.7929292929292929,
240
+ "acc_stderr": 0.028869778460267042,
241
+ "acc_norm": 0.7929292929292929,
242
+ "acc_norm_stderr": 0.028869778460267042
243
+ },
244
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
245
+ "acc": 0.8601036269430051,
246
+ "acc_stderr": 0.025033870583015184,
247
+ "acc_norm": 0.8601036269430051,
248
+ "acc_norm_stderr": 0.025033870583015184
249
+ },
250
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
251
+ "acc": 0.6358974358974359,
252
+ "acc_stderr": 0.024396672985094764,
253
+ "acc_norm": 0.6358974358974359,
254
+ "acc_norm_stderr": 0.024396672985094764
255
+ },
256
+ "harness|hendrycksTest-high_school_mathematics|5": {
257
+ "acc": 0.362962962962963,
258
+ "acc_stderr": 0.029318203645206865,
259
+ "acc_norm": 0.362962962962963,
260
+ "acc_norm_stderr": 0.029318203645206865
261
+ },
262
+ "harness|hendrycksTest-high_school_microeconomics|5": {
263
+ "acc": 0.6218487394957983,
264
+ "acc_stderr": 0.03149930577784906,
265
+ "acc_norm": 0.6218487394957983,
266
+ "acc_norm_stderr": 0.03149930577784906
267
+ },
268
+ "harness|hendrycksTest-high_school_physics|5": {
269
+ "acc": 0.32450331125827814,
270
+ "acc_stderr": 0.038227469376587525,
271
+ "acc_norm": 0.32450331125827814,
272
+ "acc_norm_stderr": 0.038227469376587525
273
+ },
274
+ "harness|hendrycksTest-high_school_psychology|5": {
275
+ "acc": 0.8146788990825689,
276
+ "acc_stderr": 0.016659279700295838,
277
+ "acc_norm": 0.8146788990825689,
278
+ "acc_norm_stderr": 0.016659279700295838
279
+ },
280
+ "harness|hendrycksTest-high_school_statistics|5": {
281
+ "acc": 0.49537037037037035,
282
+ "acc_stderr": 0.03409825519163572,
283
+ "acc_norm": 0.49537037037037035,
284
+ "acc_norm_stderr": 0.03409825519163572
285
+ },
286
+ "harness|hendrycksTest-high_school_us_history|5": {
287
+ "acc": 0.7892156862745098,
288
+ "acc_stderr": 0.028626547912437406,
289
+ "acc_norm": 0.7892156862745098,
290
+ "acc_norm_stderr": 0.028626547912437406
291
+ },
292
+ "harness|hendrycksTest-high_school_world_history|5": {
293
+ "acc": 0.7552742616033755,
294
+ "acc_stderr": 0.027985699387036423,
295
+ "acc_norm": 0.7552742616033755,
296
+ "acc_norm_stderr": 0.027985699387036423
297
+ },
298
+ "harness|hendrycksTest-human_aging|5": {
299
+ "acc": 0.6636771300448431,
300
+ "acc_stderr": 0.031708824268455,
301
+ "acc_norm": 0.6636771300448431,
302
+ "acc_norm_stderr": 0.031708824268455
303
+ },
304
+ "harness|hendrycksTest-human_sexuality|5": {
305
+ "acc": 0.7862595419847328,
306
+ "acc_stderr": 0.0359546161177469,
307
+ "acc_norm": 0.7862595419847328,
308
+ "acc_norm_stderr": 0.0359546161177469
309
+ },
310
+ "harness|hendrycksTest-international_law|5": {
311
+ "acc": 0.7933884297520661,
312
+ "acc_stderr": 0.03695980128098824,
313
+ "acc_norm": 0.7933884297520661,
314
+ "acc_norm_stderr": 0.03695980128098824
315
+ },
316
+ "harness|hendrycksTest-jurisprudence|5": {
317
+ "acc": 0.7592592592592593,
318
+ "acc_stderr": 0.04133119440243838,
319
+ "acc_norm": 0.7592592592592593,
320
+ "acc_norm_stderr": 0.04133119440243838
321
+ },
322
+ "harness|hendrycksTest-logical_fallacies|5": {
323
+ "acc": 0.803680981595092,
324
+ "acc_stderr": 0.031207970394709218,
325
+ "acc_norm": 0.803680981595092,
326
+ "acc_norm_stderr": 0.031207970394709218
327
+ },
328
+ "harness|hendrycksTest-machine_learning|5": {
329
+ "acc": 0.5178571428571429,
330
+ "acc_stderr": 0.047427623612430116,
331
+ "acc_norm": 0.5178571428571429,
332
+ "acc_norm_stderr": 0.047427623612430116
333
+ },
334
+ "harness|hendrycksTest-management|5": {
335
+ "acc": 0.8252427184466019,
336
+ "acc_stderr": 0.03760178006026621,
337
+ "acc_norm": 0.8252427184466019,
338
+ "acc_norm_stderr": 0.03760178006026621
339
+ },
340
+ "harness|hendrycksTest-marketing|5": {
341
+ "acc": 0.8632478632478633,
342
+ "acc_stderr": 0.022509033937077816,
343
+ "acc_norm": 0.8632478632478633,
344
+ "acc_norm_stderr": 0.022509033937077816
345
+ },
346
+ "harness|hendrycksTest-medical_genetics|5": {
347
+ "acc": 0.74,
348
+ "acc_stderr": 0.04408440022768078,
349
+ "acc_norm": 0.74,
350
+ "acc_norm_stderr": 0.04408440022768078
351
+ },
352
+ "harness|hendrycksTest-miscellaneous|5": {
353
+ "acc": 0.8173690932311622,
354
+ "acc_stderr": 0.013816335389973136,
355
+ "acc_norm": 0.8173690932311622,
356
+ "acc_norm_stderr": 0.013816335389973136
357
+ },
358
+ "harness|hendrycksTest-moral_disputes|5": {
359
+ "acc": 0.7023121387283237,
360
+ "acc_stderr": 0.024617055388677,
361
+ "acc_norm": 0.7023121387283237,
362
+ "acc_norm_stderr": 0.024617055388677
363
+ },
364
+ "harness|hendrycksTest-moral_scenarios|5": {
365
+ "acc": 0.2335195530726257,
366
+ "acc_stderr": 0.014149575348976269,
367
+ "acc_norm": 0.2335195530726257,
368
+ "acc_norm_stderr": 0.014149575348976269
369
+ },
370
+ "harness|hendrycksTest-nutrition|5": {
371
+ "acc": 0.7450980392156863,
372
+ "acc_stderr": 0.024954184324879905,
373
+ "acc_norm": 0.7450980392156863,
374
+ "acc_norm_stderr": 0.024954184324879905
375
+ },
376
+ "harness|hendrycksTest-philosophy|5": {
377
+ "acc": 0.7106109324758842,
378
+ "acc_stderr": 0.025755865922632945,
379
+ "acc_norm": 0.7106109324758842,
380
+ "acc_norm_stderr": 0.025755865922632945
381
+ },
382
+ "harness|hendrycksTest-prehistory|5": {
383
+ "acc": 0.7191358024691358,
384
+ "acc_stderr": 0.025006469755799215,
385
+ "acc_norm": 0.7191358024691358,
386
+ "acc_norm_stderr": 0.025006469755799215
387
+ },
388
+ "harness|hendrycksTest-professional_accounting|5": {
389
+ "acc": 0.4716312056737589,
390
+ "acc_stderr": 0.029779450957303062,
391
+ "acc_norm": 0.4716312056737589,
392
+ "acc_norm_stderr": 0.029779450957303062
393
+ },
394
+ "harness|hendrycksTest-professional_law|5": {
395
+ "acc": 0.4498044328552803,
396
+ "acc_stderr": 0.012705721498565107,
397
+ "acc_norm": 0.4498044328552803,
398
+ "acc_norm_stderr": 0.012705721498565107
399
+ },
400
+ "harness|hendrycksTest-professional_medicine|5": {
401
+ "acc": 0.6580882352941176,
402
+ "acc_stderr": 0.02881472242225418,
403
+ "acc_norm": 0.6580882352941176,
404
+ "acc_norm_stderr": 0.02881472242225418
405
+ },
406
+ "harness|hendrycksTest-professional_psychology|5": {
407
+ "acc": 0.6519607843137255,
408
+ "acc_stderr": 0.019270998708223974,
409
+ "acc_norm": 0.6519607843137255,
410
+ "acc_norm_stderr": 0.019270998708223974
411
+ },
412
+ "harness|hendrycksTest-public_relations|5": {
413
+ "acc": 0.6636363636363637,
414
+ "acc_stderr": 0.04525393596302506,
415
+ "acc_norm": 0.6636363636363637,
416
+ "acc_norm_stderr": 0.04525393596302506
417
+ },
418
+ "harness|hendrycksTest-security_studies|5": {
419
+ "acc": 0.7224489795918367,
420
+ "acc_stderr": 0.028666857790274645,
421
+ "acc_norm": 0.7224489795918367,
422
+ "acc_norm_stderr": 0.028666857790274645
423
+ },
424
+ "harness|hendrycksTest-sociology|5": {
425
+ "acc": 0.8557213930348259,
426
+ "acc_stderr": 0.02484575321230604,
427
+ "acc_norm": 0.8557213930348259,
428
+ "acc_norm_stderr": 0.02484575321230604
429
+ },
430
+ "harness|hendrycksTest-us_foreign_policy|5": {
431
+ "acc": 0.86,
432
+ "acc_stderr": 0.03487350880197771,
433
+ "acc_norm": 0.86,
434
+ "acc_norm_stderr": 0.03487350880197771
435
+ },
436
+ "harness|hendrycksTest-virology|5": {
437
+ "acc": 0.5481927710843374,
438
+ "acc_stderr": 0.03874371556587953,
439
+ "acc_norm": 0.5481927710843374,
440
+ "acc_norm_stderr": 0.03874371556587953
441
+ },
442
+ "harness|hendrycksTest-world_religions|5": {
443
+ "acc": 0.8421052631578947,
444
+ "acc_stderr": 0.027966785859160896,
445
+ "acc_norm": 0.8421052631578947,
446
+ "acc_norm_stderr": 0.027966785859160896
447
+ },
448
+ "harness|truthfulqa:mc|0": {
449
+ "mc1": 0.29008567931456547,
450
+ "mc1_stderr": 0.01588623687420952,
451
+ "mc2": 0.41501661742948026,
452
+ "mc2_stderr": 0.014285902986671931
453
+ },
454
+ "harness|winogrande|5": {
455
+ "acc": 0.7734806629834254,
456
+ "acc_stderr": 0.011764149054698332
457
+ },
458
+ "harness|gsm8k|5": {
459
+ "acc": 0.37452615617892343,
460
+ "acc_stderr": 0.013331774158491393
461
+ }
462
+ }
463
  ```