- 1000
- 1200
- 1400
- 1600
- 200
- 400
- 600
- 800
- checkpoint-1000
- checkpoint-1200
- checkpoint-1400
- checkpoint-1600
- checkpoint-200
- checkpoint-400
- checkpoint-600
- checkpoint-800
- llama2.7b.chat.logiqav2.70b-distil.step.dpo.fix_hack.H100.w4.v1.0.s44.checkpoint-1200.logiqav2.react.train.0shot.sample5.v1.0.cleaned.v1.0
- sft.dev.n5.tem1.0.reclor.rewards.raw_trajectory.product.v1.1
- train.logiqav2.rewards.raw_trajectory.product.step-dpo-v1.0.v1.1
- train.reclor.rewards.raw_trajectory.product.v1.1
- train.rewards.raw_trajectory.product.v1.0
-
8.44 kB
-
4.71 kB