{ "cache_dir": "/leonardo_work/EUHPC_E03_068/.cache", "method": "orpo", "dataset": "autoredteam", "model": "togethercomputer/RedPajama-INCITE-Base-3B-v1", "tokenizer": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", "lr": 0.01, "train_batch_size": 5, "eval_batch_size": 5, "num_epochs": 3, "seed": 42, "eval_only": false, "evaluation_size": 2000, "checkpoint_path": null, "experiment_name": "RedPajama3b_v1-autoredteam_v2-train-3", "experiment_group": "results", "reference_model": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", "context_length": 1024, "train_summarization": "", "dpo_beta": 0.1, "orpo_beta": 0.1, "kl_coef": 0.0, "reward_model": "", "bestofn_size": 4, "train_reward_model": "" }