{ "model_name_or_path": "microsoft/Phi-3-mini-4k-instruct", "pooling_mode": "weighted_mean", "dataset_name": "E5", "dataset_file_path": "cache/echo-data", "remove_unused_columns": false, "learning_rate": 0.0002, "num_train_epochs": 3, "warmup_steps": 300, "per_device_train_batch_size": 64, "per_device_eval_batch_size": 64, "gradient_accumulation_steps": 1, "do_train": true, "disable_tqdm": false, "max_seq_length": 512, "overwrite_output_dir": true, "output_dir": "output/microsoft/Phi-3-mini-4k-instruct", "use_adapter": true, "percent_prune": [25], "autoprune": "small+large", "logging_steps": 50, "save_steps": 200, "save_total_limit": 1, "save_only_model": true, "stop_after_n_steps": 1000, "lora_r": 16, "gradient_checkpointing": true, "torch_dtype": "bfloat16", "attn_implementation": "flash_attention_2", "seed": 42 }