action_optimizer: _target_: mbrl.planning.CEMOptimizer alpha: 0.1 clipped_normal: false device: cpu elite_ratio: 0.1 lower_bound: ??? num_iterations: 5 population_size: 350 return_mean_elites: true upper_bound: ??? algorithm: agent: _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC action_space: _target_: gym.env.Box high: - 1.0 - 1.0 - 1.0 low: - -1.0 - -1.0 - -1.0 shape: - 3 args: alpha: 0.2 automatic_entropy_tuning: false device: cpu gamma: 0.99 hidden_size: 512 lr: 0.0003 policy: Gaussian target_entropy: 1 target_update_interval: 4 tau: 0.005 num_inputs: 11 freq_train_model: 250 initial_exploration_steps: 5000 learned_rewards: true name: mbpo normalize: true normalize_double_precision: true num_eval_episodes: 1 random_initial_explore: false real_data_ratio: 0.0 sac_samples_action: true target_is_delta: true debug_mode: false device: cpu dynamics_model: _target_: mbrl.models.GaussianMLP activation_fn_cfg: _target_: torch.nn.SiLU deterministic: false device: cpu ensemble_size: 7 hid_size: 200 in_size: 14 learn_logvar_bounds: false num_layers: 4 out_size: 12 propagation_method: random_model experiment: default log_frequency_agent: 1000 overrides: cem_alpha: 0.1 cem_clipped_normal: false cem_elite_ratio: 0.1 cem_num_iters: 5 cem_population_size: 350 effective_model_rollouts_per_step: 400 env: gym___Hopper-v2 epoch_length: 1000 freq_train_model: 250 model_batch_size: 256 model_lr: 0.001 model_wd: 1.0e-05 num_elites: 5 num_epochs_to_retain_sac_buffer: 1 num_sac_updates_per_step: 40 num_steps: 125000 patience: 5 planning_horizon: 15 rollout_schedule: - 20 - 150 - 1 - 15 sac_alpha: 0.2 sac_automatic_entropy_tuning: false sac_batch_size: 256 sac_gamma: 0.99 sac_hidden_size: 512 sac_lr: 0.0003 sac_policy: Gaussian sac_target_entropy: 1 sac_target_update_interval: 4 sac_tau: 0.005 sac_updates_every_steps: 1 term_fn: hopper validation_ratio: 0.2 root_dir: ./logs save_video: false seed: 0