action_optimizer: | |
_target_: mbrl.planning.CEMOptimizer | |
alpha: 0.1 | |
clipped_normal: false | |
device: cpu | |
elite_ratio: 0.1 | |
lower_bound: ??? | |
num_iterations: 5 | |
population_size: 350 | |
return_mean_elites: true | |
upper_bound: ??? | |
algorithm: | |
agent: | |
_target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC | |
action_space: | |
_target_: gym.env.Box | |
high: | |
- 1.0 | |
- 1.0 | |
- 1.0 | |
low: | |
- -1.0 | |
- -1.0 | |
- -1.0 | |
shape: | |
- 3 | |
args: | |
alpha: 0.2 | |
automatic_entropy_tuning: false | |
device: cpu | |
gamma: 0.99 | |
hidden_size: 512 | |
lr: 0.0003 | |
policy: Gaussian | |
target_entropy: 1 | |
target_update_interval: 4 | |
tau: 0.005 | |
num_inputs: 11 | |
freq_train_model: 250 | |
initial_exploration_steps: 5000 | |
learned_rewards: true | |
name: mbpo | |
normalize: true | |
normalize_double_precision: true | |
num_eval_episodes: 1 | |
random_initial_explore: false | |
real_data_ratio: 0.0 | |
sac_samples_action: true | |
target_is_delta: true | |
debug_mode: false | |
device: cpu | |
dynamics_model: | |
_target_: mbrl.models.GaussianMLP | |
activation_fn_cfg: | |
_target_: torch.nn.SiLU | |
deterministic: false | |
device: cpu | |
ensemble_size: 7 | |
hid_size: 200 | |
in_size: 14 | |
learn_logvar_bounds: false | |
num_layers: 4 | |
out_size: 12 | |
propagation_method: random_model | |
experiment: default | |
log_frequency_agent: 1000 | |
overrides: | |
cem_alpha: 0.1 | |
cem_clipped_normal: false | |
cem_elite_ratio: 0.1 | |
cem_num_iters: 5 | |
cem_population_size: 350 | |
effective_model_rollouts_per_step: 400 | |
env: gym___Hopper-v2 | |
epoch_length: 1000 | |
freq_train_model: 250 | |
model_batch_size: 256 | |
model_lr: 0.001 | |
model_wd: 1.0e-05 | |
num_elites: 5 | |
num_epochs_to_retain_sac_buffer: 1 | |
num_sac_updates_per_step: 40 | |
num_steps: 125000 | |
patience: 5 | |
planning_horizon: 15 | |
rollout_schedule: | |
- 20 | |
- 150 | |
- 1 | |
- 15 | |
sac_alpha: 0.2 | |
sac_automatic_entropy_tuning: false | |
sac_batch_size: 256 | |
sac_gamma: 0.99 | |
sac_hidden_size: 512 | |
sac_lr: 0.0003 | |
sac_policy: Gaussian | |
sac_target_entropy: 1 | |
sac_target_update_interval: 4 | |
sac_tau: 0.005 | |
sac_updates_every_steps: 1 | |
term_fn: hopper | |
validation_ratio: 0.2 | |
root_dir: ./logs | |
save_video: false | |
seed: 0 | |