shivakanthsujit's picture
Upload with huggingface_hub
d08edee
action_optimizer:
_target_: mbrl.planning.CEMOptimizer
alpha: 0.1
clipped_normal: false
device: cpu
elite_ratio: 0.1
lower_bound: ???
num_iterations: 5
population_size: 350
return_mean_elites: true
upper_bound: ???
algorithm:
agent:
_target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
action_space:
_target_: gym.env.Box
high:
- 1.0
- 1.0
- 1.0
low:
- -1.0
- -1.0
- -1.0
shape:
- 3
args:
alpha: 0.2
automatic_entropy_tuning: false
device: cpu
gamma: 0.99
hidden_size: 512
lr: 0.0003
policy: Gaussian
target_entropy: 1
target_update_interval: 4
tau: 0.005
num_inputs: 11
freq_train_model: 250
initial_exploration_steps: 5000
learned_rewards: true
name: mbpo
normalize: true
normalize_double_precision: true
num_eval_episodes: 1
random_initial_explore: false
real_data_ratio: 0.0
sac_samples_action: true
target_is_delta: true
debug_mode: false
device: cpu
dynamics_model:
_target_: mbrl.models.GaussianMLP
activation_fn_cfg:
_target_: torch.nn.SiLU
deterministic: false
device: cpu
ensemble_size: 7
hid_size: 200
in_size: 14
learn_logvar_bounds: false
num_layers: 4
out_size: 12
propagation_method: random_model
experiment: default
log_frequency_agent: 1000
overrides:
cem_alpha: 0.1
cem_clipped_normal: false
cem_elite_ratio: 0.1
cem_num_iters: 5
cem_population_size: 350
effective_model_rollouts_per_step: 400
env: gym___Hopper-v2
epoch_length: 1000
freq_train_model: 250
model_batch_size: 256
model_lr: 0.001
model_wd: 1.0e-05
num_elites: 5
num_epochs_to_retain_sac_buffer: 1
num_sac_updates_per_step: 40
num_steps: 125000
patience: 5
planning_horizon: 15
rollout_schedule:
- 20
- 150
- 1
- 15
sac_alpha: 0.2
sac_automatic_entropy_tuning: false
sac_batch_size: 256
sac_gamma: 0.99
sac_hidden_size: 512
sac_lr: 0.0003
sac_policy: Gaussian
sac_target_entropy: 1
sac_target_update_interval: 4
sac_tau: 0.005
sac_updates_every_steps: 1
term_fn: hopper
validation_ratio: 0.2
root_dir: ./logs
save_video: false
seed: 0