shivakanthsujit
/

basic-mbrl-continuous-cartpole_colab_model

Reinforcement Learning

mbrl-lib

mbrl-continuous-cartpole

deep-reinforcement-learning

Model card Files Files and versions Community

shivakanthsujit commited on Feb 11, 2023

Commit

8cc1b26

1 Parent(s): 42c1704

Init Commit

Browse files

Files changed (4) hide show

README.md +3 -2
config.yaml +97 -15
model/env_stats.pickle +2 -2
model/model.pth +2 -2

README.md CHANGED Viewed

@@ -6,8 +6,9 @@ tags:
 - reinforcement-learning
 - mbrl-lib
 ---
-# **OneDTransitionRewardModel** Agent playing **mbrl-continuous-cartpole**
-This is a trained model of a **OneDTransitionRewardModel** agent playing **mbrl-continuous-cartpole**
 using [MBRL-Lib](https://github.com/facebookresearch/mbrl-lib).
 ## Usage (with MBRL-Lib)

 - reinforcement-learning
 - mbrl-lib
 ---
+# **OneDTransitionRewardModel w/ SACAgent** Agent playing **mbrl-continuous-cartpole**
+This is a trained model of a **OneDTransitionRewardModel w/ SACAgent** agent
+playing **mbrl-continuous-cartpole**
 using [MBRL-Lib](https://github.com/facebookresearch/mbrl-lib).
 ## Usage (with MBRL-Lib)

config.yaml CHANGED Viewed

@@ -1,20 +1,102 @@
 dynamics_model:
   _target_: mbrl.models.GaussianMLP
-  num_layers: 3
-  ensemble_size: 5
-  device: cpu
   hid_size: 200
   in_size: 5
-  out_size: 4
-  deterministic: false
-  propagation_method: fixed_model
-  activation_fn_cfg:
-    _target_: torch.nn.LeakyReLU
-    negative_slope: 0.01
-algorithm:
-  learned_rewards: false
-  target_is_delta: true
-  normalize: true
 overrides:
-  model_batch_size: 32
-  validation_ratio: 0.05

+action_optimizer:
+  _target_: mbrl.planning.CEMOptimizer
+  alpha: 0.1
+  clipped_normal: false
+  device: cpu:0
+  elite_ratio: 0.1
+  lower_bound: ???
+  num_iterations: 5
+  population_size: 350
+  return_mean_elites: true
+  upper_bound: ???
+algorithm:
+  agent:
+    _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
+    action_space:
+      _target_: gym.env.Box
+      high:
+      - 1.0
+      low:
+      - -1.0
+      shape:
+      - 1
+    args:
+      alpha: 0.2
+      automatic_entropy_tuning: true
+      device: cpu:0
+      gamma: 0.99
+      hidden_size: 256
+      lr: 0.0003
+      policy: Gaussian
+      target_entropy: -0.05
+      target_update_interval: 4
+      tau: 0.005
+    num_inputs: 4
+  freq_train_model: 200
+  initial_exploration_steps: 5000
+  learned_rewards: true
+  name: mbpo
+  normalize: true
+  normalize_double_precision: true
+  num_eval_episodes: 1
+  random_initial_explore: false
+  real_data_ratio: 0.0
+  sac_samples_action: true
+  target_is_delta: true
+debug_mode: false
+device: cpu:0
 dynamics_model:
   _target_: mbrl.models.GaussianMLP
+  activation_fn_cfg:
+    _target_: torch.nn.SiLU
+  deterministic: false
+  device: cpu:0
+  ensemble_size: 7
   hid_size: 200
   in_size: 5
+  learn_logvar_bounds: false
+  num_layers: 4
+  out_size: 5
+  propagation_method: random_model
+experiment: default
+log_frequency_agent: 1000
 overrides:
+  cem_alpha: 0.1
+  cem_clipped_normal: false
+  cem_elite_ratio: 0.1
+  cem_num_iters: 5
+  cem_population_size: 350
+  effective_model_rollouts_per_step: 400
+  env: cartpole_continuous
+  epoch_length: 200
+  freq_train_model: 200
+  model_batch_size: 256
+  model_lr: 0.001
+  model_wd: 5.0e-05
+  num_elites: 5
+  num_epochs_to_retain_sac_buffer: 1
+  num_sac_updates_per_step: 20
+  num_steps: 5000
+  patience: 5
+  planning_horizon: 15
+  rollout_schedule:
+  - 1
+  - 15
+  - 1
+  - 1
+  sac_alpha: 0.2
+  sac_automatic_entropy_tuning: true
+  sac_batch_size: 256
+  sac_gamma: 0.99
+  sac_hidden_size: 256
+  sac_lr: 0.0003
+  sac_policy: Gaussian
+  sac_target_entropy: -0.05
+  sac_target_update_interval: 4
+  sac_tau: 0.005
+  sac_updates_every_steps: 1
+  trial_length: 200
+  validation_ratio: 0.2
+root_dir: ./logs
+save_video: false
+seed: 0

model/env_stats.pickle CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69554307f755e9042cf296a7c89d744651bea8ae81d226f8e3150b50f8e8ac01
-size 238

 version https://git-lfs.github.com/spec/v1
+oid sha256:41c1ac53edc417a20114e12671aea2434e2e9b5125ebfef999e87c267d9fb5c8
+size 278

model/model.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae823dd889cf4dc2f9c541ba0811c9f9ccb70575c7b768dfc01f35a5cc1073b9
-size 1667439

 version https://git-lfs.github.com/spec/v1
+oid sha256:a51a85c2bd7f4b05886da8820eb36cc4032084cd2acf14c0ae1d579ccfa9b2dc
+size 3470565