alidenewade commited on
Commit
b9dd36b
·
verified ·
1 Parent(s): d3d10ea

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1731044698.ali ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:848ae94ba298ce109de75afbb1c7e6a1cc2538f598e387e83a0c58299c897ea6
3
+ size 40
README.md CHANGED
@@ -15,7 +15,7 @@ model-index:
15
  type: doom_health_gathering_supreme
16
  metrics:
17
  - type: mean_reward
18
- value: 4.07 +/- 0.70
19
  name: mean_reward
20
  verified: false
21
  ---
 
15
  type: doom_health_gathering_supreme
16
  metrics:
17
  - type: mean_reward
18
+ value: 4.08 +/- 0.71
19
  name: mean_reward
20
  verified: false
21
  ---
config.json CHANGED
@@ -15,8 +15,8 @@
15
  "worker_num_splits": 2,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
- "num_workers": 8,
19
- "num_envs_per_worker": 4,
20
  "batch_size": 1024,
21
  "num_batches_per_epoch": 1,
22
  "num_epochs": 1,
@@ -65,7 +65,7 @@
65
  "summaries_use_frameskip": true,
66
  "heartbeat_interval": 20,
67
  "heartbeat_reporting_interval": 600,
68
- "train_for_env_steps": 1000000000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
 
15
  "worker_num_splits": 2,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
+ "num_workers": 16,
19
+ "num_envs_per_worker": 8,
20
  "batch_size": 1024,
21
  "num_batches_per_epoch": 1,
22
  "num_epochs": 1,
 
65
  "summaries_use_frameskip": true,
66
  "heartbeat_interval": 20,
67
  "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 5000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b800b24655a949eb854d52221fcae684e142841d25dc693d343231d6d13d9f32
3
- size 63879708
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c3b38abb21f9dfe9a972f3123b53aee70f68f042118197032506f87dcce15a
3
+ size 63559401
sf_log.txt CHANGED
@@ -36788,3 +36788,204 @@ main_loop: 29019.2188
36788
  [2024-11-08 07:32:14,337][41694] Avg episode rewards: #0: 4.619, true rewards: #0: 4.069
36789
  [2024-11-08 07:32:14,339][41694] Avg episode reward: 4.619, avg true_objective: 4.069
36790
  [2024-11-08 07:34:27,542][41694] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36788
  [2024-11-08 07:32:14,337][41694] Avg episode rewards: #0: 4.619, true rewards: #0: 4.069
36789
  [2024-11-08 07:32:14,339][41694] Avg episode reward: 4.619, avg true_objective: 4.069
36790
  [2024-11-08 07:34:27,542][41694] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
36791
+ [2024-11-08 07:34:47,182][41694] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme-alid
36792
+ [2024-11-08 07:44:57,962][41694] Environment doom_basic already registered, overwriting...
36793
+ [2024-11-08 07:44:58,190][41694] Environment doom_two_colors_easy already registered, overwriting...
36794
+ [2024-11-08 07:44:58,195][41694] Environment doom_two_colors_hard already registered, overwriting...
36795
+ [2024-11-08 07:44:58,200][41694] Environment doom_dm already registered, overwriting...
36796
+ [2024-11-08 07:44:58,205][41694] Environment doom_dwango5 already registered, overwriting...
36797
+ [2024-11-08 07:44:58,208][41694] Environment doom_my_way_home_flat_actions already registered, overwriting...
36798
+ [2024-11-08 07:44:58,212][41694] Environment doom_defend_the_center_flat_actions already registered, overwriting...
36799
+ [2024-11-08 07:44:58,216][41694] Environment doom_my_way_home already registered, overwriting...
36800
+ [2024-11-08 07:44:58,218][41694] Environment doom_deadly_corridor already registered, overwriting...
36801
+ [2024-11-08 07:44:58,220][41694] Environment doom_defend_the_center already registered, overwriting...
36802
+ [2024-11-08 07:44:58,223][41694] Environment doom_defend_the_line already registered, overwriting...
36803
+ [2024-11-08 07:44:58,225][41694] Environment doom_health_gathering already registered, overwriting...
36804
+ [2024-11-08 07:44:58,227][41694] Environment doom_health_gathering_supreme already registered, overwriting...
36805
+ [2024-11-08 07:44:58,229][41694] Environment doom_battle already registered, overwriting...
36806
+ [2024-11-08 07:44:58,230][41694] Environment doom_battle2 already registered, overwriting...
36807
+ [2024-11-08 07:44:58,234][41694] Environment doom_duel_bots already registered, overwriting...
36808
+ [2024-11-08 07:44:58,236][41694] Environment doom_deathmatch_bots already registered, overwriting...
36809
+ [2024-11-08 07:44:58,238][41694] Environment doom_duel already registered, overwriting...
36810
+ [2024-11-08 07:44:58,241][41694] Environment doom_deathmatch_full already registered, overwriting...
36811
+ [2024-11-08 07:44:58,242][41694] Environment doom_benchmark already registered, overwriting...
36812
+ [2024-11-08 07:44:58,245][41694] register_encoder_factory: <function make_vizdoom_encoder at 0x7f45dd724c10>
36813
+ [2024-11-08 07:44:58,567][41694] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
36814
+ [2024-11-08 07:44:58,570][41694] Overriding arg 'num_workers' with value 16 passed from command line
36815
+ [2024-11-08 07:44:58,571][41694] Overriding arg 'num_envs_per_worker' with value 8 passed from command line
36816
+ [2024-11-08 07:44:58,573][41694] Overriding arg 'train_for_env_steps' with value 5000 passed from command line
36817
+ [2024-11-08 07:44:58,582][41694] Experiment dir /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment already exists!
36818
+ [2024-11-08 07:44:58,585][41694] Resuming existing experiment from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment...
36819
+ [2024-11-08 07:44:58,589][41694] Weights and Biases integration disabled
36820
+ [2024-11-08 07:44:58,655][41694] Environment var CUDA_VISIBLE_DEVICES is 0
36821
+
36822
+ [2024-11-08 07:45:16,919][41694] Starting experiment with the following configuration:
36823
+ help=False
36824
+ algo=APPO
36825
+ env=doom_health_gathering_supreme
36826
+ experiment=default_experiment
36827
+ train_dir=/root/hfRL/ml/LunarLander-v2/train_dir
36828
+ restart_behavior=resume
36829
+ device=gpu
36830
+ seed=None
36831
+ num_policies=1
36832
+ async_rl=True
36833
+ serial_mode=False
36834
+ batched_sampling=False
36835
+ num_batches_to_accumulate=2
36836
+ worker_num_splits=2
36837
+ policy_workers_per_policy=1
36838
+ max_policy_lag=1000
36839
+ num_workers=16
36840
+ num_envs_per_worker=8
36841
+ batch_size=1024
36842
+ num_batches_per_epoch=1
36843
+ num_epochs=1
36844
+ rollout=32
36845
+ recurrence=32
36846
+ shuffle_minibatches=False
36847
+ gamma=0.99
36848
+ reward_scale=1.0
36849
+ reward_clip=1000.0
36850
+ value_bootstrap=False
36851
+ normalize_returns=True
36852
+ exploration_loss_coeff=0.001
36853
+ value_loss_coeff=0.5
36854
+ kl_loss_coeff=0.0
36855
+ exploration_loss=symmetric_kl
36856
+ gae_lambda=0.95
36857
+ ppo_clip_ratio=0.1
36858
+ ppo_clip_value=0.2
36859
+ with_vtrace=False
36860
+ vtrace_rho=1.0
36861
+ vtrace_c=1.0
36862
+ optimizer=adam
36863
+ adam_eps=1e-06
36864
+ adam_beta1=0.9
36865
+ adam_beta2=0.999
36866
+ max_grad_norm=4.0
36867
+ learning_rate=0.0003
36868
+ lr_schedule=constant
36869
+ lr_schedule_kl_threshold=0.008
36870
+ lr_adaptive_min=1e-06
36871
+ lr_adaptive_max=0.01
36872
+ obs_subtract_mean=0.0
36873
+ obs_scale=255.0
36874
+ normalize_input=True
36875
+ normalize_input_keys=None
36876
+ decorrelate_experience_max_seconds=0
36877
+ decorrelate_envs_on_one_worker=True
36878
+ actor_worker_gpus=[]
36879
+ set_workers_cpu_affinity=True
36880
+ force_envs_single_thread=False
36881
+ default_niceness=0
36882
+ log_to_file=True
36883
+ experiment_summaries_interval=10
36884
+ flush_summaries_interval=30
36885
+ stats_avg=100
36886
+ summaries_use_frameskip=True
36887
+ heartbeat_interval=20
36888
+ heartbeat_reporting_interval=600
36889
+ train_for_env_steps=5000
36890
+ train_for_seconds=10000000000
36891
+ save_every_sec=120
36892
+ keep_checkpoints=2
36893
+ load_checkpoint_kind=latest
36894
+ save_milestones_sec=-1
36895
+ save_best_every_sec=5
36896
+ save_best_metric=reward
36897
+ save_best_after=100000
36898
+ benchmark=False
36899
+ encoder_mlp_layers=[512, 512]
36900
+ encoder_conv_architecture=convnet_simple
36901
+ encoder_conv_mlp_layers=[512]
36902
+ use_rnn=True
36903
+ rnn_size=512
36904
+ rnn_type=gru
36905
+ rnn_num_layers=1
36906
+ decoder_mlp_layers=[]
36907
+ nonlinearity=elu
36908
+ policy_initialization=orthogonal
36909
+ policy_init_gain=1.0
36910
+ actor_critic_share_weights=True
36911
+ adaptive_stddev=True
36912
+ continuous_tanh_scale=0.0
36913
+ initial_stddev=1.0
36914
+ use_env_info_cache=False
36915
+ env_gpu_actions=False
36916
+ env_gpu_observations=True
36917
+ env_frameskip=4
36918
+ env_framestack=1
36919
+ pixel_format=CHW
36920
+ use_record_episode_statistics=False
36921
+ with_wandb=False
36922
+ wandb_user=None
36923
+ wandb_project=sample_factory
36924
+ wandb_group=None
36925
+ wandb_job_type=SF
36926
+ wandb_tags=[]
36927
+ with_pbt=False
36928
+ pbt_mix_policies_in_one_env=True
36929
+ pbt_period_env_steps=5000000
36930
+ pbt_start_mutation=20000000
36931
+ pbt_replace_fraction=0.3
36932
+ pbt_mutation_rate=0.15
36933
+ pbt_replace_reward_gap=0.1
36934
+ pbt_replace_reward_gap_absolute=1e-06
36935
+ pbt_optimize_gamma=False
36936
+ pbt_target_objective=true_objective
36937
+ pbt_perturb_min=1.1
36938
+ pbt_perturb_max=1.5
36939
+ num_agents=-1
36940
+ num_humans=0
36941
+ num_bots=-1
36942
+ start_bot_difficulty=None
36943
+ timelimit=None
36944
+ res_w=128
36945
+ res_h=72
36946
+ wide_aspect_ratio=False
36947
+ eval_env_frameskip=1
36948
+ fps=35
36949
+ command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000
36950
+ cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}
36951
+ git_hash=unknown
36952
+ git_repo_name=not a git repository
36953
+ [2024-11-08 07:45:16,921][41694] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
36954
+ [2024-11-08 07:45:16,924][41694] Rollout worker 0 uses device cpu
36955
+ [2024-11-08 07:45:16,926][41694] Rollout worker 1 uses device cpu
36956
+ [2024-11-08 07:45:16,928][41694] Rollout worker 2 uses device cpu
36957
+ [2024-11-08 07:45:16,930][41694] Rollout worker 3 uses device cpu
36958
+ [2024-11-08 07:45:16,933][41694] Rollout worker 4 uses device cpu
36959
+ [2024-11-08 07:45:16,938][41694] Rollout worker 5 uses device cpu
36960
+ [2024-11-08 07:45:16,940][41694] Rollout worker 6 uses device cpu
36961
+ [2024-11-08 07:45:16,942][41694] Rollout worker 7 uses device cpu
36962
+ [2024-11-08 07:45:16,944][41694] Rollout worker 8 uses device cpu
36963
+ [2024-11-08 07:45:16,947][41694] Rollout worker 9 uses device cpu
36964
+ [2024-11-08 07:45:16,949][41694] Rollout worker 10 uses device cpu
36965
+ [2024-11-08 07:45:16,952][41694] Rollout worker 11 uses device cpu
36966
+ [2024-11-08 07:45:16,954][41694] Rollout worker 12 uses device cpu
36967
+ [2024-11-08 07:45:16,956][41694] Rollout worker 13 uses device cpu
36968
+ [2024-11-08 07:45:16,958][41694] Rollout worker 14 uses device cpu
36969
+ [2024-11-08 07:45:16,960][41694] Rollout worker 15 uses device cpu
36970
+ [2024-11-08 07:45:17,210][41694] Using GPUs [0] for process 0 (actually maps to GPUs [0])
36971
+ [2024-11-08 07:45:17,213][41694] InferenceWorker_p0-w0: min num requests: 5
36972
+ [2024-11-08 07:45:17,322][41694] Starting all processes...
36973
+ [2024-11-08 07:45:17,323][41694] Starting process learner_proc0
36974
+ [2024-11-08 07:45:17,406][41694] Starting all processes...
36975
+ [2024-11-08 07:45:17,418][41694] Starting process inference_proc0-0
36976
+ [2024-11-08 07:45:17,420][41694] Starting process rollout_proc0
36977
+ [2024-11-08 07:45:17,421][41694] Starting process rollout_proc1
36978
+ [2024-11-08 07:45:17,421][41694] Starting process rollout_proc2
36979
+ [2024-11-08 07:45:17,424][41694] Starting process rollout_proc3
36980
+ [2024-11-08 07:45:17,426][41694] Starting process rollout_proc4
36981
+ [2024-11-08 07:45:17,433][41694] Starting process rollout_proc5
36982
+ [2024-11-08 07:45:17,440][41694] Starting process rollout_proc6
36983
+ [2024-11-08 07:45:17,452][41694] Starting process rollout_proc7
36984
+ [2024-11-08 07:45:17,453][41694] Starting process rollout_proc8
36985
+ [2024-11-08 07:45:17,454][41694] Starting process rollout_proc9
36986
+ [2024-11-08 07:45:17,463][41694] Starting process rollout_proc10
36987
+ [2024-11-08 07:45:17,467][41694] Starting process rollout_proc11
36988
+ [2024-11-08 07:45:17,479][41694] Starting process rollout_proc12
36989
+ [2024-11-08 07:45:17,487][41694] Starting process rollout_proc13
36990
+ [2024-11-08 07:45:17,487][41694] Starting process rollout_proc14
36991
+ [2024-11-08 07:45:17,620][41694] Starting process rollout_proc15