alidenewade
commited on
Upload folder using huggingface_hub
Browse files- .summary/0/events.out.tfevents.1731044698.ali +3 -0
- README.md +1 -1
- config.json +3 -3
- replay.mp4 +2 -2
- sf_log.txt +201 -0
.summary/0/events.out.tfevents.1731044698.ali
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:848ae94ba298ce109de75afbb1c7e6a1cc2538f598e387e83a0c58299c897ea6
|
3 |
+
size 40
|
README.md
CHANGED
@@ -15,7 +15,7 @@ model-index:
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value: 4.
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 4.08 +/- 0.71
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
config.json
CHANGED
@@ -15,8 +15,8 @@
|
|
15 |
"worker_num_splits": 2,
|
16 |
"policy_workers_per_policy": 1,
|
17 |
"max_policy_lag": 1000,
|
18 |
-
"num_workers":
|
19 |
-
"num_envs_per_worker":
|
20 |
"batch_size": 1024,
|
21 |
"num_batches_per_epoch": 1,
|
22 |
"num_epochs": 1,
|
@@ -65,7 +65,7 @@
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
-
"train_for_env_steps":
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
|
|
15 |
"worker_num_splits": 2,
|
16 |
"policy_workers_per_policy": 1,
|
17 |
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 16,
|
19 |
+
"num_envs_per_worker": 8,
|
20 |
"batch_size": 1024,
|
21 |
"num_batches_per_epoch": 1,
|
22 |
"num_epochs": 1,
|
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 5000,
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34c3b38abb21f9dfe9a972f3123b53aee70f68f042118197032506f87dcce15a
|
3 |
+
size 63559401
|
sf_log.txt
CHANGED
@@ -36788,3 +36788,204 @@ main_loop: 29019.2188
|
|
36788 |
[2024-11-08 07:32:14,337][41694] Avg episode rewards: #0: 4.619, true rewards: #0: 4.069
|
36789 |
[2024-11-08 07:32:14,339][41694] Avg episode reward: 4.619, avg true_objective: 4.069
|
36790 |
[2024-11-08 07:34:27,542][41694] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36788 |
[2024-11-08 07:32:14,337][41694] Avg episode rewards: #0: 4.619, true rewards: #0: 4.069
|
36789 |
[2024-11-08 07:32:14,339][41694] Avg episode reward: 4.619, avg true_objective: 4.069
|
36790 |
[2024-11-08 07:34:27,542][41694] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
36791 |
+
[2024-11-08 07:34:47,182][41694] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme-alid
|
36792 |
+
[2024-11-08 07:44:57,962][41694] Environment doom_basic already registered, overwriting...
|
36793 |
+
[2024-11-08 07:44:58,190][41694] Environment doom_two_colors_easy already registered, overwriting...
|
36794 |
+
[2024-11-08 07:44:58,195][41694] Environment doom_two_colors_hard already registered, overwriting...
|
36795 |
+
[2024-11-08 07:44:58,200][41694] Environment doom_dm already registered, overwriting...
|
36796 |
+
[2024-11-08 07:44:58,205][41694] Environment doom_dwango5 already registered, overwriting...
|
36797 |
+
[2024-11-08 07:44:58,208][41694] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
36798 |
+
[2024-11-08 07:44:58,212][41694] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
36799 |
+
[2024-11-08 07:44:58,216][41694] Environment doom_my_way_home already registered, overwriting...
|
36800 |
+
[2024-11-08 07:44:58,218][41694] Environment doom_deadly_corridor already registered, overwriting...
|
36801 |
+
[2024-11-08 07:44:58,220][41694] Environment doom_defend_the_center already registered, overwriting...
|
36802 |
+
[2024-11-08 07:44:58,223][41694] Environment doom_defend_the_line already registered, overwriting...
|
36803 |
+
[2024-11-08 07:44:58,225][41694] Environment doom_health_gathering already registered, overwriting...
|
36804 |
+
[2024-11-08 07:44:58,227][41694] Environment doom_health_gathering_supreme already registered, overwriting...
|
36805 |
+
[2024-11-08 07:44:58,229][41694] Environment doom_battle already registered, overwriting...
|
36806 |
+
[2024-11-08 07:44:58,230][41694] Environment doom_battle2 already registered, overwriting...
|
36807 |
+
[2024-11-08 07:44:58,234][41694] Environment doom_duel_bots already registered, overwriting...
|
36808 |
+
[2024-11-08 07:44:58,236][41694] Environment doom_deathmatch_bots already registered, overwriting...
|
36809 |
+
[2024-11-08 07:44:58,238][41694] Environment doom_duel already registered, overwriting...
|
36810 |
+
[2024-11-08 07:44:58,241][41694] Environment doom_deathmatch_full already registered, overwriting...
|
36811 |
+
[2024-11-08 07:44:58,242][41694] Environment doom_benchmark already registered, overwriting...
|
36812 |
+
[2024-11-08 07:44:58,245][41694] register_encoder_factory: <function make_vizdoom_encoder at 0x7f45dd724c10>
|
36813 |
+
[2024-11-08 07:44:58,567][41694] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
|
36814 |
+
[2024-11-08 07:44:58,570][41694] Overriding arg 'num_workers' with value 16 passed from command line
|
36815 |
+
[2024-11-08 07:44:58,571][41694] Overriding arg 'num_envs_per_worker' with value 8 passed from command line
|
36816 |
+
[2024-11-08 07:44:58,573][41694] Overriding arg 'train_for_env_steps' with value 5000 passed from command line
|
36817 |
+
[2024-11-08 07:44:58,582][41694] Experiment dir /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment already exists!
|
36818 |
+
[2024-11-08 07:44:58,585][41694] Resuming existing experiment from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment...
|
36819 |
+
[2024-11-08 07:44:58,589][41694] Weights and Biases integration disabled
|
36820 |
+
[2024-11-08 07:44:58,655][41694] Environment var CUDA_VISIBLE_DEVICES is 0
|
36821 |
+
|
36822 |
+
[2024-11-08 07:45:16,919][41694] Starting experiment with the following configuration:
|
36823 |
+
help=False
|
36824 |
+
algo=APPO
|
36825 |
+
env=doom_health_gathering_supreme
|
36826 |
+
experiment=default_experiment
|
36827 |
+
train_dir=/root/hfRL/ml/LunarLander-v2/train_dir
|
36828 |
+
restart_behavior=resume
|
36829 |
+
device=gpu
|
36830 |
+
seed=None
|
36831 |
+
num_policies=1
|
36832 |
+
async_rl=True
|
36833 |
+
serial_mode=False
|
36834 |
+
batched_sampling=False
|
36835 |
+
num_batches_to_accumulate=2
|
36836 |
+
worker_num_splits=2
|
36837 |
+
policy_workers_per_policy=1
|
36838 |
+
max_policy_lag=1000
|
36839 |
+
num_workers=16
|
36840 |
+
num_envs_per_worker=8
|
36841 |
+
batch_size=1024
|
36842 |
+
num_batches_per_epoch=1
|
36843 |
+
num_epochs=1
|
36844 |
+
rollout=32
|
36845 |
+
recurrence=32
|
36846 |
+
shuffle_minibatches=False
|
36847 |
+
gamma=0.99
|
36848 |
+
reward_scale=1.0
|
36849 |
+
reward_clip=1000.0
|
36850 |
+
value_bootstrap=False
|
36851 |
+
normalize_returns=True
|
36852 |
+
exploration_loss_coeff=0.001
|
36853 |
+
value_loss_coeff=0.5
|
36854 |
+
kl_loss_coeff=0.0
|
36855 |
+
exploration_loss=symmetric_kl
|
36856 |
+
gae_lambda=0.95
|
36857 |
+
ppo_clip_ratio=0.1
|
36858 |
+
ppo_clip_value=0.2
|
36859 |
+
with_vtrace=False
|
36860 |
+
vtrace_rho=1.0
|
36861 |
+
vtrace_c=1.0
|
36862 |
+
optimizer=adam
|
36863 |
+
adam_eps=1e-06
|
36864 |
+
adam_beta1=0.9
|
36865 |
+
adam_beta2=0.999
|
36866 |
+
max_grad_norm=4.0
|
36867 |
+
learning_rate=0.0003
|
36868 |
+
lr_schedule=constant
|
36869 |
+
lr_schedule_kl_threshold=0.008
|
36870 |
+
lr_adaptive_min=1e-06
|
36871 |
+
lr_adaptive_max=0.01
|
36872 |
+
obs_subtract_mean=0.0
|
36873 |
+
obs_scale=255.0
|
36874 |
+
normalize_input=True
|
36875 |
+
normalize_input_keys=None
|
36876 |
+
decorrelate_experience_max_seconds=0
|
36877 |
+
decorrelate_envs_on_one_worker=True
|
36878 |
+
actor_worker_gpus=[]
|
36879 |
+
set_workers_cpu_affinity=True
|
36880 |
+
force_envs_single_thread=False
|
36881 |
+
default_niceness=0
|
36882 |
+
log_to_file=True
|
36883 |
+
experiment_summaries_interval=10
|
36884 |
+
flush_summaries_interval=30
|
36885 |
+
stats_avg=100
|
36886 |
+
summaries_use_frameskip=True
|
36887 |
+
heartbeat_interval=20
|
36888 |
+
heartbeat_reporting_interval=600
|
36889 |
+
train_for_env_steps=5000
|
36890 |
+
train_for_seconds=10000000000
|
36891 |
+
save_every_sec=120
|
36892 |
+
keep_checkpoints=2
|
36893 |
+
load_checkpoint_kind=latest
|
36894 |
+
save_milestones_sec=-1
|
36895 |
+
save_best_every_sec=5
|
36896 |
+
save_best_metric=reward
|
36897 |
+
save_best_after=100000
|
36898 |
+
benchmark=False
|
36899 |
+
encoder_mlp_layers=[512, 512]
|
36900 |
+
encoder_conv_architecture=convnet_simple
|
36901 |
+
encoder_conv_mlp_layers=[512]
|
36902 |
+
use_rnn=True
|
36903 |
+
rnn_size=512
|
36904 |
+
rnn_type=gru
|
36905 |
+
rnn_num_layers=1
|
36906 |
+
decoder_mlp_layers=[]
|
36907 |
+
nonlinearity=elu
|
36908 |
+
policy_initialization=orthogonal
|
36909 |
+
policy_init_gain=1.0
|
36910 |
+
actor_critic_share_weights=True
|
36911 |
+
adaptive_stddev=True
|
36912 |
+
continuous_tanh_scale=0.0
|
36913 |
+
initial_stddev=1.0
|
36914 |
+
use_env_info_cache=False
|
36915 |
+
env_gpu_actions=False
|
36916 |
+
env_gpu_observations=True
|
36917 |
+
env_frameskip=4
|
36918 |
+
env_framestack=1
|
36919 |
+
pixel_format=CHW
|
36920 |
+
use_record_episode_statistics=False
|
36921 |
+
with_wandb=False
|
36922 |
+
wandb_user=None
|
36923 |
+
wandb_project=sample_factory
|
36924 |
+
wandb_group=None
|
36925 |
+
wandb_job_type=SF
|
36926 |
+
wandb_tags=[]
|
36927 |
+
with_pbt=False
|
36928 |
+
pbt_mix_policies_in_one_env=True
|
36929 |
+
pbt_period_env_steps=5000000
|
36930 |
+
pbt_start_mutation=20000000
|
36931 |
+
pbt_replace_fraction=0.3
|
36932 |
+
pbt_mutation_rate=0.15
|
36933 |
+
pbt_replace_reward_gap=0.1
|
36934 |
+
pbt_replace_reward_gap_absolute=1e-06
|
36935 |
+
pbt_optimize_gamma=False
|
36936 |
+
pbt_target_objective=true_objective
|
36937 |
+
pbt_perturb_min=1.1
|
36938 |
+
pbt_perturb_max=1.5
|
36939 |
+
num_agents=-1
|
36940 |
+
num_humans=0
|
36941 |
+
num_bots=-1
|
36942 |
+
start_bot_difficulty=None
|
36943 |
+
timelimit=None
|
36944 |
+
res_w=128
|
36945 |
+
res_h=72
|
36946 |
+
wide_aspect_ratio=False
|
36947 |
+
eval_env_frameskip=1
|
36948 |
+
fps=35
|
36949 |
+
command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000
|
36950 |
+
cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}
|
36951 |
+
git_hash=unknown
|
36952 |
+
git_repo_name=not a git repository
|
36953 |
+
[2024-11-08 07:45:16,921][41694] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
|
36954 |
+
[2024-11-08 07:45:16,924][41694] Rollout worker 0 uses device cpu
|
36955 |
+
[2024-11-08 07:45:16,926][41694] Rollout worker 1 uses device cpu
|
36956 |
+
[2024-11-08 07:45:16,928][41694] Rollout worker 2 uses device cpu
|
36957 |
+
[2024-11-08 07:45:16,930][41694] Rollout worker 3 uses device cpu
|
36958 |
+
[2024-11-08 07:45:16,933][41694] Rollout worker 4 uses device cpu
|
36959 |
+
[2024-11-08 07:45:16,938][41694] Rollout worker 5 uses device cpu
|
36960 |
+
[2024-11-08 07:45:16,940][41694] Rollout worker 6 uses device cpu
|
36961 |
+
[2024-11-08 07:45:16,942][41694] Rollout worker 7 uses device cpu
|
36962 |
+
[2024-11-08 07:45:16,944][41694] Rollout worker 8 uses device cpu
|
36963 |
+
[2024-11-08 07:45:16,947][41694] Rollout worker 9 uses device cpu
|
36964 |
+
[2024-11-08 07:45:16,949][41694] Rollout worker 10 uses device cpu
|
36965 |
+
[2024-11-08 07:45:16,952][41694] Rollout worker 11 uses device cpu
|
36966 |
+
[2024-11-08 07:45:16,954][41694] Rollout worker 12 uses device cpu
|
36967 |
+
[2024-11-08 07:45:16,956][41694] Rollout worker 13 uses device cpu
|
36968 |
+
[2024-11-08 07:45:16,958][41694] Rollout worker 14 uses device cpu
|
36969 |
+
[2024-11-08 07:45:16,960][41694] Rollout worker 15 uses device cpu
|
36970 |
+
[2024-11-08 07:45:17,210][41694] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
36971 |
+
[2024-11-08 07:45:17,213][41694] InferenceWorker_p0-w0: min num requests: 5
|
36972 |
+
[2024-11-08 07:45:17,322][41694] Starting all processes...
|
36973 |
+
[2024-11-08 07:45:17,323][41694] Starting process learner_proc0
|
36974 |
+
[2024-11-08 07:45:17,406][41694] Starting all processes...
|
36975 |
+
[2024-11-08 07:45:17,418][41694] Starting process inference_proc0-0
|
36976 |
+
[2024-11-08 07:45:17,420][41694] Starting process rollout_proc0
|
36977 |
+
[2024-11-08 07:45:17,421][41694] Starting process rollout_proc1
|
36978 |
+
[2024-11-08 07:45:17,421][41694] Starting process rollout_proc2
|
36979 |
+
[2024-11-08 07:45:17,424][41694] Starting process rollout_proc3
|
36980 |
+
[2024-11-08 07:45:17,426][41694] Starting process rollout_proc4
|
36981 |
+
[2024-11-08 07:45:17,433][41694] Starting process rollout_proc5
|
36982 |
+
[2024-11-08 07:45:17,440][41694] Starting process rollout_proc6
|
36983 |
+
[2024-11-08 07:45:17,452][41694] Starting process rollout_proc7
|
36984 |
+
[2024-11-08 07:45:17,453][41694] Starting process rollout_proc8
|
36985 |
+
[2024-11-08 07:45:17,454][41694] Starting process rollout_proc9
|
36986 |
+
[2024-11-08 07:45:17,463][41694] Starting process rollout_proc10
|
36987 |
+
[2024-11-08 07:45:17,467][41694] Starting process rollout_proc11
|
36988 |
+
[2024-11-08 07:45:17,479][41694] Starting process rollout_proc12
|
36989 |
+
[2024-11-08 07:45:17,487][41694] Starting process rollout_proc13
|
36990 |
+
[2024-11-08 07:45:17,487][41694] Starting process rollout_proc14
|
36991 |
+
[2024-11-08 07:45:17,620][41694] Starting process rollout_proc15
|