HugBot commited on
Commit
8233a46
·
1 Parent(s): 68df3b3
Files changed (1) hide show
  1. README.md +101 -2
README.md CHANGED
@@ -25,13 +25,112 @@ model-index:
25
  This is a trained model of a **PPO** agent playing **LunarLander-v2**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
27
 
 
 
 
28
  ## Usage (with Stable-baselines3)
29
- TODO: Add your code
30
 
31
 
32
  ```python
33
- from stable_baselines3 import ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  from huggingface_sb3 import load_from_hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
 
 
 
 
36
  ...
37
  ```
 
25
  This is a trained model of a **PPO** agent playing **LunarLander-v2**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
27
 
28
+ ## Colab
29
+ https://colab.research.google.com/github/huggingface/deep-rl-class/blob/master/notebooks/unit1/unit1.ipynb#scrollTo=PAEVwK-aahfx
30
+
31
  ## Usage (with Stable-baselines3)
 
32
 
33
 
34
  ```python
35
+ import gymnasium
36
+
37
+ from huggingface_sb3 import load_from_hub, package_to_hub
38
+ from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
39
+
40
+ from stable_baselines3 import PPO
41
+ from stable_baselines3.common.env_util import make_vec_env
42
+ from stable_baselines3.common.evaluation import evaluate_policy
43
+ from stable_baselines3.common.monitor import Monitor
44
+
45
+ import gymnasium as gym
46
+
47
+ # We create our environment with gym.make("<name_of_the_environment>")
48
+ env = gym.make("LunarLander-v2")
49
+ env.reset()
50
+ print("_____OBSERVATION SPACE_____ \n")
51
+ print("Observation Space Shape", env.observation_space.shape)
52
+ print("Sample observation", env.observation_space.sample()) # Get a random observation
53
+
54
+ print("\n _____ACTION SPACE_____ \n")
55
+ print("Action Space Shape", env.action_space.n)
56
+ print("Action Space Sample", env.action_space.sample()) # Take a random action
57
+
58
+ # Create the environment
59
+ env = make_vec_env('LunarLander-v2', n_envs=16)
60
+ # TODO: Define a PPO MlpPolicy architecture
61
+ # We use MultiLayerPerceptron (MLPPolicy) because the input is a vector,
62
+ # if we had frames as input we would use CnnPolicy
63
+ model = PPO('MlpPolicy', env, verbose=1)
64
+ # TODO: Train it for 1,000,000 timesteps
65
+ model.learn(total_timesteps=int(2e6))
66
+
67
+ # TODO: Specify file name for model and save the model to file
68
+ model_name = "ppo-LunarLander-v1"
69
+ model.save(model_name)
70
+
71
+ # TODO: Evaluate the agent
72
+ # Create a new environment for evaluation
73
+ eval_env = Monitor(gym.make("LunarLander-v2"))
74
+
75
+ # Evaluate the model with 10 evaluation episodes and deterministic=True
76
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
77
+
78
+ # Print the results
79
+ print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
80
+
81
+ import gymnasium as gym
82
+ from stable_baselines3.common.vec_env import DummyVecEnv
83
+ from stable_baselines3.common.env_util import make_vec_env
84
+
85
+ from huggingface_sb3 import package_to_hub
86
+
87
+ ## TODO: Define a repo_id
88
+ ## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
89
+ repo_id = "HugBot/ppo-LunarLander-v2"
90
+
91
+ # TODO: Define the name of the environment
92
+ env_id = "LunarLander-v2"
93
+
94
+ # Create the evaluation env and set the render_mode="rgb_array"
95
+ eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])
96
+
97
+
98
+ # TODO: Define the model architecture we used
99
+ model_architecture = "PPO"
100
+
101
+ ## TODO: Define the commit message
102
+ commit_message = "Upload PPO LunarLander-v2 trained agent"
103
+
104
+ # method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub
105
+ package_to_hub(model=model, # Our trained model
106
+ model_name=model_name, # The name of our trained model
107
+ model_architecture=model_architecture, # The model architecture we used: in our case PPO
108
+ env_id=env_id, # Name of the environment
109
+ eval_env=eval_env, # Evaluation Environment
110
+ repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
111
+ commit_message=commit_message)
112
+
113
  from huggingface_sb3 import load_from_hub
114
+ repo_id = "HugBot/ppo-LunarLander-v2" # The repo_id
115
+ filename = "ppo-LunarLander-v1.zip" # The model filename.zip
116
+
117
+ # When the model was trained on Python 3.8 the pickle protocol is 5
118
+ # But Python 3.6, 3.7 use protocol 4
119
+ # In order to get compatibility we need to:
120
+ # 1. Install pickle5 (we done it at the beginning of the colab)
121
+ # 2. Create a custom empty object we pass as parameter to PPO.load()
122
+ custom_objects = {
123
+ "learning_rate": 0.0,
124
+ "lr_schedule": lambda _: 0.0,
125
+ "clip_range": lambda _: 0.0,
126
+ }
127
+
128
+ checkpoint = load_from_hub(repo_id, filename)
129
+ model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)
130
 
131
+ #@title
132
+ eval_env = Monitor(gym.make("LunarLander-v2"))
133
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
134
+ print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
135
  ...
136
  ```