gabehubner commited on
Commit
f6f3371
·
1 Parent(s): ec3a146

working attribution mechanism

Browse files
__pycache__/ddpg.cpython-311.pyc CHANGED
Binary files a/__pycache__/ddpg.cpython-311.pyc and b/__pycache__/ddpg.cpython-311.pyc differ
 
__pycache__/train.cpython-311.pyc CHANGED
Binary files a/__pycache__/train.cpython-311.pyc and b/__pycache__/train.cpython-311.pyc differ
 
ddpg.py CHANGED
@@ -143,7 +143,12 @@ class ActorNetwork(nn.Module):
143
  self.to(self.device)
144
 
145
  def forward(self, state):
146
- print(f"State in forward function: {state.shape=}")
 
 
 
 
 
147
  x = self.fc1(state)
148
  x = self.bn1(x)
149
  x = F.relu(x)
@@ -178,21 +183,19 @@ class Agent(object):
178
  self.noise = OUActionNoise(mu=np.zeros(n_actions))
179
 
180
  self.attributions = None
181
- self.ig = None
182
 
183
  self.update_network_parameters(tau=1)
184
 
185
- def choose_action(self, observation, baseline : T.Tensor=None):
186
  self.actor.eval()
187
  observation = T.tensor(observation, dtype=T.float).to(self.actor.device)
188
- print(f"Observation: {observation.shape=}")
189
  mu = self.actor(observation).to(self.actor.device)
190
 
191
- # if attribution is not None:
192
- # if baseline is None:
193
- # baseline = T.zeros(observation.shape)
194
- # attributions = attribution.attribute((observation), baselines=baseline, target=0)
195
- # print('Attributions:', attributions)
196
 
197
 
198
  mu_prime = mu + T.tensor(self.noise(), dtype=T.float).to(self.actor.device)
 
143
  self.to(self.device)
144
 
145
  def forward(self, state):
146
+
147
+ try:
148
+ assert state.shape == T.Size([8])
149
+ except AssertionError:
150
+ raise Exception(f"Wrong shape {state.shape=}")
151
+
152
  x = self.fc1(state)
153
  x = self.bn1(x)
154
  x = F.relu(x)
 
183
  self.noise = OUActionNoise(mu=np.zeros(n_actions))
184
 
185
  self.attributions = None
186
+ self.ig : IntegratedGradients = None
187
 
188
  self.update_network_parameters(tau=1)
189
 
190
+ def choose_action(self, observation, baseline: T.Tensor = None):
191
  self.actor.eval()
192
  observation = T.tensor(observation, dtype=T.float).to(self.actor.device)
193
+ # print(f"Observation: {observation.shape=}")
194
  mu = self.actor(observation).to(self.actor.device)
195
 
196
+ if self.ig is not None:
197
+ attribution = self.ig.attribute(observation, baselines=baseline, n_steps=1)
198
+ print('Attributions:', attribution)
 
 
199
 
200
 
201
  mu_prime = mu + T.tensor(self.noise(), dtype=T.float).to(self.actor.device)
train.py CHANGED
@@ -9,12 +9,15 @@ from captum.attr import (IntegratedGradients)
9
  class TrainingLoop:
10
  def __init__(self, env_spec, output_path='./output/', seed=0, **kwargs):
11
  assert env_spec in gym.envs.registry.keys()
 
12
  defaults = {
13
  "continuous": True,
14
  "gravity": -10.0,
15
  "render_mode": None
16
  }
17
 
 
 
18
  self.env = gym.make(
19
  env_spec,
20
  **defaults
@@ -87,6 +90,7 @@ class TrainingLoop:
87
 
88
  def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor:
89
  assert self.agent is not None
 
90
 
91
  self.agent.load_models()
92
 
@@ -98,16 +102,19 @@ class TrainingLoop:
98
  obs, _ = self.env.reset()
99
 
100
  sum_obs += obs
101
- print(f"Baseline on interation #{i}: {obs}")
102
 
103
  while not done:
104
- act = self.agent.choose_action(obs, attribution=None, baseline=None)
105
  new_state, reward, terminated, truncated, info = self.env.step(act)
106
  done = terminated or truncated
107
  score += reward
108
  obs = new_state
109
 
 
 
110
  self.env.close()
 
111
 
112
  return sum_obs / num_iterations
113
 
@@ -122,8 +129,12 @@ class TrainingLoop:
122
 
123
  baseline = baseline_options[option]
124
 
 
 
125
  self.agent.load_models()
126
 
 
 
127
  ig = IntegratedGradients(self.agent.actor)
128
  self.agent.ig = ig
129
 
@@ -134,7 +145,7 @@ class TrainingLoop:
134
  score = 0
135
  obs, _ = self.env.reset()
136
  while not done:
137
- act = self.agent.choose_action(obs, baseline=baseline)
138
  new_state, reward, terminated, truncated, info = self.env.step(act)
139
  done = terminated or truncated
140
  score += reward
 
9
  class TrainingLoop:
10
  def __init__(self, env_spec, output_path='./output/', seed=0, **kwargs):
11
  assert env_spec in gym.envs.registry.keys()
12
+
13
  defaults = {
14
  "continuous": True,
15
  "gravity": -10.0,
16
  "render_mode": None
17
  }
18
 
19
+ defaults.update(**kwargs)
20
+
21
  self.env = gym.make(
22
  env_spec,
23
  **defaults
 
90
 
91
  def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor:
92
  assert self.agent is not None
93
+ print("--------- Collecting running baseline average ----------")
94
 
95
  self.agent.load_models()
96
 
 
102
  obs, _ = self.env.reset()
103
 
104
  sum_obs += obs
105
+ # print(f"Baseline on interation #{i}: {obs}")
106
 
107
  while not done:
108
+ act = self.agent.choose_action(obs, baseline=None)
109
  new_state, reward, terminated, truncated, info = self.env.step(act)
110
  done = terminated or truncated
111
  score += reward
112
  obs = new_state
113
 
114
+ print(f"Baseline collected: {sum_obs / num_iterations}")
115
+
116
  self.env.close()
117
+
118
 
119
  return sum_obs / num_iterations
120
 
 
129
 
130
  baseline = baseline_options[option]
131
 
132
+ print("\n\n\n\n--------- Performing Attributions -----------")
133
+
134
  self.agent.load_models()
135
 
136
+
137
+ print(self.agent.actor)
138
  ig = IntegratedGradients(self.agent.actor)
139
  self.agent.ig = ig
140
 
 
145
  score = 0
146
  obs, _ = self.env.reset()
147
  while not done:
148
+ act = self.agent.choose_action(observation=obs, baseline=baseline)
149
  new_state, reward, terminated, truncated, info = self.env.step(act)
150
  done = terminated or truncated
151
  score += reward