Spaces:
Sleeping
Sleeping
import gradio as gr | |
from train import TrainingLoop | |
from scipy.special import softmax | |
import numpy as np | |
train = None | |
frames, attributions = None, None | |
lunar_lander_spec_conversion = { | |
0: "X-coordinate", | |
1: "Y-coordinate", | |
2: "Linear velocity in the X-axis", | |
3: "Linear velocity in the Y-axis", | |
4: "Angle", | |
5: "Angular velocity", | |
6: "Left leg touched the floor", | |
7: "Right leg touched the floor" | |
} | |
def create_training_loop(env_spec): | |
global train | |
train = TrainingLoop(env_spec=env_spec) | |
train.create_agent() | |
return train.env.spec | |
def display_softmax(inputs): | |
inputs = np.array(inputs) | |
probabilities = softmax(inputs) | |
softmax_dict = {name: float(prob) for name, prob in zip(lunar_lander_spec_conversion.values(), probabilities)} | |
return softmax_dict | |
def generate_output(num_iterations, option): | |
global frames, attributions | |
frames, attributions = train.explain_trained(num_iterations=num_iterations, option=option) | |
slider.maximum = len(frames) | |
def get_frame_and_attribution(slider_value): | |
global frames, attributions | |
slider_value = min(slider_value, len(frames) - 1) | |
frame = frames[slider_value] | |
print(f"{frame.shape=}") | |
attribution = display_softmax(attributions[slider_value]) | |
return frame, attribution | |
with gr.Blocks() as demo: | |
gr.Markdown("# Introspection in Deep Reinforcement Learning") | |
gr.Markdown(r""" | |
\#\# How this space works: | |
This space was created for trying to apply [Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients\#:~:text=Integrated%20gradients%20is%20a%20simple,and%20feature%20or%20rule%20extraction.) \ | |
into Deep Reinforcement Learning Scenarions. It uses PyTorch's captum library for interpretability, and Gymnasium for the emulator of the continuous lunar lander. | |
\#\#\# Training algorithm: [DDPG](https://arxiv.org/abs/1509.02971) | |
This agent was trained with Deep Deterministic Policy Gradients, and outputs an average reward of 260.8 per episode (successful) | |
\#\#\# Using this space: | |
- First, select the environment (futurely there will be more environments available) | |
- Then, select if you want the baseline (see IG paper for more detail) to be \ | |
a torch `tensor` of zeroes, or a running average of the initial frames of a few episodes (selected on the right) \ | |
- Click attribute and wait a few seconds (usually 20-25s) for the attributions to be computed with the trained agent over 10 episodes | |
- Finally, use the slider to get a key frame that tells the attributions of the agent. They're under a Softmax to fit the component's requirements for a probability distribution. | |
""") | |
with gr.Tab(label="Attribute"): | |
env_spec = gr.Dropdown(choices=["LunarLander-v2"],type="value",multiselect=False, label="Environment Specification (e.g.: LunarLander-v2)") | |
env = gr.Interface(title="Create the Environment", allow_flagging="never", inputs=env_spec, fn=create_training_loop, outputs=gr.JSON()) | |
with gr.Row(): | |
option = gr.Dropdown(choices=["Torch Tensor of 0's", "Running Average"], type="index") | |
baselines = gr.Slider(label="Number of Baseline Iterations", interactive=True, minimum=0, maximum=100, value=10, step=5, info="Baseline inputs to collect for the average", render=True) | |
gr.Button("ATTRIBUTE").click(fn=generate_output, inputs=[baselines, option]) | |
slider = gr.Slider(label="Key Frame", minimum=0, maximum=1000, step=1, value=0) | |
gr.Interface(fn=get_frame_and_attribution, inputs=slider, live=True, outputs=[gr.Image(label="Timestep"),gr.Label(label="Attributions")]) | |
gr.Markdown(r"""\#\# Local Usage and Packages \ | |
`pip install torch gymnasium 'gymnasium[box2d]'` \ | |
You might need to install Box2D Separately, which requires a swig package to compile code from Python into C/C++, which is the language that Box2d was built in: \ | |
`brew install swig` \ | |
`pip install box2d \n \#\# Average Score: 164.38 (significant improvement from discrete action spaces) \ | |
For each step, the reward: \ | |
- is increased/decreased the closer/further the lander is to the landing pad. \ | |
- is increased/decreased the slower/faster the lander is moving.\ | |
- is decreased the more the lander is tilted (angle not horizontal). \ | |
- is increased by 10 points for each leg that is in contact with the ground. \ | |
- is decreased by 0.03 points each frame a side engine is firing.\ | |
- is decreased by 0.3 points each frame the main engine is firing. \ | |
The episode receives an additional reward of -100 or +100 points for crashing or landing safely respectively. An episode is considered a solution if it scores at least 200 points.\*\* \ | |
\#\# `train()` and `load_trained()` \ | |
`load_trained()` function loads a pre-trained model that ran through 1000 episodes of training, while `train()` does training from scratch. You can edit which one of the functions is running from the bottom of the main.py file. If you set render_mode=False, the program will train a lot faster.)\n demo.launch()""") | |
demo.launch() |