from typing import Any, Dict, Tuple
import numpy as np
import gymnasium as gym
import rospy
[docs]
class NormalizeActionWrapper(gym.Wrapper):
"""
A wrapper for normalizing the action space of an environment.
This wrapper normalizes the actions to be between -1.0 and 1.0.
It can be used with environments whose action space is of type Box.
Args:
env (gym.Env): The environment to wrap.
Raises:
ValueError: If the action space of the environment is not of type Box.
"""
def __init__(self, env: gym.Env) -> None:
# init the wrapper
super().__init__(env)
# get the current action space
action_space = env.action_space
# Get the low and high values of the original action space
self.low = action_space.low
self.high = action_space.high
# Check if the action space of the environment is of type Box
if not isinstance(action_space, gym.spaces.Box):
raise ValueError(f"Expected env.action_space to be of type Box, but got {type(action_space)}")
# Set the action space of the wrapper to be a Box with low=-1.0 and high=1.0
self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=self.env.action_space.shape, dtype=np.float32)
[docs]
def denormalize_action(self, action: np.ndarray) -> np.ndarray:
"""
Normalize the action to the range of the original action space.
Args:
action (np.ndarray): The action to normalize.
Returns:
np.ndarray: The normalized action.
"""
# Normalize the action to the range of the original action space
action = self.low + (action + 1.0) * 0.5 * (self.high - self.low)
# Clip the action to be within the range of the original action space
action = np.clip(action, self.low, self.high)
return action
[docs]
def reverse_action(self, action: np.ndarray) -> np.ndarray:
"""
Reverse the normalization of an action.
Args:
action (np.ndarray): The action to denormalize.
Returns:
np.ndarray: The denormalized action.
"""
# Reverse the normalization of the action
action = 2 * (action - self.low) / (self.high - self.low) - 1.0
return action
[docs]
def step(self, action: np.ndarray) -> Tuple[Any, float, bool, bool, Dict[str, Any]]:
"""
Take a step in the environment using a normalized action.
Args:
action (np.ndarray): The normalized action to take.
Returns:
observation (Any): The observation representing the current state of the environment.
reward (float): The reward for taking the given action.
done (bool): Whether the episode has ended.
info (dict): Additional information about the environment.
"""
# Denormalize the action before passing it to the underlying environment's step method
# rospy.logwarn(f"action from policy:{action}")
action = self.denormalize_action(action)
# rospy.logwarn(f"action after norm:{action}")
return self.env.step(action)