Source code for multiros.wrappers.normalize_action_wrapper

from typing import Any, Dict, Tuple

import numpy as np
import gymnasium as gym

import rospy



[docs]
class NormalizeActionWrapper(gym.Wrapper):
    """
    A wrapper for normalizing the action space of an environment.

    This wrapper normalizes the actions to be between -1.0 and 1.0.
    It can be used with environments whose action space is of type Box.

    Args:
        env (gym.Env): The environment to wrap.

    Raises:
        ValueError: If the action space of the environment is not of type Box.
    """

    def __init__(self, env: gym.Env) -> None:
        # init the wrapper
        super().__init__(env)

        # get the current action space
        action_space = env.action_space

        # Get the low and high values of the original action space
        self.low = action_space.low
        self.high = action_space.high

        # Check if the action space of the environment is of type Box
        if not isinstance(action_space, gym.spaces.Box):
            raise ValueError(f"Expected env.action_space to be of type Box, but got {type(action_space)}")

        # Set the action space of the wrapper to be a Box with low=-1.0 and high=1.0
        self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=self.env.action_space.shape, dtype=np.float32)


[docs]
    def denormalize_action(self, action: np.ndarray) -> np.ndarray:
        """
        Normalize the action to the range of the original action space.

        Args:
            action (np.ndarray): The action to normalize.

        Returns:
            np.ndarray: The normalized action.
        """
        # Normalize the action to the range of the original action space
        action = self.low + (action + 1.0) * 0.5 * (self.high - self.low)

        # Clip the action to be within the range of the original action space
        action = np.clip(action, self.low, self.high)

        return action



[docs]
    def reverse_action(self, action: np.ndarray) -> np.ndarray:
        """
        Reverse the normalization of an action.

        Args:
            action (np.ndarray): The action to denormalize.

        Returns:
            np.ndarray: The denormalized action.
        """
        # Reverse the normalization of the action
        action = 2 * (action - self.low) / (self.high - self.low) - 1.0

        return action



[docs]
    def step(self, action: np.ndarray) -> Tuple[Any, float, bool, bool, Dict[str, Any]]:
        """
        Take a step in the environment using a normalized action.

        Args:
            action (np.ndarray): The normalized action to take.

        Returns:
            observation (Any): The observation representing the current state of the environment.
            reward (float): The reward for taking the given action.
            done (bool): Whether the episode has ended.
            info (dict): Additional information about the environment.
        """
        # Denormalize the action before passing it to the underlying environment's step method
        # rospy.logwarn(f"action from policy:{action}")
        action = self.denormalize_action(action)
        # rospy.logwarn(f"action after norm:{action}")
        return self.env.step(action)