Gymnasium Wrapper¶

`GymnasiumWrapper` ¶

Bases: Env

Wraps a gyozas Environment as a standard gymnasium.Env.

Since gyozas action sets are variable-size (different branching candidates at each step), this wrapper uses a Discrete action space sized to the maximum action set seen so far. The action_set attribute holds the valid actions for the current step.

Parameters:

Name	Type	Description	Default
`instance_generator`	`InstanceGenerator`	Iterator yielding PySCIPOpt Model instances.	required
`observation_function`		Gyozas observation function. Defaults to `NodeBipartite`.	`None`
`reward_function`		Gyozas reward function. Defaults to `NNodes`.	`None`
`information_function`		Gyozas information function. Defaults to `Empty`.	`None`
`dynamics`		Gyozas dynamics. Defaults to `BranchingDynamics`.	`None`
`scip_params`		SCIP parameters dict applied at the start of each episode.	`None`
`max_steps`	`int \| None`	If set, truncate episodes after this many steps.	`None`

Source code in gyozas/gymnasium_wrapper.py

class GymnasiumWrapper(gym.Env):
    """Wraps a gyozas ``Environment`` as a standard ``gymnasium.Env``.

    Since gyozas action sets are variable-size (different branching candidates
    at each step), this wrapper uses a ``Discrete`` action space sized to the
    maximum action set seen so far. The ``action_set`` attribute holds the
    valid actions for the current step.

    Parameters
    ----------
    instance_generator
        Iterator yielding PySCIPOpt Model instances.
    observation_function
        Gyozas observation function. Defaults to ``NodeBipartite``.
    reward_function
        Gyozas reward function. Defaults to ``NNodes``.
    information_function
        Gyozas information function. Defaults to ``Empty``.
    dynamics
        Gyozas dynamics. Defaults to ``BranchingDynamics``.
    scip_params
        SCIP parameters dict applied at the start of each episode.
    max_steps
        If set, truncate episodes after this many steps.
    """

    metadata = {"render_modes": []}

    def __init__(
        self,
        instance_generator: InstanceGenerator,
        observation_function=None,
        reward_function=None,
        information_function=None,
        dynamics=None,
        scip_params=None,
        max_steps: int | None = None,
        render_mode: str | None = None,
    ) -> None:
        super().__init__()
        self.render_mode = render_mode
        self.env = Environment(
            instance_generator=instance_generator,
            observation_function=observation_function,
            reward_function=reward_function,
            information_function=information_function,
            dynamics=dynamics,
            scip_params=scip_params,
            render_mode=render_mode,
        )
        self.max_steps = max_steps
        self._step_count = 0
        self.action_set: list[int] | None = None

        # Gymnasium spaces -- observation space is set after first reset
        # Action space starts at 1 and grows as needed
        self.action_space = spaces.Discrete(1)
        self.observation_space = spaces.Dict({})  # placeholder

    def reset(self, *, seed: int | None = None, options: dict | None = None) -> tuple[Any, dict]:
        """Reset the environment and return initial observation and info.

        Parameters
        ----------
        seed
            Random seed for reproducibility.
        options
            Unused, for Gymnasium API compatibility.

        Returns
        -------
        observation
            The initial observation.
        info
            Dictionary with ``action_set`` and any information function output.
        """
        if seed is not None:
            self.env.seed(seed)

        obs, action_set, reward, done, info = self.env.reset()
        self._step_count = 0

        # When the instance is solved at the root node (e.g. by presolving),
        # obs and action_set are None.  Keep generating new instances until
        # we get one that actually requires branching decisions.
        _retries = 0
        while done:
            _retries += 1
            if _retries > 100:
                raise RuntimeError(
                    "GymnasiumWrapper.reset() got 100 consecutive instances that were "
                    "solved without any agent decisions (e.g. solved by presolving). "
                    "Use a harder instance generator."
                )
            obs, action_set, reward, done, info = self.env.reset()

        self.action_set = action_set
        self.action_space = spaces.Discrete(len(action_set))

        info_dict = {"action_set": action_set, "gyozas_info": info, "reward_offset": reward}
        return obs, info_dict

    def step(self, action: int) -> tuple[Any, float, bool, bool, dict]:
        """Take a step in the environment.

        Parameters
        ----------
        action
            Index into the current ``action_set``. The wrapper translates
            this positional index to the actual gyozas action.

        Returns
        -------
        observation
            The new observation (None if terminated).
        reward
            The step reward.
        terminated
            True if the solver finished.
        truncated
            True if ``max_steps`` was reached.
        info
            Dictionary with ``action_set`` and any information function output.
        """
        if self.action_set is None:
            raise RuntimeError("No action set available. Call reset() first.")

        # Map positional index to actual action
        if 0 <= action < len(self.action_set):
            gyozas_action = self.action_set[action]
        else:
            raise ValueError(f"Action {action} out of range [0, {len(self.action_set)})")

        obs, action_set, reward, done, info = self.env.step(gyozas_action)
        self._step_count += 1
        self.action_set = action_set

        if action_set is not None:
            self.action_space = spaces.Discrete(len(action_set))

        terminated = done
        truncated = False
        if self.max_steps is not None and self._step_count >= self.max_steps and not done:
            truncated = True
            self.env.close()
            self.action_set = None

        info_dict = {"action_set": action_set, "gyozas_info": info}
        return obs, float(reward), terminated, truncated, info_dict

    def close(self) -> None:
        """Close the underlying gyozas environment."""
        self.env.close()

`reset(*, seed=None, options=None)` ¶

Reset the environment and return initial observation and info.

Parameters:

Name	Type	Description	Default
`seed`	`int \| None`	Random seed for reproducibility.	`None`
`options`	`dict \| None`	Unused, for Gymnasium API compatibility.	`None`

Returns:

Type	Description
`observation`	The initial observation.
`info`	Dictionary with `action_set` and any information function output.

Source code in gyozas/gymnasium_wrapper.py

def reset(self, *, seed: int | None = None, options: dict | None = None) -> tuple[Any, dict]:
    """Reset the environment and return initial observation and info.

    Parameters
    ----------
    seed
        Random seed for reproducibility.
    options
        Unused, for Gymnasium API compatibility.

    Returns
    -------
    observation
        The initial observation.
    info
        Dictionary with ``action_set`` and any information function output.
    """
    if seed is not None:
        self.env.seed(seed)

    obs, action_set, reward, done, info = self.env.reset()
    self._step_count = 0

    # When the instance is solved at the root node (e.g. by presolving),
    # obs and action_set are None.  Keep generating new instances until
    # we get one that actually requires branching decisions.
    _retries = 0
    while done:
        _retries += 1
        if _retries > 100:
            raise RuntimeError(
                "GymnasiumWrapper.reset() got 100 consecutive instances that were "
                "solved without any agent decisions (e.g. solved by presolving). "
                "Use a harder instance generator."
            )
        obs, action_set, reward, done, info = self.env.reset()

    self.action_set = action_set
    self.action_space = spaces.Discrete(len(action_set))

    info_dict = {"action_set": action_set, "gyozas_info": info, "reward_offset": reward}
    return obs, info_dict

`step(action)` ¶

Take a step in the environment.

Parameters:

Name	Type	Description	Default
`action`	`int`	Index into the current `action_set`. The wrapper translates this positional index to the actual gyozas action.	required

Returns:

Type	Description
`observation`	The new observation (None if terminated).
`reward`	The step reward.
`terminated`	True if the solver finished.
`truncated`	True if `max_steps` was reached.
`info`	Dictionary with `action_set` and any information function output.

Source code in gyozas/gymnasium_wrapper.py

def step(self, action: int) -> tuple[Any, float, bool, bool, dict]:
    """Take a step in the environment.

    Parameters
    ----------
    action
        Index into the current ``action_set``. The wrapper translates
        this positional index to the actual gyozas action.

    Returns
    -------
    observation
        The new observation (None if terminated).
    reward
        The step reward.
    terminated
        True if the solver finished.
    truncated
        True if ``max_steps`` was reached.
    info
        Dictionary with ``action_set`` and any information function output.
    """
    if self.action_set is None:
        raise RuntimeError("No action set available. Call reset() first.")

    # Map positional index to actual action
    if 0 <= action < len(self.action_set):
        gyozas_action = self.action_set[action]
    else:
        raise ValueError(f"Action {action} out of range [0, {len(self.action_set)})")

    obs, action_set, reward, done, info = self.env.step(gyozas_action)
    self._step_count += 1
    self.action_set = action_set

    if action_set is not None:
        self.action_space = spaces.Discrete(len(action_set))

    terminated = done
    truncated = False
    if self.max_steps is not None and self._step_count >= self.max_steps and not done:
        truncated = True
        self.env.close()
        self.action_set = None

    info_dict = {"action_set": action_set, "gyozas_info": info}
    return obs, float(reward), terminated, truncated, info_dict

`close()` ¶

Close the underlying gyozas environment.

Source code in gyozas/gymnasium_wrapper.py

def close(self) -> None:
    """Close the underlying gyozas environment."""
    self.env.close()

Gymnasium Wrapper¶