Skip to content

Reward Functions

Protocol

RewardFunction

Bases: Protocol

Source code in gyozas/rewards/__init__.py
@runtime_checkable
class RewardFunction(Protocol):
    def reset(self, model: Model) -> None: ...
    def extract(self, model: Model, done: bool) -> float: ...

Arithmetic Composition

ArithmeticMixin

Mixin that equips a reward function with arithmetic operators.

Concrete reward classes should inherit from this mixin in addition to implementing reset / extract. The resulting objects satisfy the RewardFunction protocol and compose freely::

reward = (NNodes() + LPIterations() * 0.1).cumsum()
Source code in gyozas/rewards/arithmetic.py
class ArithmeticMixin:
    """Mixin that equips a reward function with arithmetic operators.

    Concrete reward classes should inherit from this mixin in addition to
    implementing ``reset`` / ``extract``.  The resulting objects satisfy the
    ``RewardFunction`` protocol and compose freely::

        reward = (NNodes() + LPIterations() * 0.1).cumsum()
    """

    # --- binary operators ---------------------------------------------------

    def __add__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.add)

    def __radd__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.add)

    def __sub__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.sub)

    def __rsub__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.sub)

    def __mul__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.mul)

    def __rmul__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.mul)

    def __matmul__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.matmul)

    def __rmatmul__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.matmul)

    def __truediv__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.truediv)

    def __rtruediv__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.truediv)

    def __floordiv__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.floordiv)

    def __rfloordiv__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.floordiv)

    def __mod__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.mod)

    def __rmod__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.mod)

    def __divmod__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, divmod)

    def __rdivmod__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, divmod)

    def __pow__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.pow)

    def __rpow__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.pow)

    def __lshift__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.lshift)

    def __rlshift__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.lshift)

    def __rshift__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.rshift)

    def __rrshift__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.rshift)

    def __and__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.and_)

    def __rand__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.and_)

    def __xor__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.xor)

    def __rxor__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.xor)

    def __or__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(self, other, _op.or_)

    def __ror__(self, other: object) -> ArithmeticMixin:
        return _BinaryOp(other, self, _op.or_)

    # --- unary operators ----------------------------------------------------

    def __neg__(self) -> ArithmeticMixin:
        return _UnaryOp(self, _op.neg)

    def __pos__(self) -> ArithmeticMixin:
        return _UnaryOp(self, _op.pos)

    def __abs__(self) -> ArithmeticMixin:
        return _UnaryOp(self, abs)

    def __invert__(self) -> ArithmeticMixin:
        return _UnaryOp(self, _op.invert)

    def __round__(self) -> ArithmeticMixin:
        return _UnaryOp(self, round)

    def __trunc__(self) -> ArithmeticMixin:
        return _UnaryOp(self, math.trunc)

    def __floor__(self) -> ArithmeticMixin:
        return _UnaryOp(self, math.floor)

    def __ceil__(self) -> ArithmeticMixin:
        return _UnaryOp(self, math.ceil)

    # --- functional combinators ---------------------------------------------

    # --- reward interface (must be implemented by concrete subclasses) ------

    @abstractmethod
    def reset(self, model: Model) -> None: ...

    @abstractmethod
    def extract(self, model: Model, done: bool) -> float: ...

    # --- functional combinators ---------------------------------------------

    def apply(self, fn: Callable[[float], float]) -> ArithmeticMixin:
        """Return a reward function whose value is ``fn(self)``."""
        return _UnaryOp(self, fn)

    def cumsum(self) -> ArithmeticMixin:
        """Return a reward function that accumulates the sum over the episode."""
        return _CumSum(self)

    # --- math module wrappers ------------------------------------------------

    def exp(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.exp(self)``."""
        return _UnaryOp(self, math.exp)

    def log(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.log(self)``."""
        return _UnaryOp(self, math.log)

    def log2(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.log2(self)``."""
        return _UnaryOp(self, math.log2)

    def log10(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.log10(self)``."""
        return _UnaryOp(self, math.log10)

    def sqrt(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.sqrt(self)``."""
        return _UnaryOp(self, math.sqrt)

    def sin(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.sin(self)``."""
        return _UnaryOp(self, math.sin)

    def cos(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.cos(self)``."""
        return _UnaryOp(self, math.cos)

    def tan(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.tan(self)``."""
        return _UnaryOp(self, math.tan)

    def asin(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.asin(self)``."""
        return _UnaryOp(self, math.asin)

    def acos(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.acos(self)``."""
        return _UnaryOp(self, math.acos)

    def atan(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.atan(self)``."""
        return _UnaryOp(self, math.atan)

    def sinh(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.sinh(self)``."""
        return _UnaryOp(self, math.sinh)

    def cosh(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.cosh(self)``."""
        return _UnaryOp(self, math.cosh)

    def tanh(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.tanh(self)``."""
        return _UnaryOp(self, math.tanh)

    def asinh(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.asinh(self)``."""
        return _UnaryOp(self, math.asinh)

    def acosh(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.acosh(self)``."""
        return _UnaryOp(self, math.acosh)

    def atanh(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.atanh(self)``."""
        return _UnaryOp(self, math.atanh)

    def isfinite(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.isfinite(self)``."""
        return _UnaryOp(self, math.isfinite)

    def isinf(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.isinf(self)``."""
        return _UnaryOp(self, math.isinf)

    def isnan(self) -> ArithmeticMixin:
        """Return a reward function whose value is ``math.isnan(self)``."""
        return _UnaryOp(self, math.isnan)

apply(fn)

Return a reward function whose value is fn(self).

Source code in gyozas/rewards/arithmetic.py
def apply(self, fn: Callable[[float], float]) -> ArithmeticMixin:
    """Return a reward function whose value is ``fn(self)``."""
    return _UnaryOp(self, fn)

cumsum()

Return a reward function that accumulates the sum over the episode.

Source code in gyozas/rewards/arithmetic.py
def cumsum(self) -> ArithmeticMixin:
    """Return a reward function that accumulates the sum over the episode."""
    return _CumSum(self)

exp()

Return a reward function whose value is math.exp(self).

Source code in gyozas/rewards/arithmetic.py
def exp(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.exp(self)``."""
    return _UnaryOp(self, math.exp)

log()

Return a reward function whose value is math.log(self).

Source code in gyozas/rewards/arithmetic.py
def log(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.log(self)``."""
    return _UnaryOp(self, math.log)

log2()

Return a reward function whose value is math.log2(self).

Source code in gyozas/rewards/arithmetic.py
def log2(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.log2(self)``."""
    return _UnaryOp(self, math.log2)

log10()

Return a reward function whose value is math.log10(self).

Source code in gyozas/rewards/arithmetic.py
def log10(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.log10(self)``."""
    return _UnaryOp(self, math.log10)

sqrt()

Return a reward function whose value is math.sqrt(self).

Source code in gyozas/rewards/arithmetic.py
def sqrt(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.sqrt(self)``."""
    return _UnaryOp(self, math.sqrt)

sin()

Return a reward function whose value is math.sin(self).

Source code in gyozas/rewards/arithmetic.py
def sin(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.sin(self)``."""
    return _UnaryOp(self, math.sin)

cos()

Return a reward function whose value is math.cos(self).

Source code in gyozas/rewards/arithmetic.py
def cos(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.cos(self)``."""
    return _UnaryOp(self, math.cos)

tan()

Return a reward function whose value is math.tan(self).

Source code in gyozas/rewards/arithmetic.py
def tan(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.tan(self)``."""
    return _UnaryOp(self, math.tan)

asin()

Return a reward function whose value is math.asin(self).

Source code in gyozas/rewards/arithmetic.py
def asin(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.asin(self)``."""
    return _UnaryOp(self, math.asin)

acos()

Return a reward function whose value is math.acos(self).

Source code in gyozas/rewards/arithmetic.py
def acos(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.acos(self)``."""
    return _UnaryOp(self, math.acos)

atan()

Return a reward function whose value is math.atan(self).

Source code in gyozas/rewards/arithmetic.py
def atan(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.atan(self)``."""
    return _UnaryOp(self, math.atan)

sinh()

Return a reward function whose value is math.sinh(self).

Source code in gyozas/rewards/arithmetic.py
def sinh(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.sinh(self)``."""
    return _UnaryOp(self, math.sinh)

cosh()

Return a reward function whose value is math.cosh(self).

Source code in gyozas/rewards/arithmetic.py
def cosh(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.cosh(self)``."""
    return _UnaryOp(self, math.cosh)

tanh()

Return a reward function whose value is math.tanh(self).

Source code in gyozas/rewards/arithmetic.py
def tanh(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.tanh(self)``."""
    return _UnaryOp(self, math.tanh)

asinh()

Return a reward function whose value is math.asinh(self).

Source code in gyozas/rewards/arithmetic.py
def asinh(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.asinh(self)``."""
    return _UnaryOp(self, math.asinh)

acosh()

Return a reward function whose value is math.acosh(self).

Source code in gyozas/rewards/arithmetic.py
def acosh(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.acosh(self)``."""
    return _UnaryOp(self, math.acosh)

atanh()

Return a reward function whose value is math.atanh(self).

Source code in gyozas/rewards/arithmetic.py
def atanh(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.atanh(self)``."""
    return _UnaryOp(self, math.atanh)

isfinite()

Return a reward function whose value is math.isfinite(self).

Source code in gyozas/rewards/arithmetic.py
def isfinite(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.isfinite(self)``."""
    return _UnaryOp(self, math.isfinite)

isinf()

Return a reward function whose value is math.isinf(self).

Source code in gyozas/rewards/arithmetic.py
def isinf(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.isinf(self)``."""
    return _UnaryOp(self, math.isinf)

isnan()

Return a reward function whose value is math.isnan(self).

Source code in gyozas/rewards/arithmetic.py
def isnan(self) -> ArithmeticMixin:
    """Return a reward function whose value is ``math.isnan(self)``."""
    return _UnaryOp(self, math.isnan)

NNodes

NNodes

Bases: ArithmeticMixin

Reward based on the change in number of explored nodes since the last step.

Source code in gyozas/rewards/nnodes.py
class NNodes(ArithmeticMixin):
    """Reward based on the change in number of explored nodes since the last step."""

    def __init__(self) -> None:
        self.last_n_nodes = 0

    def reset(self, model: Model) -> None:
        self.last_n_nodes = 0

    def extract(self, model: Model, done: bool) -> int:
        n_nodes = model.getNNodes()
        delta = n_nodes - self.last_n_nodes
        self.last_n_nodes = n_nodes
        return delta

SolvingTime

SolvingTime

Bases: ArithmeticMixin

Reward based on the wall-clock solving time elapsed since the last step.

Source code in gyozas/rewards/solving_time.py
class SolvingTime(ArithmeticMixin):
    """Reward based on the wall-clock solving time elapsed since the last step."""

    def __init__(self) -> None:
        self.solving_time = 0

    def reset(self, model: Model) -> None:
        self.solving_time = 0

    def extract(self, model: Model, done: bool) -> float:
        solving_time = model.getSolvingTime()
        delta = solving_time - self.solving_time
        self.solving_time = solving_time
        return delta

LPIterations

LPIterations

Bases: ArithmeticMixin

Reward based on the change in LP iteration count since the last step.

Source code in gyozas/rewards/lp_iterations.py
class LPIterations(ArithmeticMixin):
    """Reward based on the change in LP iteration count since the last step."""

    def __init__(self) -> None:
        self.n_lp_iterations = 0

    def reset(self, model: Model) -> None:
        self.n_lp_iterations = 0

    def extract(self, model: Model, done: bool) -> int:
        n_lp_iterations = model.getNLPIterations()
        delta = n_lp_iterations - self.n_lp_iterations
        self.n_lp_iterations = n_lp_iterations
        return delta

DualIntegral

DualIntegral

Bases: ArithmeticMixin

Reward based on the change in the dual bound integral over solving time.

Uses a SCIP event handler to track the dual bound at each LP event and computes the trapezoidal integral.

Source code in gyozas/rewards/integral_bound.py
class DualIntegral(ArithmeticMixin):
    """Reward based on the change in the dual bound integral over solving time.

    Uses a SCIP event handler to track the dual bound at each LP event and
    computes the trapezoidal integral.
    """

    def __init__(self) -> None:
        self.dual_integral = 0.0
        self.event = DualBoundEventHandler()

    def close(self) -> None:
        """Drop caught events so the SCIP model can be GC'd.

        catchEvent() calls Py_INCREF(model) internally; without a matching
        dropEvent() the model's refcount never reaches zero.
        """
        if self.event.model is not None:
            with contextlib.suppress(Exception):
                self.event.eventexit()
            self.event.model = None

    def __del__(self) -> None:
        self.close()

    def reset(self, model: Model) -> None:
        self.close()
        self.dual_integral = 0.0
        self.event = DualBoundEventHandler()
        model.includeEventhdlr(self.event, "dual_bound_tracker", "tracks dual bound for integral computation")

    def extract(self, model: Model, done: bool) -> float:
        dual_integral = self.event._compute_integral()
        delta = dual_integral - self.dual_integral
        self.dual_integral = dual_integral
        return delta

close()

Drop caught events so the SCIP model can be GC'd.

catchEvent() calls Py_INCREF(model) internally; without a matching dropEvent() the model's refcount never reaches zero.

Source code in gyozas/rewards/integral_bound.py
def close(self) -> None:
    """Drop caught events so the SCIP model can be GC'd.

    catchEvent() calls Py_INCREF(model) internally; without a matching
    dropEvent() the model's refcount never reaches zero.
    """
    if self.event.model is not None:
        with contextlib.suppress(Exception):
            self.event.eventexit()
        self.event.model = None

PrimalIntegral

PrimalIntegral

Bases: ArithmeticMixin

Reward based on the change in the primal bound integral over solving time.

Uses a SCIP event handler to track the primal bound at each best-solution-found event and computes the trapezoidal integral.

Source code in gyozas/rewards/integral_bound.py
class PrimalIntegral(ArithmeticMixin):
    """Reward based on the change in the primal bound integral over solving time.

    Uses a SCIP event handler to track the primal bound at each best-solution-found
    event and computes the trapezoidal integral.
    """

    def __init__(self) -> None:
        self.primal_integral = 0.0
        self.event = PrimalBoundEventHandler()

    def close(self) -> None:
        if self.event.model is not None:
            with contextlib.suppress(Exception):
                self.event.eventexit()
            self.event.model = None

    def __del__(self) -> None:
        self.close()

    def reset(self, model: Model) -> None:
        self.close()
        self.primal_integral = 0.0
        self.event = PrimalBoundEventHandler()
        model.includeEventhdlr(self.event, "primal_bound_tracker", "tracks primal bound for integral computation")

    def extract(self, model: Model, done: bool) -> float:
        primal_integral = self.event._compute_integral()
        delta = primal_integral - self.primal_integral
        self.primal_integral = primal_integral
        return delta

PrimalDualIntegral

PrimalDualIntegral

Bases: ArithmeticMixin

Reward that sums the primal and dual bound integral changes.

Source code in gyozas/rewards/integral_bound.py
class PrimalDualIntegral(ArithmeticMixin):
    """Reward that sums the primal and dual bound integral changes."""

    def __init__(self) -> None:
        self.primal_integral = PrimalIntegral()
        self.dual_integral = DualIntegral()

    def reset(self, model: Model) -> None:
        self.primal_integral.reset(model)
        self.dual_integral.reset(model)

    def extract(self, model: Model, done: bool) -> float:
        primal_delta = self.primal_integral.extract(model, done)
        dual_delta = self.dual_integral.extract(model, done)
        return primal_delta + dual_delta

Done

Done

Bases: ArithmeticMixin

Reward that returns 1 when the solver finds an optimal solution, 0 otherwise.

Source code in gyozas/rewards/done.py
class Done(ArithmeticMixin):
    """Reward that returns 1 when the solver finds an optimal solution, 0 otherwise."""

    def reset(self, model: Model) -> None:
        pass

    def extract(self, model: Model, done: bool) -> float:
        return 1.0 if model.getStatus() == "optimal" else 0.0