Source code for mwu_max

"""Implements a MWU-max method.

    The MWU implemented here tries to maximize a cost function (gains function, in this case).
    This is done by alocating probabilities in the available dimensions, with the constraint that
    the sum of allocated probabilities is <= 1.
"""
from typing import List
import copy


[docs]class MwuMax:
    """Class that implements MWU-max algorithm.

    Parameters
    ----------
    n_experts : int
        Dimension of the problem cost function.
    eps : float
        Calibration parameter that regulates the step-size of the updates.

    Methods
    -------
    update_weights(expert_gains: List[float])
        Update mwu probabilities based on a new input of the cost function.
    get_probs()
        Get a copy of the currently allocated probabilities.
    get_n_experts()
        Get the number of dimensions used in this instance.

    Notes
    -----
    Usually, the mwu respects the following restriction: :math:`\\sum_{i=1}^d p_i = 1`, where :math:`p_i` is
    the probability or weight attributed to dimnesion :math:`i`. In order to accomodate the restriction that
    :math:`\\sum_{i=1}^d p_i \\leq 1`, we consider an extra dimension, which always receives cost 0.

    """
    _n_experts: int
    _eps: float
    _weights: List[float]
    _probs: List[float]

    def __init__(self, n_experts: int, eps: float):
        assert n_experts > 0
        assert eps > 1e-6
        assert eps < 0.5-1e-6

        self._n_experts = n_experts
        self._eps = eps
        self._weights = [1 for _ in range(self._n_experts)]
        # here we use _n_experts+1 because of the extra dimension to transform the
        # equality constraint into <=.
        self._probs = [1/(self._n_experts+1) for _ in range(self._n_experts)]

[docs]    def update_weights(self, expert_gains: List[float]) -> List[float]:
        """Update mwu probailitiesbased on a new input of the cost function.

        Parameters
        ----------
        expert_gains : list of float
            A list containing the new input from the cost function. Length must match instance dimension.

        Returns
        -------
        list of float
            Updated probabilies of each dimension.
        """
        assert len(expert_gains) == self._n_experts

        for i in range(self._n_experts):
            self._weights[i] *= (1 + self._eps * expert_gains[i])
        # we sum an extra 1 that is the extra expert weight
        sum_weights = sum(self._weights)+1
        for i in range(self._n_experts):
            self._probs[i] = self._weights[i] / sum_weights
        return copy.copy(self._probs)

[docs]    def get_probs(self) -> List[float]:
        """Getter for the current probabilities.

        Returns
        -------
        list of float
            Current probabilies of each dimension.
        """
        return copy.copy(self._probs)

[docs]    def get_n_experts(self) -> int:
        """Getter for the number of dimensions (experts) in this instance.

        Returns
        -------
        int
            The number of experts.
        """
        return self._n_experts