Source code for cblearn.embedding.wrapper._soe

from typing import Optional, Union

from sklearn.base import BaseEstimator
from sklearn.utils import check_random_state
import numpy as np

from cblearn import utils
from cblearn.embedding._base import TripletEmbeddingMixin
from cblearn.embedding.wrapper._r_base import RWrapperMixin


__doctest_requires__ = {'SOE': ['rpy2']}



[docs]
class SOE(BaseEstimator, TripletEmbeddingMixin, RWrapperMixin):
    """ A soft ordinal embedding estimator, wrapping an R implementation.

        The wrapped R package is the reference implementation of SOE [1]_.

        Attributes:
            embedding_: Final embedding, shape (n_objects, n_components)
            stress_: Final value of the SOE stress corresponding to the embedding.


        >>> from cblearn import datasets
        >>> import doctest; doctest.ELLIPSIS_MARKER = "-output from R-"
        >>> triplets = datasets.make_random_triplets(np.random.rand(15, 2), result_format='list-order', size=1000)
        >>> triplets.shape, np.unique(triplets).shape
        ((1000, 3), (15,))
        >>> estimator = SOE(verbose=True).fit(triplets) # doctest: +ELLIPSIS
        -output from R-
        >>> estimator.embedding_.shape
        (15, 2)


        References
        ----------
        .. [1] Terada, Y., & Luxburg, U. (2014). Local ordinal embedding.
               International Conference on Machine Learning, 847–855.
        """


[docs]
    def __init__(self, n_components=2, n_init=10, margin=.1, max_iter=1000, verbose=False,
                 random_state: Union[None, int, np.random.RandomState] = None):
        """
        Args:
            n_components:
                The dimension of the embedding.
            margin:
                Scale parameter which only takes strictly positive value.
            max_iter:
                Maximum number of optimization iterations.
            verbose:
                Enable verbose output.
            random_state:
                The seed of the pseudo random number generator used to initialize the optimization.
        """
        self.n_components = n_components
        self.margin = margin
        self.max_iter = max_iter
        self.verbose = verbose
        self.random_state = random_state



[docs]
    def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None,
            n_objects: Optional[int] = None) -> 'SOE':
        """Computes the embedding.

        Args:
            X: The training input samples, shape (n_samples, 3)
            y: Ignored
            init: Initial embedding for optimization
        Returns:
            self.
        """
        loe = self.import_r_package('loe')
        random_state = check_random_state(self.random_state)
        self.seed_r(random_state)

        if self.verbose:
            report_every = 100
        else:
            import rpy2.rinterface_lib

            rpy2.rinterface_lib.callbacks.consolewrite_print = lambda prompt: None
            report_every = self.max_iter

        triplets = utils.check_query_response(X, y, result_format='list-order')
        quadruplets = triplets[:, [1, 0, 0, 2]]  # type: ignore
        quadruplets = quadruplets.astype(np.int32) + 1  # R is 1-indexed, int32

        if init is None:
            init = 'rand'
        if not n_objects:
            n_objects = len(np.unique(quadruplets))

        self.stress_ = np.infty
        soe_result = loe.SOE(CM=quadruplets, N=n_objects, p=self.n_components, c=self.margin,
                             maxit=self.max_iter, report=report_every, iniX=init,
                             rnd=quadruplets.shape[0])
        i_stress = soe_result.rx2("str")[0]
        if i_stress < self.stress_:
            self.stress_ = i_stress
            self.embedding_ = np.asarray(soe_result.rx2("X"))

        return self


    def _more_tags(self):
        return {
            **TripletEmbeddingMixin._more_tags(self),
            'Xfail': [
                'check_transformer_n_iter',  # the R package does not return n_iter
            ]
        }