Source code for cblearn.embedding.wrapper._soe

from typing import Optional, Union

from sklearn.base import BaseEstimator
from sklearn.utils import check_random_state
import numpy as np

from cblearn import utils
from cblearn.embedding._base import TripletEmbeddingMixin
from cblearn.embedding.wrapper._r_base import RWrapperMixin


__doctest_requires__ = {'SOE': ['rpy2']}


[docs] class SOE(BaseEstimator, TripletEmbeddingMixin, RWrapperMixin): """ A soft ordinal embedding estimator, wrapping an R implementation. The wrapped R package is the reference implementation of SOE [1]_. Attributes: embedding_: Final embedding, shape (n_objects, n_components) stress_: Final value of the SOE stress corresponding to the embedding. >>> from cblearn import datasets >>> import doctest; doctest.ELLIPSIS_MARKER = "-output from R-" >>> triplets = datasets.make_random_triplets(np.random.rand(15, 2), result_format='list-order', size=1000) >>> triplets.shape, np.unique(triplets).shape ((1000, 3), (15,)) >>> estimator = SOE(verbose=True).fit(triplets) # doctest: +ELLIPSIS -output from R- >>> estimator.embedding_.shape (15, 2) References ---------- .. [1] Terada, Y., & Luxburg, U. (2014). Local ordinal embedding. International Conference on Machine Learning, 847–855. """
[docs] def __init__(self, n_components=2, n_init=10, margin=.1, max_iter=1000, verbose=False, random_state: Union[None, int, np.random.RandomState] = None): """ Args: n_components: The dimension of the embedding. margin: Scale parameter which only takes strictly positive value. max_iter: Maximum number of optimization iterations. verbose: Enable verbose output. random_state: The seed of the pseudo random number generator used to initialize the optimization. """ self.n_components = n_components self.margin = margin self.max_iter = max_iter self.verbose = verbose self.random_state = random_state
[docs] def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None, n_objects: Optional[int] = None) -> 'SOE': """Computes the embedding. Args: X: The training input samples, shape (n_samples, 3) y: Ignored init: Initial embedding for optimization Returns: self. """ loe = self.import_r_package('loe') random_state = check_random_state(self.random_state) self.seed_r(random_state) if self.verbose: report_every = 100 else: import rpy2.rinterface_lib rpy2.rinterface_lib.callbacks.consolewrite_print = lambda prompt: None report_every = self.max_iter triplets = utils.check_query_response(X, y, result_format='list-order') quadruplets = triplets[:, [1, 0, 0, 2]] # type: ignore quadruplets = quadruplets.astype(np.int32) + 1 # R is 1-indexed, int32 if init is None: init = 'rand' if not n_objects: n_objects = len(np.unique(quadruplets)) self.stress_ = np.infty soe_result = loe.SOE(CM=quadruplets, N=n_objects, p=self.n_components, c=self.margin, maxit=self.max_iter, report=report_every, iniX=init, rnd=quadruplets.shape[0]) i_stress = soe_result.rx2("str")[0] if i_stress < self.stress_: self.stress_ = i_stress self.embedding_ = np.asarray(soe_result.rx2("X")) return self
def _more_tags(self): return { **TripletEmbeddingMixin._more_tags(self), 'Xfail': [ 'check_transformer_n_iter', # the R package does not return n_iter ] }