Source code for cblearn.embedding.wrapper._mlds

from typing import Union

from sklearn.base import BaseEstimator
from sklearn.utils import check_random_state
import numpy as np

from cblearn import utils
from cblearn.embedding._base import TripletEmbeddingMixin
from cblearn.embedding.wrapper._r_base import RWrapperMixin


__doctest_requires__ = {'MLDS': ['rpy2']}



[docs]
class MLDS(BaseEstimator, TripletEmbeddingMixin, RWrapperMixin):
    """ A maximum-likelihood difference scaling (MLDS) estimator, wrapping the R implementation.

    Note::
        This method assumes, that the objects can be embedded in a one-dimensional space
        and that the object indices are consistent to their order in this space.

    This estimator required the R programming language
    and the R package `MLDS <https://cran.r-project.org/web/packages/MLDS/index.html>`_.
    This R package is the reference implementation of MLDS [1]_.

    Attributes:
        embedding_: array-likeThe final embedding, shape (n_objects, 1)
        log_likelihood_: The final log-likelihood of the embedding.


    >>> from cblearn import datasets
    >>> import doctest; doctest.ELLIPSIS_MARKER = "-output from R-"
    >>> triplets = datasets.make_random_triplets(np.arange(15).reshape(-1, 1), size=400, result_format='list-order')
    >>> triplets.shape, np.unique(triplets).shape
    ((400, 3), (15,))
    >>> estimator = MLDS().fit(triplets); print("...finished fit") # doctest: +ELLIPSIS
    -output from R-...finished fit
    >>> estimator.embedding_.shape
    (15, 1)


    References
    ----------
    .. [1] M Knoblauch, K., & Maloney, L. T. (2012). Modeling Psychophysical Data in R.
           Springer New York. https://doi.org/10.1007/978-1-4614-4475-6
    """


[docs]
    def __init__(self, n_components: int = 1, random_state: Union[None, int, np.random.RandomState] = None, method='glm'):
        """
        Args:
            n_components: Embedding dimension for api compatibility. Only 1 is supported for MLDS.
            random_state: The seed of the pseudo random number generator used to initialize the optimization.
            method: Optimizer method, either 'glm' or 'optim'.
        """
        if n_components != 1:
            raise ValueError(f"MLDS expects n_components=1, got {n_components}")
        self.n_components = n_components
        self.random_state = random_state
        self.method = method



[docs]
    def fit(self, X: utils.Query, y: np.ndarray = None) -> 'MLDS':
        """Computes the embedding.

        Args:
            X: The training input samples, shape (n_samples, 3)
            y: Ignored
            init: Initial embedding for optimization
        Returns:
            This estimator
        """
        mlds = self.import_r_package('MLDS', robject_translations={"logLik.mlds": "logLik_mlds"})
        random_state = check_random_state(self.random_state)
        self.seed_r(random_state)

        triplets, answer = utils.check_query_response(X, y, result_format='list-boolean')
        triplets = triplets.astype(np.int32) + 1
        r_df = self.robjects.vectors.DataFrame({
            'resp': answer,
            's1': triplets[:, 1],
            's2': triplets[:, 0],
            's3': triplets[:, 2],
        })

        self.r_estimator_ = mlds.mlds(r_df, method=self.method)
        self.log_likelihood_ = mlds.logLik_mlds(self.r_estimator_)[0]
        self.embedding_ = np.asarray(self.r_estimator_.rx2("pscale")).reshape(-1, 1)

        return self