Source code for cblearn.embedding.wrapper._mlds
from typing import Union
from sklearn.base import BaseEstimator
from sklearn.utils import check_random_state
import numpy as np
from cblearn import utils
from cblearn.embedding._base import TripletEmbeddingMixin
from cblearn.embedding.wrapper._r_base import RWrapperMixin
__doctest_requires__ = {'MLDS': ['rpy2']}
[docs]
class MLDS(BaseEstimator, TripletEmbeddingMixin, RWrapperMixin):
""" A maximum-likelihood difference scaling (MLDS) estimator, wrapping the R implementation.
Note::
This method assumes, that the objects can be embedded in a one-dimensional space
and that the object indices are consistent to their order in this space.
This estimator required the R programming language
and the R package `MLDS <https://cran.r-project.org/web/packages/MLDS/index.html>`_.
This R package is the reference implementation of MLDS [1]_.
Attributes:
embedding_: array-likeThe final embedding, shape (n_objects, 1)
log_likelihood_: The final log-likelihood of the embedding.
>>> from cblearn import datasets
>>> import doctest; doctest.ELLIPSIS_MARKER = "-output from R-"
>>> triplets = datasets.make_random_triplets(np.arange(15).reshape(-1, 1), size=400, result_format='list-order')
>>> triplets.shape, np.unique(triplets).shape
((400, 3), (15,))
>>> estimator = MLDS().fit(triplets); print("...finished fit") # doctest: +ELLIPSIS
-output from R-...finished fit
>>> estimator.embedding_.shape
(15, 1)
References
----------
.. [1] M Knoblauch, K., & Maloney, L. T. (2012). Modeling Psychophysical Data in R.
Springer New York. https://doi.org/10.1007/978-1-4614-4475-6
"""
[docs]
def __init__(self, n_components: int = 1, random_state: Union[None, int, np.random.RandomState] = None, method='glm'):
"""
Args:
n_components: Embedding dimension for api compatibility. Only 1 is supported for MLDS.
random_state: The seed of the pseudo random number generator used to initialize the optimization.
method: Optimizer method, either 'glm' or 'optim'.
"""
if n_components != 1:
raise ValueError(f"MLDS expects n_components=1, got {n_components}")
self.n_components = n_components
self.random_state = random_state
self.method = method
[docs]
def fit(self, X: utils.Query, y: np.ndarray = None) -> 'MLDS':
"""Computes the embedding.
Args:
X: The training input samples, shape (n_samples, 3)
y: Ignored
init: Initial embedding for optimization
Returns:
This estimator
"""
mlds = self.import_r_package('MLDS', robject_translations={"logLik.mlds": "logLik_mlds"})
random_state = check_random_state(self.random_state)
self.seed_r(random_state)
triplets, answer = utils.check_query_response(X, y, result_format='list-boolean')
triplets = triplets.astype(np.int32) + 1
r_df = self.robjects.vectors.DataFrame({
'resp': answer,
's1': triplets[:, 1],
's2': triplets[:, 0],
's3': triplets[:, 2],
})
self.r_estimator_ = mlds.mlds(r_df, method=self.method)
self.log_likelihood_ = mlds.logLik_mlds(self.r_estimator_)[0]
self.embedding_ = np.asarray(self.r_estimator_.rx2("pscale")).reshape(-1, 1)
return self