Source code for cblearn.datasets._triplet_simulation

""" Functions in this file return sampled triplets with answers, based on an artificial embedding and noise.

    Usually they combine functions from _triplet_indices and _triplet_answers
    and are used as a high-level interface to create artificial datasets.
"""
import numpy as np

from typing import Union

from ._triplet_indices import make_all_triplet_indices
from ._triplet_indices import make_random_triplet_indices
from ._triplet_response import noisy_triplet_response
from .. import utils


def make_all_triplets(embedding: np.ndarray, result_format: str, monotonic: bool = False, **kwargs):
    """ Make all possible triplets with answers for the provided embedding or distances.

        Args:
            embedding: Object coordinates or distance matrix
            monotonic: Only triplets (j, i, k), such that j < i < k.
            random_state: Seed for noisy answers
            kwargs: Additional arguments passed to :func:`cblearn.datasets.noisy_triplet_answers`
        Returns:
            The triplets and answers, based on format. See :func:`cblearn.utils.check_triplets`.
    """
    triplets = make_all_triplet_indices(len(embedding), monotonic)
    return noisy_triplet_response(triplets, embedding, result_format=result_format, **kwargs)


[docs] def make_random_triplets(embedding: np.ndarray, result_format: str, size: Union[int, float] = 1., random_state: Union[None, int, np.random.RandomState] = None, repeat: bool = True, monotonic: bool = False, make_all: int = 10000, **kwargs ) -> utils.Query: """ Make random triplets with answers for the provided embedding or distances. >>> triplets, answers = make_random_triplets(np.random.rand(12, 2), size=1000, result_format='list-boolean') >>> answers.shape, np.unique(answers).tolist() ((1000,), [False, True]) >>> triplets.shape, np.unique(triplets).tolist() ((1000, 3), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) Args: embedding: Object coordinates (n_objects, n_components) or distance matrix (n_objects, n_objects). result_format: Result format size: Either absolute or relative number of triplets to generate. repeat: Sample triplet indices with repetitions monotonic: Sample triplets (j, i, k), such that j < i < k. make_all: Choose from all triplets instead of iterative sampling, if the difference between all triplets to the requested number is smaller than this value. random_state: Seed for triplet sampling and noisy answers kwargs: Additional arguments passed to :func:`cblearn.datasets.noisy_triplet_answers` Returns: The triplets and answers, based on format. See :func:`cblearn.utils.check_triplets`. """ triplets = make_random_triplet_indices(len(embedding), size, random_state, repeat, monotonic, make_all) return noisy_triplet_response(triplets, embedding, result_format=result_format, random_state=random_state, **kwargs)