Small Ordinal Embedding Benchmark#

In this example, we generate an artificial set of triplets to fit an ordinal embedding algorithm like SOE. We vary the data dimension and measure the average fit’s duration and accuracy.

samples=10 triplets=100 benchmark-repetitions=10
SOE(n_components=1, n_init=1): time      0.01 (sd: 0.00)
SOE(n_components=1, n_init=1): train acc 0.94 (sd: 0.09)
SOE(n_components=1, n_init=1): test  acc 0.90 (sd: 0.10)
SOE(n_components=3, n_init=1): time      0.01 (sd: 0.00)
SOE(n_components=3, n_init=1): train acc 1.00 (sd: 0.00)
SOE(n_components=3, n_init=1): test  acc 0.90 (sd: 0.02)
SOE(n_components=10, n_init=1): time      0.01 (sd: 0.00)
SOE(n_components=10, n_init=1): train acc 1.00 (sd: 0.00)
SOE(n_components=10, n_init=1): test  acc 0.81 (sd: 0.02)
SOE(n_components=1): time      0.10 (sd: 0.01)
SOE(n_components=1): train acc 1.00 (sd: 0.00)
SOE(n_components=1): test  acc 0.99 (sd: 0.00)
SOE(n_components=3): time      0.07 (sd: 0.01)
SOE(n_components=3): train acc 1.00 (sd: 0.00)
SOE(n_components=3): test  acc 0.84 (sd: 0.03)
SOE(n_components=10): time      0.04 (sd: 0.00)
SOE(n_components=10): train acc 1.00 (sd: 0.00)
SOE(n_components=10): test  acc 0.81 (sd: 0.02)

import time

import numpy as np

from cblearn import embedding
from cblearn.datasets import make_random_triplets


def benchmark_embedding(embedding, n_dims):
    estimator.n_components = n_dims
    X = np.random.normal(size=(n_samples, n_dims))
    T = make_random_triplets(X, size=n_triplets, result_format='list-order')
    T_test = make_random_triplets(X, size=10_000, result_format='list-order')

    delta_times = []
    train_accs = []
    test_accs = []
    for _ in range(n_repeat):
        start_time = time.time()
        estimator.fit(T)
        end_time = time.time()
        delta_times.append(end_time - start_time)
        train_accs.append(estimator.score(T))
        test_accs.append(estimator.score(T_test))

    print(f"{estimator}: time      {np.mean(delta_times):.2f} (sd: {np.std(delta_times):.2f})")
    print(f"{estimator}: train acc {np.mean(train_accs):.2f} (sd: {np.std(train_accs):.2f})")
    print(f"{estimator}: test  acc {np.mean(test_accs):.2f} (sd: {np.std(test_accs):.2f})")


n_samples, n_triplets = 10, 100
#  n_samples, n_triplets = 100, 1_000   # uncomment to use 10x more data
n_repeat = 10
estimator = embedding.SOE(1, n_init=1)

print(f"samples={n_samples} triplets={n_triplets} benchmark-repetitions={n_repeat}")
estimator = embedding.SOE(1, n_init=1)
benchmark_embedding(estimator, n_dims=1)
benchmark_embedding(estimator, n_dims=3)
benchmark_embedding(estimator, n_dims=10)

estimator = embedding.SOE(1, n_init=10)
benchmark_embedding(estimator, n_dims=1)
benchmark_embedding(estimator, n_dims=3)
benchmark_embedding(estimator, n_dims=10)

Total running time of the script: (0 minutes 2.952 seconds)

Gallery generated by Sphinx-Gallery