Note
Go to the end to download the full example code.
Small Ordinal Embedding Benchmark#
In this example, we generate an artificial set of triplets to fit an ordinal embedding algorithm like SOE. We vary the data dimension and measure the average fit’s duration and accuracy.
samples=10 triplets=100 benchmark-repetitions=10
SOE(n_components=1, n_init=1): time 0.01 (sd: 0.00)
SOE(n_components=1, n_init=1): train acc 0.91 (sd: 0.11)
SOE(n_components=1, n_init=1): test acc 0.88 (sd: 0.13)
SOE(n_components=3, n_init=1): time 0.01 (sd: 0.00)
SOE(n_components=3, n_init=1): train acc 1.00 (sd: 0.00)
SOE(n_components=3, n_init=1): test acc 0.86 (sd: 0.03)
SOE(n_components=10, n_init=1): time 0.00 (sd: 0.00)
SOE(n_components=10, n_init=1): train acc 1.00 (sd: 0.00)
SOE(n_components=10, n_init=1): test acc 0.84 (sd: 0.02)
SOE(n_components=1): time 0.09 (sd: 0.01)
SOE(n_components=1): train acc 1.00 (sd: 0.00)
SOE(n_components=1): test acc 0.97 (sd: 0.01)
SOE(n_components=3): time 0.06 (sd: 0.00)
SOE(n_components=3): train acc 1.00 (sd: 0.00)
SOE(n_components=3): test acc 0.86 (sd: 0.02)
SOE(n_components=10): time 0.04 (sd: 0.00)
SOE(n_components=10): train acc 1.00 (sd: 0.00)
SOE(n_components=10): test acc 0.81 (sd: 0.03)
import time
import numpy as np
from cblearn import embedding
from cblearn.datasets import make_random_triplets
def benchmark_embedding(embedding, n_dims):
estimator.n_components = n_dims
X = np.random.normal(size=(n_samples, n_dims))
T = make_random_triplets(X, size=n_triplets, result_format='list-order')
T_test = make_random_triplets(X, size=10_000, result_format='list-order')
delta_times = []
train_accs = []
test_accs = []
for _ in range(n_repeat):
start_time = time.time()
estimator.fit(T)
end_time = time.time()
delta_times.append(end_time - start_time)
train_accs.append(estimator.score(T))
test_accs.append(estimator.score(T_test))
print(f"{estimator}: time {np.mean(delta_times):.2f} (sd: {np.std(delta_times):.2f})")
print(f"{estimator}: train acc {np.mean(train_accs):.2f} (sd: {np.std(train_accs):.2f})")
print(f"{estimator}: test acc {np.mean(test_accs):.2f} (sd: {np.std(test_accs):.2f})")
n_samples, n_triplets = 10, 100
# n_samples, n_triplets = 100, 1_000 # uncomment to use 10x more data
n_repeat = 10
estimator = embedding.SOE(1, n_init=1)
print(f"samples={n_samples} triplets={n_triplets} benchmark-repetitions={n_repeat}")
estimator = embedding.SOE(1, n_init=1)
benchmark_embedding(estimator, n_dims=1)
benchmark_embedding(estimator, n_dims=3)
benchmark_embedding(estimator, n_dims=10)
estimator = embedding.SOE(1, n_init=10)
benchmark_embedding(estimator, n_dims=1)
benchmark_embedding(estimator, n_dims=3)
benchmark_embedding(estimator, n_dims=10)
Total running time of the script: (0 minutes 2.690 seconds)