Source code for cblearn.metrics._triplets
import numpy as np
from sklearn.utils import check_array
from sklearn import metrics
import sparse
import scipy
from cblearn import utils
from .. import datasets
[docs]
def query_accuracy(true_response: utils.Response, pred_response: utils.Response) -> float:
"""Fraction of violated triplet constraints.
For all triplets (i, j, k), count R * (||O(j) - O(i)|| - ||O(k) - O(i)||) > 0
and divide by the number of triplets.
Args:
true_response: Triplet constraints either in array or sparse matrix format
pred_response: Either object coordinates, shape (n_objects, n_features),
or predicted triplet response.
Returns:
Number between 0 and 1, indicating the fraction of triplet constraints which are violated.
"""
if not isinstance(true_response, (sparse.COO, scipy.sparse.spmatrix)) and np.asarray(true_response).ndim == 1:
# Assume only a sequence of responses was passed
true_query = None
true_response = utils.check_response(true_response, result_format='boolean')
else:
true_query, true_response = utils.check_query_response(true_response, result_format='list-boolean')
if not isinstance(pred_response, (sparse.COO, scipy.sparse.spmatrix)) \
and np.asarray(pred_response).ndim == 1:
# Assume only a sequence of answers was passed
pred_query = None
pred_response = utils.check_response(pred_response, result_format='boolean')
elif true_query is not None and isinstance(pred_response, (np.ndarray, list)) \
and len(pred_response) != len(true_query):
# Assume an embedding was passed
embedding = check_array(pred_response, ensure_2d=True)
pred_query, pred_response = datasets.triplet_response(true_query, embedding, distance='euclidean',
result_format='list-boolean')
else:
# Assume a complete triplet query+response was passed
pred_query, pred_response = utils.check_query_response(pred_response, result_format='list-boolean')
# sort both triplet lists
if true_query is not None and pred_query is not None:
true_ix, pred_ix = np.lexsort(true_query.T), np.lexsort(pred_query.T)
true_query, true_response = true_query[true_ix], true_response[true_ix]
pred_query, pred_response = pred_query[pred_ix], pred_response[pred_ix]
if np.any(true_query != pred_query):
raise ValueError("Expects identical queries for true and predicted.")
elif not (true_query is None and pred_query is None):
raise ValueError("Expects either only responses or query-response pairs for both true and predicted. "
"Do not mix these to prevent unexpected behaviour.")
return metrics.accuracy_score(true_response, pred_response)
[docs]
def query_error(true_response: utils.Response, pred_response: utils.Response) -> float:
""" Error measured by 1 - query accuracy.`
See :py:func:`cblearn.metrics.query_accuracy` for more information."""
return 1 - query_accuracy(true_response, pred_response)
[docs]
def query_accuracy_scorer(clf, X, y):
"""Scorer function for query accuracy, compatible with sklearn's scorer API.
See :py:func:`cblearn.metrics.query_accuracy` for more information.
"""
X, y = utils.check_query_response(X, y, result_format='list-count')
y_pred = clf.predict(X, result_format='list-count')
return query_accuracy(y, y_pred)