Source code for evaluation_metrics

'''This module provides functionality to evaluate the 
performance of a machine learning model.
Each method compares the predicted data to true data.

'''

import numpy as np
from src.helperfunctions import norms


[docs]def evaluate_accuracy(predicted_output, true_output): """Calculate the proportion of labels a classifier correctly predicts. Parameters ---------- predicted_output : numpy.ndarray The predictions made by the classifier. true_output : int The true labels. Returns ------- accuracy : float Proportion correctly predicted, between 0 and 1 (inclusive). """ if predicted_output is None: return if predicted_output.ndim >= 2 or true_output.ndim >= 2: raise TypeError("Must pass in 1D Numpy array") if predicted_output.shape[0] != true_output.shape[0]: raise TypeError("Predicted and true output must have same size", \ 'Dimensions of predictions: ', predicted_output.shape,\ 'Dimensions of true output: ', true_output.shape) number_predictions = predicted_output.shape[0] if number_predictions == 0: return "None made" correct_predictions = np.count_nonzero(predicted_output == true_output) accuracy = correct_predictions / number_predictions return accuracy
[docs]def confusion_matrix(number_labels, predicted_output, true_output): """Calculate contingency table of predicted labels and true labels. If there are L labels, then for 0 <= i, j <= L - 1, the (i, j) entry contains the number of times we predicted j when the true class is i. Parameters ---------- number_labels : int The number of possible labels in the data. predicted_output : numpy.ndarray The predictions made by the classifier. true_output : int The true labels . Returns ------- confusion_matrix : numpy.ndarray Square [confusion] matrix of size number_labels by number_labels. Notes ------ The true output and predicted output row vectors are stacked over each other. This 2 by sample_size numpy array is stored as output_combined. To identify how often we predict j when the truth is i, we count the number of times the column vector [i, j] appears in output_combined. I consulted [1]_ to figure out the lattermost step. References ----------- .. [1] https://stackoverflow.com/a/40382459 """ if predicted_output is None: return confusion_matrix = np.zeros(shape=(number_labels, number_labels), dtype=np.int8) output_combined = np.stack((true_output, predicted_output), axis=1) for row_index in range(number_labels): # for col_index in range(number_labels): # j count = (output_combined == (row_index, col_index)).\ all(axis=1).sum() confusion_matrix[row_index, col_index] = count return confusion_matrix
[docs]def evaluate_regression_error(predicted_output, true_output, norm=norms.euclidean_2): """Calculate the error with respect to a norm of regression output. Parameters ---------- predicted_output : numpy.ndarray The predictions made by the classifier. true_output : int The true response values. norm : func The choice of norm to use to measure error. Default is the Euclidean L_2 norm. Returns ------- error : float Measurement of error of regression model. (squared norm) """ error = norm(predicted_output - true_output)**2 return error