metrics.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. import numpy as np
  2. import sklearn.metrics as mt
  3. # ECE from https://towardsdatascience.com/expected-calibration-error-ece-a-step-by-step-visual-explanation-with-python-code-c3e9aa12937d
  4. def ECE(predicted_labels, confidences, true_labels, M=5):
  5. # Uniform M bins
  6. bin_boundaries = np.linspace(0, 1, M + 1)
  7. bin_lowers = bin_boundaries[:-1]
  8. bin_uppers = bin_boundaries[1:]
  9. # get correct/false
  10. accuracies = predicted_labels == true_labels
  11. ece = np.zeros(1)
  12. for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
  13. # bin sample
  14. in_bin = np.logical_and(
  15. confidences > bin_lower.item(), confidences <= bin_upper.item()
  16. )
  17. prob_in_bin = in_bin.mean()
  18. if prob_in_bin > 0:
  19. accuracy_in_bin = accuracies[in_bin].mean()
  20. avg_confid = confidences[in_bin].mean()
  21. ece += np.abs(avg_confid - accuracy_in_bin) * prob_in_bin
  22. return ece[0]
  23. # Maximum Calibration error - maximum of error per bin
  24. def MCE(predicted_labels, confidences, true_labels, M=5):
  25. bin_boundaries = np.linspace(0, 1, M + 1)
  26. bin_lowers = bin_boundaries[:-1]
  27. bin_uppers = bin_boundaries[1:]
  28. # get correct/false
  29. accuracies = predicted_labels == true_labels
  30. mces = []
  31. for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
  32. # bin sample
  33. in_bin = np.logical_and(
  34. confidences > bin_lower.item(), confidences < bin_upper.item()
  35. )
  36. prob_in_bin = in_bin.mean()
  37. if prob_in_bin > 0:
  38. accuracy_in_bin = accuracies[in_bin].mean()
  39. avg_confid = confidences[in_bin].mean()
  40. mces.append(np.abs(avg_confid - accuracy_in_bin))
  41. return max(mces)
  42. def F1(predicted_labels, true_labels):
  43. tp = np.sum(np.logical_and(predicted_labels == 1, true_labels == 1))
  44. fp = np.sum(np.logical_and(predicted_labels == 1, true_labels == 0))
  45. fn = np.sum(np.logical_and(predicted_labels == 0, true_labels == 1))
  46. precision = tp / (tp + fp)
  47. recall = tp / (tp + fn)
  48. return 2 * (precision * recall) / (precision + recall)
  49. # Uses sklearn's AUC function
  50. # Requieres confidences to be the predicted probabilities for the positive class
  51. def AUC(confidences, true_labels):
  52. fpr, tpr, _ = mt.roc_curve(true_labels, confidences)
  53. return mt.auc(fpr, tpr)
  54. def entropy(confidences):
  55. return -np.sum(confidences * np.log(confidences))
  56. ### Negative Log Likelyhood for binary classification
  57. def nll_binary(confidences, true_labels):
  58. return -np.sum(np.log(confidences[true_labels == 1])) - np.sum(np.log(1 - confidences[true_labels == 0]))
  59. ### Breier score for binary classification
  60. def brier_binary(confidences, true_labels):
  61. return np.mean((confidences - true_labels) ** 2)