clustering_metric.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import numpy as np
  2. from sklearn.metrics import jaccard_score as jaccard_similarity_score
  3. from sklearn.metrics import fowlkes_mallows_score
  4. from sklearn.metrics import adjusted_rand_score
  5. class JaccardSimilarityScore(object):
  6. """
  7. Compute jaccard_similarity_score
  8. """
  9. def compute(self, labels, pred_scores):
  10. return jaccard_similarity_score(labels, pred_scores, average="weighted")
  11. class FowlkesMallowsScore(object):
  12. """
  13. Compute fowlkes_mallows_score, as in FMI
  14. """
  15. def compute(self, labels, pred_scores):
  16. return fowlkes_mallows_score(labels, pred_scores)
  17. class AdjustedRandScore(object):
  18. """
  19. Compute adjusted_rand_score,as in RI
  20. """
  21. def compute(self, labels, pred_scores):
  22. return adjusted_rand_score(labels, pred_scores)
  23. class ContengincyMatrix(object):
  24. """
  25. Compute contengincy_matrix
  26. """
  27. def compute(self, labels, pred_scores):
  28. #total_count = len(labels)
  29. label_predict = list(zip(labels, pred_scores))
  30. predicted_label = list(range(0, max(pred_scores) + 1))
  31. unique_true_label = np.unique(labels)
  32. result_array = np.zeros([len(unique_true_label), max(pred_scores) + 1])
  33. for v1, v2 in label_predict:
  34. result_array[v1][v2] += 1
  35. return result_array, predicted_label, unique_true_label
  36. class DistanceMeasure(object):
  37. """
  38. Compute distance_measure
  39. """
  40. def compute(self, dist_table, inter_cluster_dist, max_radius):
  41. max_radius_result = max_radius
  42. cluster_nearest_result = []
  43. if len(dist_table) == 1:
  44. cluster_nearest_result.append(0)
  45. else:
  46. for j in range(0, len(dist_table)):
  47. arr = inter_cluster_dist[j * (len(dist_table) - 1): (j + 1) * (len(dist_table) - 1)]
  48. smallest_index = list(arr).index(min(arr))
  49. if smallest_index > j:
  50. smallest_index += 1
  51. cluster_nearest_result.append(smallest_index)
  52. distance_measure_result = dict()
  53. for n in range(0, len(dist_table)):
  54. distance_measure_result[n] = [max_radius_result[n], cluster_nearest_result[n]]
  55. return distance_measure_result
  56. class DaviesBouldinIndex(object):
  57. """
  58. Compute dbi,as in dbi
  59. """
  60. def compute(self, dist_table, cluster_dist):
  61. if len(dist_table) == 1:
  62. return np.nan
  63. max_dij_list = []
  64. d = 0
  65. for i in range(0, len(dist_table)):
  66. dij_list = []
  67. for j in range(0, len(dist_table)):
  68. if j != i:
  69. dij_list.append((dist_table[i] + dist_table[j]) / (cluster_dist[d] ** 0.5))
  70. d += 1
  71. max_dij = max(dij_list)
  72. max_dij_list.append(max_dij)
  73. return np.sum(max_dij_list) / len(dist_table)