feature_histogram_test.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #
  2. # Copyright 2019 The FATE Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import unittest
  17. from fate_arch.session import computing_session as session
  18. from federatedml.ensemble import FeatureHistogram
  19. from federatedml.feature.instance import Instance
  20. from federatedml.feature.sparse_vector import SparseVector
  21. from federatedml.util import consts
  22. import copy
  23. import numpy as np
  24. import random
  25. class TestFeatureHistogram(unittest.TestCase):
  26. def setUp(self):
  27. self.feature_histogram = FeatureHistogram()
  28. session.init("test_feature_histogram")
  29. data_insts = []
  30. for i in range(1000):
  31. indices = []
  32. data = []
  33. for j in range(10):
  34. x = random.randint(0, 5)
  35. if x != 0:
  36. data.append(x)
  37. indices.append(j)
  38. sparse_vec = SparseVector(indices, data, shape=10)
  39. data_insts.append((Instance(features=sparse_vec), (1, random.randint(0, 3))))
  40. self.node_map = {0: 0, 1: 1, 2: 2, 3: 3}
  41. self.data_insts = data_insts
  42. self.data_bin = session.parallelize(data_insts, include_key=False, partition=16)
  43. self.grad_and_hess_list = [(random.random(), random.random()) for i in range(1000)]
  44. self.grad_and_hess = session.parallelize(self.grad_and_hess_list, include_key=False, partition=16)
  45. bin_split_points = []
  46. for i in range(10):
  47. bin_split_points.append(np.array([i for i in range(6)]))
  48. self.bin_split_points = np.array(bin_split_points)
  49. self.bin_sparse = [0 for i in range(10)]
  50. def test_accumulate_histogram(self):
  51. data = [[[[random.randint(0, 10) for i in range(2)]
  52. for j in range(3)]
  53. for k in range(4)]
  54. for r in range(5)]
  55. histograms = copy.deepcopy(data)
  56. for i in range(len(data)):
  57. for j in range(len(data[i])):
  58. histograms[i][j] = self.feature_histogram._tensor_histogram_cumsum(histograms[i][j])
  59. for k in range(1, len(data[i][j])):
  60. for r in range(len(data[i][j][k])):
  61. data[i][j][k][r] += data[i][j][k - 1][r]
  62. self.assertTrue(data[i][j][k][r] == histograms[i][j][k][r])
  63. def test_calculate_histogram(self):
  64. histograms = self.feature_histogram.calculate_histogram(
  65. self.data_bin, self.grad_and_hess,
  66. self.bin_split_points, self.bin_sparse,
  67. node_map=self.node_map)
  68. his2 = [[[[0 for i in range(3)]
  69. for j in range(6)]
  70. for k in range(10)]
  71. for r in range(4)]
  72. for i in range(1000):
  73. grad, hess = self.grad_and_hess_list[i]
  74. id = self.node_map[self.data_insts[i][1][1]]
  75. for fid, bid in self.data_insts[i][0].features.get_all_data():
  76. his2[id][fid][bid][0] += grad
  77. his2[id][fid][bid][1] += hess
  78. his2[id][fid][bid][2] += 1
  79. for i in range(len(his2)):
  80. for j in range(len(his2[i])):
  81. his2[i][j] = self.feature_histogram._tensor_histogram_cumsum(his2[i][j])
  82. for k in range(len(his2[i][j])):
  83. for r in range(len(his2[i][j][k])):
  84. self.assertTrue(np.fabs(his2[i][j][k][r] - histograms[i][j][k][r]) < consts.FLOAT_ZERO)
  85. def test_aggregate_histogram(self):
  86. fake_fid = 114
  87. data1 = [[random.randint(0, 10) for i in range(2)] for j in range(3)]
  88. data2 = [[random.randint(0, 10) for i in range(2)] for j in range(3)]
  89. fid, agg_histograms = self.feature_histogram._hist_aggregate((fake_fid, data1), (fake_fid, data2))
  90. for i in range(len(data1)):
  91. for j in range(len(data1[i])):
  92. data1[i][j] += data2[i][j]
  93. self.assertTrue(data1[i][j] == agg_histograms[i][j])
  94. def tearDown(self):
  95. session.stop()
  96. if __name__ == '__main__':
  97. unittest.main()