Shellmiao
/
FateWithShellmiaoComment


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
							#
#  Copyright 2019 The FATE Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import unittest

import numpy as np

from fate_arch.session import computing_session as session
from federatedml.feature.instance import Instance
from federatedml.model_selection import KFold
from federatedml.param.cross_validation_param import CrossValidationParam


class TestKFlod(unittest.TestCase):
    def setUp(self):
        session.init("123")
        self.data_num = 1000
        self.feature_num = 200
        final_result = []
        for i in range(self.data_num):
            tmp = i * np.ones(self.feature_num)
            inst = Instance(inst_id=i, features=tmp, label=0)
            tmp = (str(i), inst)
            final_result.append(tmp)
        table = session.parallelize(final_result,
                                    include_key=True,
                                    partition=3)
        self.table = table

    def test_split(self):
        kfold_obj = KFold()
        kfold_obj.n_splits = 10
        kfold_obj.random_seed = 32

        # print(self.table, self.table.count())
        data_generator = kfold_obj.split(self.table)
        expect_test_data_num = self.data_num / 10
        expect_train_data_num = self.data_num - expect_test_data_num

        key_list = []
        for train_data, test_data in data_generator:
            train_num = train_data.count()
            test_num = test_data.count()
            # print("train_num: {}, test_num: {}".format(train_num, test_num))
            self.assertTrue(0.9 * expect_train_data_num < train_num < 1.1 * expect_train_data_num)
            self.assertTrue(0.9 * expect_test_data_num < test_num < 1.1 * expect_test_data_num)
            first_key = train_data.first()[0]
            key_list.append(first_key)

        # Test random seed work
        kfold_obj2 = KFold()
        kfold_obj2.n_splits = 10
        kfold_obj2.random_seed = 32

        data_generator = kfold_obj.split(self.table)
        n = 0
        for train_data, test_data in data_generator:
            second_key = train_data.first()[0]
            first_key = key_list[n]
            self.assertTrue(first_key == second_key)
            n += 1


if __name__ == '__main__':
    unittest.main()