union_param.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copylast 2019 The FATE Authors. All Rights Reserved.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. from federatedml.param.base_param import BaseParam
  19. from federatedml.util import LOGGER
  20. class UnionParam(BaseParam):
  21. """
  22. Define the union method for combining multiple dTables and keep entries with the same id
  23. Parameters
  24. ----------
  25. need_run: bool, default True
  26. Indicate if this module needed to be run
  27. allow_missing: bool, default False
  28. Whether allow mismatch between feature length and header length in the result. Note that empty tables will always be skipped regardless of this param setting.
  29. keep_duplicate: bool, default False
  30. Whether to keep entries with duplicated keys. If set to True, a new id will be generated for duplicated entry in the format {id}_{table_name}.
  31. """
  32. def __init__(self, need_run=True, allow_missing=False, keep_duplicate=False):
  33. super().__init__()
  34. self.need_run = need_run
  35. self.allow_missing = allow_missing
  36. self.keep_duplicate = keep_duplicate
  37. def check(self):
  38. descr = "union param's "
  39. if type(self.need_run).__name__ != "bool":
  40. raise ValueError(
  41. descr + "need_run {} not supported, should be bool".format(
  42. self.need_run))
  43. if type(self.allow_missing).__name__ != "bool":
  44. raise ValueError(
  45. descr + "allow_missing {} not supported, should be bool".format(
  46. self.allow_missing))
  47. if type(self.keep_duplicate).__name__ != "bool":
  48. raise ValueError(
  49. descr + "keep_duplicate {} not supported, should be bool".format(
  50. self.keep_duplicate))
  51. LOGGER.info("Finish union parameter check!")
  52. return True