union_param.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copylast 2019 The FATE Authors. All Rights Reserved.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. from pipeline.param.base_param import BaseParam
  19. class UnionParam(BaseParam):
  20. """
  21. Define the union method for combining multiple dTables and keep entries with the same id
  22. Parameters
  23. ----------
  24. need_run: bool, default True
  25. Indicate if this module needed to be run
  26. allow_missing: bool, default False
  27. Whether allow mismatch between feature length and header length in the result. Note that empty tables will always be skipped regardless of this param setting.
  28. keep_duplicate: bool, default False
  29. Whether to keep entries with duplicated keys. If set to True, a new id will be generated for duplicated entry in the format {id}_{table_name}.
  30. """
  31. def __init__(self, need_run=True, allow_missing=False, keep_duplicate=False):
  32. super().__init__()
  33. self.need_run = need_run
  34. self.allow_missing = allow_missing
  35. self.keep_duplicate = keep_duplicate
  36. def check(self):
  37. descr = "union param's "
  38. if type(self.need_run).__name__ != "bool":
  39. raise ValueError(
  40. descr + "need_run {} not supported, should be bool".format(
  41. self.need_run))
  42. if type(self.allow_missing).__name__ != "bool":
  43. raise ValueError(
  44. descr + "allow_missing {} not supported, should be bool".format(
  45. self.allow_missing))
  46. if type(self.keep_duplicate).__name__ != "bool":
  47. raise ValueError(
  48. descr + "keep_duplicate {} not supported, should be bool".format(
  49. self.keep_duplicate))
  50. return True