feature_imputation_param.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copyright 2019 The FATE Authors. All Rights Reserved.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. from pipeline.param.base_param import BaseParam
  19. class FeatureImputationParam(BaseParam):
  20. """
  21. Define feature imputation parameters
  22. Parameters
  23. ----------
  24. default_value : None or single object type or list
  25. the value to replace missing value.
  26. if None, it will use default value defined in federatedml/feature/imputer.py,
  27. if single object, will fill missing value with this object,
  28. if list, it's length should be the same as input data' feature dimension,
  29. means that if some column happens to have missing values, it will replace it
  30. the value by element in the identical position of this list.
  31. missing_fill_method : [None, 'min', 'max', 'mean', 'designated']
  32. the method to replace missing value
  33. col_missing_fill_method: None or dict of (column name, missing_fill_method) pairs
  34. specifies method to replace missing value for each column;
  35. any column not specified will take missing_fill_method,
  36. if missing_fill_method is None, unspecified column will not be imputed;
  37. missing_impute : None or list
  38. element of list can be any type, or auto generated if value is None, define which values to be consider as missing, default: None
  39. need_run: bool, default True
  40. need run or not
  41. """
  42. def __init__(self, default_value=0, missing_fill_method=None, col_missing_fill_method=None,
  43. missing_impute=None, need_run=True):
  44. super(FeatureImputationParam, self).__init__()
  45. self.default_value = default_value
  46. self.missing_fill_method = missing_fill_method
  47. self.col_missing_fill_method = col_missing_fill_method
  48. self.missing_impute = missing_impute
  49. self.need_run = need_run
  50. def check(self):
  51. descr = "feature imputation param's "
  52. self.check_boolean(self.need_run, descr + "need_run")
  53. if self.missing_fill_method is not None:
  54. self.missing_fill_method = self.check_and_change_lower(self.missing_fill_method,
  55. ['min', 'max', 'mean', 'designated'],
  56. f"{descr}missing_fill_method ")
  57. if self.col_missing_fill_method:
  58. if not isinstance(self.col_missing_fill_method, dict):
  59. raise ValueError(f"{descr}col_missing_fill_method should be a dict")
  60. for k, v in self.col_missing_fill_method.items():
  61. if not isinstance(k, str):
  62. raise ValueError(f"{descr}col_missing_fill_method should contain str key(s) only")
  63. v = self.check_and_change_lower(v,
  64. ['min', 'max', 'mean', 'designated'],
  65. f"per column method specified in {descr} col_missing_fill_method dict")
  66. self.col_missing_fill_method[k] = v
  67. if self.missing_impute:
  68. if not isinstance(self.missing_impute, list):
  69. raise ValueError(f"{descr}missing_impute must be None or list.")
  70. return True