linear_regression_param.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copyright 2019 The FATE Authors. All Rights Reserved.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. import copy
  19. from pipeline.param.glm_param import LinearModelParam
  20. from pipeline.param.callback_param import CallbackParam
  21. from pipeline.param.encrypt_param import EncryptParam
  22. from pipeline.param.encrypted_mode_calculation_param import EncryptedModeCalculatorParam
  23. from pipeline.param.cross_validation_param import CrossValidationParam
  24. from pipeline.param.init_model_param import InitParam
  25. from pipeline.param.sqn_param import StochasticQuasiNewtonParam
  26. from pipeline.param.stepwise_param import StepwiseParam
  27. from pipeline.param import consts
  28. class LinearParam(LinearModelParam):
  29. """
  30. Parameters used for Linear Regression.
  31. Parameters
  32. ----------
  33. penalty : {'L2' or 'L1'}
  34. Penalty method used in LinR. Please note that, when using encrypted version in HeteroLinR,
  35. 'L1' is not supported.
  36. tol : float, default: 1e-4
  37. The tolerance of convergence
  38. alpha : float, default: 1.0
  39. Regularization strength coefficient.
  40. optimizer : {'sgd', 'rmsprop', 'adam', 'sqn', 'adagrad'}
  41. Optimize method
  42. batch_size : int, default: -1
  43. Batch size when updating model. -1 means use all data in a batch. i.e. Not to use mini-batch strategy.
  44. learning_rate : float, default: 0.01
  45. Learning rate
  46. max_iter : int, default: 20
  47. The maximum iteration for training.
  48. init_param: InitParam object, default: default InitParam object
  49. Init param method object.
  50. early_stop : {'diff', 'abs', 'weight_dff'}
  51. Method used to judge convergence.
  52. a) diff: Use difference of loss between two iterations to judge whether converge.
  53. b) abs: Use the absolute value of loss to judge whether converge. i.e. if loss < tol, it is converged.
  54. c) weight_diff: Use difference between weights of two consecutive iterations
  55. encrypt_param: EncryptParam object, default: default EncryptParam object
  56. encrypt param
  57. encrypted_mode_calculator_param: EncryptedModeCalculatorParam object, default: default EncryptedModeCalculatorParam object
  58. encrypted mode calculator param
  59. cv_param: CrossValidationParam object, default: default CrossValidationParam object
  60. cv param
  61. decay: int or float, default: 1
  62. Decay rate for learning rate. learning rate will follow the following decay schedule.
  63. lr = lr0/(1+decay*t) if decay_sqrt is False. If decay_sqrt is True, lr = lr0 / sqrt(1+decay*t)
  64. where t is the iter number.
  65. decay_sqrt: Bool, default: True
  66. lr = lr0/(1+decay*t) if decay_sqrt is False, otherwise, lr = lr0 / sqrt(1+decay*t)
  67. validation_freqs: int, list, tuple, set, or None
  68. validation frequency during training, required when using early stopping.
  69. The default value is None, 1 is suggested. You can set it to a number larger than 1 in order to speed up training by skipping validation rounds.
  70. When it is larger than 1, a number which is divisible by "max_iter" is recommended, otherwise, you will miss the validation scores of the last training iteration.
  71. early_stopping_rounds: int, default: None
  72. If positive number specified, at every specified training rounds, program checks for early stopping criteria.
  73. Validation_freqs must also be set when using early stopping.
  74. metrics: list or None, default: None
  75. Specify which metrics to be used when performing evaluation during training process. If metrics have not improved at early_stopping rounds, trianing stops before convergence.
  76. If set as empty, default metrics will be used. For regression tasks, default metrics are ['root_mean_squared_error', 'mean_absolute_error']
  77. use_first_metric_only: bool, default: False
  78. Indicate whether to use the first metric in `metrics` as the only criterion for early stopping judgement.
  79. floating_point_precision: None or integer
  80. if not None, use floating_point_precision-bit to speed up calculation,
  81. e.g.: convert an x to round(x * 2**floating_point_precision) during Paillier operation, divide
  82. the result by 2**floating_point_precision in the end.
  83. callback_param: CallbackParam object
  84. callback param
  85. """
  86. def __init__(self, penalty='L2',
  87. tol=1e-4, alpha=1.0, optimizer='sgd',
  88. batch_size=-1, learning_rate=0.01, init_param=InitParam(),
  89. max_iter=20, early_stop='diff',
  90. encrypt_param=EncryptParam(), sqn_param=StochasticQuasiNewtonParam(),
  91. encrypted_mode_calculator_param=EncryptedModeCalculatorParam(),
  92. cv_param=CrossValidationParam(), decay=1, decay_sqrt=True, validation_freqs=None,
  93. early_stopping_rounds=None, stepwise_param=StepwiseParam(), metrics=None, use_first_metric_only=False,
  94. floating_point_precision=23, callback_param=CallbackParam()):
  95. super(LinearParam, self).__init__(penalty=penalty, tol=tol, alpha=alpha, optimizer=optimizer,
  96. batch_size=batch_size, learning_rate=learning_rate,
  97. init_param=init_param, max_iter=max_iter, early_stop=early_stop,
  98. encrypt_param=encrypt_param, cv_param=cv_param, decay=decay,
  99. decay_sqrt=decay_sqrt, validation_freqs=validation_freqs,
  100. early_stopping_rounds=early_stopping_rounds,
  101. stepwise_param=stepwise_param, metrics=metrics,
  102. use_first_metric_only=use_first_metric_only,
  103. floating_point_precision=floating_point_precision,
  104. callback_param=callback_param)
  105. self.sqn_param = copy.deepcopy(sqn_param)
  106. self.encrypted_mode_calculator_param = copy.deepcopy(encrypted_mode_calculator_param)
  107. def check(self):
  108. descr = "linear_regression_param's "
  109. super(LinearParam, self).check()
  110. if self.optimizer not in ['sgd', 'rmsprop', 'adam', 'adagrad', 'sqn']:
  111. raise ValueError(
  112. descr + "optimizer not supported, optimizer should be"
  113. " 'sgd', 'rmsprop', 'adam', 'sqn' or 'adagrad'")
  114. self.sqn_param.check()
  115. if self.encrypt_param.method != consts.PAILLIER:
  116. raise ValueError(
  117. descr + "encrypt method supports 'Paillier' only")
  118. return True