123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- #
- # Copyright 2019 The FATE Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import numpy as np
- from federatedml.linear_model.linear_model_weight import LinearModelWeights
- from federatedml.util import LOGGER, consts, paillier_check, ipcl_operator
- class _Optimizer(object):
- def __init__(self, learning_rate, alpha, penalty, decay, decay_sqrt, mu=0):
- self.learning_rate = learning_rate
- self.iters = 0
- self.alpha = alpha
- self.penalty = penalty
- self.decay = decay
- self.decay_sqrt = decay_sqrt
- self.mu = mu
- def decay_learning_rate(self):
- if self.decay_sqrt:
- lr = self.learning_rate / np.sqrt(1 + self.decay * self.iters)
- else:
- lr = self.learning_rate / (1 + self.decay * self.iters)
- return lr
- @property
- def shrinkage_val(self):
- this_step_size = self.learning_rate / np.sqrt(self.iters)
- return self.alpha * this_step_size
- def set_iters(self, iters):
- self.iters = iters
- def apply_gradients(self, grad):
- raise NotImplementedError("Should not call here")
- def _l1_updator(self, model_weights: LinearModelWeights, gradient):
- coef_ = model_weights.coef_
- if model_weights.fit_intercept:
- gradient_without_intercept = gradient[: -1]
- else:
- gradient_without_intercept = gradient
- new_weights = np.sign(coef_ - gradient_without_intercept) * np.maximum(0, np.abs(
- coef_ - gradient_without_intercept) - self.shrinkage_val)
- if model_weights.fit_intercept:
- new_weights = np.append(new_weights, model_weights.intercept_)
- new_weights[-1] -= gradient[-1]
- new_param = LinearModelWeights(new_weights, model_weights.fit_intercept, model_weights.raise_overflow_error)
- # LOGGER.debug("In _l1_updator, original weight: {}, new_weights: {}".format(
- # model_weights.unboxed, new_weights
- # ))
- return new_param
- def _l2_updator(self, lr_weights: LinearModelWeights, gradient):
- """
- For l2 regularization, the regular term has been added in gradients.
- """
- new_weights = lr_weights.unboxed - gradient
- new_param = LinearModelWeights(new_weights, lr_weights.fit_intercept, lr_weights.raise_overflow_error)
- return new_param
- def add_regular_to_grad(self, grad, lr_weights):
- if self.penalty == consts.L2_PENALTY:
- if paillier_check.is_single_ipcl_encrypted_number(lr_weights.unboxed):
- grad_ct = ipcl_operator.merge_encrypted_number_array(grad)
- grad_ct = np.array(grad_ct)
- if lr_weights.fit_intercept:
- alpha = np.append(np.ones(len(grad) - 1) * self.alpha, 0.0)
- new_grad = grad_ct + lr_weights.unboxed.item(0) * alpha
- else:
- new_grad = grad_ct + self.alpha * lr_weights.coef_
- else:
- if lr_weights.fit_intercept:
- gradient_without_intercept = grad[: -1]
- gradient_without_intercept += self.alpha * lr_weights.coef_
- new_grad = np.append(gradient_without_intercept, grad[-1])
- else:
- new_grad = grad + self.alpha * lr_weights.coef_
- else:
- new_grad = grad
- return new_grad
- def regularization_update(self, model_weights: LinearModelWeights, grad,
- prev_round_weights: LinearModelWeights = None):
- # LOGGER.debug(f"In regularization_update, input model_weights: {model_weights.unboxed}")
- if self.penalty == consts.L1_PENALTY:
- model_weights = self._l1_updator(model_weights, grad)
- elif self.penalty == consts.L2_PENALTY:
- model_weights = self._l2_updator(model_weights, grad)
- else:
- new_vars = model_weights.unboxed - grad
- model_weights = LinearModelWeights(new_vars,
- model_weights.fit_intercept,
- model_weights.raise_overflow_error)
- if prev_round_weights is not None: # additional proximal term
- coef_ = model_weights.unboxed
- if model_weights.fit_intercept:
- coef_without_intercept = coef_[: -1]
- else:
- coef_without_intercept = coef_
- coef_without_intercept -= self.mu * (model_weights.coef_ - prev_round_weights.coef_)
- if model_weights.fit_intercept:
- new_coef_ = np.append(coef_without_intercept, coef_[-1])
- else:
- new_coef_ = coef_without_intercept
- model_weights = LinearModelWeights(new_coef_,
- model_weights.fit_intercept,
- model_weights.raise_overflow_error)
- return model_weights
- def __l1_loss_norm(self, model_weights: LinearModelWeights):
- coef_ = model_weights.coef_
- loss_norm = np.sum(self.alpha * np.abs(coef_))
- return loss_norm
- def __l2_loss_norm(self, model_weights: LinearModelWeights):
- coef_ = model_weights.coef_
- loss_norm = 0.5 * self.alpha * np.dot(coef_, coef_)
- return loss_norm
- def __add_proximal(self, model_weights, prev_round_weights):
- prev_round_coef_ = prev_round_weights.coef_
- coef_ = model_weights.coef_
- diff = coef_ - prev_round_coef_
- loss_norm = self.mu * 0.5 * np.dot(diff, diff)
- return loss_norm
- def loss_norm(self, model_weights: LinearModelWeights, prev_round_weights: LinearModelWeights = None):
- proximal_term = None
- if prev_round_weights is not None:
- proximal_term = self.__add_proximal(model_weights, prev_round_weights)
- if self.penalty == consts.L1_PENALTY:
- loss_norm_value = self.__l1_loss_norm(model_weights)
- elif self.penalty == consts.L2_PENALTY:
- loss_norm_value = self.__l2_loss_norm(model_weights)
- else:
- loss_norm_value = None
- # additional proximal term
- if loss_norm_value is None:
- loss_norm_value = proximal_term
- elif proximal_term is not None:
- loss_norm_value += proximal_term
- return loss_norm_value
- def hess_vector_norm(self, delta_s: LinearModelWeights):
- if self.penalty == consts.L1_PENALTY:
- return LinearModelWeights(np.zeros_like(delta_s.unboxed),
- fit_intercept=delta_s.fit_intercept,
- raise_overflow_error=delta_s.raise_overflow_error)
- elif self.penalty == consts.L2_PENALTY:
- return LinearModelWeights(self.alpha * np.array(delta_s.unboxed),
- fit_intercept=delta_s.fit_intercept,
- raise_overflow_error=delta_s.raise_overflow_error)
- else:
- return LinearModelWeights(np.zeros_like(delta_s.unboxed),
- fit_intercept=delta_s.fit_intercept,
- raise_overflow_error=delta_s.raise_overflow_error)
- def update_model(self, model_weights: LinearModelWeights, grad, prev_round_weights: LinearModelWeights = None,
- has_applied=True):
- if not has_applied:
- grad = self.add_regular_to_grad(grad, model_weights)
- delta_grad = self.apply_gradients(grad)
- else:
- delta_grad = grad
- model_weights = self.regularization_update(model_weights, delta_grad, prev_round_weights)
- return model_weights
- class _SgdOptimizer(_Optimizer):
- def apply_gradients(self, grad):
- learning_rate = self.decay_learning_rate()
- delta_grad = learning_rate * grad
- # LOGGER.debug("In sgd optimizer, learning_rate: {}, delta_grad: {}".format(learning_rate, delta_grad))
- return delta_grad
- class _RMSPropOptimizer(_Optimizer):
- def __init__(self, learning_rate, alpha, penalty, decay, decay_sqrt, mu):
- super().__init__(learning_rate, alpha, penalty, decay, decay_sqrt)
- self.rho = 0.99
- self.opt_m = None
- def apply_gradients(self, grad):
- learning_rate = self.decay_learning_rate()
- if self.opt_m is None:
- self.opt_m = np.zeros_like(grad)
- self.opt_m = self.rho * self.opt_m + (1 - self.rho) * np.square(grad)
- self.opt_m = np.array(self.opt_m, dtype=np.float64)
- delta_grad = learning_rate * grad / np.sqrt(self.opt_m + 1e-6)
- return delta_grad
- class _AdaGradOptimizer(_Optimizer):
- def __init__(self, learning_rate, alpha, penalty, decay, decay_sqrt, mu):
- super().__init__(learning_rate, alpha, penalty, decay, decay_sqrt)
- self.opt_m = None
- def apply_gradients(self, grad):
- learning_rate = self.decay_learning_rate()
- if self.opt_m is None:
- self.opt_m = np.zeros_like(grad)
- self.opt_m = self.opt_m + np.square(grad)
- self.opt_m = np.array(self.opt_m, dtype=np.float64)
- delta_grad = learning_rate * grad / (np.sqrt(self.opt_m) + 1e-7)
- return delta_grad
- class _NesterovMomentumSGDOpimizer(_Optimizer):
- def __init__(self, learning_rate, alpha, penalty, decay, decay_sqrt, mu):
- super().__init__(learning_rate, alpha, penalty, decay, decay_sqrt)
- self.nesterov_momentum_coeff = 0.9
- self.opt_m = None
- def apply_gradients(self, grad):
- learning_rate = self.decay_learning_rate()
- if self.opt_m is None:
- self.opt_m = np.zeros_like(grad)
- v = self.nesterov_momentum_coeff * self.opt_m - learning_rate * grad
- delta_grad = self.nesterov_momentum_coeff * self.opt_m - (1 + self.nesterov_momentum_coeff) * v
- self.opt_m = v
- # LOGGER.debug('In nesterov_momentum, opt_m: {}, v: {}, delta_grad: {}'.format(
- # self.opt_m, v, delta_grad
- # ))
- return delta_grad
- class _AdamOptimizer(_Optimizer):
- def __init__(self, learning_rate, alpha, penalty, decay, decay_sqrt, mu):
- super().__init__(learning_rate, alpha, penalty, decay, decay_sqrt)
- self.opt_beta1 = 0.9
- self.opt_beta2 = 0.999
- self.opt_beta1_decay = 1.0
- self.opt_beta2_decay = 1.0
- self.opt_m = None
- self.opt_v = None
- def apply_gradients(self, grad):
- learning_rate = self.decay_learning_rate()
- if self.opt_m is None:
- self.opt_m = np.zeros_like(grad)
- if self.opt_v is None:
- self.opt_v = np.zeros_like(grad)
- self.opt_beta1_decay = self.opt_beta1_decay * self.opt_beta1
- self.opt_beta2_decay = self.opt_beta2_decay * self.opt_beta2
- self.opt_m = self.opt_beta1 * self.opt_m + (1 - self.opt_beta1) * grad
- self.opt_v = self.opt_beta2 * self.opt_v + (1 - self.opt_beta2) * np.square(grad)
- opt_m_hat = self.opt_m / (1 - self.opt_beta1_decay)
- opt_v_hat = self.opt_v / (1 - self.opt_beta2_decay)
- opt_v_hat = np.array(opt_v_hat, dtype=np.float64)
- delta_grad = learning_rate * opt_m_hat / (np.sqrt(opt_v_hat) + 1e-8)
- return delta_grad
- class _StochasticQuansiNewtonOptimizer(_Optimizer):
- def __init__(self, learning_rate, alpha, penalty, decay, decay_sqrt, mu):
- super().__init__(learning_rate, alpha, penalty, decay, decay_sqrt)
- self.__opt_hess = None
- def apply_gradients(self, grad):
- learning_rate = self.decay_learning_rate()
- # LOGGER.debug("__opt_hess is: {}".format(self.__opt_hess))
- if self.__opt_hess is None:
- delta_grad = learning_rate * grad
- else:
- delta_grad = learning_rate * self.__opt_hess.dot(grad)
- # LOGGER.debug("In sqn updater, grad: {}, delta_grad: {}".format(grad, delta_grad))
- return delta_grad
- def set_hess_matrix(self, hess_matrix):
- self.__opt_hess = hess_matrix
- def optimizer_factory(param):
- try:
- optimizer_type = param.optimizer
- learning_rate = param.learning_rate
- alpha = param.alpha
- penalty = param.penalty
- decay = param.decay
- decay_sqrt = param.decay_sqrt
- if hasattr(param, 'mu'):
- mu = param.mu
- else:
- mu = 0.0
- init_params = [learning_rate, alpha, penalty, decay, decay_sqrt, mu]
- except AttributeError:
- raise AttributeError("Optimizer parameters has not been totally set")
- LOGGER.debug("in optimizer_factory, optimizer_type: {}, learning_rate: {}, alpha: {}, penalty: {},"
- "decay: {}, decay_sqrt: {}".format(optimizer_type, *init_params))
- if optimizer_type == 'sgd':
- return _SgdOptimizer(*init_params)
- elif optimizer_type == 'nesterov_momentum_sgd':
- return _NesterovMomentumSGDOpimizer(*init_params)
- elif optimizer_type == 'rmsprop':
- return _RMSPropOptimizer(*init_params)
- elif optimizer_type == 'adam':
- return _AdamOptimizer(*init_params)
- elif optimizer_type == 'adagrad':
- return _AdaGradOptimizer(*init_params)
- elif optimizer_type == 'sqn':
- return _StochasticQuansiNewtonOptimizer(*init_params)
- else:
- raise NotImplementedError("Optimize method cannot be recognized: {}".format(optimizer_type))
|