merge_hetero_lr.py 4.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import json
  2. import numpy as np
  3. from federatedml.protobuf.generated.lr_model_param_pb2 import LRModelParam
  4. from federatedml.protobuf.generated.lr_model_meta_pb2 import LRModelMeta
  5. from sklearn.linear_model import LogisticRegression
  6. from sklearn2pmml.pipeline import PMMLPipeline
  7. from google.protobuf import json_format
  8. def _get_coef(param_obj):
  9. coefficient = np.empty((1, len(param_obj.header)))
  10. weight_dict = dict(param_obj.weight)
  11. for index in range(len(param_obj.header)):
  12. coefficient[0][index] = weight_dict[param_obj.header[index]]
  13. return coefficient
  14. def _merge_single_model_coef(guest_pb_param, host_pb_param, include_guest_coef):
  15. host_coef = _get_coef(host_pb_param)
  16. if include_guest_coef:
  17. guest_coef = _get_coef(guest_pb_param)
  18. coef = np.concatenate((guest_coef, host_coef), axis=1)
  19. return coef
  20. return host_coef
  21. def _get_model_header(guest_pb_param, host_pb_param, include_guest_coef, include_role=False):
  22. header = list(host_pb_param.header)
  23. if include_guest_coef:
  24. if include_role:
  25. guest_header = [f"guest_{feature}" for feature in guest_pb_param.header]
  26. host_header = [f"host_{feature}" for feature in header]
  27. else:
  28. guest_header = list(guest_pb_param.header)
  29. host_header = header
  30. header = guest_header + host_header
  31. return header
  32. def merge_lr(guest_param: dict, guest_meta: dict, host_params: list, host_metas: list, output_format: str,
  33. include_guest_coef=False):
  34. # check for multi-host
  35. if len(host_params) > 1 or len(host_metas) > 1:
  36. raise ValueError(f"Cannot merge Hetero LR models from multiple hosts. Please check input")
  37. host_param, host_meta = host_params[0], host_metas[0]
  38. pb_meta = json_format.Parse(json.dumps(guest_meta), LRModelMeta())
  39. if pb_meta.reveal_strategy == "encrypted_reveal_in_host":
  40. raise ValueError(f"Cannot merge encrypted LR models. Please check input.")
  41. # set up model
  42. sk_lr_model = LogisticRegression(penalty=pb_meta.penalty.lower(),
  43. tol=pb_meta.tol,
  44. fit_intercept=pb_meta.fit_intercept,
  45. max_iter=pb_meta.max_iter,
  46. multi_class="ovr",
  47. solver="saga")
  48. include_role = False
  49. if output_format in ['pmml']:
  50. include_role = True
  51. if pb_meta.need_one_vs_rest:
  52. guest_pb_param_c = json_format.Parse(json.dumps(guest_param), LRModelParam())
  53. host_pb_param_c = json_format.Parse(json.dumps(host_param), LRModelParam())
  54. sk_lr_model.classes_ = np.array([int(c) for c in guest_pb_param_c.one_vs_rest_result.one_vs_rest_classes])
  55. guest_pb_models = guest_pb_param_c.one_vs_rest_result.completed_models
  56. host_pb_models = host_pb_param_c.one_vs_rest_result.completed_models
  57. coef_list, intercept_list, iters_list, header = [], [], [], []
  58. for guest_single_pb_param, host_single_pb_param in zip(guest_pb_models, host_pb_models):
  59. coef = _merge_single_model_coef(guest_single_pb_param, host_single_pb_param, include_guest_coef)
  60. coef_list.append(coef)
  61. intercept_list.append(guest_single_pb_param.intercept)
  62. iters_list.append(guest_single_pb_param.iters)
  63. header = _get_model_header(guest_single_pb_param, host_single_pb_param, include_guest_coef, include_role)
  64. sk_lr_model.coef_ = np.concatenate(coef_list, axis=0)
  65. sk_lr_model.intercept_ = np.array(intercept_list)
  66. sk_lr_model.n_iter_ = np.array(iters_list)
  67. else:
  68. guest_pb_param = json_format.Parse(json.dumps(guest_param), LRModelParam())
  69. host_pb_param = json_format.Parse(json.dumps(host_param), LRModelParam())
  70. sk_lr_model.classes_ = np.array([0, 1])
  71. sk_lr_model.n_iter_ = np.array([guest_pb_param.iters])
  72. header = _get_model_header(guest_pb_param, host_pb_param, include_guest_coef, include_role)
  73. coef = _merge_single_model_coef(guest_pb_param, host_pb_param, include_guest_coef)
  74. sk_lr_model.coef_ = coef
  75. sk_lr_model.intercept_ = np.array([guest_pb_param.intercept])
  76. sk_lr_model.feature_names_in_ = np.array(header, dtype=str)
  77. sk_lr_model.n_features_in_ = len(header)
  78. if output_format in ['sklearn', 'scikit-learn']:
  79. return sk_lr_model
  80. elif output_format in ['pmml']:
  81. pipeline = PMMLPipeline([("classifier", sk_lr_model)])
  82. pipeline.active_fields = header
  83. return pipeline
  84. else:
  85. raise ValueError('unknown output type {}'.format(output_format))