123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- import argparse
- import numpy as np
- import os
- import pandas
- from sklearn import metrics
- from pipeline.utils.tools import JobConfig
- import torch as t
- from torch import nn
- from pipeline import fate_torch_hook
- from torch.utils.data import DataLoader, TensorDataset
- from federatedml.nn.backend.utils.common import global_seed
- fate_torch_hook(t)
- class HeteroLocalModel(t.nn.Module):
- def __init__(self, guest_btn, host_btn, interactive, top):
- super().__init__()
- self.guest_btn = guest_btn
- self.host_btn = host_btn
- self.inter = interactive
- self.top = top
- def forward(self, x1, x2):
- return self.top(self.inter(self.guest_btn(x1), self.host_btn(x2)))
- def build(param, shape1, shape2, lr):
- global_seed(101)
- guest_bottom = t.nn.Sequential(
- nn.Linear(shape1, param["bottom_layer_units"]),
- nn.ReLU()
- )
- host_bottom = t.nn.Sequential(
- nn.Linear(shape2, param["bottom_layer_units"]),
- nn.ReLU()
- )
- interactive_layer = t.nn.InteractiveLayer(
- guest_dim=param["bottom_layer_units"],
- host_dim=param["bottom_layer_units"],
- host_num=1,
- out_dim=param["interactive_layer_units"])
- act = nn.Sigmoid() if param["top_layer_units"] == 1 else nn.Softmax(dim=1)
- top_layer = t.nn.Sequential(
- t.nn.Linear(
- param["interactive_layer_units"],
- param["top_layer_units"]),
- act)
- model = HeteroLocalModel(
- guest_bottom,
- host_bottom,
- interactive_layer,
- top_layer)
- opt = t.optim.Adam(model.parameters(), lr=lr)
- return model, opt
- def fit(epoch, model, optimizer, loss, batch_size, dataset):
- print(
- 'model is {}, loss is {}, optimizer is {}'.format(
- model,
- loss,
- optimizer))
- dl = DataLoader(dataset, batch_size=batch_size)
- for i in range(epoch):
- epoch_loss = 0
- for xa, xb, label in dl:
- optimizer.zero_grad()
- pred = model(xa, xb)
- l = loss(pred, label)
- epoch_loss += l.detach().numpy()
- l.backward()
- optimizer.step()
- print('epoch is {}, epoch loss is {}'.format(i, epoch_loss))
- def predict(model, Xa, Xb):
- pred_rs = model(Xb, Xa)
- return pred_rs.detach().numpy()
- def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml"):
- if isinstance(config, str):
- config = JobConfig.load_from_file(config)
- data_base_dir = config["data_base_dir"]
- else:
- data_base_dir = config.data_base_dir
- if isinstance(param, str):
- param = JobConfig.load_from_file(param)
- data_guest = param["data_guest"]
- data_host = param["data_host"]
- idx = param["idx"]
- label_name = param["label_name"]
- # prepare data
- Xb = pandas.read_csv(
- os.path.join(
- data_base_dir,
- data_guest),
- index_col=idx)
- Xa = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx)
- y = Xb[label_name]
- out = Xa.drop(Xb.index)
- Xa = Xa.drop(out.index)
- Xb = Xb.drop(label_name, axis=1)
- Xa = t.Tensor(Xa.values)
- Xb = t.Tensor(Xb.values)
- y = t.Tensor(y.values)
- if param["loss"] == "categorical_crossentropy":
- loss = t.nn.CrossEntropyLoss()
- y = y.type(t.int64).flatten()
- else:
- loss = t.nn.BCELoss()
- y = y.reshape((-1, 1))
- model, opt = build(
- param, Xb.shape[1], Xa.shape[1], lr=param['learning_rate'])
- dataset = TensorDataset(Xb, Xa, y)
- fit(epoch=param['epochs'], model=model, optimizer=opt,
- batch_size=param['batch_size'], dataset=dataset, loss=loss)
- eval_result = {}
- for metric in param["metrics"]:
- if metric.lower() == "auc":
- predict_y = predict(model, Xa, Xb)
- auc = metrics.roc_auc_score(y, predict_y)
- eval_result["auc"] = auc
- elif metric == "accuracy":
- predict_y = np.argmax(predict(model, Xa, Xb), axis=1)
- acc = metrics.accuracy_score(
- y_true=y.detach().numpy(), y_pred=predict_y)
- eval_result["accuracy"] = acc
- print(eval_result)
- data_summary = {}
- return data_summary, eval_result
- if __name__ == "__main__":
- parser = argparse.ArgumentParser("BENCHMARK-QUALITY SKLEARN JOB")
- parser.add_argument("-config", type=str,
- help="config file")
- parser.add_argument("-param", type=str,
- help="config file for params")
- args = parser.parse_args()
- if args.config is not None:
- main(args.config, args.param)
- main()
|