Shellmiao
/
FateWithShellmiaoComment


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
							#
#  Copyright 2019 The FATE Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import argparse
import torch as t
from pipeline import fate_torch_hook
from pipeline.backend.pipeline import PipeLine
from pipeline.component import DataTransform, HomoNN, Evaluation
from pipeline.component import Reader
from pipeline.interface import Data, Model
from pipeline.utils.tools import load_job_config, JobConfig
from federatedml.evaluation.metrics import classification_metric
from fate_test.utils import extract_data, parse_summary_result
from pipeline.component.nn import TrainerParam, DatasetParam


fate_torch_hook(t)


class dataset(object):
    breast = {
        "guest": {"name": "breast_homo_guest", "namespace": "experiment"},
        "host": [
            {"name": "breast_homo_host", "namespace": "experiment"},
            {"name": "breast_homo_host", "namespace": "experiment"}
        ]
    }
    vehicle = {
        "guest": {"name": "vehicle_scale_homo_guest", "namespace": "experiment"},
        "host": [
            {"name": "vehicle_scale_homo_host", "namespace": "experiment"},
            {"name": "vehicle_scale_homo_host", "namespace": "experiment"}
        ]
    }


def main(config="../../config.yaml", param="param_conf.yaml", namespace=""):
    num_host = 1
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    epoch = param["epoch"]
    lr = param["lr"]
    batch_size = param.get("batch_size", -1)
    is_multy = param["is_multy"]
    data = getattr(dataset, param.get("dataset", "vehicle"))

    if is_multy:
        loss = t.nn.CrossEntropyLoss()
    else:
        loss = t.nn.BCELoss()

    input_shape = 18 if is_multy else 30
    output_shape = 4 if is_multy else 1
    out_act = t.nn.Softmax(dim=1) if is_multy else t.nn.Sigmoid()

    model = t.nn.Sequential(
        t.nn.Linear(input_shape, 16),
        t.nn.ReLU(),
        t.nn.Linear(16, output_shape),
        out_act
    )

    optimizer = t.optim.Adam(model.parameters(), lr=lr)

    guest_train_data = data["guest"]
    host_train_data = data["host"][:num_host]
    for d in [guest_train_data, *host_train_data]:
        d["namespace"] = f"{d['namespace']}{namespace}"

    hosts = config.parties.host[:num_host]

    pipeline = PipeLine() .set_initiator(
        role='guest',
        party_id=config.parties.guest[0]) .set_roles(
        guest=config.parties.guest[0],
        host=hosts,
        arbiter=config.parties.arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest',
        party_id=config.parties.guest[0]).component_param(
        table=guest_train_data)
    for i in range(num_host):
        reader_0.get_party_instance(role='host', party_id=hosts[i]) \
            .component_param(table=host_train_data[i])

    data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
    data_transform_0.get_party_instance(
        role='guest', party_id=config.parties.guest[0]) .component_param(
        with_label=True, output_format="dense")
    data_transform_0.get_party_instance(
        role='host',
        party_id=hosts).component_param(
        with_label=True)

    if is_multy:
        ds_param = DatasetParam(
            dataset_name='table',
            flatten_label=True,
            label_dtype='long')
    else:
        ds_param = DatasetParam(dataset_name='table')

    homo_nn_0 = HomoNN(
        name="homo_nn_0",
        trainer=TrainerParam(
            trainer_name='fedavg_trainer',
            epochs=epoch,
            batch_size=batch_size,
        ),
        dataset=ds_param,
        torch_seed=100,
        optimizer=optimizer,
        loss=loss,
        model=model)
    homo_nn_1 = HomoNN(name="homo_nn_1")

    if is_multy:
        eval_type = "multi"
    else:
        eval_type = "binary"
    evaluation_0 = Evaluation(
        name='evaluation_0',
        eval_type=eval_type,
        metrics=[
            "accuracy",
            "precision",
            "recall"])

    pipeline.add_component(reader_0)
    pipeline.add_component(
        data_transform_0, data=Data(
            data=reader_0.output.data))
    pipeline.add_component(homo_nn_0, data=Data(
        train_data=data_transform_0.output.data))
    pipeline.add_component(
        homo_nn_1, data=Data(
            test_data=data_transform_0.output.data), model=Model(
            homo_nn_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data))
    pipeline.compile()
    pipeline.fit()

    metric_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())
    nn_0_data = pipeline.get_component("homo_nn_0").get_output_data()
    nn_1_data = pipeline.get_component("homo_nn_1").get_output_data()
    nn_0_score = extract_data(nn_0_data, "predict_result")
    nn_0_label = extract_data(nn_0_data, "label")
    nn_1_score = extract_data(nn_1_data, "predict_result")
    nn_1_label = extract_data(nn_1_data, "label")
    nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True)
    nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True)

    if eval_type == "binary":
        # metric_nn = {
        #     "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label),
        #     "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score),
        #     "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label,
        #                                                                          nn_1_label)}
        # metric_summary["distribution_metrics"] = {"homo_nn": metric_nn}
        if metric_summary is None:
            metric_summary = {}
        metric_summary["accuracy"] = (
            nn_0_score == nn_0_label).sum() / len(nn_0_label)
    # elif eval_type == "multi":
    #     metric_nn = {
    #         "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)}
    #     metric_summary["distribution_metrics"] = {"homo_nn": metric_nn}

    data_summary = dict(
        train={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}},
        test={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}}
    )

    return data_summary, metric_summary


if __name__ == "__main__":
    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
    parser.add_argument("-config", type=str,
                        help="config file")
    parser.add_argument("-param", type=str,
                        help="config file for params")
    args = parser.parse_args()
    if args.config is not None:
        main(args.config, args.param)
    else:
        main(args.param)