123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- # The unique identifier for each federated learning task
- task_id: ""
- # Provide dataset and federated learning simulation related configuration.
- data:
- # The root directory where datasets are stored.
- root: "./data/"
- # The name of the dataset, support: femnist, shakespeare, cifar10, and cifar100.
- dataset: femnist
- # The data distribution of each client, support: iid, niid (for femnist and shakespeare), and dir and class (for cifar datasets).
- # `iid` means independent and identically distributed data.
- # `niid` means non-independent and identically distributed data for FEMNIST and Shakespeare.
- # `dir` means using Dirichlet process to simulate non-iid data, for CIFAR-10 and CIFAR-100 datasets.
- # `class` means partitioning the dataset by label classes, for datasets like CIFAR-10, CIFAR-100.
- split_type: "iid"
- # The minimal number of samples in each client. It is applicable for LEAF datasets and dir simulation of CIFAR-10 and CIFAR-100.
- min_size: 10
- # The fraction of data sampled for LEAF datasets. e.g., 10% means that only 10% of total dataset size are used.
- data_amount: 0.05
- # The fraction of the number of clients used when the split_type is 'iid'.
- iid_fraction: 0.1
- # Whether partition users of the dataset into train-test groups. Only applicable to femnist and shakespeare datasets.
- # True means partitioning users of the dataset into train-test groups.
- # False means partitioning each users' samples into train-test groups.
- user: False
- # The fraction of data for training; the rest are for testing.
- train_test_split: 0.9
- # The number of classes in each client. Only applicable when the split_type is 'class'.
- class_per_client: 1
- # The targeted number of clients to construct.used in non-leaf dataset, number of clients split into. for leaf dataset, only used when split type class.
- num_of_clients: 100
- # The parameter for Dirichlet distribution simulation, applicable only when split_type is `dir` for CIFAR datasets.
- alpha: 0.5
- # The targeted distribution of quantities to simulate data quantity heterogeneity.
- # The values should sum up to 1. e.g., [0.1, 0.2, 0.7].
- # The `num_of_clients` should be divisible by `len(weights)`.
- # None means clients are simulated with the same data quantity.
- weights: NULL
- # The name of the model for training, support: lenet, rnn, resnet, resnet18, resnet50, vgg9.
- model: lenet
- # How to conduct testing, options: test_in_client or test_in_server.
- # `test_in_client` means that each client has a test set to run testing.
- # `test_in_server` means that server has a test set to run testing for the global model. Use this mode for cifar datasets.
- test_mode: "test_in_client"
- # The way to measure testing performance (accuracy) when test mode is `test_in_client`, support: average or weighted (means weighted average).
- test_method: "average"
- server:
- track: False # Whether track server metrics using the tracking service.
- rounds: 10 # Total training round.
- clients_per_round: 5 # The number of clients to train in each round.
- test_every: 1 # The frequency of testing: conduct testing every N round.
- save_model_every: 10 # The frequency of saving model: save model every N round.
- save_model_path: "" # The path to save model. Default path is root directory of the library.
- batch_size: 32 # The batch size of test_in_server.
- test_all: False # Whether test all clients or only selected clients.
- random_selection: True # Whether select clients to train randomly.
- # The strategy to aggregate client uploaded models, options: FedAvg, equal.
- # FedAvg aggregates models using weighted average, where the weights are data size of clients.
- # equal aggregates model by simple averaging.
- aggregation_strategy: "FedAvg"
- # The content of aggregation, options: all, parameters.
- # all means aggregating models using state_dict, including both model parameters and persistent buffers like BatchNorm stats.
- # parameters means aggregating only model parameters.
- aggregation_content: "all"
- client:
- track: False # Whether track server metrics using the tracking service.
- batch_size: 32 # The batch size of training in client.
- test_batch_size: 5 # The batch size of testing in client.
- local_epoch: 10 # The number of epochs to train in each round.
- optimizer:
- type: "Adam" # The name of the optimizer, options: Adam, SGD.
- lr: 0.001
- momentum: 0.9
- weight_decay: 0
- seed: 0
- local_test: False # Whether test the trained models in clients before uploading them to the server.
- gpu: 0 # The total number of GPUs used in training. 0 means CPU.
- distributed: # The distributed training configurations. It is only applicable when gpu > 1.
- backend: "nccl" # The distributed backend.
- init_method: ""
- world_size: 0
- rank: 0
- local_rank: 0
- tracking: # The configurations for logging and tracking.
- database: "" # The path of local dataset, sqlite3.
- log_file: ""
- log_level: "INFO" # The level of logging.
- metric_file: ""
- save_every: 1
- # The configuration for system heterogeneity simulation.
- resource_heterogeneous:
- simulate: False # Whether simulate system heterogeneity in federated learning.
- # The type of heterogeneity to simulate, support iso, dir, real.
- # iso means that
- hetero_type: "real"
- level: 3 # The level of heterogeneous (0-5), 0 means no heterogeneous among clients.
- sleep_group_num: 1000 # The number of groups with different sleep time. 1 means all clients are the same.
- total_time: 1000 # The total sleep time of all clients, unit: second.
- fraction: 1 # The fraction of clients attending heterogeneous simulation.
- grouping_strategy: "greedy" # The grouping strategy to handle system heterogeneity, support: random, greedy, slowest.
- initial_default_time: 5 # The estimated default training time for each training round, unit: second.
- default_time_momentum: 0.2 # The default momentum for default time update.
- seed: 0 # The random seed.
|