# The unique identifier for each federated learning task task_id: "" # Provide dataset and federated learning simulation related configuration. data: # The root directory where datasets are stored. root: "./data/" # The name of the dataset, support: femnist, shakespeare, cifar10, and cifar100. dataset: femnist # The data distribution of each client, support: iid, niid (for femnist and shakespeare), and dir and class (for cifar datasets). # `iid` means independent and identically distributed data. # `niid` means non-independent and identically distributed data for FEMNIST and Shakespeare. # `dir` means using Dirichlet process to simulate non-iid data, for CIFAR-10 and CIFAR-100 datasets. # `class` means partitioning the dataset by label classes, for datasets like CIFAR-10, CIFAR-100. split_type: "iid" # The minimal number of samples in each client. It is applicable for LEAF datasets and dir simulation of CIFAR-10 and CIFAR-100. min_size: 10 # The fraction of data sampled for LEAF datasets. e.g., 10% means that only 10% of total dataset size are used. data_amount: 0.05 # The fraction of the number of clients used when the split_type is 'iid'. iid_fraction: 0.1 # Whether partition users of the dataset into train-test groups. Only applicable to femnist and shakespeare datasets. # True means partitioning users of the dataset into train-test groups. # False means partitioning each users' samples into train-test groups. user: False # The fraction of data for training; the rest are for testing. train_test_split: 0.9 # The number of classes in each client. Only applicable when the split_type is 'class'. class_per_client: 1 # The targeted number of clients to construct.used in non-leaf dataset, number of clients split into. for leaf dataset, only used when split type class. num_of_clients: 100 # The parameter for Dirichlet distribution simulation, applicable only when split_type is `dir` for CIFAR datasets. alpha: 0.5 # The targeted distribution of quantities to simulate data quantity heterogeneity. # The values should sum up to 1. e.g., [0.1, 0.2, 0.7]. # The `num_of_clients` should be divisible by `len(weights)`. # None means clients are simulated with the same data quantity. weights: NULL # The name of the model for training, support: lenet, rnn, resnet, resnet18, resnet50, vgg9. model: lenet # How to conduct testing, options: test_in_client or test_in_server. # `test_in_client` means that each client has a test set to run testing. # `test_in_server` means that server has a test set to run testing for the global model. Use this mode for cifar datasets. test_mode: "test_in_client" # The way to measure testing performance (accuracy) when test mode is `test_in_client`, support: average or weighted (means weighted average). test_method: "average" server: track: False # Whether track server metrics using the tracking service. rounds: 10 # Total training round. clients_per_round: 5 # The number of clients to train in each round. test_every: 1 # The frequency of testing: conduct testing every N round. save_model_every: 10 # The frequency of saving model: save model every N round. save_model_path: "" # The path to save model. Default path is root directory of the library. batch_size: 32 # The batch size of test_in_server. test_all: False # Whether test all clients or only selected clients. random_selection: True # Whether select clients to train randomly. # The strategy to aggregate client uploaded models, options: FedAvg, equal. # FedAvg aggregates models using weighted average, where the weights are data size of clients. # equal aggregates model by simple averaging. aggregation_strategy: "FedAvg" # The content of aggregation, options: all, parameters. # all means aggregating models using state_dict, including both model parameters and persistent buffers like BatchNorm stats. # parameters means aggregating only model parameters. aggregation_content: "all" client: track: False # Whether track server metrics using the tracking service. batch_size: 32 # The batch size of training in client. test_batch_size: 5 # The batch size of testing in client. local_epoch: 10 # The number of epochs to train in each round. optimizer: type: "Adam" # The name of the optimizer, options: Adam, SGD. lr: 0.001 momentum: 0.9 weight_decay: 0 seed: 0 local_test: False # Whether test the trained models in clients before uploading them to the server. gpu: 0 # The total number of GPUs used in training. 0 means CPU. distributed: # The distributed training configurations. It is only applicable when gpu > 1. backend: "nccl" # The distributed backend. init_method: "" world_size: 0 rank: 0 local_rank: 0 tracking: # The configurations for logging and tracking. database: "" # The path of local dataset, sqlite3. log_file: "" log_level: "INFO" # The level of logging. metric_file: "" save_every: 1 # The configuration for system heterogeneity simulation. resource_heterogeneous: simulate: False # Whether simulate system heterogeneity in federated learning. # The type of heterogeneity to simulate, support iso, dir, real. # iso means that hetero_type: "real" level: 3 # The level of heterogeneous (0-5), 0 means no heterogeneous among clients. sleep_group_num: 1000 # The number of groups with different sleep time. 1 means all clients are the same. total_time: 1000 # The total sleep time of all clients, unit: second. fraction: 1 # The fraction of clients attending heterogeneous simulation. grouping_strategy: "greedy" # The grouping strategy to handle system heterogeneity, support: random, greedy, slowest. initial_default_time: 5 # The estimated default training time for each training round, unit: second. default_time_momentum: 0.2 # The default momentum for default time update. seed: 0 # The random seed.