# Hetero NN: A federated task with guest using image data and host using text data

In this task, we will show you how to build a federated task under Hetero-NN, in which the participating parties use different structured data: the guest party has image data and labels, and the host party has text, and together they complete a binary classification task. The tutorial dataset is built by flickr 8k, and labels 0 and 1 indicate whether the image is in the wilderness or in the city. You can download the processed dataset from here and put it under examples/data. The complete dataset can be downloaded from here. (Please note that the original dataset is different from the data in this example, and this dataset is annotated with a small portion of the complete dataset for demonstration purposes.)

## Get the example dataset:

Please down load the dataset from:
- https://webank-ai-1251170195.cos.ap-guangzhou.myqcloud.com/fate/examples/data/flicker_toy_data.zip
and put it under /examples/data folder

The origin of this dataset is the flickr-8k dataset from:
- https://www.kaggle.com/datasets/adityajn105/flickr8k

In [2]:
from pipeline.component.nn import save_to_fate

### Guest Bottom Model

In [3]:
%%save_to_fate model guest_bottom_image.py
from torch import nn
import torch as t
from torch.nn import functional as F

class ImgBottomNet(nn.Module):
 def __init__(self):
 super(ImgBottomNet, self).__init__()
 self.seq = t.nn.Sequential(
 nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
 nn.MaxPool2d(kernel_size=3),
 nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3),
 nn.AvgPool2d(kernel_size=5)
 )
 
 self.fc = t.nn.Sequential(
 nn.Linear(1176, 32),
 nn.ReLU(),
 nn.Linear(32, 8)
 )

 def forward(self, x):
 x = self.seq(x)
 x = x.flatten(start_dim=1)
 x = self.fc(x)
 return x


## Guest Top Model

In [4]:
%%save_to_fate model guest_top_image.py
from torch import nn
import torch as t
from torch.nn import functional as F

class ImgTopNet(nn.Module):
 def __init__(self):
 super(ImgTopNet, self).__init__()
 
 self.fc = t.nn.Sequential(
 nn.Linear(4, 1),
 nn.Sigmoid()
 )

 def forward(self, x):
 x = self.fc(x)
 return x.flatten()

### Host Bottom Model

In [5]:
%%save_to_fate model host_bottom_lstm.py
from torch import nn
import torch as t
from torch.nn import functional as F

class LSTMBottom(nn.Module):
 
 def __init__(self, vocab_size):
 super(LSTMBottom, self).__init__()
 self.word_embed = nn.Embedding(num_embeddings=vocab_size, embedding_dim=16, padding_idx=0)
 self.lstm = t.nn.Sequential(
 nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True)
 )
 self.act = nn.ReLU()
 self.linear = nn.Linear(16, 8)

 def forward(self, x):
 embeddings = self.word_embed(x)
 lstm_fw, _ = self.lstm(embeddings)
 
 return self.act(self.linear(lstm_fw.sum(dim=1))) 

### Locally test dataset and bottom model

In [6]:
from federatedml.nn.dataset.image import ImageDataset
from federatedml.nn.dataset.nlp_tokenizer import TokenizerDataset

2022-12-26 20:45:42.535744: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-26 20:45:42.535777: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [7]:
# flicke image
img_ds = ImageDataset(center_crop=True, center_crop_shape=(224, 224), return_label=True) # return label = True
img_ds.load('../../../../examples/data/flicker_toy_data/flicker/images/')
# text
txt_ds = TokenizerDataset(return_label=False) 
txt_ds.load('../../../../examples/data/flicker_toy_data/text.csv')

In [8]:
print(len(img_ds))
print(img_ds[0])
print(img_ds.get_classes())
print(img_ds.get_sample_ids()[0: 10])

215
(tensor([[[0.5059, 0.5176, 0.5137, ..., 0.4941, 0.5020, 0.5059],
 [0.4980, 0.5020, 0.4980, ..., 0.4824, 0.5020, 0.5059],
 [0.5059, 0.4863, 0.4902, ..., 0.4980, 0.4980, 0.5137],
 ...,
 [0.7843, 0.7922, 0.7529, ..., 0.1412, 0.2078, 0.2196],
 [0.9922, 0.9922, 0.9647, ..., 0.1176, 0.0941, 0.1333],
 [0.9961, 0.9922, 1.0000, ..., 0.1647, 0.1294, 0.1373]],

 [[0.5765, 0.5882, 0.5843, ..., 0.5490, 0.5569, 0.5608],
 [0.5686, 0.5804, 0.5765, ..., 0.5490, 0.5529, 0.5529],
 [0.5608, 0.5569, 0.5647, ..., 0.5569, 0.5490, 0.5529],
 ...,
 [0.7961, 0.8039, 0.7490, ..., 0.1373, 0.1882, 0.2000],
 [0.9961, 0.9961, 0.9608, ..., 0.1137, 0.1137, 0.1529],
 [0.9922, 0.9922, 1.0000, ..., 0.1608, 0.1059, 0.1216]],

 [[0.6235, 0.6353, 0.6314, ..., 0.5922, 0.6000, 0.6118],
 [0.6078, 0.6235, 0.6196, ..., 0.5804, 0.5882, 0.6000],
 [0.6039, 0.6118, 0.6196, ..., 0.5843, 0.5843, 0.6000],
 ...,
 [0.5882, 0.5961, 0.5686, ..., 0.1216, 0.1765, 0.1882],
 [0.7294, 0.7373, 0.7373, ..., 0.0980, 0.0980, 0.1294],
 [0.8745, 0

In [9]:
print(len(txt_ds))
print(txt_ds[0]) # word idx
print(txt_ds.get_vocab_size()) # vocab size

215
tensor([ 101, 1037, 2158, 1998, 2450, 2729, 2005, 2019, 10527, 2247,
 1996, 2217, 1997, 1037, 2303, 1997, 2300, 1012, 102, 0,
 0, 0, 0, 0, 0, 0])
30522


In [10]:
img_bottom = ImgBottomNet()
lstm_bottom = LSTMBottom(vocab_size=txt_ds.get_vocab_size())

In [11]:
lstm_bottom(t.vstack([txt_ds[0], txt_ds[1]])) # test forward

tensor([[0.0000, 1.8284, 0.0000, 0.0000, 2.3009, 0.0626, 0.0678, 0.0000],
 [0.0369, 1.8046, 0.0000, 0.0000, 2.4555, 0.0000, 0.0000, 0.0000]],
 grad_fn=)

In [12]:
img_bottom(t.vstack([img_ds[0][0].unsqueeze(dim=0), img_ds[1][0].unsqueeze(dim=0)])) 

tensor([[ 0.0987, 0.0808, -0.0140, -0.0718, -0.1381, 0.2642, -0.1874, -0.0494],
 [ 0.0856, 0.0948, -0.0362, -0.0702, -0.0695, 0.2293, -0.1768, -0.0638]],
 grad_fn=)

### Pipeline

In [13]:
import os
import torch as t
from torch import nn
from pipeline import fate_torch_hook
from pipeline.component import HeteroNN
from pipeline.component.hetero_nn import DatasetParam
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, Evaluation, DataTransform
from pipeline.interface import Data, Model
from pipeline.component.nn import save_to_fate

fate_torch_hook(t)

fate_project_path = os.path.abspath('../../../../')
guest = 10000
host = 9999

pipeline_mix = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

guest_data = {"name": "flicker_guest", "namespace": "experiment"}
host_data = {"name": "flicker_host", "namespace": "experiment"}

guest_data_path = fate_project_path + '/examples/data/flicker_toy_data/flicker/images'
host_data_path = fate_project_path + '/examples/data/flicker_toy_data/text.csv'

pipeline_mix.bind_table(name='flicker_guest', namespace='experiment', path=guest_data_path)
pipeline_mix.bind_table(name='flicker_host', namespace='experiment', path=host_data_path)

{'namespace': 'experiment', 'table_name': 'flicker_host'}

In [14]:
reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_data)

In [15]:
hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=5,
 interactive_layer_lr=0.001, batch_size=64, validation_freqs=1, task_type='classification')
guest_nn_0 = hetero_nn_0.get_party_instance(role='guest', party_id=guest)
host_nn_0 = hetero_nn_0.get_party_instance(role='host', party_id=host)

In [16]:

guest_bottom = t.nn.Sequential(
 nn.CustModel(module_name='guest_bottom_image', class_name='ImgBottomNet')
)

guest_top = t.nn.Sequential(
 nn.CustModel(module_name='guest_top_image', class_name='ImgTopNet')
)
# bottom model
host_bottom = nn.CustModel(module_name='host_bottom_lstm', class_name='LSTMBottom', vocab_size=txt_ds.get_vocab_size())

interactive_layer = t.nn.InteractiveLayer(out_dim=4, guest_dim=8, host_dim=8, host_num=1)

In [18]:
guest_nn_0.add_top_model(guest_top)
guest_nn_0.add_bottom_model(guest_bottom)
host_nn_0.add_bottom_model(host_bottom)
optimizer = t.optim.Adam(lr=0.001)
loss = t.nn.BCELoss()

hetero_nn_0.set_interactive_layer(interactive_layer)
hetero_nn_0.compile(optimizer=optimizer, loss=loss)

In [19]:
# 添加dataset
guest_nn_0.add_dataset(DatasetParam(dataset_name='image', return_label=True, center_crop=True, center_crop_shape=(224, 224), label_dtype='float'))
host_nn_0.add_dataset(DatasetParam(dataset_name='nlp_tokenizer', return_label=False))

In [20]:
pipeline_mix.add_component(reader_0)
pipeline_mix.add_component(hetero_nn_0, data=Data(train_data=reader_0.output.data))
pipeline_mix.compile()



In [22]:
pipeline_mix.fit()