resnet.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. import math
  2. import torch.nn as nn
  3. import torch.nn.functional as F
  4. from .ozan_rep_fun import ozan_rep_function, trevor_rep_function, OzanRepFunction, TrevorRepFunction
  5. from easyfl.models.model import BaseModel
  6. __all__ = ['resnet18',
  7. 'resnet18_half',
  8. 'resnet18_tripple',
  9. 'resnet34',
  10. 'resnet50',
  11. 'resnet101',
  12. 'resnet152']
  13. def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
  14. """3x3 convolution with padding"""
  15. return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
  16. padding=dilation, groups=groups, bias=False, dilation=dilation)
  17. def conv1x1(in_planes, out_planes, stride=1):
  18. """1x1 convolution"""
  19. return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
  20. class BasicBlock(nn.Module):
  21. expansion = 1
  22. def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
  23. base_width=64, dilation=1, norm_layer=None):
  24. super(BasicBlock, self).__init__()
  25. if norm_layer is None:
  26. norm_layer = nn.BatchNorm2d
  27. if groups != 1 or base_width != 64:
  28. raise ValueError('BasicBlock only supports groups=1 and base_width=64')
  29. if dilation > 1:
  30. raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
  31. # Both self.conv1 and self.downsample layers downsample the input when stride != 1
  32. self.conv1 = conv3x3(inplanes, planes, stride)
  33. self.bn1 = norm_layer(planes)
  34. self.relu = nn.ReLU(inplace=True)
  35. self.conv2 = conv3x3(planes, planes)
  36. self.bn2 = norm_layer(planes)
  37. self.downsample = downsample
  38. self.stride = stride
  39. def forward(self, x):
  40. identity = x
  41. out = self.conv1(x)
  42. out = self.bn1(out)
  43. out = self.relu(out)
  44. out = self.conv2(out)
  45. out = self.bn2(out)
  46. if self.downsample is not None:
  47. identity = self.downsample(x)
  48. out += identity
  49. out = self.relu(out)
  50. return out
  51. class Bottleneck(nn.Module):
  52. expansion = 4
  53. def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
  54. base_width=64, dilation=1, norm_layer=None):
  55. super(Bottleneck, self).__init__()
  56. if norm_layer is None:
  57. norm_layer = nn.BatchNorm2d
  58. width = int(planes * (base_width / 64.)) * groups
  59. # Both self.conv2 and self.downsample layers downsample the input when stride != 1
  60. self.conv1 = conv1x1(inplanes, width)
  61. self.bn1 = norm_layer(width)
  62. self.conv2 = conv3x3(width, width, stride, groups, dilation)
  63. self.bn2 = norm_layer(width)
  64. self.conv3 = conv1x1(width, planes * self.expansion)
  65. self.bn3 = norm_layer(planes * self.expansion)
  66. self.relu = nn.ReLU(inplace=True)
  67. self.downsample = downsample
  68. self.stride = stride
  69. def forward(self, x):
  70. identity = x
  71. out = self.conv1(x)
  72. out = self.bn1(out)
  73. out = self.relu(out)
  74. out = self.conv2(out)
  75. out = self.bn2(out)
  76. out = self.relu(out)
  77. out = self.conv3(out)
  78. out = self.bn3(out)
  79. if self.downsample is not None:
  80. identity = self.downsample(x)
  81. out += identity
  82. out = self.relu(out)
  83. return out
  84. class ResNetEncoder(nn.Module):
  85. def __init__(self, block, layers, widths=[64, 128, 256, 512], num_classes=1000, zero_init_residual=False,
  86. groups=1, width_per_group=64, replace_stride_with_dilation=None,
  87. norm_layer=None):
  88. super(ResNetEncoder, self).__init__()
  89. if norm_layer is None:
  90. norm_layer = nn.BatchNorm2d
  91. self._norm_layer = norm_layer
  92. self.inplanes = 64
  93. self.dilation = 1
  94. if replace_stride_with_dilation is None:
  95. # each element in the tuple indicates if we should replace
  96. # the 2x2 stride with a dilated convolution instead
  97. replace_stride_with_dilation = [False, False, False]
  98. if len(replace_stride_with_dilation) != 3:
  99. raise ValueError("replace_stride_with_dilation should be None "
  100. "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
  101. self.groups = groups
  102. self.base_width = width_per_group
  103. self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
  104. bias=False)
  105. self.bn1 = norm_layer(self.inplanes)
  106. self.relu = nn.ReLU(inplace=True)
  107. self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
  108. self.layer1 = self._make_layer(block, widths[0], layers[0])
  109. self.layer2 = self._make_layer(block, widths[1], layers[1], stride=2,
  110. dilate=replace_stride_with_dilation[0])
  111. self.layer3 = self._make_layer(block, widths[2], layers[2], stride=2,
  112. dilate=replace_stride_with_dilation[1])
  113. self.layer4 = self._make_layer(block, widths[3], layers[3], stride=2,
  114. dilate=replace_stride_with_dilation[2])
  115. for m in self.modules():
  116. if isinstance(m, nn.Conv2d):
  117. nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
  118. elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
  119. nn.init.constant_(m.weight, 1)
  120. nn.init.constant_(m.bias, 0)
  121. # Zero-initialize the last BN in each residual branch,
  122. # so that the residual branch starts with zeros, and each residual block behaves like an identity.
  123. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
  124. if zero_init_residual:
  125. for m in self.modules():
  126. if isinstance(m, Bottleneck):
  127. nn.init.constant_(m.bn3.weight, 0)
  128. elif isinstance(m, BasicBlock):
  129. nn.init.constant_(m.bn2.weight, 0)
  130. def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
  131. norm_layer = self._norm_layer
  132. downsample = None
  133. previous_dilation = self.dilation
  134. if dilate:
  135. self.dilation *= stride
  136. stride = 1
  137. if stride != 1 or self.inplanes != planes * block.expansion:
  138. downsample = nn.Sequential(
  139. conv1x1(self.inplanes, planes * block.expansion, stride),
  140. norm_layer(planes * block.expansion),
  141. )
  142. layers = []
  143. layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
  144. self.base_width, previous_dilation, norm_layer))
  145. self.inplanes = planes * block.expansion
  146. for _ in range(1, blocks):
  147. layers.append(block(self.inplanes, planes, groups=self.groups,
  148. base_width=self.base_width, dilation=self.dilation,
  149. norm_layer=norm_layer))
  150. return nn.Sequential(*layers)
  151. def forward(self, x):
  152. x = self.conv1(x)
  153. x = self.bn1(x)
  154. x = self.relu(x)
  155. x = self.maxpool(x)
  156. x = self.layer1(x)
  157. x = self.layer2(x)
  158. x = self.layer3(x)
  159. x = self.layer4(x)
  160. return x
  161. class Decoder(nn.Module):
  162. def __init__(self, output_channels=32, num_classes=None, base_match=512):
  163. super(Decoder, self).__init__()
  164. self.output_channels = output_channels
  165. self.num_classes = num_classes
  166. self.relu = nn.ReLU(inplace=True)
  167. if num_classes is not None:
  168. self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
  169. self.fc = nn.Linear(512 * block.expansion, num_classes)
  170. else:
  171. self.upconv0 = nn.ConvTranspose2d(base_match, 256, 2, 2)
  172. self.bn_upconv0 = nn.BatchNorm2d(256)
  173. self.conv_decode0 = nn.Conv2d(256, 256, 3, padding=1)
  174. self.bn_decode0 = nn.BatchNorm2d(256)
  175. self.upconv1 = nn.ConvTranspose2d(256, 128, 2, 2)
  176. self.bn_upconv1 = nn.BatchNorm2d(128)
  177. self.conv_decode1 = nn.Conv2d(128, 128, 3, padding=1)
  178. self.bn_decode1 = nn.BatchNorm2d(128)
  179. self.upconv2 = nn.ConvTranspose2d(128, 64, 2, 2)
  180. self.bn_upconv2 = nn.BatchNorm2d(64)
  181. self.conv_decode2 = nn.Conv2d(64, 64, 3, padding=1)
  182. self.bn_decode2 = nn.BatchNorm2d(64)
  183. self.upconv3 = nn.ConvTranspose2d(64, 48, 2, 2)
  184. self.bn_upconv3 = nn.BatchNorm2d(48)
  185. self.conv_decode3 = nn.Conv2d(48, 48, 3, padding=1)
  186. self.bn_decode3 = nn.BatchNorm2d(48)
  187. self.upconv4 = nn.ConvTranspose2d(48, 32, 2, 2)
  188. self.bn_upconv4 = nn.BatchNorm2d(32)
  189. self.conv_decode4 = nn.Conv2d(32, output_channels, 3, padding=1)
  190. def forward(self, representation):
  191. # batch_size=representation.shape[0]
  192. if self.num_classes is None:
  193. # x2 = self.conv_decode_res(representation)
  194. # x2 = self.bn_conv_decode_res(x2)
  195. # x2 = interpolate(x2,size=(256,256))
  196. x = self.upconv0(representation)
  197. x = self.bn_upconv0(x)
  198. x = self.relu(x)
  199. x = self.conv_decode0(x)
  200. x = self.bn_decode0(x)
  201. x = self.relu(x)
  202. x = self.upconv1(x)
  203. x = self.bn_upconv1(x)
  204. x = self.relu(x)
  205. x = self.conv_decode1(x)
  206. x = self.bn_decode1(x)
  207. x = self.relu(x)
  208. x = self.upconv2(x)
  209. x = self.bn_upconv2(x)
  210. x = self.relu(x)
  211. x = self.conv_decode2(x)
  212. x = self.bn_decode2(x)
  213. x = self.relu(x)
  214. x = self.upconv3(x)
  215. x = self.bn_upconv3(x)
  216. x = self.relu(x)
  217. x = self.conv_decode3(x)
  218. x = self.bn_decode3(x)
  219. x = self.relu(x)
  220. x = self.upconv4(x)
  221. x = self.bn_upconv4(x)
  222. # x = torch.cat([x,x2],1)
  223. # print(x.shape,self.static.shape)
  224. # x = torch.cat([x,x2,input,self.static.expand(batch_size,-1,-1,-1)],1)
  225. x = self.relu(x)
  226. x = self.conv_decode4(x)
  227. # z = x[:,19:22,:,:].clone()
  228. # y = (z).norm(2,1,True).clamp(min=1e-12)
  229. # print(y.shape,x[:,21:24,:,:].shape)
  230. # x[:,19:22,:,:]=z/y
  231. else:
  232. x = F.adaptive_avg_pool2d(x, (1, 1))
  233. x = x.view(x.size(0), -1)
  234. x = self.fc(x)
  235. return x
  236. class ResNet(BaseModel):
  237. def __init__(self, block, layers, tasks=None, num_classes=None, ozan=False, size=1, **kwargs):
  238. super(ResNet, self).__init__()
  239. if size == 1:
  240. self.encoder = ResNetEncoder(block, layers, **kwargs)
  241. elif size == 2:
  242. self.encoder = ResNetEncoder(block, layers, [96, 192, 384, 720], **kwargs)
  243. elif size == 3:
  244. self.encoder = ResNetEncoder(block, layers, [112, 224, 448, 880], **kwargs)
  245. elif size == 0.5:
  246. self.encoder = ResNetEncoder(block, layers, [48, 96, 192, 360], **kwargs)
  247. self.tasks = tasks
  248. self.ozan = ozan
  249. self.task_to_decoder = {}
  250. if tasks is not None:
  251. # self.final_conv = nn.Conv2d(728,512,3,1,1)
  252. # self.final_conv_bn = nn.BatchNorm2d(512)
  253. for task in tasks:
  254. if task == 'segment_semantic':
  255. output_channels = 18
  256. if task == 'depth_zbuffer':
  257. output_channels = 1
  258. if task == 'normal':
  259. output_channels = 3
  260. if task == 'edge_occlusion':
  261. output_channels = 1
  262. if task == 'reshading':
  263. output_channels = 3
  264. if task == 'keypoints2d':
  265. output_channels = 1
  266. if task == 'edge_texture':
  267. output_channels = 1
  268. if size == 1:
  269. decoder = Decoder(output_channels)
  270. elif size == 2:
  271. decoder = Decoder(output_channels, base_match=720)
  272. elif size == 3:
  273. decoder = Decoder(output_channels, base_match=880)
  274. elif size == 0.5:
  275. decoder = Decoder(output_channels, base_match=360)
  276. self.task_to_decoder[task] = decoder
  277. else:
  278. self.task_to_decoder['classification'] = Decoder(output_channels=0, num_classes=1000)
  279. self.decoders = nn.ModuleList(self.task_to_decoder.values())
  280. # ------- init weights --------
  281. for m in self.modules():
  282. if isinstance(m, nn.Conv2d):
  283. n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
  284. m.weight.data.normal_(0, math.sqrt(2. / n))
  285. elif isinstance(m, nn.BatchNorm2d):
  286. m.weight.data.fill_(1)
  287. m.bias.data.zero_()
  288. # -----------------------------
  289. def forward(self, input):
  290. rep = self.encoder(input)
  291. if self.tasks is None:
  292. return self.decoders[0](rep)
  293. # rep = self.final_conv(rep)
  294. # rep = self.final_conv_bn(rep)
  295. outputs = {'rep': rep}
  296. if self.ozan:
  297. OzanRepFunction.n = len(self.decoders)
  298. rep = ozan_rep_function(rep)
  299. for i, (task, decoder) in enumerate(zip(self.task_to_decoder.keys(), self.decoders)):
  300. outputs[task] = decoder(rep[i])
  301. else:
  302. TrevorRepFunction.n = len(self.decoders)
  303. rep = trevor_rep_function(rep)
  304. for i, (task, decoder) in enumerate(zip(self.task_to_decoder.keys(), self.decoders)):
  305. outputs[task] = decoder(rep)
  306. return outputs
  307. def _resnet(arch, block, layers, pretrained, **kwargs):
  308. model = ResNet(block=block, layers=layers, **kwargs)
  309. # if pretrained:
  310. # state_dict = load_state_dict_from_url(model_urls[arch],
  311. # progress=progress)
  312. # model.load_state_dict(state_dict)
  313. return model
  314. def resnet18(pretrained=False, **kwargs):
  315. """Constructs a ResNet-18 model.
  316. Args:
  317. pretrained (bool): If True, returns a model pre-trained on ImageNet
  318. progress (bool): If True, displays a progress bar of the download to stderr
  319. """
  320. return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained,
  321. **kwargs)
  322. def resnet18_tripple(pretrained=False, **kwargs):
  323. """Constructs a ResNet-18 model.
  324. Args:
  325. pretrained (bool): If True, returns a model pre-trained on ImageNet
  326. progress (bool): If True, displays a progress bar of the download to stderr
  327. """
  328. return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, size=3,
  329. **kwargs)
  330. def resnet18_half(pretrained=False, **kwargs):
  331. """Constructs a ResNet-18 model.
  332. Args:
  333. pretrained (bool): If True, returns a model pre-trained on ImageNet
  334. progress (bool): If True, displays a progress bar of the download to stderr
  335. """
  336. return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, size=0.5,
  337. **kwargs)
  338. def resnet34(pretrained=False, **kwargs):
  339. """Constructs a ResNet-34 model.
  340. Args:
  341. pretrained (bool): If True, returns a model pre-trained on ImageNet
  342. progress (bool): If True, displays a progress bar of the download to stderr
  343. """
  344. return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained,
  345. **kwargs)
  346. def resnet50(pretrained=False, **kwargs):
  347. """Constructs a ResNet-50 model.
  348. Args:
  349. pretrained (bool): If True, returns a model pre-trained on ImageNet
  350. progress (bool): If True, displays a progress bar of the download to stderr
  351. """
  352. return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained,
  353. **kwargs)
  354. def resnet101(pretrained=False, **kwargs):
  355. """Constructs a ResNet-101 model.
  356. Args:
  357. pretrained (bool): If True, returns a model pre-trained on ImageNet
  358. progress (bool): If True, displays a progress bar of the download to stderr
  359. """
  360. return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained,
  361. **kwargs)
  362. def resnet152(pretrained=False, **kwargs):
  363. """Constructs a ResNet-152 model.
  364. Args:
  365. pretrained (bool): If True, returns a model pre-trained on ImageNet
  366. progress (bool): If True, displays a progress bar of the download to stderr
  367. """
  368. return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained,
  369. **kwargs)