## 二、代码实战

import paddle
from paddle.nn import AvgPool2D, Conv2D

### 2.1 CoordConv类代码实现

class CoordConv(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super(CoordConv, self).__init__()
self.conv = Conv2D(
in_channels + 2, out_channels , kernel_size , stride , padding)
def forward(self, x):
b = x.shape[0]
h = x.shape[2]
w = x.shape[3]
gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1.
gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w])
gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1.
gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w])
y = paddle.concat([x, gx, gy], axis=1)
y = self.conv(y)
return y
def __init__(self, num_classes=1):
super(dcn2, self).__init__()
self.conv4 = CoordConv(64, 64, (3,3), 2, 1)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = self.conv3(x)
x = F.relu(x)
x = self.conv4(x)
x = F.relu(x)
x = self.flatten(x)
x = self.linear1(x)
x = F.relu(x)
x = self.linear2(x)
return x
cnn3 = dcn2()
model3.summary((64, 3, 32, 32))
---------------------------------------------------------------------------
Layer (type)     Input Shape          Output Shape         Param #
===========================================================================
Conv2D-26 [[64, 3, 32, 32]] [64, 32, 32, 32] 896
Conv2D-27 [[64, 32, 32, 32]] [64, 64, 15, 15] 18,496
Conv2D-28 [[64, 64, 15, 15]] [64, 64, 7, 7] 36,928
Conv2D-31 [[64, 66, 7, 7]] [64, 64, 4, 4] 38,080
CoordConv-4 [[64, 64, 7, 7]] [64, 64, 4, 4] 0
Flatten-1 [[64, 64, 4, 4]] [64, 1024] 0
Linear-1 [[64, 1024]] [64, 64] 65,600
Linear-2 [[64, 64]] [64, 1] 65
===========================================================================
Total params: 160,065
Trainable params: 160,065
Non-trainable params: 0
---------------------------------------------------------------------------
Input size (MB): 0.75
Forward/backward pass size (MB): 26.09
Params size (MB): 0.61
Estimated Total Size (MB): 27.45
---------------------------------------------------------------------------
{'total_params': 160065, 'trainable_params': 160065}
def __init__(self, num_classes=1):
super(MyNet, self).__init__()
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = self.conv3(x)
x = F.relu(x)
x = self.conv4(x)
x = F.relu(x)
x = self.flatten(x)
x = self.linear1(x)
x = F.relu(x)
x = self.linear2(x)
return x
# 可视化模型
cnn1 = MyNet()
model1.summary((64, 3, 32, 32))
---------------------------------------------------------------------------
Layer (type)     Input Shape          Output Shape         Param #
===========================================================================
Conv2D-1 [[64, 3, 32, 32]] [64, 32, 32, 32] 896
Conv2D-2 [[64, 32, 32, 32]] [64, 64, 15, 15] 18,496
Conv2D-3 [[64, 64, 15, 15]] [64, 64, 7, 7] 36,928
Conv2D-4 [[64, 64, 7, 7]] [64, 64, 4, 4] 36,928
Flatten-1 [[64, 64, 4, 4]] [64, 1024] 0
Linear-1 [[64, 1024]] [64, 64] 65,600
Linear-2 [[64, 64]] [64, 1] 65
===========================================================================
Total params: 158,913
Trainable params: 158,913
Non-trainable params: 0
---------------------------------------------------------------------------
Input size (MB): 0.75
Forward/backward pass size (MB): 25.59
Params size (MB): 0.61
Estimated Total Size (MB): 26.95
---------------------------------------------------------------------------
{'total_params': 158913, 'trainable_params': 158913}

## 总结

### 一些需要注意的点

1. CoordConv的位置在网络中应该尽量靠前
2. 最好的应用方向是姿态估计等对位置高度敏感的CV任务