2023年4月15日
YOLOX-CSPLayer

# C3是CSP Bottleneck with 3 convolutions的简称
# CSP 的全称是 Cross Stage Partial，中文翻译为跨阶段部分
class CSPLayer(nn.Module):
    """C3 in yolov5, CSP Bottleneck with 3 convolutions"""

    def __init__(
        self,
        in_channels,        # 输入通道数
        out_channels,       # 输出通道数
        n=1,                # Bottleneck的个数
        shortcut=True,      # 是否使用shortcut
        expansion=0.5,      # expansion的作用是将输入的通道数变为输出的通道数的expansion倍
        depthwise=False,    # 是否使用depthwise卷积
        act="silu",         # 激活函数,silu的公式是 x = x * torch.sigmoid(x)，
                            # sigmoid的公式是 y = 1 / (1 + e^(-x))
    ):
        """
        Args:
            in_channels (int): input channels.
            out_channels (int): output channels.
            n (int): number of Bottlenecks. Default value: 1.
        """
        # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        hidden_channels = int(out_channels * expansion)                                 # hidden_channels=输出通道数*expansion
        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)       # 1x1卷积，好处是可以减少参数的数量
        self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)       # 1x1卷积
        self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)  # 1x1卷积，输入的通道数是2*hidden_channels
        module_list = [
            Bottleneck(
                hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act
            )
            # Bottleneck的作用是将输入的特征图进行融合
            for _ in range(n)
        ]
        self.m = nn.Sequential(*module_list)

    def forward(self, x):
        x_1 = self.conv1(x)
        x_2 = self.conv2(x)
        x_1 = self.m(x_1)
        x = torch.cat((x_1, x_2), dim=1)    # 将x_1和x_2进行concat操作
                                            # concat是在通道维度上进行的
                                            # x_1和x_2的大小是一样的
        return self.conv3(x)
    # C3的结构图如下：
    # 1. 输入的特征图的大小为HxWxC_in
    # 2. conv1:1x1卷积的输出的特征图的大小为HxWxC_in*expansion
    # 3. conv2:1x1卷积的输出的特征图的大小为HxWxC_in*expansion
    # 4. Bottleneck的输出的特征图的大小为HxWxC_in*expansion
    # 5. 将2和4进行concat操作，concat是在通道维度上进行的
    # 6. conv3:1x1卷积的输出的特征图的大小为HxWxC_out