2023年4月15日
YOLOX-CSPDarknet

# CSPDarknet全称是Cross Stage Partial Network，它是一个残差网络，它的网络结构是：
# 1. Focus 层 2. Dark2 层 3. Dark3 层 4. Dark4 层 5. Dark5 层
class CSPDarknet(nn.Module):
    def __init__(
        self,
        dep_mul,                                        # 深度乘数，它的作用是控制网络的深度
        wid_mul,                                        # 宽度乘数，它的作用是控制通道数
        out_features=("dark3", "dark4", "dark5"),       # 输出特征层，它是一个元组
        depthwise=False,                                # 是否使用深度可分离卷积
        act="silu",                                     # 激活函数，它的默认值是 swish 激活函数，公式为：x * sigmoid(x)
    ):
        super().__init__()
        assert out_features, "please provide output features of Darknet"
        self.out_features = out_features
        # 若out_features不为空，则将其赋值给self.out_features
        Conv = DWConv if depthwise else BaseConv
        # DWConv是深度可分离卷积，BaseConv是普通卷积

        base_channels = int(wid_mul * 64)  # 64
        # base_channels是通道数，它的值是wid_mul * 64，wid_mul的默认值是1，所以base_channels的默认值是64
        base_depth = max(round(dep_mul * 3), 1)  # 3
        # base_depth是深度，它的值是dep_mul * 3，dep_mul的默认值是1，所以base_depth的默认值是3

        # stem中文意思是茎，它的作用是将输入的特征图的高和宽减半，同时增加通道数
        self.stem = Focus(3, base_channels, ksize=3, act=act)
        # Focus层的作用是将输入的特征图的高和宽减半，同时增加通道数

        # dark2
        self.dark2 = nn.Sequential(
            Conv(base_channels, base_channels * 2, 3, 2, act=act),
            # 卷积层，卷积核大小为3，步长为2，Feature Map的高宽计算公式为：(W - F + 2P) / S + 1
            # 其中W是输入的Feature Map的高或宽，F是卷积核的大小，P是padding的大小，S是步长的大小
            # 新的Feature Map的高宽为：(W - 3 + 2 * 1) / 2 + 1 = (W - 1) / 2
            CSPLayer(
                base_channels * 2,          # 128，它的值是base_channels * 2
                base_channels * 2,          # 128，它的值是base_channels * 2
                n=base_depth,               # 3，它的值是base_depth
                depthwise=depthwise,        # 是否使用深度可分离卷积
                act=act,                    # 激活函数
            ),
        )

        # dark3
        self.dark3 = nn.Sequential(
            Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
            CSPLayer(
                base_channels * 4,
                base_channels * 4,
                n=base_depth * 3,
                depthwise=depthwise,
                act=act,
            ),
        )

        # dark4
        self.dark4 = nn.Sequential(
            Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
            CSPLayer(
                base_channels * 8,
                base_channels * 8,
                n=base_depth * 3,
                depthwise=depthwise,
                act=act,
            ),
        )

        # dark5
        self.dark5 = nn.Sequential(
            Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
            SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
            CSPLayer(
                base_channels * 16,
                base_channels * 16,
                n=base_depth,
                shortcut=False,
                depthwise=depthwise,
                act=act,
            ),
        )

    def forward(self, x):
        outputs = {}
        x = self.stem(x)
        outputs["stem"] = x
        x = self.dark2(x)
        outputs["dark2"] = x
        x = self.dark3(x)
        outputs["dark3"] = x
        x = self.dark4(x)
        outputs["dark4"] = x
        x = self.dark5(x)
        outputs["dark5"] = x
        return {k: v for k, v in outputs.items() if k in self.out_features}
        # k: v for k, v in outputs.items() if k in self.out_features的作用是：
        # 1. 将outputs.items()转换为列表
        # 2. 将列表中的元素k和v分别赋值给k和v
        # 3. 判断k是否在self.out_features中，若在，则将k和v添加到字典中
        # 4. 将字典返回
# CSPDarknet数据流：
# W*H*C -> Focus -> W/2*H/2*C*2 -> Dark2 -> W/4*H/4*C*4 -> Dark3 -> W/8*H/8*C*8 -> Dark4 -> W/16*H/16*C*16 -> Dark5 -> W/32*H/32*C*16
# CSPDarknet的输出特征层有：
# 1. stem：W/2*H/2*C*2 2. dark2：W/4*H/4*C*4 3. dark3：W/8*H/8*C*8 4. dark4：W/16*H/16*C*16 5. dark5：W/32*H/32*C*16