2023年4月15日
YOLOX-CSPDarknet
# CSPDarknet全称是Cross Stage Partial Network,它是一个残差网络,它的网络结构是:# 1. Focus 层 2. Dark2 层 3. Dark3 层 4. Dark4 层 5. Dark5 层class CSPDarknet(nn.Module): def __init__( self, dep_mul, # 深度乘数,它的作用是控制网络的深度 wid_mul, # 宽度乘数,它的作用是控制通道数 out_features=("dark3", "dark4", "dark5"), # 输出特征层,它是一个元组 depthwise=False, # 是否使用深度可分离卷积 act="silu", # 激活函数,它的默认值是 swish 激活函数,公式为:x * sigmoid(x) ): super().__init__() assert out_features, "please provide output features of Darknet" self.out_features = out_features # 若out_features不为空,则将其赋值给self.out_features Conv = DWConv if depthwise else BaseConv # DWConv是深度可分离卷积,BaseConv是普通卷积
base_channels = int(wid_mul * 64) # 64 # base_channels是通道数,它的值是wid_mul * 64,wid_mul的默认值是1,所以base_channels的默认值是64 base_depth = max(round(dep_mul * 3), 1) # 3 # base_depth是深度,它的值是dep_mul * 3,dep_mul的默认值是1,所以base_depth的默认值是3
# stem中文意思是茎,它的作用是将输入的特征图的高和宽减半,同时增加通道数 self.stem = Focus(3, base_channels, ksize=3, act=act) # Focus层的作用是将输入的特征图的高和宽减半,同时增加通道数
# dark2 self.dark2 = nn.Sequential( Conv(base_channels, base_channels * 2, 3, 2, act=act), # 卷积层,卷积核大小为3,步长为2,Feature Map的高宽计算公式为:(W - F + 2P) / S + 1 # 其中W是输入的Feature Map的高或宽,F是卷积核的大小,P是padding的大小,S是步长的大小 # 新的Feature Map的高宽为:(W - 3 + 2 * 1) / 2 + 1 = (W - 1) / 2 CSPLayer( base_channels * 2, # 128,它的值是base_channels * 2 base_channels * 2, # 128,它的值是base_channels * 2 n=base_depth, # 3,它的值是base_depth depthwise=depthwise, # 是否使用深度可分离卷积 act=act, # 激活函数 ), )
# dark3 self.dark3 = nn.Sequential( Conv(base_channels * 2, base_channels * 4, 3, 2, act=act), CSPLayer( base_channels * 4, base_channels * 4, n=base_depth * 3, depthwise=depthwise, act=act, ), )
# dark4 self.dark4 = nn.Sequential( Conv(base_channels * 4, base_channels * 8, 3, 2, act=act), CSPLayer( base_channels * 8, base_channels * 8, n=base_depth * 3, depthwise=depthwise, act=act, ), )
# dark5 self.dark5 = nn.Sequential( Conv(base_channels * 8, base_channels * 16, 3, 2, act=act), SPPBottleneck(base_channels * 16, base_channels * 16, activation=act), CSPLayer( base_channels * 16, base_channels * 16, n=base_depth, shortcut=False, depthwise=depthwise, act=act, ), )
def forward(self, x): outputs = {} x = self.stem(x) outputs["stem"] = x x = self.dark2(x) outputs["dark2"] = x x = self.dark3(x) outputs["dark3"] = x x = self.dark4(x) outputs["dark4"] = x x = self.dark5(x) outputs["dark5"] = x return {k: v for k, v in outputs.items() if k in self.out_features} # k: v for k, v in outputs.items() if k in self.out_features的作用是: # 1. 将outputs.items()转换为列表 # 2. 将列表中的元素k和v分别赋值给k和v # 3. 判断k是否在self.out_features中,若在,则将k和v添加到字典中 # 4. 将字典返回# CSPDarknet数据流:# W*H*C -> Focus -> W/2*H/2*C*2 -> Dark2 -> W/4*H/4*C*4 -> Dark3 -> W/8*H/8*C*8 -> Dark4 -> W/16*H/16*C*16 -> Dark5 -> W/32*H/32*C*16# CSPDarknet的输出特征层有:# 1. stem:W/2*H/2*C*2 2. dark2:W/4*H/4*C*4 3. dark3:W/8*H/8*C*8 4. dark4:W/16*H/16*C*16 5. dark5:W/32*H/32*C*16