- Pytorch 1.7.0
- torchvision 0.8.2
- Python 3.8
- CUDA10.2 + cuDNN v7.6.5
- Win10 + Pycharm
- GTX1660, 6G
class Block(nn.Module): def __init__(self, inchannel, outchannel, res=True): super(Block, self).__init__() self.res = res # 是否带残差连接 self.left = nn.Sequential( nn.Conv2d(inchannel, outchannel, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(outchannel), nn.ReLU(inplace=True), nn.Conv2d(outchannel, outchannel, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(outchannel), ) if stride != 1 or inchannel != outchannel: self.shortcut = nn.Sequential( nn.Conv2d(inchannel, outchannel, kernel_size=1, bias=False), nn.BatchNorm2d(outchannel), ) else: self.shortcut = nn.Sequential() self.relu = nn.Sequential( nn.ReLU(inplace=True), ) def forward(self, x): out = self.left(x) if self.res: out += self.shortcut(x) out = self.relu(out) return out class myModel(nn.Module): def __init__(self, cfg=[64, 'M', 128, 'M', 256, 'M', 512, 'M'], res=True): super(myModel, self).__init__() self.res = res # 是否带残差连接 self.cfg = cfg # 配置列表 self.inchannel = 3 # 初始输入通道数 self.futures = self.make_layer() # 构建卷积层之后的全连接层以及分类器: self.classifier = nn.Sequential(nn.Dropout(0.4), # 两层fc效果还差一些 nn.Linear(4 * 512, 10), ) # fc,最终Cifar10输出是10类 def make_layer(self): layers = [] for v in self.cfg: if v == 'M': layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) else: layers.append(Block(self.inchannel, v, self.res)) self.inchannel = v # 输入通道数改为上一层的输出通道数 return nn.Sequential(*layers) def forward(self, x): out = self.futures(x) # view(out.size(0), -1): change tensor size from (N ,H , W) to (N, H*W) out = out.view(out.size(0), -1) out = self.classifier(out) return out
momentum=0.9 的 optim.SGD,adam在很多情况下能加速收敛,但因为是自适应学习率,在训练后期存在不能收敛到全局极值点的问题,所以采用能手动调节学习率的SGD,现在很多比赛和论文中也是采用该策略。设置weight_decay=5e-3,即设置较大的L2正则来降低过拟合。
# 定义损失函数和优化器 loss_func = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=5e-3)
optim.lr_scheduler.MultiStepLR,参数设为:milestones=[int(num_epochs * 0.56), int(num_epochs * 0.78)], gamma=0.1,即在0.56倍epochs和0.78时分别下降为前一阶段学习率的0.1倍。
# 学习率调整策略 MultiStep: scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[int(num_epochs * 0.56), int(num_epochs * 0.78)], gamma=0.1, last_epoch=-1)
# 更新学习率并查看当前学习率 scheduler.step() print('\t last_lr:', scheduler.get_last_lr())
norm_mean = [0.485, 0.456, 0.406] # 均值 norm_std = [0.229, 0.224, 0.225] # 方差 transforms.Normalize(norm_mean, norm_std), #将[0,1]归一化到[-1,1] transforms.RandomHorizontalFlip(), # 随机水平镜像 transforms.RandomErasing(scale=(0.04, 0.2), ratio=(0.5, 2)), # 随机遮挡 transforms.RandomCrop(32, padding=4) # 随机中心裁剪
batch_size = 512 # 约占用显存4G num_epochs = 200 # 训练轮数 LR = 0.01 # 初始学习率
实验结果:best_acc= 94.71%
cfg=[64, ‘M’, 128, 128, ‘M’, 256, 256, ‘M’, 512, 512,‘M’] 修改为 [64, ‘M’, 128, ‘M’, 256, ‘M’, 512, ‘M’]
