FCN模型实现-Pytorch+预训练VGG16

ゝ一纸荒年。 2023-07-15 13:25 171阅读 0赞

FCN模型的网络与VGG16类似,之后后边将全连接层换成了卷基层,具体的网络结构与细节可以去看论文:

https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf

下边详细讲一下用Pytorch对FCN的实现:

本文参考了https://zhuanlan.zhihu.com/p/32506912 但是修改了部分代码,加上了很多新的注释,并将代码更新到Pytorch1.x

首先是读取图像

  1. #使用的VOC数据目录
  2. voc_root = '/media/cyq/CU/Ubuntu system files/VOCdevkit/VOC2012'
  3. #此函数用来读取图像和标签的名字
  4. def read_images(root=voc_root, train=True):
  5. txt_fname = root + '/ImageSets/Segmentation/' + ('train.txt' if train else 'val.txt')
  6. with open(txt_fname, 'r') as f:
  7. images = f.read().split()
  8. data = [os.path.join(root, 'JPEGImages', i+'.jpg') for i in images]
  9. label = [os.path.join(root, 'SegmentationClass', i+'.png') for i in images]
  10. return data, label

对输入图像做出裁剪,使图像大小一致,方便训练

  1. #这里对图像和标签都截取对应的部分
  2. def rand_crop(data, label, height, width):
  3. '''
  4. data is PIL.Image object
  5. label is PIL.Image object
  6. '''
  7. x = random.uniform(0,data.size[0]-width)
  8. x = int(x)
  9. y = random.uniform(0,data.size[1]-height)
  10. y = int(y)
  11. box = (x,y,x+width,y+height)
  12. data = data.crop(box)
  13. label = label.crop(box)
  14. return data, label

label图像与标签的映射

  1. #21个类
  2. classes = ['background','aeroplane','bicycle','bird','boat',
  3. 'bottle','bus','car','cat','chair','cow','diningtable',
  4. 'dog','horse','motorbike','person','potted plant',
  5. 'sheep','sofa','train','tv/monitor']
  6. # 每个类对应的RGB值
  7. colormap = [[0,0,0],[128,0,0],[0,128,0], [128,128,0], [0,0,128],
  8. [128,0,128],[0,128,128],[128,128,128],[64,0,0],[192,0,0],
  9. [64,128,0],[192,128,0],[64,0,128],[192,0,128],
  10. [64,128,128],[192,128,128],[0,64,0],[128,64,0],
  11. [0,192,0],[128,192,0],[0,64,128]]
  12. #下边就是将label中每种颜色映射成0-20的数字
  13. cm2lbl = np.zeros(256**3) # 每个像素点有 0 ~ 255 的选择,RGB 三个通道
  14. for i,cm in enumerate(colormap):
  15. cm2lbl[(cm[0]*256+cm[1])*256+cm[2]] = i # 建立索引
  16. def image2label(im):
  17. data = np.array(im, dtype='int32')
  18. idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
  19. return np.array(cm2lbl[idx], dtype='int64') # 根据索引得到 label 矩阵

数据集制作

  1. def img_transforms(im, label, crop_size):
  2. im, label = rand_crop(im, label, *crop_size)
  3. im_tfs = tfs.Compose([
  4. tfs.ToTensor(),
  5. tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  6. ])
  7. im = im_tfs(im)
  8. label = image2label(label)
  9. label = torch.from_numpy(label)
  10. return im, label
  11. class VOCSegDataset(data.Dataset):
  12. '''
  13. voc dataset
  14. '''
  15. def __init__(self, train, crop_size, transforms):
  16. self.crop_size = crop_size
  17. self.transforms = transforms
  18. data_list, label_list = read_images(train=train)
  19. self.data_list = self._filter(data_list)
  20. self.label_list = self._filter(label_list)
  21. print('Read ' + str(len(self.data_list)) + ' images')
  22. def _filter(self, images): # 过滤掉图片大小小于 crop 大小的图片
  23. return [im for im in images if (Image.open(im).size[1] >= self.crop_size[0] and
  24. Image.open(im).size[0] >= self.crop_size[1])]
  25. def __getitem__(self, idx):
  26. img = self.data_list[idx]
  27. label = self.label_list[idx]
  28. img = Image.open(img)
  29. label = Image.open(label).convert('RGB')
  30. img, label = self.transforms(img, label, self.crop_size)
  31. return img, label
  32. def __len__(self):
  33. return len(self.data_list)
  34. # 实例化数据集
  35. input_shape = (320, 480)
  36. voc_train = VOCSegDataset(True, input_shape, img_transforms)
  37. voc_test = VOCSegDataset(False, input_shape, img_transforms)
  38. train_data = DataLoader(voc_train, 4, shuffle=True, num_workers=4)
  39. valid_data = DataLoader(voc_test, 4, num_workers=4)

模型定义(这里使用FCN32s)

  1. # 使用预训练的 VGG16
  2. pretrained_net = models.vgg16(pretrained=True)
  3. num_classes = len(classes)
  4. class fcn(nn.Module):
  5. def __init__(self, num_classes):
  6. super(fcn, self).__init__()
  7. #卷积层使用VGG16的
  8. self.features = pretrained_net.features
  9. #将全连接层替换成卷积层
  10. self.conv1 = nn.Conv2d(512, 4096, 1)
  11. self.conv2 = nn.Conv2d(4096, 21, 1)
  12. self.relu = nn.ReLU(inplace=True)
  13. #上采样,这里只用到了32的
  14. self.upsample2x = nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False)
  15. self.upsample8x = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=False)
  16. self.upsample32x = nn.Upsample(scale_factor=32,mode='bilinear',align_corners=False)
  17. def forward(self, x):
  18. s = self.features(x)
  19. s = self.conv1(s)
  20. s = self.relu(s)
  21. s = self.conv2(s)
  22. s = self.relu(s)
  23. s = self.upsample32x(s)
  24. return s
  25. #创建模型
  26. net = fcn(num_classes)
  27. net.cuda()

参数设定,这里还是有很大可优化空间的

  1. criterion = nn.CrossEntropyLoss()
  2. optimizer = torch.optim.SGD(net.parameters(), lr=1e-2, weight_decay=1e-4)

计算准确率,这里使用像素点准确率

  1. def acc_simu(label_true,label_pred):
  2. #所有像素点个数
  3. sum = len(voc_train.data_list)*label_true.shape[1]*label_true.shape[2]
  4. cnt = 0.
  5. check = label_true==label_pred
  6. for i in range(0,label_pred.shape[0]):
  7. for j in range(0,label_pred.shape[1]):
  8. for k in range(0,label_pred.shape[2]):
  9. if check[i][j][k]:
  10. cnt = cnt + 1
  11. return 100.*cnt/sum

模型训练

  1. for e in range(80):
  2. train_loss = 0
  3. train_acc = 0
  4. #记录所花时间
  5. prev_time = datetime.datetime.now()
  6. net = net.train()
  7. for data in train_data:
  8. im = data[0].cuda()
  9. label = data[1].cuda()
  10. # forward
  11. out = net(im)
  12. out = F.log_softmax(out, dim=1) # (b, n, h, w)
  13. loss = criterion(out, label)
  14. # backward
  15. optimizer.zero_grad()
  16. loss.backward()
  17. optimizer.step()
  18. train_loss += loss.item()
  19. label_pred = out.max(dim=1)[1].data.cpu().numpy()
  20. label_true = label.data.cpu().numpy()
  21. acc = acc_simu(label_true,label_pred)
  22. train_acc += acc
  23. print(train_acc,'%')
  24. net = net.eval()
  25. eval_loss = 0
  26. eval_acc = 0
  27. for data in valid_data:
  28. im = data[0].cuda()
  29. label = data[1].cuda()
  30. # forward
  31. with torch.no_grad():
  32. out = net(im)
  33. out = F.log_softmax(out, dim=1)
  34. loss = criterion(out, label)
  35. eval_loss += loss.item()
  36. label_pred = out.max(dim=1)[1].data.cpu().numpy()
  37. label_true = label.data.cpu().numpy()
  38. acc = acc_simu(label_true,label_pred)
  39. eval_acc += acc
  40. cur_time = datetime.datetime.now()
  41. h, remainder = divmod((cur_time - prev_time).seconds, 3600)
  42. m, s = divmod(remainder, 60)
  43. epoch_str = ('Epoch: {}, Train Loss: {:.5f}, Train Acc: {:.5f}, \
  44. Valid Loss: {:.5f}, Valid Acc: {:.5f} '.format(
  45. e, train_loss / len(train_data), train_acc,
  46. eval_loss / len(valid_data), eval_acc))
  47. time_str = 'Time: {:.0f}:{:.0f}:{:.0f}'.format(h, m, s)
  48. print(epoch_str+time_str)
  49. torch.save(net, 'model.pkl')

运行结果

  1. 由于使用了VGG16预训练模型,使得模型训练容易了很多,经过了10次迭代,训练集正确率达到90.38%,测试集达到了81.77%
  2. 然后我调节了学习率,又迭代了五次,训练集正确率达到了93.29%,然而测试机依旧才达到81.94% ,和论文中达到了89.1%还是有些差距的

下边贴出几张该模型图像分割之后的示例:

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxNjg1MjY1_size_16_color_FFFFFF_t_70watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxNjg1MjY1_size_16_color_FFFFFF_t_70 1

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxNjg1MjY1_size_16_color_FFFFFF_t_70 2watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxNjg1MjY1_size_16_color_FFFFFF_t_70 3

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxNjg1MjY1_size_16_color_FFFFFF_t_70 4watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxNjg1MjY1_size_16_color_FFFFFF_t_70 5

  1. 可以看出分割的物体大致还是被区别开了,但是轮廓地区错误率依旧很高,有一些图像内容比较复杂的出入就会相当大
  2. **博客代码可以按顺序copy到编译器中运行**

发表评论

表情:
评论列表 (有 0 条评论,171人围观)

还没有评论,来说两句吧...

相关阅读

    相关 pytorch Vgg16笔记

    原本想直接跳过VGG,直接到PSEnet,但面试遇到很多使用VGG16的,于是静下心看看VGG网络到底是什么样的。 1 卷积核 又叫滤波器filter,在[pytorc