Pytorch中的数据转换Transforms与DataLoader方式
作者:Jeremy_lf
DataLoader
DataLoader是一个比较重要的类,它为我们提供的常用操作有:
batch_size
(每个batch的大小)shuffle
(是否进行shuffle操作)num_workers
(加载数据的时候使用几个子进程)
import torch as t import torch.nn as nn import torch.nn.functional as F import torch ''' 初始化网络 初始化Loss函数 & 优化器 进入step循环: 梯度清零 向前传播 计算本次Loss 向后传播 更新参数 ''' class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=5) self.conv2 = nn.Conv2d(10, 20, kernel_size=5) self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(320, 50) self.fc2 = nn.Linear(50, 10) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return x if __name__ == "__main__": net = LeNet() # #########训练网络######### from torch import optim # from torchvision.datasets import MNIST import torchvision import numpy from torchvision import transforms from torch.utils.data import DataLoader # 初始化Loss函数 & 优化器 loss_fn = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # transforms = transforms.Compose([]) DOWNLOAD = False BATCH_SIZE = 32 transform = transforms.Compose([ transforms.ToTensor() ]) #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # 归一化 train_dataset = torchvision.datasets.MNIST(root='./', train=True, transform=transform, download=DOWNLOAD) test_dataset = torchvision.datasets.MNIST(root='./data/mnist', train=False, transform=torchvision.transforms.ToTensor(), download=True) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE) for epoch in range(200): running_loss = 0.0 for step, data in enumerate(train_loader): inputs, labels = data inputs, labels = t.autograd.Variable(inputs), t.autograd.Variable(labels) # inputs = torch.from_numpy(inputs).unsqueeze(1) # labels = torch.from_numpy(numpy.array(labels)) # 梯度清零 optimizer.zero_grad() # forward outputs = net(inputs) # backward loss = loss_fn(outputs, labels) loss.backward() # update optimizer.step() running_loss += loss.item() if step % 10 == 9: print("[{0:d}, {1:5d}] loss: {2:3f}".format(epoch + 1, step + 1, running_loss / 2000)) running_loss = 0. print("Finished Training") # save the trained net torch.save(net, 'net.pkl') # load the trained net net1 = torch.load('net.pkl') # test the trained net correct = 0 total = 1 for images, labels in test_loader: preds = net(images) predicted = torch.argmax(preds, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total print('accuracy of test data:{:.1%}'.format(accuracy))
数据变换(Transform)
实例化数据库的时候,有一个可选的参数可以对数据进行转换,满足大多神经网络的要求输入固定尺寸的图片,因此要对原图进行Rescale或者Crop操作,然后返回的数据需要转换成Tensor。
数据转换(Transfrom)发生在数据库中的__getitem__操作中。
class Rescale(object): """Rescale the image in a sample to a given size. Args: output_size (tuple or int): Desired output size. If tuple, output is matched to output_size. If int, smaller of image edges is matched to output_size keeping aspect ratio the same. """ def __init__(self, output_size): assert isinstance(output_size, (int, tuple)) self.output_size = output_size def __call__(self, sample): image, landmarks = sample['image'], sample['landmarks'] h, w = image.shape[:2] if isinstance(self.output_size, int): if h > w: new_h, new_w = self.output_size * h / w, self.output_size else: new_h, new_w = self.output_size, self.output_size * w / h else: new_h, new_w = self.output_size new_h, new_w = int(new_h), int(new_w) img = transform.resize(image, (new_h, new_w)) # h and w are swapped for landmarks because for images, # x and y axes are axis 1 and 0 respectively landmarks = landmarks * [new_w / w, new_h / h] return {'image': img, 'landmarks': landmarks} class RandomCrop(object): """Crop randomly the image in a sample. Args: output_size (tuple or int): Desired output size. If int, square crop is made. """ def __init__(self, output_size): assert isinstance(output_size, (int, tuple)) if isinstance(output_size, int): self.output_size = (output_size, output_size) else: assert len(output_size) == 2 self.output_size = output_size def __call__(self, sample): image, landmarks = sample['image'], sample['landmarks'] h, w = image.shape[:2] new_h, new_w = self.output_size top = np.random.randint(0, h - new_h) left = np.random.randint(0, w - new_w) image = image[top: top + new_h, left: left + new_w] landmarks = landmarks - [left, top] return {'image': image, 'landmarks': landmarks} class ToTensor(object): """Convert ndarrays in sample to Tensors.""" def __call__(self, sample): image, landmarks = sample['image'], sample['landmarks'] # swap color axis because # numpy image: H x W x C # torch image: C X H X W image = image.transpose((2, 0, 1)) return {'image': torch.from_numpy(image), 'landmarks': torch.from_numpy(landmarks)}
torchvision 包的介绍
torchvision 是PyTorch中专门用来处理图像的库,这个包中有四个大类。
torchvision.datasets torchvision.models torchvision.transforms torchvision.utils
torchvision.datasets
torchvision.datasets 是用来进行数据加载的,PyTorch团队在这个包中帮我们提前处理好了很多很多图片数据集。
MNIST、COCO、Captions、Detection、LSUN、ImageFolder、Imagenet-12、CIFAR、STL10、SVHN、PhotoTour
import torchvision from torch.utils.data import DataLoader DOWNLOAD = False BATCH_SIZE = 32 transform = transforms.Compose([ transforms.ToTensor() ]) #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # 归一化 train_dataset = torchvision.datasets.MNIST(root='./', train=True, transform=transform, download=DOWNLOAD) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
torchvision.models
torchvision.models 中为我们提供了已经训练好的模型,加载之后,可以直接使用。包含以下模型结构。
AlexNet、VGG、ResNet、SqueezeNet、DenseNet、MobileNet
import torchvision.models as models resnet18 = models.resnet18(pretrained=True) alexnet = models.alexnet(pretrained=True)
torchvision.transforms
transforms提供了一般图像的转化操作类
# 图像预处理步骤 transform = transforms.Compose([ transforms.Resize(96), # 缩放到 96 * 96 大小 transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # 归一化 ])
Transforms支持的变化
__all__ = ["Compose", "ToTensor", "PILToTensor", "ConvertImageDtype", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad", "Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop", "RandomHorizontalFlip", "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation", "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale", "RandomPerspective", "RandomErasing", "GaussianBlur", "InterpolationMode", "RandomInvert", "RandomPosterize", "RandomSolarize", "RandomAdjustSharpness", "RandomAutocontrast", "RandomEqualize"]
from PIL import Image # from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from torch.autograd import Variable from torchvision.transforms import functional as F tensor数据类型 # 通过transforms.ToTensor去看两个问题 img_path = "./k.jpg" img = Image.open(img_path) # writer = SummaryWriter("logs") tensor_trans = transforms.ToTensor() tensor_img = tensor_trans(img) tensor_img1 = F.to_tensor(img) print(tensor_img.type(),tensor_img1.type()) print(tensor_img.shape) ''' transforms.Normalize使用如下公式进行归一化: channel=(channel-mean)/std(因为transforms.ToTensor()已经把数据处理成[0,1],那么(x-0.5)/0.5就是[-1.0, 1.0]) ''' # writer.add_image("Tensor_img", tensor_img) # writer.close()
将输入的PIL.Image重新改变大小成给定的size,size是最小边的边长。
举个例子,如果原图的height>width,那么改变大小后的图片大小是(size*height/width, size)。
### class torchvision.transforms.Scale(size, interpolation=2) ```python from torchvision import transforms from PIL import Image crop = transforms.Scale(12) img = Image.open('test.jpg') print(type(img)) print(img.size) croped_img=crop(img) print(type(croped_img)) print(croped_img.size)
对PIL.Image进行变换
class torchvision.transforms.Compose(transforms)
将多个transform组合起来使用。
class torchvision.transforms.Normalize(mean, std)
给定均值:(R,G,B) 方差:(R,G,B),将会把Tensor正则化。即:Normalized_image=(image-mean)/std。
class torchvision.transforms.RandomSizedCrop(size, interpolation=2)
先将给定的PIL.Image随机切,然后再resize成给定的size大小。
class torchvision.transforms.RandomCrop(size, padding=0)
切割中心点的位置随机选取。size可以是tuple也可以是Integer。
class torchvision.transforms.CenterCrop(size)
将给定的PIL.Image进行中心切割,得到给定的size,size可以是tuple,(target_height, target_width)。size也可以是一个Integer,在这种情况下,切出来的图片的形状是正方形。
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持脚本之家。