首先载入一张图片
from PIL import Image
from torchvision import transforms
def loadImage():
# 读取图片
im = Image.open("test_ph.jpg")
im = im.convert("RGB")
im.show()
return im
im = loadImage()
原始图片:
裁剪:
- 中心裁剪:centercrop
transforms.CenterCrop(300)(im) #从中心裁剪一个300*300的图像
transforms.CenterCrop((300,400))(im) #从中心裁一个长为300,宽为400的图像
- 随机裁剪
transforms.RandomCrop(300)(im) #随机裁剪一个300*300的图像
transforms.RandomCrop((300,400))(im) #随机裁剪一个300*400的图像
pics = transforms.FiveCrop(300)(im) #从上、下、左、右、中心各裁一个300*300的图像
pics[4]
旋转/翻转
- 依概率随机水平翻转
transforms.RandomHorizontalFlip(p=1.0)(im) #p默认为0.5,这里设成1,那么就肯定会水平翻转
- 依概率随机垂直翻转
transforms.RandomVerticalFlip(p=1)(im)
- 随机旋转某一角度
transforms.RandomRotation(30)(im) #在(-30,30)之间选择一个角度进行旋转
transforms.RandomRotation((60,90))(im) #在60-90之间选择一个角度进行旋转
重置大小
transforms.Resize((400,500))(im)
图像转换成tensor
data = transforms.ToTensor()(im) #将图像转换为tensor,并归一化到[0,1] (其实就是除以255)
data.size() #3表示有3个通道,805表示长有805个pixel,1440表示宽有1440个pixel
torch.Size([3, 805, 1440])
数据标准化
norm_data=transforms.Normalize(mean=(0.2,0.2,0.2),std=(0.9,0.9,0.9))(data) #norm_data = (data-mean)/std mean和std都有三个数,分别对应3个通道
data
tensor([[[0.9412, 0.9451, 0.9451, ..., 0.9137, 0.9216, 0.9333],
[0.4863, 0.4902, 0.4980, ..., 0.4824, 0.4863, 0.4902],
[0.4078, 0.4157, 0.4275, ..., 0.3804, 0.3922, 0.3922],
...,
[0.3176, 0.3059, 0.2980, ..., 0.3176, 0.3333, 0.3412],
[0.3333, 0.3176, 0.3059, ..., 0.3176, 0.3412, 0.3529],
[0.3373, 0.3216, 0.3098, ..., 0.3216, 0.3490, 0.3686]],
[[0.9961, 1.0000, 1.0000, ..., 0.9843, 0.9922, 1.0000],
[0.5529, 0.5569, 0.5647, ..., 0.5529, 0.5569, 0.5608],
[0.4784, 0.4863, 0.4980, ..., 0.4588, 0.4706, 0.4706],
...,
[0.3961, 0.3922, 0.3804, ..., 0.4118, 0.4235, 0.4314],
[0.4118, 0.4039, 0.3882, ..., 0.4118, 0.4314, 0.4431],
[0.4157, 0.4078, 0.3922, ..., 0.4157, 0.4392, 0.4588]],
[[1.0000, 1.0000, 1.0000, ..., 1.0000, 1.0000, 1.0000],
[0.6314, 0.6353, 0.6431, ..., 0.6078, 0.6039, 0.6078],
[0.6275, 0.6353, 0.6471, ..., 0.5647, 0.5686, 0.5686],
...,
[0.4941, 0.4863, 0.4863, ..., 0.5451, 0.5569, 0.5647],
[0.5098, 0.4980, 0.4941, ..., 0.5451, 0.5647, 0.5765],
[0.5137, 0.5020, 0.4980, ..., 0.5490, 0.5725, 0.5922]]])
norm_data
tensor([[[0.8235, 0.8279, 0.8279, ..., 0.7930, 0.8017, 0.8148],
[0.3181, 0.3224, 0.3312, ..., 0.3137, 0.3181, 0.3224],
[0.2309, 0.2397, 0.2527, ..., 0.2004, 0.2135, 0.2135],
...,
[0.1307, 0.1176, 0.1089, ..., 0.1307, 0.1481, 0.1569],
[0.1481, 0.1307, 0.1176, ..., 0.1307, 0.1569, 0.1699],
[0.1525, 0.1351, 0.1220, ..., 0.1351, 0.1656, 0.1874]],
[[0.8845, 0.8889, 0.8889, ..., 0.8715, 0.8802, 0.8889],
[0.3922, 0.3965, 0.4052, ..., 0.3922, 0.3965, 0.4009],
[0.3094, 0.3181, 0.3312, ..., 0.2876, 0.3007, 0.3007],
...,
[0.2179, 0.2135, 0.2004, ..., 0.2353, 0.2484, 0.2571],
[0.2353, 0.2266, 0.2092, ..., 0.2353, 0.2571, 0.2702],
[0.2397, 0.2309, 0.2135, ..., 0.2397, 0.2658, 0.2876]],
[[0.8889, 0.8889, 0.8889, ..., 0.8889, 0.8889, 0.8889],
[0.4793, 0.4837, 0.4924, ..., 0.4532, 0.4488, 0.4532],
[0.4749, 0.4837, 0.4967, ..., 0.4052, 0.4096, 0.4096],
...,
[0.3268, 0.3181, 0.3181, ..., 0.3834, 0.3965, 0.4052],
[0.3442, 0.3312, 0.3268, ..., 0.3834, 0.4052, 0.4183],
[0.3486, 0.3355, 0.3312, ..., 0.3878, 0.4139, 0.4357]]])
数据转化成图像
transforms.ToPILImage(mode="RGB")(norm_data)
连续操作: 使用transforms.Compose
trans = transforms.Compose((transforms.CenterCrop(300),transforms.RandomRotation(30),
transforms.ToTensor(),transforms.Normalize(mean=(0.1,0.2,0.3),std=(1,1,1)),
transforms.ToPILImage(mode="RGB")))
trans(im)