There was a time when data is read with the following command:
img = cv2.imread(os.path.join(path, file, pic))
img = Image.fromarray(img.astype(np.uint8))
Suddenly one day suddenly react, why should so much trouble, the direct use:
img = Image.open(os.path.join(path, file, pic))
Like it
The reason to use the transform and other operations in pytorch in Image.open read data is to use the job, which is why even with the above conversion Image.fromarray
Add that knowledge:
- Use scipy.misc.imread read image data is RGB format;
- Use cv2.imread picture data is read BGR format;
- Use PIL.Image.open read image data is RGB format;
So do wavelet transform time out of the question:
#coding:utf-8 import torch.nn as nn import torch def dwt_init(x): x01 = x[:, :, 0::2, :] / 2 x02 = x[:, :, 1::2, :] / 2 x1 = x01[:, :, :, 0::2] x2 = x02[:, :, :, 0::2] x3 = x01[:, :, :, 1::2] x4 = x02[:, :, :, 1::2] x_LL X1 + X2 + X3 = + X4 x_HL = -X1 - X2 + X3 + X4 x_LH = -X1 + X2 - X3 + X4 x_HH = X1 - X2 - X3 + X4 return torch.cat ((x_LL, x_HL, x_LH, x_HH ), 1 ) # haar using Haar wavelet transform to achieve a two-dimensional inverse discrete wavelet DEF iwt_init (X): R & lt = 2 in_batch, in_channel, in_height, in_width = x.size () #Print ([in_batch, in_channel, in_height, in_width]) out_batch, out_channel, out_height, out_width = in_batch, int ( in_channel / (R & lt ** 2 )), R & lt in_height *, R & lt * in_width print(out_batch, out_channel, out_height, out_width) x1 = x[:, 0:out_channel, :, :] / 2 x2 = x[:, out_channel:out_channel * 2, :, :] / 2 x3 = x[:, out_channel * 2:out_channel * 3, :, :] / 2 x4 = x[:, out_channel * 3:out_channel * 4, :, :] / 2 print(x1.shape) print(x2.shape) print(x3.shape) print(x4.shape) # h = torch.zeros([out_batch, out_channel, out_height, out_width]).float().cuda() h = torch.zeros([out_batch, out_channel, out_height, out_width]).float() h[:, :, 0::2, 0::2] = x1 - x2 - x3 + x4 h[:, :, 1::2, 0::2] = x1 - x2 + x3 - x4 h[:, :, 0::2, 1::2] = x1 + x2 - x3 - x4 h[:, :, 1::2, 1::2] = x1 + x2 + x3 + x4 return H # D discrete wavelet class DWT (nn.Module): DEF the __init __ (Self): Super (DWT, Self) __ .__ the init () self.requires_grad = False # signal processing, non-convolution operation, does not need to find a gradient guide DEF Forward (Self, X): return dwt_init (X) # D discrete wavelet inverse class IWT is (nn.Module): DEF the __init __ (Self): Super (IWT is, Self) __ .__ the init () self.requires_grad = False DEF Forward (Self, X): return iwt_init (X) IF the __name__ == ' __main__ ' : Import OS, CV2, torchvision from PIL import Image import numpy as np from torchvision import transforms as trans # img = cv2.imread('./1.jpg') # print(img.shape) # img = Image.fromarray(img.astype(np.uint8)) img = Image.open('./1.jpg') transform = trans.Compose([ trans.ToTensor() ]) img = transform(img).unsqueeze(0) dwt = DWT() change_img_tensor = dwt(img) print(change_img_tensor.shape) for i in range(change_img_tensor.size(1)//3): print(i*3,i*3+3) torchvision.utils.save_image(change_img_tensor[:,i*3:i*3+3:,:], os.path.join('./', 'change_{}.jpg'.format(i)))
Only the data obtained by the first method is:
Data obtained by the second method are:
This is because the first method is to read the incoming BGR format, but actually this is the following to the RGB format, the RGB format is stored under normal circumstances
Why depth study by BRG general description of the image, rather than the RGB channels?
Because caffe, as the earliest and most popular group of library representatives, with opencv, but opencv default channel is bgr's. This is one of opencv entry pit, bgr is a historical legacy, for compatibility with some hardware early. In fact, you can train yourself with rgb , the new library has been largely gone rgb or bgr this problem is to switch the next order. But if you use some good old training model, you have to be compatible bgr old model. It may be external converter
For example, I have worked on a previous data:
from pathlib import Path from config import get_config import argparse from PIL import Image from tqdm import tqdm import mxnet as mx import cv2 import numpy as np def load_mx_rec(rec_path): save_path = rec_path/'imgs' if not save_path.exists(): save_path.mkdir() imgrec = mx.recordio.MXIndexedRecordIO(str(rec_path/'train.idx'), str(rec_path/'train.rec'), 'r') img_info = imgrec.read_idx(0) header,_ = mx.recordio.unpack(img_info) max_idx = int(header.label[0]) for idx in tqdm(range(1,max_idx)): img_info = imgrec.read_idx(idx) header, img = mx.recordio.unpack_img(img_info) label = int(header.label) #add # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img = Image.fromarray(img) label_path = save_path/str(label) if not label_path.exists(): label_path.mkdir() img.save(label_path/'{}.jpg'.format(idx), quality=95) if __name__ == '__main__': parser = argparse.ArgumentParser(description='for face verification') parser.add_argument("-r", "--rec_path", help="mxnet record file path",default='faces_emore', type=str) args = parser.parse_args() conf = get_config() rec_path = conf.data_path/args.rec_path load_mx_rec(rec_path) #训练数据在train.idx和train.rec
If I do not have to add here:
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
So get the picture color is very strange:
If I add this one to give the image to normal:
Written before the error:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
But I found that the effect is the same