Image processing problem pytorch

 

There was a time when data is read with the following command:

img = cv2.imread(os.path.join(path, file, pic))
img = Image.fromarray(img.astype(np.uint8))

Suddenly one day suddenly react, why should so much trouble, the direct use:

img = Image.open(os.path.join(path, file, pic))

Like it

The reason to use the transform and other operations in pytorch in Image.open read data is to use the job, which is why even with the above conversion Image.fromarray

 

Add that knowledge:

  • Use scipy.misc.imread read image data is RGB format;
  • Use cv2.imread picture data is read BGR format;
  • Use PIL.Image.open read image data is RGB format;

 

So do wavelet transform time out of the question:

#coding:utf-8
import torch.nn as nn
import torch

def dwt_init(x):

    x01 = x[:, :, 0::2, :] / 2
    x02 = x[:, :, 1::2, :] / 2
    x1 = x01[:, :, :, 0::2]
    x2 = x02[:, :, :, 0::2]
    x3 = x01[:, :, :, 1::2]
    x4 = x02[:, :, :, 1::2]
    x_LL X1 + X2 + X3 = + X4 
    x_HL = -X1 - X2 + X3 + X4 
    x_LH = -X1 + X2 - X3 + X4 
    x_HH = X1 - X2 - X3 + X4 

    return torch.cat ((x_LL, x_HL, x_LH, x_HH ), 1 ) 


# haar using Haar wavelet transform to achieve a two-dimensional inverse discrete wavelet 
DEF iwt_init (X): 
    R & lt = 2 
    in_batch, in_channel, in_height, in_width = x.size () 
    #Print ([in_batch, in_channel, in_height, in_width]) 
    out_batch, out_channel, out_height, out_width = in_batch, int ( 
        in_channel / (R & lt ** 2 )), R & lt in_height *, R & lt * in_width
    print(out_batch, out_channel, out_height, out_width)
    x1 = x[:, 0:out_channel, :, :] / 2
    x2 = x[:, out_channel:out_channel * 2, :, :] / 2
    x3 = x[:, out_channel * 2:out_channel * 3, :, :] / 2
    x4 = x[:, out_channel * 3:out_channel * 4, :, :] / 2
    print(x1.shape)
    print(x2.shape)
    print(x3.shape)
    print(x4.shape)
    # h = torch.zeros([out_batch, out_channel, out_height, out_width]).float().cuda()
    h = torch.zeros([out_batch, out_channel, out_height, out_width]).float()

    h[:, :, 0::2, 0::2] = x1 - x2 - x3 + x4
    h[:, :, 1::2, 0::2] = x1 - x2 + x3 - x4
    h[:, :, 0::2, 1::2] = x1 + x2 - x3 - x4
    h[:, :, 1::2, 1::2] = x1 + x2 + x3 + x4

    return H 


# D discrete wavelet 
class DWT (nn.Module): 
    DEF the __init __ (Self): 
        Super (DWT, Self) __ .__ the init () 
        self.requires_grad = False # signal processing, non-convolution operation, does not need to find a gradient guide 

    DEF Forward (Self, X): 
        return dwt_init (X) 


# D discrete wavelet inverse 
class IWT is (nn.Module): 
    DEF the __init __ (Self): 
        Super (IWT is, Self) __ .__ the init () 
        self.requires_grad = False 

    DEF Forward (Self, X): 
        return iwt_init (X) 

IF the __name__ == ' __main__ ' : 
    Import OS, CV2, torchvision 
    from PIL import Image
    import numpy as np
    from torchvision import transforms as trans
    # img = cv2.imread('./1.jpg')
    # print(img.shape)
    # img = Image.fromarray(img.astype(np.uint8))
    img = Image.open('./1.jpg')
    transform = trans.Compose([
        trans.ToTensor()
    ])
    img = transform(img).unsqueeze(0)
    dwt = DWT()
    change_img_tensor = dwt(img)
    print(change_img_tensor.shape)

    for i in range(change_img_tensor.size(1)//3):
        print(i*3,i*3+3)
        torchvision.utils.save_image(change_img_tensor[:,i*3:i*3+3:,:], os.path.join('./', 'change_{}.jpg'.format(i)))

Only the data obtained by the first method is:

 

 

Data obtained by the second method are:

 This is because the first method is to read the incoming BGR format, but actually this is the following to the RGB format, the RGB format is stored under normal circumstances

 

 

Why depth study by BRG general description of the image, rather than the RGB channels?

 Because caffe, as the earliest and most popular group of library representatives, with opencv, but opencv default channel is bgr's. This is one of opencv entry pit, bgr is a historical legacy, for compatibility with some hardware early. In fact, you can train yourself with rgb , the new library has been largely gone rgb or bgr this problem is to switch the next order. But if you use some good old training model, you have to be compatible bgr old model. It may be external converter



For example, I have worked on a previous data:

from pathlib import Path
from config import get_config
import argparse

from PIL import Image
from tqdm import tqdm
import mxnet as mx
import cv2
import numpy as np

def load_mx_rec(rec_path):
    save_path = rec_path/'imgs'
    if not save_path.exists():
        save_path.mkdir()
    imgrec = mx.recordio.MXIndexedRecordIO(str(rec_path/'train.idx'), str(rec_path/'train.rec'), 'r')
    img_info = imgrec.read_idx(0)
    header,_ = mx.recordio.unpack(img_info)
    max_idx = int(header.label[0])
    for idx in tqdm(range(1,max_idx)):
        img_info = imgrec.read_idx(idx)
        header, img = mx.recordio.unpack_img(img_info)
        label = int(header.label)
        #add
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        img = Image.fromarray(img)
        label_path = save_path/str(label)
        if not label_path.exists():
            label_path.mkdir()
        img.save(label_path/'{}.jpg'.format(idx), quality=95)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='for face verification')
    parser.add_argument("-r", "--rec_path", help="mxnet record file path",default='faces_emore', type=str)
    args = parser.parse_args()
    conf = get_config()
    rec_path = conf.data_path/args.rec_path
    load_mx_rec(rec_path) #训练数据在train.idx和train.rec

If I do not have to add here:

img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

So get the picture color is very strange:

 

If I add this one to give the image to normal:

 

Written before the error:

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

But I found that the effect is the same

 

Guess you like

Origin www.cnblogs.com/wanghui-garcia/p/12523250.html