[After GF6-WFV data preprocessing is completed - python batch processing tool - including source code]

GF6-WFV data preprocessing

1. GF6-WFV data - the first cropping (sub)

This piece is mainly based on the previous step to extract and crop the water body boundary (sub), extract the water body of each scene image, and visually interpret a water body boundary (shp) for each scene image. Store the preprocessed data and the water boundary in the same folder, and use the program to perform batch cropping (sub).

Data_Rad_Fla_Rpc_bm-XX-JG-sub.dat——3 ratio index calculation results

insert image description here

2. Data_Rad_Fla_Rpc_bm-XX-JG-sub.dat——3 ratio index calculation results—on this basis, find the maximum value of most data through the histogram, and perform a removal (0.1%——1%) The operation - the content of the red box is similar to removing that paragraph

insert image description here

Data_Rad_Fla_Rpc_bm-XX-JG-sub-q1.dat——the result of removing (0.1%——1%) from the calculation results of the three ratio indices

insert image description here

Data_Rad_Fla_Rpc_bm-XX-JG-sub-q1.dat——the result of removing (0.1%—1%) from the calculation results of the 3 ratio indices—the background value appears

insert image description here

3. GF6-WFV data - the second clipping (clip)

This piece is mainly based on the previous step to perform water body boundary extraction and clipping (clip), still because after the previous step, the data exists (black background) - still use the previous scene image to visually interpret a The water boundary (shp) is clipped twice.
insert image description here

Data_Rad_Fla_Rpc_bm-XX-JG-sub-q1.dat——the result of removing (0.1%——1%) from the calculation results of the three ratio indices—the appearance of the background value—the secondary clipping is to solve this problem, so as not to avoid one by one Ignore the background value (ignore background data)

insert image description here

4. GF6-WFV data is stored in separate models after the second clipping

insert image description here

1 of the 3 ratio index calculation results of the data

insert image description here

Finally got such a result

insert image description here

The above content is implemented in Python - the specific code is as follows:

insert image description here

import os
import numpy as np
from osgeo import gdal
import sys
import cv2
from tqdm import tqdm

# import ipdb

def cv_to_gdal(filename, img, datatype):
    if len(img.shape) < 3:
        img = img.reshape(img.shape[0], img.shape[1], 1)

    im_data = img.transpose(2, 0, 1)

    im_bands, im_height, im_width = im_data.shape

    band_list = [i + 1 for i in range(im_bands)]
    if im_bands == 3:
        band_list = [4 - i for i in band_list]

    driver = gdal.GetDriverByName('GTiff')
    dataset = driver.Create(filename, im_width, im_height, im_bands, datatype)
    for i in range(im_bands):
        dataset.GetRasterBand(band_list[i]).WriteArray(im_data[i])
    del dataset


def write_gdal(filename, img, datatype, img_geotrans, img_proj, big_tiff=False):
    if len(img.shape) < 3:
        im_bands = 1
        im_height, im_width = img.shape
        img = img.reshape( img.shape[0], img.shape[1],1)
    else:
        im_height, im_width, im_bands = img.shape

    band_list = [i + 1 for i in range(im_bands)]
    if im_bands == 3:
        band_list = [4 - i for i in band_list]

    driver = gdal.GetDriverByName('GTiff')
    big_tiff_str = "YES" if big_tiff == True else "NO"

    dataset = driver.Create(filename, im_width, im_height, im_bands, datatype,
                            options=['BigTIFF={}'.format(big_tiff_str), 'COMPRESS=LZW'])
    # ipdb.set_trace()
    # print(img.shape)
    for i in range(im_bands):
        dataset.GetRasterBand(band_list[i]).WriteArray(img[..., i])

    dataset.SetGeoTransform(img_geotrans)
    dataset.SetProjection(img_proj)
    dataset.BuildOverviews('Nearest', [2, 4, 8, 16, 32, 64, 128])

    del dataset


def read_img(filepath):
    dataset = gdal.Open(filepath)
    if dataset is None:
        print('FATAL: GDAL open file failed. [%s]' % filepath)
        sys.exit(1)
    img_width = dataset.RasterXSize
    img_height = dataset.RasterYSize
    img_nbands = dataset.RasterCount

    img_geotrans = dataset.GetGeoTransform()
    img_proj = dataset.GetProjection()

    # print(img_nbands)

    band_list = [i + 1 for i in range(img_nbands)]
    if img_nbands == 3:
        band_list = [4 - i for i in band_list]

    data_type = gdal.GDT_Byte
    for i in range(img_nbands):
        band = dataset.GetRasterBand(band_list[i])
        data_type = band.DataType
        if data_type == gdal.GDT_Byte:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.uint8)
        elif data_type == gdal.GDT_UInt16:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.uint16)
        elif data_type == gdal.GDT_Int16:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.int16)
        elif data_type == gdal.GDT_UInt32:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.uint32)
        elif data_type == gdal.GDT_Int32:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.int32)
        elif data_type == gdal.GDT_Float32:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.float32)
        elif data_type == gdal.GDT_Float64:
            img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.float64)
        else:
            print('ERROR: GDAL unknown data type. []')

        if i == 0:
            img_array = img_arr.reshape((img_height, img_width, 1))
        else:
            img_arr_reshape = img_arr.reshape((img_height, img_width, 1))
            img_array = np.append(img_array, img_arr_reshape, axis=2)

    return img_array, data_type, img_geotrans, img_proj
if __name__ == '__main__':
    img_path= r"E:\4研究区——20230317备份\test\新建文件夹"
    out_path_med=r"E:\4研究区——20230317备份\test\1-一次裁剪"
    out_path_med_med=r"E:\4研究区——20230317备份\test\2-去除99.9以外150"
    out_path_last= r"E:\4研究区——20230317备份\test\3-二次裁剪"
    number_th=99.5

#20190403_1119863972.shp
    for file in tqdm(os.listdir(img_path)):
        file_name=os.path.join(img_path,file)
        for file_name_name in os.listdir(file_name):
            if file_name_name.endswith(".shp"):
                shp_path=os.path.join(file_name,file_name_name)
            if file_name_name.endswith(".dat"):
                img_path_out_med=os.path.join(out_path_med,file_name_name.replace(".dat","-sub.tif"))
                file_name_name_img=os.path.join(file_name,file_name_name)
                # 第一次裁剪
                ds = gdal.Warp(img_path_out_med, file_name_name_img, format='GTiff', cutlineDSName=shp_path,
                               cropToCutline=True, dstNodata=0)

                #过滤异常值
                img_array, data_type, img_geotrans, img_proj = read_img(img_path_out_med)

                #百分比剔除
                high = np.percentile(img_array, number_th)

                #固定值剔除
                # high = number_th


                img_array[img_array > high] = 0
                out_path_med_med_abs=os.path.join(out_path_med_med,file_name_name.replace("-sub.tif","-sub-tc.tif"))
                write_gdal(out_path_med_med_abs,img_array,data_type,img_geotrans,img_proj)

                #第二次裁剪
                out_path_last_abs=os.path.join(out_path_last,file_name_name.replace("-sub-tc.tif","-sub-tc-clip.tif"))
                ds = gdal.Warp(out_path_last_abs, out_path_med_med_abs, format='GTiff', cutlineDSName=shp_path,
                               cropToCutline=True, dstNodata=0)


Python is really easy to use, the overall code is made with the help of classmates, the processing time has been shortened a lot, python —— YYDS, python is awesome!

Guess you like

Origin blog.csdn.net/qq_36253366/article/details/129960695
Recommended