GF6-WFV data preprocessing
1. GF6-WFV data - the first cropping (sub)
This piece is mainly based on the previous step to extract and crop the water body boundary (sub), extract the water body of each scene image, and visually interpret a water body boundary (shp) for each scene image. Store the preprocessed data and the water boundary in the same folder, and use the program to perform batch cropping (sub).
Data_Rad_Fla_Rpc_bm-XX-JG-sub.dat——3 ratio index calculation results
2. Data_Rad_Fla_Rpc_bm-XX-JG-sub.dat——3 ratio index calculation results—on this basis, find the maximum value of most data through the histogram, and perform a removal (0.1%——1%) The operation - the content of the red box is similar to removing that paragraph
Data_Rad_Fla_Rpc_bm-XX-JG-sub-q1.dat——the result of removing (0.1%——1%) from the calculation results of the three ratio indices
Data_Rad_Fla_Rpc_bm-XX-JG-sub-q1.dat——the result of removing (0.1%—1%) from the calculation results of the 3 ratio indices—the background value appears
3. GF6-WFV data - the second clipping (clip)
This piece is mainly based on the previous step to perform water body boundary extraction and clipping (clip), still because after the previous step, the data exists (black background) - still use the previous scene image to visually interpret a The water boundary (shp) is clipped twice.
Data_Rad_Fla_Rpc_bm-XX-JG-sub-q1.dat——the result of removing (0.1%——1%) from the calculation results of the three ratio indices—the appearance of the background value—the secondary clipping is to solve this problem, so as not to avoid one by one Ignore the background value (ignore background data)
4. GF6-WFV data is stored in separate models after the second clipping
1 of the 3 ratio index calculation results of the data
Finally got such a result
The above content is implemented in Python - the specific code is as follows:
import os
import numpy as np
from osgeo import gdal
import sys
import cv2
from tqdm import tqdm
# import ipdb
def cv_to_gdal(filename, img, datatype):
if len(img.shape) < 3:
img = img.reshape(img.shape[0], img.shape[1], 1)
im_data = img.transpose(2, 0, 1)
im_bands, im_height, im_width = im_data.shape
band_list = [i + 1 for i in range(im_bands)]
if im_bands == 3:
band_list = [4 - i for i in band_list]
driver = gdal.GetDriverByName('GTiff')
dataset = driver.Create(filename, im_width, im_height, im_bands, datatype)
for i in range(im_bands):
dataset.GetRasterBand(band_list[i]).WriteArray(im_data[i])
del dataset
def write_gdal(filename, img, datatype, img_geotrans, img_proj, big_tiff=False):
if len(img.shape) < 3:
im_bands = 1
im_height, im_width = img.shape
img = img.reshape( img.shape[0], img.shape[1],1)
else:
im_height, im_width, im_bands = img.shape
band_list = [i + 1 for i in range(im_bands)]
if im_bands == 3:
band_list = [4 - i for i in band_list]
driver = gdal.GetDriverByName('GTiff')
big_tiff_str = "YES" if big_tiff == True else "NO"
dataset = driver.Create(filename, im_width, im_height, im_bands, datatype,
options=['BigTIFF={}'.format(big_tiff_str), 'COMPRESS=LZW'])
# ipdb.set_trace()
# print(img.shape)
for i in range(im_bands):
dataset.GetRasterBand(band_list[i]).WriteArray(img[..., i])
dataset.SetGeoTransform(img_geotrans)
dataset.SetProjection(img_proj)
dataset.BuildOverviews('Nearest', [2, 4, 8, 16, 32, 64, 128])
del dataset
def read_img(filepath):
dataset = gdal.Open(filepath)
if dataset is None:
print('FATAL: GDAL open file failed. [%s]' % filepath)
sys.exit(1)
img_width = dataset.RasterXSize
img_height = dataset.RasterYSize
img_nbands = dataset.RasterCount
img_geotrans = dataset.GetGeoTransform()
img_proj = dataset.GetProjection()
# print(img_nbands)
band_list = [i + 1 for i in range(img_nbands)]
if img_nbands == 3:
band_list = [4 - i for i in band_list]
data_type = gdal.GDT_Byte
for i in range(img_nbands):
band = dataset.GetRasterBand(band_list[i])
data_type = band.DataType
if data_type == gdal.GDT_Byte:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.uint8)
elif data_type == gdal.GDT_UInt16:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.uint16)
elif data_type == gdal.GDT_Int16:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.int16)
elif data_type == gdal.GDT_UInt32:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.uint32)
elif data_type == gdal.GDT_Int32:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.int32)
elif data_type == gdal.GDT_Float32:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.float32)
elif data_type == gdal.GDT_Float64:
img_arr = band.ReadAsArray(0, 0, img_width, img_height).astype(np.float64)
else:
print('ERROR: GDAL unknown data type. []')
if i == 0:
img_array = img_arr.reshape((img_height, img_width, 1))
else:
img_arr_reshape = img_arr.reshape((img_height, img_width, 1))
img_array = np.append(img_array, img_arr_reshape, axis=2)
return img_array, data_type, img_geotrans, img_proj
if __name__ == '__main__':
img_path= r"E:\4研究区——20230317备份\test\新建文件夹"
out_path_med=r"E:\4研究区——20230317备份\test\1-一次裁剪"
out_path_med_med=r"E:\4研究区——20230317备份\test\2-去除99.9以外150"
out_path_last= r"E:\4研究区——20230317备份\test\3-二次裁剪"
number_th=99.5
#20190403_1119863972.shp
for file in tqdm(os.listdir(img_path)):
file_name=os.path.join(img_path,file)
for file_name_name in os.listdir(file_name):
if file_name_name.endswith(".shp"):
shp_path=os.path.join(file_name,file_name_name)
if file_name_name.endswith(".dat"):
img_path_out_med=os.path.join(out_path_med,file_name_name.replace(".dat","-sub.tif"))
file_name_name_img=os.path.join(file_name,file_name_name)
# 第一次裁剪
ds = gdal.Warp(img_path_out_med, file_name_name_img, format='GTiff', cutlineDSName=shp_path,
cropToCutline=True, dstNodata=0)
#过滤异常值
img_array, data_type, img_geotrans, img_proj = read_img(img_path_out_med)
#百分比剔除
high = np.percentile(img_array, number_th)
#固定值剔除
# high = number_th
img_array[img_array > high] = 0
out_path_med_med_abs=os.path.join(out_path_med_med,file_name_name.replace("-sub.tif","-sub-tc.tif"))
write_gdal(out_path_med_med_abs,img_array,data_type,img_geotrans,img_proj)
#第二次裁剪
out_path_last_abs=os.path.join(out_path_last,file_name_name.replace("-sub-tc.tif","-sub-tc-clip.tif"))
ds = gdal.Warp(out_path_last_abs, out_path_med_med_abs, format='GTiff', cutlineDSName=shp_path,
cropToCutline=True, dstNodata=0)