1. WHU-CD は、
公共建物の CD データセットです。これには、サイズ 32507 × 15354 の高解像度 (0.075 m) 航空写真のペアが含まれています。画像をオーバーラップのないサイズ 256×256 の小さなブロックに切り抜き、ランダムに 3 つの部分 (トレーニング/検証/テスト用にそれぞれ 6096/762/762) に分割します
。
import os
from PIL import Image
def crop_and_save_images(source_folder, target_folder):
image_files = os.listdir(source_folder)
for file in image_files:
if file.endswith('.tif'):
image_path = os.path.join(source_folder, file)
img = Image.open(image_path)
width, height = img.size
patch_num = 0
for y in range(0, height, 256):
for x in range(0, width, 256):
cropped_img = img.crop((x, y, x+256, y+256))
cropped_img_name = os.path.splitext(file)[0] + "_{}.png".format(patch_num)
target_path = os.path.join(target_folder, cropped_img_name)
cropped_img.save(target_path)
patch_num += 1
source_folder_T1 = "T1"
source_folder_T2 = "T2"
source_folder_label1 = "label1"
target_folder_A = "A"
target_folder_B = "B"
target_folder_label = "label"
crop_and_save_images(source_folder_T1, target_folder_A)
crop_and_save_images(source_folder_T2, target_folder_B)
crop_and_save_images(source_folder_label1, target_folder_label)
3. トレーニング セット、テスト セット、検証セットを 8:1:1 に従って分割します。
import os
import random
source_folder = "images"
train_txt = "train.txt"
test_txt = "test.txt"
label_txt = "label.txt"
def split_images_to_txt(source_folder, train_txt, test_txt, label_txt):
image_files = os.listdir(source_folder)
random.shuffle(image_files)
total_num = len(image_files)
train_num = int(total_num * 0.8)
test_num = int(total_num * 0.1)
val_num = total_num - train_num - test_num
with open(train_txt, 'w') as f_train, open(test_txt, 'w') as f_test, open(label_txt, 'w') as f_label:
for i, file in enumerate(image_files):
image_name = os.path.splitext(file)[0]
image_extension = os.path.splitext(file)[1]
if i < train_num:
f_train.write(image_name + image_extension + '\n')
elif i < train_num + test_num:
f_test.write(image_name + image_extension + '\n')
else:
f_label.write(image_name + image_extension + '\n')
split_images_to_txt(source_folder, train_txt, test_txt, label_txt)