変更検出 WHU-CD データセット処理

1. WHU-CD は、
公共建物の CD データセットです。これには、サイズ 32507 × 15354 の高解像度 (0.075 m) 航空写真のペアが含まれています。画像をオーバーラップのないサイズ 256×256 の小さなブロックに切り抜き、ランダムに 3 つの部分 (トレーニング/検証/テスト用にそれぞれ 6096/762/762) に分割します
。

import os
from PIL import Image

def crop_and_save_images(source_folder, target_folder):
    image_files = os.listdir(source_folder)

    for file in image_files:
        if file.endswith('.tif'):
            image_path = os.path.join(source_folder, file)
            img = Image.open(image_path)
            width, height = img.size

            patch_num = 0
            for y in range(0, height, 256):
                for x in range(0, width, 256):
                    cropped_img = img.crop((x, y, x+256, y+256))
                    cropped_img_name = os.path.splitext(file)[0] + "_{}.png".format(patch_num)
                    target_path = os.path.join(target_folder, cropped_img_name)
                    cropped_img.save(target_path)
                    patch_num += 1

source_folder_T1 = "T1"
source_folder_T2 = "T2"
source_folder_label1 = "label1"

target_folder_A = "A"
target_folder_B = "B"
target_folder_label = "label"

crop_and_save_images(source_folder_T1, target_folder_A)
crop_and_save_images(source_folder_T2, target_folder_B)
crop_and_save_images(source_folder_label1, target_folder_label)

3. トレーニングセット、テストセット、検証セットを 8:1:1 に従って分割します。

import os
import random

source_folder = "images"
train_txt = "train.txt"
test_txt = "test.txt"
label_txt = "label.txt"

def split_images_to_txt(source_folder, train_txt, test_txt, label_txt):
    image_files = os.listdir(source_folder)
    random.shuffle(image_files)

    total_num = len(image_files)
    train_num = int(total_num * 0.8)
    test_num = int(total_num * 0.1)
    val_num = total_num - train_num - test_num

    with open(train_txt, 'w') as f_train, open(test_txt, 'w') as f_test, open(label_txt, 'w') as f_label:
        for i, file in enumerate(image_files):
            image_name = os.path.splitext(file)[0]
            image_extension = os.path.splitext(file)[1]

            if i < train_num:
                f_train.write(image_name + image_extension + '\n')
            elif i < train_num + test_num:
                f_test.write(image_name + image_extension + '\n')
            else:
                f_label.write(image_name + image_extension + '\n')
        

split_images_to_txt(source_folder, train_txt, test_txt, label_txt)

変更検出 WHU-CD データセット処理

おすすめ