gdal笔记--chapter12

遥感图像分类(监督分类、非监督分类)

1.K均值聚类分析(非监督分类)

#非监督分类
#堆叠栅格波段的函数
from osgeo import gdal
import numpy as np

def stack_bands(filenames):
    bands = []
    for fn in filenames:
        ds = gdal.Open(fn)
        for i in range(1, ds.RasterCount+1):
            bands.append(ds.GetRasterBand(i).ReadAsArray())
    return np.dstack(bands)

#Spectral的K均值聚类分析
import os
import numpy as np
import spectral
from osgeo import gdal
import ospybook as pb

folder = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Landsat\Utah'
raster_fn = ['LE70380322000181EDC02_60m.tif', 'LE70380322000181EDC02_TIR_60m.tif']
out_fn = 'kmeans_prediction_60m2.tif'

os.chdir(folder)

data = pb.stack_bands(raster_fn) #数据堆叠
classes, centers = spectral.kmeans(data) #运行模型
#默认10个集群,20次迭代
ds = gdal.Open(raster_fn[0]) #用于下面获取投影和仿射变换
out_ds = pb.make_raster(ds, out_fn, classes, gdal.GDT_Byte)
levels = pb.compute_overview_levels(out_ds.GetRasterBand(1))
out_ds.BuildOverviews('NEAREST', levels)
out_ds.FlushCache()
out_ds.GetRasterBand(1).ComputeStatistics(False)

del out_ds, ds

2.Sklearn进行CART决策树分类(监督分类)

#使用CART地图分类(监督分类)
import csv
import os
import numpy as np
from sklearn import tree
from osgeo import gdal
import ospybook as pb

folder = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Landsat\Utah'
raster_fn = ['LE70380322000181EDC02_60m.tif', 'LE70380322000181EDC02_TIR_60m.tif']  #遥感影像波段存储
out_fn = r'tree_prediction60.tif'
train_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\training_data.csv'  #csv文件,坐标和数据
gap_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\landcover60.tif' #颜色表数据

os.chdir(folder)

xys = []
classes = []
with open(train_fn) as fp:
    reader = csv.reader(fp)  #返回reader对象,遍历行
    next(reader) #读取首行
    for row in reader: #遍历csv各行
        xys.append([float(n) for n in row[:2]]) #前两列是坐标
        classes.append(int(row[2])) #第三列是类别

ds = gdal.Open(raster_fn[0])
pixel_trans = gdal.Transformer(ds, None, [])  #设置None,将投影坐标转换成图像坐标(即像素偏移)
offset, ok = pixel_trans.TransformPoints(True, xys) #计算像素偏移
cols, rows, z = zip(*offset) #分别计算出行列号和z,用于读取波段值
cols = [int(col) for col in cols]  #行列号整数
rows = [int(row) for row in rows]

data = pb.stack_bands(raster_fn)  #堆叠两个波段的数组

sample = data[rows, cols, :] #采样读取样本点处的像元值(波段全选,按列表读取对应行列的值)
print(np.shape(sample)) #cols*rows行,列为波段数(578,4)

clf = tree.DecisionTreeClassifier(max_depth=5) #建立模型,树深度5
clf = clf.fit(sample, classes) #拟合模型clf

#内存不够大时,逐行读取
# prediction = np.empty(data.shape[0:2]) #行*列 全0数组
# print(np.shape(prediction)) #(3631, 3996)原数据行列数
# for i in range(data.shape[0]):
#     prediction[i, :] = clf.predict(data[i, :, :]) #逐行读取原数据,拟合模型

rows, cols, bands = data.shape
data2d = np.reshape(data, (rows*cols, bands))  #reshape波段数组,按波段数展开。列数是波段数,行数是点数,每行是对应点的像元值
prediction = clf.predict(data2d)
prediction = np.reshape(prediction,(rows, cols)) #reshape回原样

prediction[np.sum(data, 2)==0] = 0 #求和,axis=2,如果原数据所有波段这个点和为0 ,那应该是nodata
predict_ds = pb.make_raster(ds, out_fn, prediction, gdal.GDT_Byte, 0)
predict_ds.FlushCache() #刷新缓存
levels = pb.compute_overview_levels(predict_ds.GetRasterBand(1)) #输出合适的levels(如2\4\8\16),创建概览图
print(levels)
predict_ds.BuildOverviews('NEAREST', levels)

gap_ds = gdal.Open(gap_fn)
colors = gap_ds.GetRasterBand(1).GetRasterColorTable()  #读取样本图颜色表
predict_ds.GetRasterBand(1).SetRasterColorTable(colors) #设置颜色表

del ds

3. Kappa系数、混淆矩阵计算

#混淆矩阵和kappa系数统计
#提取预测图上的样本点值,和实际样本值放在一起计算kappa系数、混淆矩阵
import csv
import os
import numpy as np

from sklearn import metrics #计算混淆矩阵
import skll  #计算kappa系数,kappa系数越小,分类越差
from osgeo import gdal

folder = ''
accuracy_fn = r''  #csv文件,样本点分类
matrix_fn = r''    #存储混淆矩阵
prediction_fn = r'' #预测的结果

os.chdir('')

xys = []  #存储点的投影坐标
classes = []
with open(accuracy_fn) as fp:
    reader = csv.reader(fp)
    next(reader)
    for row in reader:
        xys.append([float(n) for n in row[:2]])
        classes.append(int(row[2]))

ds = gdal.Open(prediction_fn)  #打开预测图
pixel_trans = gdal.Transformer(ds, None, [])  #定义投影坐标到图像坐标的转换器,提取图像坐标
offsets, ok = pixel_trans.TransformPoints(True, xys)
cols, rows, z = zip(*offsets)

data = ds.GetRasterBand(1).ReadAsArray()
sample = data[rows, cols] #读取样本点在预测图上的分类结果
del ds

print('Kappa: ', skll.kappa(classes, sample))  #classes、sample(样本值、预测值)用来计算kappa系数

labels = np.unique(np.concatenate((classes, sample)))  #classes\samples添加到一行,unique筛选所有出现的类别,作为行列
matrix = metrics.confusion_matrix(classes, sample, labels) #计算混淆矩阵

matrix = np.insert(matrix, 0, labels, 0)  #labels插入第一行,作为行坐标
matrix = np.insert(matrix, 0, np.insert(labels, 0, 0), 1) #先插入0到起点,再把labels插入第一列,作为列坐标
np.savetxt(matrix_fn, matrix, fmt='1.0f%', delimeter=',')

猜你喜欢

转载自www.cnblogs.com/ljwgis/p/12668830.html