GDAL笔记--chapter11

本章主要介绍了利用numpy和scipy库做地图运算，并讲述了局部分析、焦点分析、区域分析和全局分析几种不同的方法。

1.保存为新栅格

import gdal
import numpy as np
#保存为新栅格的函数
def make_raster(in_ds, fn, data, data_type, nodata=None):
    driver = gdal.GetDriverByName('GTiff')
    out_ds = driver.Create(fn, in_ds.RasterXSize, in_ds.RasterYSize, 1, data_type)
    out_ds.SetProjection(in_ds.GetProjection())
    out_ds.SetGeoTransform(in_ds.GetGeoTransform())
    out_band = out_ds.GetRasterBand(1)
    if nodata is not None:
        out_band.SetNoDataValue(nodata)
    out_band.WriteArray(data)
    out_band.FlushCache()
    out_band.ComputeStatistics(False)
    return out_ds

2.局部分析（多栅格运算）

#检查np.nan\np.inf
ndvi = (nir - red)/(nir + red)
ndvi = np.where(np.isnan(ndvi), -99, ndvi)
ndvi = np.where(np.isinf(ndvi), -99, ndvi)
out_band.WriteArray(ndvi)
out_band.SetNoDataValue(-99)  #设置-99为nodata！！！

ndvi = np.where(nir+red > 0, (nir-red)/(nir+red), -99)  #设置分母大于0条件

#给你NAIP图像计算NDVI值
import os
import numpy as np
from osgeo import gdal
import ospybook as pb

os.chdir('')
in_fn = ''
out_fn = 'ndvi.tif'

ds = gdal.Open('')
red = ds.GetRasterBand(1).ReadAsArray().astype(np.float) #转float
nir = ds.GetRasterBand(4).ReadAsArray()
red = np.ma.masked_where(nir+red==0, red) #蒙版nir+red==0区域屏蔽红色数组，那么这里不做运算
ndvi = (nir-red)/(nir+red)
ndvi = ndvi.filled(-99)  #对没有运算的单元，进行填充

out_ds = pb.make_raster(ds, out_fn, ndvi, gdal.GDT_Float32, -99)
overviews = pb.compute_overview_levels(out_ds.GetRasterBand(1))
out_ds.BuildOverViews('average', overviews)
del ds, out_ds

#蒙版2(创建一个单独的掩码数组，应用到多个数组)
mask = np.ma.equal(nir+red, 0)
red = np.ma.masked_array(red, mask) #蒙版屏蔽目标区域红色波段

3.从催化剂数组得到任意大小切片数组（这里数组读取是连续的，没有步长；和有步长的切片方法不一样）

#从催化剂数组中得到任意大小切片的函数
def make_slices(data, win_size):
    rows = data.shape[0]-win_size[0]+1
    cols = data.shape[1]-win_size[1]+1
    slices=[]
    for i in range(win_size[0]):
        for j in range(win_size[1]):
            slices.append(data[i:rows+i, j:cols+j])
    return slices

stacked = np.dstack(slices) #堆叠在第三个维度中，返回一个可用于计算均值的三维数组
outdata = np.zeros(indata.shape, np.float32)
outdata[1:-1, 1:-1] = np.mean(stacked, 2)  #在高度维上取平均，由于切片比原图小，每侧切掉一行一列

4.焦点分析（使用围绕的像素计算，先计算切片，再运算）

4.1 平滑高程数据集

#平滑一个高程数据集
import os
import numpy as np
from osgeo import gdal
import ospybook as pb

in_fn=''
out_fn=''
in_ds = gdal.Open(in_fn)
in_band = in_ds.GetRasterBand(1)
in_data = in_band.ReadAsArray()

slices = pb.make_slices(in_data, (3,3))  #(3*3)取切片
stacked_data = np.ma.dstack(slices)  #dstack堆叠数据

rows, cols = in_band.YSize, in_band.XSize
out_data = np.ones((rows, cols), np.int32)*-99   #初始化nodata，保证边缘多出部分为nodata
out_data[1:-1,1:-1] = np.mean(stacked_data, 2)  #取平均需要stack,然后mean

pb.make_raster(in_ds, out_fn, out_data, gdal.GDT_Int32, -99)
del in_ds

4.2 坡度计算

#从DEM计算坡度，这里不必把切片堆叠到三维数组(dstack),因为需要在坡度方程单独引用切片
import os
import numpy as np
from osgeo import gdal
import ospybook as pb

in_fn=''
out_fn=''

in_ds = gdal.Open('')
cell_width = in_ds.GetGeoTransform()[1]  #x方向分辨率
cell_height = in_ds.GetGeoTransform()[5] #y方向分辨率
band = in_ds.GetRasterBand(1)
in_data = band.ReadAsArray().astype(np.float)
out_data = np.ones((band.YSize, band.XSize))*-99 #初始化nodata

slices = pb.make_slices(in_data, (3,3))  #切片
rise = slice[6]+ (2*slices[7] + slices[8] - slices[8]) - (slices[0] + 2*slices[1] +slices[2])/(8*cell_height)
run = ...  #计算偏导数
dist = np.sqrt(np.square(rise)+np.square(run))
out_data[1:-1, 1:-1] = np.arctan(dist)*180/np.pi

pb.make_raster(in_ds, out_fn, out_data, gdal.GDT_Float32, -99)
del in_ds

5.scipy具有傅里叶变换、插值、图像处理等功能，可用于焦点分析

#使用scipy平滑滤波器
import os
import scipy.ndimage
from osgeo import gdal
import ospybook as pb

in_fn=''
out_fn=''

in_ds = gdal.Open(in_fn)
in_data = in_ds.GetRasterBand(1).ReadAsArray()

out_data = scipy.ndimage.filters.uniform_filter(
    in_data, size=3, mode='nearest')
#size=3代表移动窗口大小，最邻近像素填充边缘
pb.make_raster(in_ds, out_fn, out_data, gdal.GDT_Int32)
del in_ds

#利用scipy计算斜率
import os
import numpy as np
import scipy.ndimage
from osgeo import gdal
import ospybook as pb

in_fn=''
out_fn=''

#定义坡度计算函数
def slope(data, cell_width, cell_height):
    rise = ((data[6]+2*data[7]+data[8])-data[0]+2*data[1]+data[2])/(8*cell_height)
    run = ...
    dist = np.sqrt(np.square(rise)+np.square(run))
    return np.arctan(dist)*180/np.pi

in_ds = gdal.Open(in_fn)
in_band = in_ds.GetRasterBand(1)
in_data = in_band.ReadAsArray().astype(np.float32)  #float

cell_width = in_ds.GetGeoTransform()[1]
cell_height = in_ds.GetGeoTransform()[5]
out_data = scipy.ndimage.filters.generic_filter(
    in_data, slope, size=3, mode='nearest',
    extra_arguments=(cell_width, cell_height)
)
#scipy自定义滤波器
pb.make_raster(in_ds, out_fn, out_data, gdal.GDTFloat32)
del in_ds

6.打破焦点分析

#打破焦点分析
# (没有足够的内存存放图像的情况下，把图像分成重叠的块)

#分块的焦点分析
import os
import numpy as np
from osgeo import gdal
import ospybook as pb

in_fn=''
out_fn=''

in_ds = gdal.Open('')
in_band = in_ds.GetRasterBand(1)
xsize = in_band.SXize
ysize = in_band.YSize

driver = gdal.GetDriverByName('GTiff')
out_ds = gdal.Create(out_fn, xsize, ysize,1, gdal.GDT_Int32)
out_ds.SetProjection(in_ds.GetProjection())
out_ds.SetGeoTransform(in_ds.GetGeoTransform())
out_band = out_ds.GetRasterBand(1)
out_band.SetNoDataValue(-99)

n = 100
for i in range(0, yszie, n):
    if i+n+1 < ysize:
        rows = n + 2  #额外读取两行
    else:
        rows = ysize - i  #如果到顶就取余数
    yoff = max(0, i-1) #在0行开始读取数据
    in_data = in_band.ReadAsArray(0, yoff, xsize, rows)  #每次从(0,yoff)开始读取，rows行数据
    slices = pb.make_slices(in_data, (3,3))
    stacked_data = np.ma.stack(slices)
    out_data = np.ones(in_data.shape, np.int32)*-99
    out_data[1:-1,1:-1] = np.mean(stacked_data, 2)

    if yoff==0:  #第一次读取
        out_band.WriteArray(out_data)
    else:  #否则从第二块开始写入
        out_band.WriteArray(out_data[1:], 0, yoff+1)
        #继续从(0,yoff+1)开始写入数据
        #out_data[1:]中1是第一行向后数据，这样不要覆盖上次的最后一行数据。即out_data[1:,:
        # ]
out_band.FlushCache()
out_band.ComputeStatistics(False)
del out_ds, in_ds

7. 区域分析(histogram2d)

#区域分析
import numpy as np
import scipy.stats
from osgeo import gdal

landcover_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\landcover60.tif'
ecoregion_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\utah_ecoIII60.tif'
out_fn ='histogram.csv'

def get_bins(data):
    bins = np.unique(data)  #保留唯一的数据
    return np.append(bins, max(bins)+1)  #新增最大值+1

#直方图的行（区域）对应传入的第一个数组、直方图的列（土地覆盖）对应传入的第二个数组
hist, zone_bins, landcover_bins = np.histogram2d(
    zones.flatten(), landcover.flatten(),
    [get_bins(zones), get_bins(landcover)]
)

lc_ds = gdal.Open(landcover_fn)
lc_band = lc_ds.GetRasterBand(1)
lc_data = lc_band.ReadAsArray().flatten()
bins = np.unique(lc_data)
print(bins)
print(np.append(bins[~np.isnan(bins)], max(bins)+1))

8.利用scipy做区域分析(scipy.stats.binned_statistic_2d)

# #利用scipy做区域分析
import numpy as np
import scipy.stats
from osgeo import gdal

def get_bins(data):
    bins = np.unique(data)
    return np.append(bins, max(bins)+1)

landcover_fn =r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\landcover60.tif'
ecoregion_fn = r'E:\桌面文件保存路径\gdal\osgeopy-data\osgeopy-data\Utah\utah_ecoIII60.tif'
out_fn ='histogram.csv'

eco_ds = gdal.Open(ecoregion_fn)
eco_band = eco_ds.GetRasterBand(1)
eco_data = eco_band.ReadAsArray().flatten()
eco_bins = get_bins(eco_data)  #获取组距

lc_ds = gdal.Open(landcover_fn)
lc_band = lc_ds.GetRasterBand(1)
lc_data = lc_band.ReadAsArray().flatten()
lc_bins = get_bins(lc_data)

#输入两个数据集、用于统计的第三个数组，count计数并指定组距。输出直方图、组距和额外的输出（指示数据落入哪个组距）
#行为第一个数组，列为第二个数组，在此基础上统计第三个数组。
#e.g.若传递高程数据、mean作为第三个和第四个参数，可以
#计算每个生态区和土地覆盖区的平均高程。
hist, eco_bins2, lc_bins2, bn = \
    scipy.stats.binned_statistic_2d(
        eco_data, lc_data, lc_data, 'count',
        [eco_bins, lc_bins]
    )
# print(hist)
print(eco_bins2)  #行
print(lc_bins2)   #列
print(max(bn))  #落入哪个组距
hist = np.insert(hist, 0, lc_bins[:-1], 0) #把土地覆盖数据插入hist第一行
row_labels = np.insert(eco_bins[:-1], 0, 0) #0插入eco_bins第一个位置
hist = np.insert(hist, 0, row_labels, 1) #把生态数据插入hist第一列
np.savetxt(out_fn, hist, fmt='%1.0f', delimiter=',')
#1代表至少打印一个数字，.0意味小数点后没有数字，F意味浮点数

#如果想知道每个生态区最常见的土地覆盖类型，不需要知道数量
def my_mode(data):
    return scipy.stats.mode(data)[0] #返回数组中最常出现的成员和个数

mode, bins, bn = scipy.stats.binned_statistic(eco_data, lc_data, my_mode, eco_bins)
#把一个生态区进行组距，从而统计土地覆盖
print(mode)  #直方图
print(bins)  #eco数组组距
print(bn)    #落入位置

9. 全局分析（用到了gdal.RasterizeLayer和gdal.ComputeProximity）

#全局分析
#邻近分析
import os
import sys
from osgeo import gdal, ogr

folder = ''  #shp数据文件夹
road_ln = '' #道路图层lyr
wilderness_ln = '' #荒地图层lyr
road_raster_fn = '' #道路栅格数据
proximity_fn = 'proximity.tif'  #邻近度栅格
cell_size = 10

shp_ds = ogr.Open(folder)
wild_lyr = shp_ds.GetLayerByName(wilderness_ln)#获取荒地图层
wild_lyr.SetAttributeFilter('WILD_NM ='Frank Church -RONR'')#属性查询
envelopes = [row.geometry().GetEnvelope() for row in wild_lyr] #图层里每个要素，获取几何体，获取最小外接四边形
coords = list(zip(*envelopes))  #zip迭代器取出四个点，划定荒地范围
minx, maxx = min(coords[0]), max(coords[1])
miny, maxy = min(coords[2]), max(coords[3])

road_lyr = shp_ds.GetLayerByName(roads_ln)  #道路图层
road_lyr.SetSpatialFilterRect(minx, miny, maxx, maxy) #在荒地范围空间查询

os.chdir(folder) 
tif_driver = gdal.GetDriverByName('GTiff')
cols = int((maxx-minx)/cellsize)  #计算区域行列数
rows = int((maxy-miny)/cellsize)

road_ds = tif_driver.Create(road_raster_fn, cols, rows)  #创建道路栅格数据
road_ds.SetProjection(road_lyr.GetSpatialRef().ExportToWkt()) #设置投影。lyr.GetSpatialRef()返回空间参考对象，需要转换成wkt|数据集才能ds.GetProjection()
road_ds.SetGeoTransform(minx, cellsize, 0, maxy, 0, -cellsize) #设置栅格数据GeoTransform地理变换

gdal.RasterizeLayer(road_ds, [1], road_lyr, burn_values=[1], callback=gdal.TermProgress)
#栅格化道路图层，道路1表示，其他0。第一个[1]是第一个波段索引；第二个1是将有要素的地方转换成栅格值的列表
prox_ds = tif_driver.Create(proximity_fn, cols, rows, 1, gdal.GDT_Int32) #创建邻近度栅格
prox_ds.SetProjection(road_ds.GetProjection())
prox_ds.SetGeoTransform(road_ds.GetGeoTransform())
gdal.ComputeProximity(   #计算proximity，结果存于proximity
    road_ds.GetRasterBand(1), prox_ds.GetRasterBand(1),
    ['DISTUNITS=GEO'], gdal.TermProgress)  #DISTUNITS指定距离单位，默认为像素，这里设为地理坐标单位

wild_ds = gdal.GetDriverByName('MEM').Create('tmp', cols, rows)  #只需要荒野区域内统计信息，所以用MEM驱动把数据存在内存
wild_ds.SetProjection(prox_ds.GetProjection())
wild_ds.SetGeoTransform(prox_ds.GetGeoTransform())
gdal.RasterizeLayer(wild_ds, [1], wild_lyr, burn_values=[1], callback=gdal.TermProgress)
#栅格化荒野图层

wild_data = wild_ds.ReadAsArray()
prox_data = prox_ds.ReadAsArray()
prox_data[wild_data==0]=-99  #非荒野区域-99
prox_ds.GetRasterBand(1).WriteArray(prox_data)
prox_ds.GetRasterBand(1).SetNoDataValue(-99) #设置非荒野区域nodata
prox_ds.FlushCache()

stats = prox_ds.GetRasterBand(1).ComputeStatistics(False, gdal.TermProgress) #计算精确值
print('Mean distance from roads is', stats[2])

del prox_ds, road_ds, shp_ds

10. 重采样制作分步切片以及获取新像素偏移的坐标

#新的重采样方法
data = np.reshape(np.arange(24), (4, 6))
data[::2,::2]  #设置步长重采样，这里像素大小增大
np.repeat(data, 2, 1) #增加数组大小（减小像素大小）重采样。在列上重复2次
np.repeat(np.repeat(data, 2, 0), 2, 1) #先在行上重复2次，再列重复2次

#如果要对原始大小四倍像素重采样，取四个像素平均值
#与移动窗口切片不同，这些切片比原始数据小很多，且他们大小与输出数组相同

#制作分步切片。前面为连续数据切片，这里设置步长，是分布切片
def make_resample_slices(data, win_size):
    row = int(data.shape[0]/win_size[0])*win_size[0]  #由于可能无法整除，计算新行
    col = int(data.shape[1]/win_size[1])*win_size[1]
    slices = []
    for i in range(win_size[0]):
        for j in range(win_size[1]):
            slices.append(data[i:row:win_size[0], j:col:win_size[1]])
    return slices        

#当新像素大小是原始像素小数倍时，这种技术会导致像素中心偏移。
#根据旧像素获取新像素偏移，得到的是偏移后的行列号坐标，即图像坐标
def get_indices(source_ds, target_width, target_height):
    source_geotransform = source_ds.GetGeoTransform()
    source_width = source_geotransform[1] #像素行分辨率
    source_height = source_geotransform[5]
    dx = target_width/source_width  #扩大倍数
    dy = target_height/source_height
    target_x = np.arange(dx/2, source_ds.RasterXSize, dx)
    target_y = np.arange(dy/2, source_ds.RasterYSize, dy)
    return np.meshgrid(target_x, target_y)

ds = gdal.Open(fn)
data = ds.ReadAsArray()
x, y = get_indices(ds, 25, -25)
new_data = data[y.astype(int), x.astype(int)] #索引转换整数，最邻近采样

11. 双线性插值以及重采样

#还有双线性插值、三次卷积插值方法
#双线性插值，在找到新坐标后，找到最邻近的四个原始像素，与距离加权得到新的值
def bilinear(in_data, x, y):
    x -= 0.5  #索引减去0.5到输入像素中心（ds/2必定包含0.5）
    y -= 0.5
    x0 = np.floor(x).astype(int)  #取整
    x1 = x0 + 1  #相邻坐标，获取围绕目标像素的四个像素
    y0 = np.floor(y).astype(int)
    y1 = y0 + 1

    ul = in_data[y0, x0]*(y1-y)*(x1-x)  #乘两个方向上该像素到目标像素的距离
    ur = in_data[y0, x1]*(y1-y)*(x-x0)
    ll = in_data[y1, x0]*(y-y0)*(x1-x)
    lr = in_data[y1, x1]*(y-y0)*(x-x0) 

    return ul+ur+ll+lr  #加权和即像素值

#双线性插值重采样
in_fn = ''
out_fn = ''
cell_size = (0.02, -0.02)  #新像素大小
in_ds = gdal.Open(in_fn)
x, y = get_indices(in_ds, *cell_size)  #偏移的新像素x, y
outdata = bilinear(in_ds.ReadAsArray(), x, y)  #重采样

driver = gdal.GetDriverByName('GTiff')
rows, cols = outdata.shape  #新行列数
out_ds = driver.Create(out_fn, cols, rows, 1, gdal.GDT_Int32)
out_ds.SetProjection(in_ds.GetProjection())

gt = list(in_ds.GetGeoTransform())  #列表化元组
gt[1] = cell_size[0]  #更改geotransform像素分辨率
gt[5] = cell_size[1]
out_ds.SetGeoTransform(gt)

out_band = out_ds.GetRasterBand(1)
out_band.WriteArray(outdata)
out_band.FlushCache()
out_band.ComputeStatistics(False)

#此外scipy.ndimage还有其他插值方法可供使用

12. GDAL warp及python调用

#GDAL命令重采样
gdalwarp -tr 0.02 0.02 -r bilinear first.tif final.tif
#python调用命令
import subprocess
result = subprocess.call(gdalwarp -tr 0.02 0.02 -r bilinear first.tif final.tif)

总结：

 
局部分析： 计算像素到像素的基础工作，NDVI 
 
焦点分析： 使用环绕像素计算输出值的移动窗口，如斜率 
 
区域分析： 处理同一区域的像素 
 
全局分析： 邻近度分析等，涉及整个数据集

猜你喜欢