基于shapefile的离群点提取

在这里插入图片描述

前言

如上图所示,在影像散落着很多目标点,但我们可能对离群点更感兴趣,本篇博文将介绍如何提取离群点。

方法概述

  1. 对每个点新建一定长度的缓冲区;
  2. 获得上述缓冲区后,获取缓冲区面积最大值;
  3. 合并缓冲区相交的feature;
  4. 设置面积阈值,在我的实验中,阈值为max_area * 1.2;
  5. 移除超出阈值的缓冲区;
  6. 获取剩余缓冲区内的点,即为离群的点。

代码

extract.py

from pathlib import Path
from osgeo import ogr, osr, gdal
import os
import shutil


def buffer(inShp, fname, bdistance=0.02):
    """
    :param inShp: 输入的矢量路径
    :param fname: 输出的矢量路径
    :param bdistance: 缓冲区距离
    :return:
    """
    ogr.UseExceptions()
    in_ds = ogr.Open(inShp)
    f = open(inShp.replace('.shp','.prj'))
    f_ = f.readline()
    in_lyr = in_ds.GetLayer()

    # 创建输出Buffer文件
    driver = ogr.GetDriverByName('ESRI Shapefile')

    if Path(fname).exists():
        driver.DeleteDataSource(fname)
    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(fname)
    out_lyr = out_ds.CreateLayer(fname, in_lyr.GetSpatialRef(), ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()


    # # 新建area字段
    # new_field = ogr.FieldDefn("Area", ogr.OFTReal)
    # new_field.SetWidth(40)
    # new_field.SetPrecision(20)  # 设置面积精度,小数点后16位
    # out_lyr.CreateField(new_field)

    # 遍历原始的Shapefile文件给每个Geometry做Buffer操作
    for feature in in_lyr:
        geometry = feature.GetGeometryRef()
        buffer = geometry.Buffer(bdistance)
        out_feature = ogr.Feature(def_feature)
        out_feature.SetGeometry(buffer)
        out_lyr.CreateFeature(out_feature)
        out_feature = None
    out_ds.FlushCache()
    del in_ds, out_ds


def area(shpPath):
    '''计算面积'''
    max_area = 0
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource = driver.Open(shpPath, 1)
    layer = dataSource.GetLayer()
    new_field = ogr.FieldDefn("Area", ogr.OFTReal)
    new_field.SetWidth(32)
    new_field.SetPrecision(16)  # 设置面积精度,小数点后16位
    layer.CreateField(new_field)
    for feature in layer:
        geom = feature.GetGeometryRef()
        area = geom.GetArea()  # 计算面积
        if area > max_area:
            max_area = area
        feature.SetField("Area", area)  # 将面积添加到属性表中
        layer.SetFeature(feature)
    dataSource = None
    return max_area


def multipoly2poly(inputshp, outputshp):
    """
    :param inputshp: 输入的矢量路径
    :param outputshp: 输出的矢量路径
    :return:
    """
    gdal.UseExceptions()
    driver = ogr.GetDriverByName('ESRI Shapefile')
    in_ds = driver.Open(inputshp, 0)
    in_lyr = in_ds.GetLayer()
    if os.path.exists(outputshp):
        driver.DeleteDataSource(outputshp)
    out_ds = driver.CreateDataSource(outputshp)
    out_lyr = out_ds.CreateLayer('poly', geom_type=ogr.wkbPolygon)
    for in_feat in in_lyr:
        geom = in_feat.GetGeometryRef()
        if geom.GetGeometryName() == 'MULTIPOLYGON':
            for geom_part in geom:
                addPolygon(geom_part.ExportToWkb(), out_lyr)
        else:
            addPolygon(geom.ExportToWkb(), out_lyr)


def addPolygon(simplePolygon, out_lyr):
    featureDefn = out_lyr.GetLayerDefn()
    polygon = ogr.CreateGeometryFromWkb(simplePolygon)
    out_feat = ogr.Feature(featureDefn)
    out_feat.SetGeometry(polygon)
    out_lyr.CreateFeature(out_feat)
    # print('Polygon added.')


def uni(shpPath, fname):
    """
    :param shpPath: 输入的矢量路径
    :param fname: 输出的矢量路径
    :return:
    """
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource = driver.Open(shpPath, 1)
    layer = dataSource.GetLayer()

    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(fname)
    out_lyr = out_ds.CreateLayer(fname, layer.GetSpatialRef(), ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()
    # 遍历原始的Shapefile文件给每个Geometry做Buffer操作
    # current_union = layer[0].Clone()
    print('the length of layer:', len(layer))
    if len(layer) == 0:
        return

    for i, feature in enumerate(layer):
        geometry = feature.GetGeometryRef()
        if i == 0:
            current_union = geometry.Clone()
        current_union = current_union.Union(geometry).Clone()

        if i == len(layer) - 1:
            out_feature = ogr.Feature(def_feature)
            out_feature.SetGeometry(current_union)
            out_lyr.ResetReading()
            out_lyr.CreateFeature(out_feature)


def remove_big_buffer(shpPath, outputShp, area_thresold):
    '''计算面积'''
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource = driver.Open(shpPath, 1)
    layer = dataSource.GetLayer()
    new_field = ogr.FieldDefn("Area", ogr.OFTReal)
    new_field.SetWidth(32)
    new_field.SetPrecision(16)  # 设置面积精度,小数点后16位
    layer.CreateField(new_field)

    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(outputShp)
    out_lyr = out_ds.CreateLayer(outputShp, layer.GetSpatialRef(), ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()

    for feature in layer:
        geom = feature.GetGeometryRef()
        area = geom.GetArea()  # 计算面积
        if area > area_thresold * 1.2:
            continue
        feature.SetField("Area", area)  # 将面积添加到属性表中
        layer.SetFeature(feature)
        out_feature = ogr.Feature(def_feature)
        out_feature.SetGeometry(geom)
        out_lyr.CreateFeature(out_feature)
        out_feature = None
    out_ds.FlushCache()
    del dataSource, out_ds


def intersection(shp1, shp2, outshp):
    """
    :param shp1:   缓冲区矢量路径
    :param shp2:   目标矢量路径
    :param outshp:  输出矢量路径
    :return:
    """
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource1 = driver.Open(shp1, 1)
    layer1 = dataSource1.GetLayer()

    dataSource2 = driver.Open(shp2, 1)
    layer2 = dataSource2.GetLayer()
    spatialref = layer2.GetSpatialRef()

    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(outshp)
    out_lyr = out_ds.CreateLayer(outshp, spatialref, ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()

    for feature in layer1:
        geom = feature.GetGeometryRef()
        for feature_ in layer2:
            geom_ = feature_.GetGeometryRef()
            if geom.Intersect(geom_) == 1:
                out_feature = ogr.Feature(def_feature)
                out_feature.SetGeometry(geom_)
                out_lyr.CreateFeature(out_feature)

    out_ds.FlushCache()
    del dataSource1, dataSource2


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)


def mainfunc(inShp, outshp, bdistance):
    temproot = './temp'
    mkdir(temproot)
    fname = f'{temproot}/buffer.shp'
    fname2 = f'{temproot}/buffer2.shp'
    buffer(inShp, fname, bdistance=bdistance)
    max_area = area(fname)
    uni(fname, fname2)
    multipoly2poly(fname2, fname)
    remove_big_buffer(fname, fname2, max_area)
    uni(fname2, fname)
    intersection(fname, inShp, outshp)

    # remove temporary directory
    if os.path.exists(temproot):
        shutil.rmtree(temproot)


if __name__ == '__main__':
    f = open('config_order.txt')
    data = f.readlines()
    inShp = data[0].replace('\n', '')
    outshp = data[1].replace('\n', '')
    bdistance = float(data[2].replace('\n', ''))

    print(inShp, outshp)
    mainfunc(inShp, outshp, bdistance)

config_order.txt

./data/buildings.shp
isolated_buildings.shp
0.008

猜你喜欢

转载自blog.csdn.net/weixin_42990464/article/details/115293259
今日推荐