Outlier extraction based on shapefile

Insert picture description here

Preface

As shown in the figure above, there are many target points scattered in the image, but we may be more interested in outliers. This blog post will introduce how to extract outliers.

Method overview

  1. Create a buffer of a certain length for each point;
  2. After obtaining the above buffer, obtain the maximum value of the buffer area;
  3. Merge the intersecting features of the buffer zone;
  4. Set the area threshold. In my experiment, the threshold is max_area * 1.2;
  5. Remove the buffer that exceeds the threshold;
  6. Get the points in the remaining buffer, which are outliers.

Code

extract.py

from pathlib import Path
from osgeo import ogr, osr, gdal
import os
import shutil


def buffer(inShp, fname, bdistance=0.02):
    """
    :param inShp: 输入的矢量路径
    :param fname: 输出的矢量路径
    :param bdistance: 缓冲区距离
    :return:
    """
    ogr.UseExceptions()
    in_ds = ogr.Open(inShp)
    f = open(inShp.replace('.shp','.prj'))
    f_ = f.readline()
    in_lyr = in_ds.GetLayer()

    # 创建输出Buffer文件
    driver = ogr.GetDriverByName('ESRI Shapefile')

    if Path(fname).exists():
        driver.DeleteDataSource(fname)
    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(fname)
    out_lyr = out_ds.CreateLayer(fname, in_lyr.GetSpatialRef(), ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()


    # # 新建area字段
    # new_field = ogr.FieldDefn("Area", ogr.OFTReal)
    # new_field.SetWidth(40)
    # new_field.SetPrecision(20)  # 设置面积精度,小数点后16位
    # out_lyr.CreateField(new_field)

    # 遍历原始的Shapefile文件给每个Geometry做Buffer操作
    for feature in in_lyr:
        geometry = feature.GetGeometryRef()
        buffer = geometry.Buffer(bdistance)
        out_feature = ogr.Feature(def_feature)
        out_feature.SetGeometry(buffer)
        out_lyr.CreateFeature(out_feature)
        out_feature = None
    out_ds.FlushCache()
    del in_ds, out_ds


def area(shpPath):
    '''计算面积'''
    max_area = 0
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource = driver.Open(shpPath, 1)
    layer = dataSource.GetLayer()
    new_field = ogr.FieldDefn("Area", ogr.OFTReal)
    new_field.SetWidth(32)
    new_field.SetPrecision(16)  # 设置面积精度,小数点后16位
    layer.CreateField(new_field)
    for feature in layer:
        geom = feature.GetGeometryRef()
        area = geom.GetArea()  # 计算面积
        if area > max_area:
            max_area = area
        feature.SetField("Area", area)  # 将面积添加到属性表中
        layer.SetFeature(feature)
    dataSource = None
    return max_area


def multipoly2poly(inputshp, outputshp):
    """
    :param inputshp: 输入的矢量路径
    :param outputshp: 输出的矢量路径
    :return:
    """
    gdal.UseExceptions()
    driver = ogr.GetDriverByName('ESRI Shapefile')
    in_ds = driver.Open(inputshp, 0)
    in_lyr = in_ds.GetLayer()
    if os.path.exists(outputshp):
        driver.DeleteDataSource(outputshp)
    out_ds = driver.CreateDataSource(outputshp)
    out_lyr = out_ds.CreateLayer('poly', geom_type=ogr.wkbPolygon)
    for in_feat in in_lyr:
        geom = in_feat.GetGeometryRef()
        if geom.GetGeometryName() == 'MULTIPOLYGON':
            for geom_part in geom:
                addPolygon(geom_part.ExportToWkb(), out_lyr)
        else:
            addPolygon(geom.ExportToWkb(), out_lyr)


def addPolygon(simplePolygon, out_lyr):
    featureDefn = out_lyr.GetLayerDefn()
    polygon = ogr.CreateGeometryFromWkb(simplePolygon)
    out_feat = ogr.Feature(featureDefn)
    out_feat.SetGeometry(polygon)
    out_lyr.CreateFeature(out_feat)
    # print('Polygon added.')


def uni(shpPath, fname):
    """
    :param shpPath: 输入的矢量路径
    :param fname: 输出的矢量路径
    :return:
    """
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource = driver.Open(shpPath, 1)
    layer = dataSource.GetLayer()

    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(fname)
    out_lyr = out_ds.CreateLayer(fname, layer.GetSpatialRef(), ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()
    # 遍历原始的Shapefile文件给每个Geometry做Buffer操作
    # current_union = layer[0].Clone()
    print('the length of layer:', len(layer))
    if len(layer) == 0:
        return

    for i, feature in enumerate(layer):
        geometry = feature.GetGeometryRef()
        if i == 0:
            current_union = geometry.Clone()
        current_union = current_union.Union(geometry).Clone()

        if i == len(layer) - 1:
            out_feature = ogr.Feature(def_feature)
            out_feature.SetGeometry(current_union)
            out_lyr.ResetReading()
            out_lyr.CreateFeature(out_feature)


def remove_big_buffer(shpPath, outputShp, area_thresold):
    '''计算面积'''
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource = driver.Open(shpPath, 1)
    layer = dataSource.GetLayer()
    new_field = ogr.FieldDefn("Area", ogr.OFTReal)
    new_field.SetWidth(32)
    new_field.SetPrecision(16)  # 设置面积精度,小数点后16位
    layer.CreateField(new_field)

    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(outputShp)
    out_lyr = out_ds.CreateLayer(outputShp, layer.GetSpatialRef(), ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()

    for feature in layer:
        geom = feature.GetGeometryRef()
        area = geom.GetArea()  # 计算面积
        if area > area_thresold * 1.2:
            continue
        feature.SetField("Area", area)  # 将面积添加到属性表中
        layer.SetFeature(feature)
        out_feature = ogr.Feature(def_feature)
        out_feature.SetGeometry(geom)
        out_lyr.CreateFeature(out_feature)
        out_feature = None
    out_ds.FlushCache()
    del dataSource, out_ds


def intersection(shp1, shp2, outshp):
    """
    :param shp1:   缓冲区矢量路径
    :param shp2:   目标矢量路径
    :param outshp:  输出矢量路径
    :return:
    """
    driver = ogr.GetDriverByName("ESRI Shapefile")
    dataSource1 = driver.Open(shp1, 1)
    layer1 = dataSource1.GetLayer()

    dataSource2 = driver.Open(shp2, 1)
    layer2 = dataSource2.GetLayer()
    spatialref = layer2.GetSpatialRef()

    # 新建DataSource,Layer
    out_ds = driver.CreateDataSource(outshp)
    out_lyr = out_ds.CreateLayer(outshp, spatialref, ogr.wkbPolygon)
    def_feature = out_lyr.GetLayerDefn()

    for feature in layer1:
        geom = feature.GetGeometryRef()
        for feature_ in layer2:
            geom_ = feature_.GetGeometryRef()
            if geom.Intersect(geom_) == 1:
                out_feature = ogr.Feature(def_feature)
                out_feature.SetGeometry(geom_)
                out_lyr.CreateFeature(out_feature)

    out_ds.FlushCache()
    del dataSource1, dataSource2


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)


def mainfunc(inShp, outshp, bdistance):
    temproot = './temp'
    mkdir(temproot)
    fname = f'{temproot}/buffer.shp'
    fname2 = f'{temproot}/buffer2.shp'
    buffer(inShp, fname, bdistance=bdistance)
    max_area = area(fname)
    uni(fname, fname2)
    multipoly2poly(fname2, fname)
    remove_big_buffer(fname, fname2, max_area)
    uni(fname2, fname)
    intersection(fname, inShp, outshp)

    # remove temporary directory
    if os.path.exists(temproot):
        shutil.rmtree(temproot)


if __name__ == '__main__':
    f = open('config_order.txt')
    data = f.readlines()
    inShp = data[0].replace('\n', '')
    outshp = data[1].replace('\n', '')
    bdistance = float(data[2].replace('\n', ''))

    print(inShp, outshp)
    mainfunc(inShp, outshp, bdistance)

config_order.txt

./data/buildings.shp
isolated_buildings.shp
0.008

Guess you like

Origin blog.csdn.net/weixin_42990464/article/details/115293259