Preface
As shown in the figure above, there are many target points scattered in the image, but we may be more interested in outliers. This blog post will introduce how to extract outliers.
Method overview
- Create a buffer of a certain length for each point;
- After obtaining the above buffer, obtain the maximum value of the buffer area;
- Merge the intersecting features of the buffer zone;
- Set the area threshold. In my experiment, the threshold is max_area * 1.2;
- Remove the buffer that exceeds the threshold;
- Get the points in the remaining buffer, which are outliers.
Code
extract.py
from pathlib import Path
from osgeo import ogr, osr, gdal
import os
import shutil
def buffer(inShp, fname, bdistance=0.02):
"""
:param inShp: 输入的矢量路径
:param fname: 输出的矢量路径
:param bdistance: 缓冲区距离
:return:
"""
ogr.UseExceptions()
in_ds = ogr.Open(inShp)
f = open(inShp.replace('.shp','.prj'))
f_ = f.readline()
in_lyr = in_ds.GetLayer()
# 创建输出Buffer文件
driver = ogr.GetDriverByName('ESRI Shapefile')
if Path(fname).exists():
driver.DeleteDataSource(fname)
# 新建DataSource,Layer
out_ds = driver.CreateDataSource(fname)
out_lyr = out_ds.CreateLayer(fname, in_lyr.GetSpatialRef(), ogr.wkbPolygon)
def_feature = out_lyr.GetLayerDefn()
# # 新建area字段
# new_field = ogr.FieldDefn("Area", ogr.OFTReal)
# new_field.SetWidth(40)
# new_field.SetPrecision(20) # 设置面积精度,小数点后16位
# out_lyr.CreateField(new_field)
# 遍历原始的Shapefile文件给每个Geometry做Buffer操作
for feature in in_lyr:
geometry = feature.GetGeometryRef()
buffer = geometry.Buffer(bdistance)
out_feature = ogr.Feature(def_feature)
out_feature.SetGeometry(buffer)
out_lyr.CreateFeature(out_feature)
out_feature = None
out_ds.FlushCache()
del in_ds, out_ds
def area(shpPath):
'''计算面积'''
max_area = 0
driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource = driver.Open(shpPath, 1)
layer = dataSource.GetLayer()
new_field = ogr.FieldDefn("Area", ogr.OFTReal)
new_field.SetWidth(32)
new_field.SetPrecision(16) # 设置面积精度,小数点后16位
layer.CreateField(new_field)
for feature in layer:
geom = feature.GetGeometryRef()
area = geom.GetArea() # 计算面积
if area > max_area:
max_area = area
feature.SetField("Area", area) # 将面积添加到属性表中
layer.SetFeature(feature)
dataSource = None
return max_area
def multipoly2poly(inputshp, outputshp):
"""
:param inputshp: 输入的矢量路径
:param outputshp: 输出的矢量路径
:return:
"""
gdal.UseExceptions()
driver = ogr.GetDriverByName('ESRI Shapefile')
in_ds = driver.Open(inputshp, 0)
in_lyr = in_ds.GetLayer()
if os.path.exists(outputshp):
driver.DeleteDataSource(outputshp)
out_ds = driver.CreateDataSource(outputshp)
out_lyr = out_ds.CreateLayer('poly', geom_type=ogr.wkbPolygon)
for in_feat in in_lyr:
geom = in_feat.GetGeometryRef()
if geom.GetGeometryName() == 'MULTIPOLYGON':
for geom_part in geom:
addPolygon(geom_part.ExportToWkb(), out_lyr)
else:
addPolygon(geom.ExportToWkb(), out_lyr)
def addPolygon(simplePolygon, out_lyr):
featureDefn = out_lyr.GetLayerDefn()
polygon = ogr.CreateGeometryFromWkb(simplePolygon)
out_feat = ogr.Feature(featureDefn)
out_feat.SetGeometry(polygon)
out_lyr.CreateFeature(out_feat)
# print('Polygon added.')
def uni(shpPath, fname):
"""
:param shpPath: 输入的矢量路径
:param fname: 输出的矢量路径
:return:
"""
driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource = driver.Open(shpPath, 1)
layer = dataSource.GetLayer()
# 新建DataSource,Layer
out_ds = driver.CreateDataSource(fname)
out_lyr = out_ds.CreateLayer(fname, layer.GetSpatialRef(), ogr.wkbPolygon)
def_feature = out_lyr.GetLayerDefn()
# 遍历原始的Shapefile文件给每个Geometry做Buffer操作
# current_union = layer[0].Clone()
print('the length of layer:', len(layer))
if len(layer) == 0:
return
for i, feature in enumerate(layer):
geometry = feature.GetGeometryRef()
if i == 0:
current_union = geometry.Clone()
current_union = current_union.Union(geometry).Clone()
if i == len(layer) - 1:
out_feature = ogr.Feature(def_feature)
out_feature.SetGeometry(current_union)
out_lyr.ResetReading()
out_lyr.CreateFeature(out_feature)
def remove_big_buffer(shpPath, outputShp, area_thresold):
'''计算面积'''
driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource = driver.Open(shpPath, 1)
layer = dataSource.GetLayer()
new_field = ogr.FieldDefn("Area", ogr.OFTReal)
new_field.SetWidth(32)
new_field.SetPrecision(16) # 设置面积精度,小数点后16位
layer.CreateField(new_field)
# 新建DataSource,Layer
out_ds = driver.CreateDataSource(outputShp)
out_lyr = out_ds.CreateLayer(outputShp, layer.GetSpatialRef(), ogr.wkbPolygon)
def_feature = out_lyr.GetLayerDefn()
for feature in layer:
geom = feature.GetGeometryRef()
area = geom.GetArea() # 计算面积
if area > area_thresold * 1.2:
continue
feature.SetField("Area", area) # 将面积添加到属性表中
layer.SetFeature(feature)
out_feature = ogr.Feature(def_feature)
out_feature.SetGeometry(geom)
out_lyr.CreateFeature(out_feature)
out_feature = None
out_ds.FlushCache()
del dataSource, out_ds
def intersection(shp1, shp2, outshp):
"""
:param shp1: 缓冲区矢量路径
:param shp2: 目标矢量路径
:param outshp: 输出矢量路径
:return:
"""
driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource1 = driver.Open(shp1, 1)
layer1 = dataSource1.GetLayer()
dataSource2 = driver.Open(shp2, 1)
layer2 = dataSource2.GetLayer()
spatialref = layer2.GetSpatialRef()
# 新建DataSource,Layer
out_ds = driver.CreateDataSource(outshp)
out_lyr = out_ds.CreateLayer(outshp, spatialref, ogr.wkbPolygon)
def_feature = out_lyr.GetLayerDefn()
for feature in layer1:
geom = feature.GetGeometryRef()
for feature_ in layer2:
geom_ = feature_.GetGeometryRef()
if geom.Intersect(geom_) == 1:
out_feature = ogr.Feature(def_feature)
out_feature.SetGeometry(geom_)
out_lyr.CreateFeature(out_feature)
out_ds.FlushCache()
del dataSource1, dataSource2
def mkdir(path):
if not os.path.exists(path):
os.mkdir(path)
def mainfunc(inShp, outshp, bdistance):
temproot = './temp'
mkdir(temproot)
fname = f'{temproot}/buffer.shp'
fname2 = f'{temproot}/buffer2.shp'
buffer(inShp, fname, bdistance=bdistance)
max_area = area(fname)
uni(fname, fname2)
multipoly2poly(fname2, fname)
remove_big_buffer(fname, fname2, max_area)
uni(fname2, fname)
intersection(fname, inShp, outshp)
# remove temporary directory
if os.path.exists(temproot):
shutil.rmtree(temproot)
if __name__ == '__main__':
f = open('config_order.txt')
data = f.readlines()
inShp = data[0].replace('\n', '')
outshp = data[1].replace('\n', '')
bdistance = float(data[2].replace('\n', ''))
print(inShp, outshp)
mainfunc(inShp, outshp, bdistance)
config_order.txt
./data/buildings.shp
isolated_buildings.shp
0.008