python利用geometry函数和shp文件进行数据的精确筛选

python利用geometry函数和shp文件进行数据的精确筛选

本帖主要介绍的是利用shp文件和geometry函数实现数据的精确筛选,意思就是加入作者只需要一个省的数据,手上确是全国或者全球的数据,怎么去清洗数据,只得到符合省界经纬度圈内的数据。
先上结果图(以雷电数据为例)
在这里插入图片描述
图中是全国及周边的FY4闪电数据,但是作者只需要湖南省的,就需要数据清洗
在这里插入图片描述
在这里插入图片描述
说明:
1.这里只介绍重点部分,对于画图将不再介绍,源代码里面可以看懂
2.geometry函数其实就在shapely库中
3.该方法经作者实测应该不是最优解,因为作者i7十一代的CUP依然运行了很久,如果进行逐个点的判断,会比较慢,需要先进行处理,减少数据量。
4.需要进行初步的数据清洗,利用经纬度先做大致的裁剪,然后再进行精细的判断。

具体实现:

#本期重点,利用shp文件判断数据是否再内部
sf = shapefile.Reader('Hunan_Province')#读取shp文件
sf = sf.shapeRecords()[0].shape#获得经纬度的边缘,这里简写了,实际步骤为读取shapeRecords(),再读取第0个,再读取shape
flat_points = np.column_stack((lon, lat))#将一维的经纬度整合成二维
in_shape_points = []#建立一个list存储所需数据
for pt in flat_points:
    if geometry.Point(pt).within(geometry.shape(sf)):#判断点是否再shp形状内部
        in_shape_points.append(pt)#存入数组
selected_lon = [elem[0] for elem in in_shape_points]#重新展开为一维
selected_lat = [elem[1] for elem in in_shape_points]#重新展开为一维

代码解释:
1.lon, lat经纬度已经进行了初步的筛选,得到了一小区域的所有点
2.sf代表shp文件的shape属性,代码中进行了缩写,可以分开一步一步写
3.代码中用的shp文件是单省的,有的人可能是全国的文件,需要做循环进行判断
例如:
for city_rcd in sz_shp.shapeRecords(): # 遍历每一条shaperecord
if city_rcd.record[6] == ‘Shenzhen’: # 遍历时,record字段是地区的信息(由字符串表示)
sz_shp = city_rcd.shape # 遍历时,shape字段是shape——形状(由点组成)

4.geometry.Point(pt).within(geometry.shape(sf))就是精细的判断数据是否再shp这个圈里
5.将得到的数据再次展开成一维的,用于画散点图

具体代码里面有详细的解释,但是因为版权的关系,没有附上测试数据,往见谅

完整代码

import shapefile
import shapely.geometry as geometry
from copy import copy
import numpy as np
import os
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER

#-------------------------------画地图部分(开始)--------------------------------#
#---------------------------功能函数------------------------------#
#给画刻度用来辅助,确定四周
def find_side(ls, side):
    """
 Given a shapely LineString which is assumed to be rectangular, return the
 line corresponding to a given side of the rectangle.

 """
    minx, miny, maxx, maxy = ls.bounds
    points = {
    
    'left': [(minx, miny), (minx, maxy)],
              'right': [(maxx, miny), (maxx, maxy)],
              'bottom': [(minx, miny), (maxx, miny)],
              'top': [(minx, maxy), (maxx, maxy)],}
    return geometry.LineString(points[side])
#用来画坐标轴的刻度(包括经度和纬度)
def _lambert_ticks(ax, ticks, tick_location, line_constructor, tick_extractor):
    """得到一个兰伯特正形投影的轴的刻度位置和标签."""
    outline_patch = geometry.LineString(ax.outline_patch.get_path().vertices.tolist())
    axis = find_side(outline_patch, tick_location)
    n_steps = 30
    extent = ax.get_extent(ccrs.PlateCarree())
    _ticks = []
    for t in ticks:
        xy = line_constructor(t, n_steps, extent)
        proj_xyz = ax.projection.transform_points(ccrs.Geodetic(), xy[:, 0], xy[:, 1])
        xyt = proj_xyz[..., :2]
        ls = geometry.LineString(xyt.tolist())
        locs = axis.intersection(ls)
        if not locs:
            tick = [None]
        else:
            tick = tick_extractor(locs.xy)
        _ticks.append(tick[0])
    # Remove ticks that aren't visible:
    ticklabels = copy(ticks)
    while True:
        try:
            index = _ticks.index(None)
        except ValueError:
            break
        _ticks.pop(index)
        ticklabels.pop(index)
    return _ticks, ticklabels
#设置x轴标签(用来画纬度)
def lambert_xticks(ax, ticks):
    """Draw ticks on the bottom x-axis of a Lambert Conformal projection."""
    te = lambda xy: xy[0]
    lc = lambda t, n, b: np.vstack((np.zeros(n) + t, np.linspace(b[2], b[3], n))).T
    xticks, xticklabels = _lambert_ticks(ax, ticks, 'bottom', lc, te)
    ax.xaxis.tick_bottom()
    ax.set_xticks(xticks)
    ax.set_xticklabels([ax.xaxis.get_major_formatter()(xtick) for xtick in xticklabels])
#设置y轴标签(用来画经度)
def lambert_yticks(ax, ticks):
    """Draw ricks on the left y-axis of a Lamber Conformal projection."""
    te = lambda xy: xy[1]
    lc = lambda t, n, b: np.vstack((np.linspace(b[0], b[1], n), np.zeros(n) + t)).T
    yticks, yticklabels = _lambert_ticks(ax, ticks, 'left', lc, te)
    ax.yaxis.tick_left()
    ax.set_yticks(yticks)
    ax.set_yticklabels([ax.yaxis.get_major_formatter()(ytick) for ytick in yticklabels])
#---------------------------功能函数------------------------------#

#-----------------cartopy画地图------------------#
fig = plt.figure(figsize=[10, 8],frameon=True)
#给画图区添加兰伯特投影
ax = fig.add_axes([0.08, 0.05, 0.8, 0.94], projection=ccrs.LambertConformal(central_latitude=90, central_longitude=105))

ax.add_feature(cfeature.OCEAN.with_scale('50m'))
ax.add_feature(cfeature.LAND.with_scale('50m'))
ax.add_feature(cfeature.RIVERS.with_scale('50m'))
ax.add_feature(cfeature.LAKES.with_scale('50m'))

ax.set_extent([80, 130, 15, 55],crs=ccrs.PlateCarree())
fig.canvas.draw()

#设置刻度值,画经纬网格
xticks = [55, 65, 75, 85, 95, 105, 115, 125, 135, 145, 155, 165]
yticks = [0 , 5 , 10, 15, 20, 25 , 30 , 35 , 40 , 45 , 50 , 55 , 60 , 65]
ax.gridlines(xlocs=xticks, ylocs=yticks,linestyle='--',lw=1,color='dimgrey')

# 设置经纬网格的端点(也是用来配合画刻度的,注释掉以后刻度不能正常显示了)
ax.xaxis.set_major_formatter(LONGITUDE_FORMATTER)
ax.yaxis.set_major_formatter(LATITUDE_FORMATTER)
#画经纬度刻度
lambert_xticks(ax, xticks)
lambert_yticks(ax, yticks)


with open('CN-border-La.dat') as src:
    context = src.read()
    blocks = [cnt for cnt in context.split('>') if len(cnt) > 0]
    borders = [np.fromstring(block, dtype=float, sep=' ') for block in blocks]
for line in borders:
    ax.plot(line[0::2], line[1::2], '-', lw=1.5, color='k',
            transform=ccrs.Geodetic())
#-----------------------------------------------画地图部分(结束)------------------------------------#




#--------------------------数据筛选部分(开始)-------------------------------#
#---------------------------功能函数------------------------------#
#获得所有文件的绝对路径
def get_files_path_list(path):
    files = []
    filesList = os.listdir(path)
    for filename in filesList:
        fileAbsPath = os.path.join(path,filename)
        files.append(fileAbsPath)
    return files
#闪电位置,并进行初步筛选
def get_lon_lat(files_path):
    #从列表中取出每一个文件路径,画图
    Lon = []#用于存储经度
    Lat = []#用于存储纬度
    for file in files_path:#循环获取每个文件路径
        nc_file = Dataset(file, 'r')#读取文件
        lon = list(nc_file.variables['LON'][:])
        lat = list(nc_file.variables['LAT'][:])
        lon_temp = []
        lat_temp = []
        for i in range(len(lon)):
            if lon[i] > 108 and lon[i] < 114.5 and lat[i] > 24 and lat[i] < 31:#利用经纬度进行初步筛选,因为后面精细筛选需要算力太大
                lon_temp.append(lon[i])
                lat_temp.append(lat[i])






        Lon = Lon + lon_temp
        Lat = Lat + lat_temp
    return Lon,Lat
#---------------------------功能函数------------------------------#




path = r'D:\project\study_way\cartopy\A2021011103078357900001\A2021011103078357900001'
#获取文件的绝对路径列表
files_path = get_files_path_list(path)#功能函数,获取文件列表
lon, lat = get_lon_lat(files_path)#获取经纬度

#本期重点,利用shp文件判断数据是否再内部
sf = shapefile.Reader('Hunan_Province')#读取shp文件
sf = sf.shapeRecords()[0].shape#获得经纬度的边缘,这里简写了,实际步骤为读取shapeRecords(),再读取第0个,再读取shape
flat_points = np.column_stack((lon, lat))#将一维的经纬度整合成二维
in_shape_points = []#建立一个list存储所需数据
for pt in flat_points:
    if geometry.Point(pt).within(geometry.shape(sf)):#判断点是否再shp形状内部
        in_shape_points.append(pt)#存入数组
selected_lon = [elem[0] for elem in in_shape_points]#重新展开为一维
selected_lat = [elem[1] for elem in in_shape_points]#重新展开为一维
#--------------------------数据筛选部分(结束)-------------------------------#

#画散点图
ax.scatter(selected_lon, selected_lat, s=5, alpha=.6,transform=ccrs.PlateCarree())
#出图
plt.show()

Guess you like

Origin blog.csdn.net/weixin_42372313/article/details/115393004