房源匹配最近地铁站

最近有一个新任务我们作为基础数据组需要收集很多基本信息。其中很重要的一个字段就是房源的最近地铁站和与地铁的距离为多少,这对于租户来说,是影响租房与否以及愿意支付价格的重要因素。

class MatchSubway:
    def __init__(self,data1,data2):
        import pandas as pd
        assert 'name' in data1.columns and '小区经度' in data1.columns and '小区纬度' in data1.columns and \
        'block' in data2.columns and 'lng' in data2.columns and 'lat' in data2.columns,\
        'The information is not enough'
        self._df1 = data1
        self._df2 = data2
        self._df1.insert(self._df1.shape[1],'匹配地铁站',None)
        self._df2.insert(self._df1.shape[1],'近铁距离',None)
    def _getdisfromXtoY(self,lng_a,lat_a,lng_b,lat_b):
        import numpy as np
        pk = 180/3.14169
        a1 = lat_a/pk
        a2 = lng_a / pk
        b1 = lat_b / pk
        b2 = lng_b / pk
        t1 = np.cos(a1)* np.cos(a2) * np.cos(b1) *np.cos(b2)
        t2 = np.cos(a1)*np.sin(a2) *np.cos(b1) *np.sin(b2)
        t3 = np.sin(a1)*np.sin(b1)
        tt = np.arccos(t1 + t2 + t3)
        return (6378000*tt)
    def matchsubway(self):
        for i in self._df1.index:
            result = self._getdisfromXtoY(self._df1.loc[i,'小区经度'],self._df1.loc[i,'小区纬度'],self._df2['lng'],self._df2['lat'])
            pos1 = result.sort_values().index[0]
            subway = self._df2.loc[pos1,'block']
            distance = np.min(result)
            self._df1.loc[i,'匹配地铁站'] = subway
            self._df1.loc[i,'近铁距离'] = distance
        return self._df1


猜你喜欢

转载自blog.csdn.net/weixin_41968760/article/details/80756154