百度爬取经纬度(百度地图的经纬度是存在偏移加密的)

#!/usr/bin/env python
# coding: utf-8
#数据爬取
import requests
from fake_useragent import UserAgent
import pandas as pd
#import xlrd
import numpy as np
from urllib.parse import quote
import re
from time import sleep
from random import randint
import random


# In[2]:


place_name  = pd.read_excel('企业信息获取.xlsx')
hangshu = place_name.shape[0]
leishu = place_name.shape[1]
place_name


# In[ ]:



class Url_Mnger:
    # hangshu = place_name.shape[0]
    # leishu = place_name.shape[1]
    def Url_join(self,hangshu):
        all_url = []
        for i in range(hangshu):       #长度
                #宽度
            village_name = place_name.iloc[i,1]
            place_encode = quote(village_name)
            url = 'http://api.map.baidu.com/geocoder?address={}'.format(place_encode)
            print(village_name,url)
            all_url.append(url)
        return all_url
#请求发送类
class Response_Cast(object):
    def Get_response(self,url):
        headers={
    
    
        'User-Agent':UserAgent().chrome
        }
        response = requests.get(url=url,headers = headers)
        return  response.text
#数据管理类
class Info_Manger:
    def Parse_html(self,info_text):      #解析
        latitude=re.findall(r'<lat>(.+)</lat>',info_text)
        longitude = re.findall('<lng>(.+)</lng>',info_text)
        latitude = latitude[0]
        longitude = longitude[0]
        print(latitude,longitude)
        return latitude,longitude

    # def Data_join(self,latitude,longitude):
    #     lat.append(latitude)
    #     longi.append(longitude)
    #     return lat,longi

    def Make_dataform(self,lat,longi):
        df = pd.DataFrame({
    
    'a_point':place_name.iloc[:,1],'a_lat':lat,'a_longi':longi})
        return df
#数据保存
    def Savedata(self,df):
        df.to_csv('geo_data_gaode_quchong.csv',encoding='GBK')
        # with open('geo.csv','w',encoding = 'utf-8') as f:
        #     f.write(df)
        #     f.close()

class Run_Scrapy:
    def __init__(self):
        url_manger = Url_Mnger()
        url_list = url_manger.Url_join(hangshu)
        url_list_length = len(url_list)
        response_cast = Response_Cast()
        info_manger = Info_Manger()
        lat = []
        longi = []
#        print(url_list)
        for url,j in zip(url_list, range(url_list_length)):
            print(j,'/',url_list_length)
            sleep(random.uniform(1, 1.5))
            if (j % 100) == 0 :
                # sleep(random.uniform(3, 10))
                response_info = response_cast.Get_response(url)
                info_latitude, info_longitude = info_manger.Parse_html(response_info)
                lat.append(info_latitude)
                longi.append(info_longitude)
#                print(lat,longi)
            else:
                response_info = response_cast.Get_response(url)
                info_latitude,info_longitude = info_manger.Parse_html(response_info)
                lat.append(info_latitude)
                longi.append(info_longitude)
#                print(lat,longi)
      #      if (j % 100) == 10 :
      #          make_dataform = info_manger.Make_dataform(lat,longi)
       #         info_manger.Savedata(make_dataform)
        make_dataform = info_manger.Make_dataform(lat,longi)
        info_manger.Savedata(make_dataform)

if __name__ == '__main__':
    Run_Scrapy





猜你喜欢

转载自blog.csdn.net/qq_42830971/article/details/126454509