百度poi爬取_mongo和txt各存一份_python3.6

直接上代码:

# -*- coding: utf-8 -*-
# Python 3
# 提取城市的POI点信息并将其保存至MongoDB数据库
import urllib.request
import json
from pymongo import MongoClient
from urllib.parse import quote
import string

left_bottom = [114.3441373010,38.0491078329]  # 设置区域左下角坐标(百度坐标系)
right_top = [114.4280503408,38.2695714013] # 设置区域右上角坐标(百度坐标系)
part_n = 2  # 设置区域网格(2*2)
client = MongoClient('localhost',27017)
db = client.admin #此处必须写
db.authenticate("root", "root")
my_db = client.mydb  # 再连接自己的数据库mydb
col = my_db.luquan2   # luquan集合,同上解释
# col = db.taxi # 连接集合
url0 = 'http://api.map.baidu.com/place/v2/search?'
x_item = (right_top[0]-left_bottom[0])/part_n
y_item = (right_top[1]-left_bottom[1])/part_n
query = '住宅区' #搜索关键词设置
ak = '************' #百度地图ak

def get_url_data_to_mongo():
    n = 0  # 切片计数器
    for i in range(part_n):
        for j in range(part_n):
            left_bottom_part = [left_bottom[0] + i * x_item, left_bottom[1] + j * y_item]  # 切片的左下角坐标
            right_top_part = [right_top[0] + i * x_item, right_top[1] + j * y_item]  # 切片的右上角坐标
            for k in range(20):
                url = url0 + 'query=' + query + '&page_size=20&page_num=' + str(k) + '&scope=1&bounds=' + str(
                    left_bottom_part[1]) + ',' + str(left_bottom_part[0]) + ',' + str(right_top_part[1]) + ',' + str(
                    right_top_part[0]) + '&output=json&ak=' + ak
                url = quote(url, safe=string.printable)  # 将url中的中文转换成符号,防止报错'ascii' codec can't encode
                data = urllib.request.urlopen(url)
                hjson = json.loads(data.read())
                if hjson['message'] == 'ok':
                    results = hjson['results']
                    for m in range(len(results)):  # 提取返回的结果
                        list_A_B = []
                        data2=results[m]['name']+','+str((results[m]['location']['lng']))+','+str((results[m]['location']['lat']))+','+(results[m]['address'])+','+(results[m]['area'])

                        list_A_B.append(data2)
                        text_save(list_A_B, 'luquan.txt')
                        col.insert_one(results[m]) #存入col
            n += 1
            print('第', str(n), '个切片入库成功')

#将读取的数据存入txt文档中
def text_save(content,filename,mode='a'):
    #打开文件
    file = open(filename,mode)
    for i in range(len(content)):
        number = content[i] + '\n'
        file.write(number)
    file.close()

if __name__ == '__main__':
    get_url_data_to_mongo()

数据截图:

猜你喜欢

转载自blog.csdn.net/sunwukong_hadoop/article/details/81126861