百度地图API爬取不同类型POI的详细数据

一、相关概念
查询某个范围内的所有POI

参数介绍:

page_size:单次查询返回的POI的数量,最大值为20
page_num:查找的POI数量超过20时,会分页显示;比如60个POI就会分3页;此时,page_num=1/2/3会先显示全部的数据;当page_num=4时,第4页的结果集大小为0;
scope:1为默认值;2会显示详细数据
region:检索的行政区域
URL链接:

http://api.map.baidu.com/place/v2/search/?query=查询关键字&page_size=20&page_num=0&output=json&bounds=40.817,111.697,40.821,111.709&scope=2&ak=你的ak

查询结果示例:

“status”:0,
“message”:“ok”,
“total”:2,
“result_type”:“poi_type”,
“results”:[
{
“name”:“红螺寺”,
“location”:{
“lat”:40.390454,
“lng”:116.632411
},
“address”:“北京市怀柔区红螺东路2号”,
“province”:“北京市”,
“city”:“北京市”,
“area”:“怀柔区”,
“street_id”:“”,
“telephone”:“(010)60681175,(010)60681639”,
“detail”:1,
“uid”:“605884e7c61e3573871541a3”,
“detail_info”:{
“tag”:“旅游景点;文物古迹”,
“navi_location”:{
“lng”:116.63176774842,
“lat”:40.37846005246
},
“type”:“scope”,
“detail_url”:“http://api.map.baidu.com/place/detail?uid=605884e7c61e3573871541a3&output=html&source=placeapi_v2”,
“overall_rating”:“4.3”,
“comment_num”:“200”,
“children”:[

            ]
        }
    },
    {
        "name":"卧佛寺",
        "location":{
            "lat":40.013776,
            "lng":116.213915
        },
        "address":"北京市海淀区卧佛寺路北京植物园内",
        "province":"北京市",
        "city":"北京市",
        "area":"海淀区",
        "street_id":"934b3dbf0a8d977b8b2fb5c0",
        "detail":1,
        "uid":"934b3dbf0a8d977b8b2fb5c0",
        "detail_info":{
            "tag":"旅游景点;文物古迹",
            "navi_location":{
                "lng":116.21389548337,
                "lat":40.011540367963
            },
            "type":"scope",
            "detail_url":"http://api.map.baidu.com/place/detail?uid=934b3dbf0a8d977b8b2fb5c0&output=html&source=placeapi_v2",
            "overall_rating":"4.7",
            "image_num":"38",
            "comment_num":"74",
            "children":[
                
            ]
        }
    }
    ]

查询某个POI的详细数据

参数介绍:

uid:某个POI对应的唯一的标识(通过范围查询获取到的)
URL链接:

http://api.map.baidu.com/place/v2/detail?uid=fabbfbf31f9a6964ad31e55f&output=json&scope=2&ak=你的ak

查询结果示例:

{
“status”:0,
“message”:“ok”,
“result”:{
“uid”:“605884e7c61e3573871541a3”,
“street_id”:“”,
“name”:“红螺寺”,
“location”:{
“lng”:116.63241097199,
“lat”:40.390454021402
},
“address”:“北京市怀柔区红螺东路2号”,
“province”:“北京市”,
“city”:“北京市”,
“area”:“怀柔区”,
“telephone”:“(010)60681175,(010)60681639”,
“detail_info”:{
“tag”:“旅游景点;文物古迹”,
“navi_location”:{
“lng”:116.63176778525,
“lat”:40.378460018453
},
“detail_url”:“http://api.map.baidu.com/place/detail?uid=605884e7c61e3573871541a3&output=html&source=placeapi_v2”,
“type”:“scope”,
“price”:“¥54元”,
“overall_rating”:“4.3”,
“image_num”:“133”,
“comment_num”:“200”,
“scope_type”:“古迹”,
“scope_grade”:“AAAA”,
“content_tag”:“适合亲子;登山;礼佛祈福;赏红叶;适合拍照;日出;适合跑步;银杏;情侣约会;香火旺;免费项目;收费合理;空气清新;绿植繁茂;位置优越;景色优美;人气旺;景区大;气势宏大;环境不错;玩的开心;休闲好去处;值得游玩;建筑风格独特;景点多;保存完整;停车方便;交通便利;设施新全;服务热情;收获颇丰;卫生干净”
},
“detail”:1
}
}
二、相关链接
百度地图API的POI分类

http://lbsyun.baidu.com/index.php?title=lbscloud/poitags

申请ak

http://lbsyun.baidu.com/apiconsole/key#/home

POI检索相关介绍

http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-placeapi

三、功能模块
范围查询获取POI数据

#将查询到的poi数据存入数据库
def insertPOIData(name_list,ak,cursor):
#总共查询到了多少对象
total = 0
#不重复的向数据库中写入的数据条数
inserttotal = 0
for i in name_list:
#ecxel表格数据判空
if i == ‘’:
break
#j的范围从0开始;上限不一样
for j in range(0, 10):
time.sleep(3)
url = getUrlByName(i, ak, j)
print(url)
html = requests.get(url)
# print(type(html)) response类型
data = html.json()
# print(type(data)) dict类型
print(data)
#status状态码为0表示获取正常
if data[‘status’] == 0:
#判断获取的数量,为0表示查询不到该类型的对象
if data[‘total’] == 0:
break
total = total + data[‘total’]
if ‘results’ in data:
for item in data[‘results’]: # 一次返回的results中有20条数据
# print(item)
name = item[‘name’]
if isExist(cursor, item[‘uid’]):
print(f’{name}已经存在’)
#跳出循环,判断results中的下一个item
continue
insert = “insert into poidatas(tag,uid,lat,lng,name,address,province,city,area) values (‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’)” % (
i, item[‘uid’], str(item[‘location’][‘lat’]), str(item[‘location’][‘lng’]), item[‘name’],
item[‘address’], item[‘province’], item[‘city’], item[‘area’]) # 字符串类型的数据插入要加单引号
if cursor.execute(insert):
inserttotal = inserttotal + 1
if ‘overall_rating’ in item[‘detail_info’]:
update = “update poidatas set overall_rating =‘%s’ where uid = ‘%s’” % (
item[‘detail_info’][‘overall_rating’], item[‘uid’])
cursor.execute(update)
if ‘distance’ in item[‘detail_info’]:
update = “update poidatas set distance =‘%s’ where uid = ‘%s’” % (
item[‘detail_info’][‘distance’], item[‘uid’])
cursor.execute(update)
if ‘comment_num’ in item[‘detail_info’]:
update = “update poidatas set comment_num =‘%s’ where uid = ‘%s’” % (
item[‘detail_info’][‘comment_num’], item[‘uid’])
cursor.execute(update)
if ‘price’ in item[‘detail_info’]:
update = “update poidatas set price =‘%s’ where uid = ‘%s’” % (
item[‘detail_info’][‘price’], item[‘uid’])
cursor.execute(update)

            if 'result' in data:  #还需要对只有一个返回结果的情况进行判断
                #区别就是这里不能用for循环
                item = data['result']
                # print(item)
                db = pymysql.connect(host="localhost", user="root", password="root", database="poi")
                cursor = db.cursor()
                name = item['name']
                if isExist(cursor, item['uid']):
                    print(f'{name}已经存在')
                    exit()
                insert = "insert into poidatas(tag,uid,lat,lng,name,address,province,city,area) values ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
                i, item['uid'], str(item['location']['lat']), str(item['location']['lng']), item['name'],
                item['address'], item['province'], item['city'], item['area'])  # 字符串类型的数据插入要加单引号
                if cursor.execute(insert):
                    inserttotal = inserttotal + 1
                if 'overall_rating' in item['detail_info']:
                    update = "update poidatas set overall_rating ='%s' where uid = '%s'" % (
                    item['detail_info']['overall_rating'], item['uid'])
                    cursor.execute(update)
                if 'distance' in item['detail_info']:
                    update = "update poidatas set distance ='%s' where uid = '%s'" % (
                    item['detail_info']['distance'], item['uid'])
                    cursor.execute(update)
                if 'comment_num' in item['detail_info']:
                    update = "update poidatas set comment_num ='%s' where uid = '%s'" % (
                    item['detail_info']['comment_num'], item['uid'])
                    cursor.execute(update)
                if 'price' in item['detail_info']:
                    update = "update poidatas set price ='%s' where uid = '%s'" % (
                    item['detail_info']['price'], item['uid'])
                    cursor.execute(update)
print('总共查找到的POI数量为 : ')
print(total)
print('插入数据库的POI数量为 : ')
print(inserttotal)

根据uid查询POI详细数据

#通过uid查询更详细的数据并存入数据库
def updateDetailInfo(ak,cursor):
selectsql = ‘SELECT uid FROM poidatas’
cursor.execute(selectsql)
result = cursor.fetchall()
for row in result:
uid = row[0]
url2 = ‘http://api.map.baidu.com/place/v2/detail?uid=%s&output=json&scope=2&ak=%s’ %(uid,ak)
print(url2)
time.sleep(3)
html=requests.get(url2)
data=html.json()
print(data)
if data[‘status’]==0:
if ‘result’ in data:
#print(data[‘result’])
#result集合大小为1,这里不能使用for循环
item = data[‘result’]
if ‘shop_hours’ in item[‘detail_info’]:
update = “update poidatas set shop_hours =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘shop_hours’],item[‘uid’])
print(update)
cursor.execute(update)
if ‘detail_url’ in item[‘detail_info’]:
update = “update poidatas set detail_url =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘detail_url’],item[‘uid’])
print(update)
cursor.execute(update)
if ‘image_num’ in item[‘detail_info’]:
update = “update poidatas set image_num =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘image_num’],item[‘uid’])
print(update)
cursor.execute(update)
if ‘service_rating’ in item[‘detail_info’]:
update = “update poidatas set service_rating =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘service_rating’],item[‘uid’])
print(update)
cursor.execute(update)
if ‘environment_rating’ in item[‘detail_info’]:
update = “update poidatas set environment_rating =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘environment_rating’],item[‘uid’])
print(update)
cursor.execute(update)
判断POI是否已经存入数据库

#判断是否已经存入数据库
def isExist(cursor,uid):
sql = “select * from poidatas where uid = ‘%s’” % uid
#print(cursor.execute(sql)) sql语句执行成功,返回的是1
if cursor.execute(sql):
return True
else:
return False
从excel表中读取POI类别

def readExcel(path):
data = xlrd.open_workbook(path)
sheets = data.sheets()
data_list=[]
for i in range(len(sheets)):
table=data.sheets()[i]
table_rows=table.nrows
table_cols=table.ncols
for j in range(table_rows):
data_list.append( table.cell(j,0).value)
return data_list
拼接访问URL

def getUrlByName(name,ak,j):
#矩形搜索,POI数量较少
url = ‘http://api.map.baidu.com/place/v2/search/?query=%s&page_size=20&page_num=%s&output=json&bounds=40.817,111.697,40.821,111.709&scope=2&ak=%s’ %(name,j,ak)
# 行政区域搜索,POI数量较多
#url = ‘http://api.map.baidu.com/place/v2/search/?query=%s&output=json&region=呼和浩特&scope=2&ak=%s’ %(name,ak)
return url
Main函数

def Main():
ak = “~~~~~”
name_list=readExcel(r’D:\poi类别.xls’)
db = pymysql.connect(host=“localhost”, user=“root”, password=“root”, database=“poi”)
cursor = db.cursor()
insertPOIData(name_list,ak,cursor)
updateDetailInfo(ak, cursor)
db.commit()
cursor.close()

猜你喜欢

转载自blog.csdn.net/weixin_43214644/article/details/126488190
今日推荐