import requests
from bs4 import BeautifulSoup
## 获取网页信息
def getHtmlText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding =r.apparent_encoding
return r.text
except:
return 'false'
## 解析网页参数,获取商品信息
def parserHtml(goods_data,html):
soup =BeautifulSoup(html,'html.parser')
lis = soup.find_all('li',class_= 'gl-item')
print(len(lis))
for i in range(len(lis)):
try:
title = lis[i].a['title']
price = lis[i].find('div',class_='p-price').i.string
goods_data.append([title,price])
except:
print('')
## 输出数据
def printHtmlGoods(goods_data):
std = r'{0:^100}{1:^8}'
print(std.format('商品名称', '价格'))
for i in range(len(goods_data)):
print(std.format(goods_data[i][0], goods_data[i][0]))
def main():
url_basic = 'https://search.jd.com/Search?keyword='
pages = 3
keyword = '电脑'
goods_data = []
for i in range(pages):
page = 1+i*2
url = url_basic + keyword + '&enc=utf-8&wq'+ keyword + '&page=' + str(page)
print(url)
html =getHtmlText(url)
parserHtml(goods_data,html)
printHtmlGoods(goods_data)
if __name__ == '__main__':
main()
python爬虫--11 京东商城
猜你喜欢
转载自blog.csdn.net/qq_25672165/article/details/104914201
今日推荐
周排行