python爬虫--11 京东商城

import requests
from bs4 import BeautifulSoup
## 获取网页信息
def getHtmlText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding =r.apparent_encoding
        return  r.text
    except:
        return 'false'

## 解析网页参数,获取商品信息
def parserHtml(goods_data,html):
    soup =BeautifulSoup(html,'html.parser')
    lis = soup.find_all('li',class_= 'gl-item')
    print(len(lis))
    for i in range(len(lis)):
        try:
            title = lis[i].a['title']
            price = lis[i].find('div',class_='p-price').i.string
            goods_data.append([title,price])
        except:
            print('')
## 输出数据
def  printHtmlGoods(goods_data):
    std = r'{0:^100}{1:^8}'
    print(std.format('商品名称', '价格'))
    for i in range(len(goods_data)):
        print(std.format(goods_data[i][0], goods_data[i][0]))

def main():
    url_basic = 'https://search.jd.com/Search?keyword='
    pages = 3
    keyword = '电脑'

    goods_data = []
    for i in range(pages):
        page = 1+i*2
        url = url_basic + keyword + '&enc=utf-8&wq'+ keyword + '&page=' + str(page)
        print(url)
        html =getHtmlText(url)
        parserHtml(goods_data,html)
    printHtmlGoods(goods_data)
if __name__ == '__main__':
    main()

发布了108 篇原创文章 · 获赞 7 · 访问量 5185

猜你喜欢

转载自blog.csdn.net/qq_25672165/article/details/104914201