使用python对图片进行爬取

对京东上面的手机图片进行爬取,f12找到相应的代码之后进行模式匹配,其中过滤到没用信息,实现代码如下:

import re
import urllib.request as request
import urllib

def craw(url, page):
    html1 = request.urlopen(url).read()
    html1 = str(html1)
    pat1 = '<div id="J_goodsList".+?<div class="page clearfix">'
    result1 = re.compile(pat1).findall(html1)
    result1 = result1[0]
    pat2 = '<img width="220" height="220" class="err-product" data-img="1" source-data-lazy-img=".+?\.jpg" />'
    imagelist = re.compile(pat2).findall(result1)
    x = 1
    for imageurl in imagelist:
        imagename = "F:\\C\\mobilepic\\" + str(page) + '-' + str(x) + '.jpg'
        #print(imageurl[87:-4]) 获得图片的地址
        imageurl = "http://" + imageurl[87:-4]
        try:
            request.urlretrieve(imageurl, filename=imagename)
            print(imagename)
        except urllib.error.URLError as e:
            #异常处理,若不能爬取,调至下一张图片
            if hasattr(e, 'code'):
                x = + 1
            if hasattr(e, 'reason'):
                x += 1
        x += 1


for i in range(1, 3):
    url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&cid2=653&cid3=655&page=" + str(
        i) + '3&s=58&click=0'
    craw(url, i)
    print("Finish:", i)

结果:

下载图片
标题

猜你喜欢

转载自blog.csdn.net/qq_41359265/article/details/84672870