python爬虫下载图片--艺术网站

import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
import MySQLdb
import MySQLdb.cursors
import urllib2
import urllib
import json
import re
import time
import os
import random
from bs4 import BeautifulSoup
db = MySQLdb.connect(host='localhost', user='root', passwd='root', db='python', port=3306, charset='utf8', unix_socket='/tmp/mysql.sock',cursorclass = MySQLdb.cursors.DictCursor)
db.autocommit(True)
cursor = db.cursor()
#title='name;phone;address;contect;e-maill;email;QQ;tel;url;\n'
#fw.write(title.encode('gbk'))
headers = {}
headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
url="http://zengfanzhi.artron.net/works"
request = urllib2.Request(url=url,headers=headers)		
response = urllib2.urlopen(request)
html = response.read()
html = BeautifulSoup(html,"html5lib",from_encoding='utf-8')
strj=str(html)
#strj='[{"imageSrc":"http://img4.artron.net/artist/A0000077/brt000007700049.jpg"}];'
strjson=re.findall(r"\[\{(.+?)\}\]",strj)
strjson="[{"+strjson[0]+"}]"
#in_json = eval(strjson)
#print type(in_json)
imgjson=json.loads(strjson)
author=html.find('title').get_text()
path='/mnt/study/bailian/'+author
if not os.path.exists(path):
	os.makedirs(path)
for i in range(len(imgjson)):
	imgsrc=imgjson[i]['imageSrc']
	title=imgjson[i]['title']
	urllib.urlretrieve(imgsrc,'%s/%s.jpg' % (path,title))
	print imgsrc
发布了165 篇原创文章 · 获赞 34 · 访问量 35万+

猜你喜欢

转载自blog.csdn.net/liuhongwei_study/article/details/87928009
今日推荐