Download all the pictures on the page
#codeing:utf-8
import urllib
import re
import sys
import os
url = 'http://tieba.baidu.com/p/1753935195'
# with open('F:\hack\python\python_project\html\htmlpage.html', 'w') as f:
# f.write(htmlcode)
def getHtml(url):
page = urllib.urlopen(url)
htmlcode = page.read()
return htmlcode
def getPicList(htmlcode):
reg = r'src="(.+?\jpg)" width'
reg_pic = re.compile(reg)
piclist = reg_pic.findall(htmlcode)
return piclist
def downPic(piclist):
cur_dir = sys.path[0]
img_dir = cur_dir + '\\img_dir\\'
if not os.path.exists(img_dir):
os.makedirs(img_dir)
for pic in piclist:
str_array = pic.split('/')
filename = str_array[len(str_array) - 1]
file = img_dir + filename
if os.path.exists(file):
print(file + " existed")
else:
urllib.urlretrieve(pic,file)
# x = x + 1
htmlcode = getHtml(url)
piclist = getPicList(htmlcode)
downPic(piclist)