爬虫下载文档的3种方法

import urllib2,cookielib

url = 'http://www.baidu.com'

print "1"
response = urllib2.urlopen(url)
print response.getcode()
print len(response.read())

print "2"
request = urllib2.Request(url)
request.add_header("user-Agent","Mozilla/5.0")
response2 = urllib2.urlopen(request)
print response2.getcode()
print len(response2.read())

print "3"
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
response3 = urllib2.urlopen(url)
print response3.getcode()
print cj
print response3.read()

猜你喜欢

转载自blog.51cto.com/12607410/2427643
今日推荐