#coding=utf-8 ''' Created on 2012-5-17 @author: chenhuiting ''' import sys import urllib2 import gzip import StringIO # 页面url url = "http://www.915.com/news/201005/25-052512922010.html" # 页面编码 page_encode = "utf8" request = urllib2.Request(url) request.add_header("Accept-encoding", "gzip") usock = urllib2.urlopen(request) page = usock.read() # 处理gzip过的页面 if usock.headers.get('content-encoding', None) == 'gzip': page = gzip.GzipFile(fileobj=StringIO.StringIO(page)).read() # 转unicode(gbk/utf8) if not isinstance(page, unicode): page = unicode(page, page_encode) print(page)
python抓取一个页面
猜你喜欢
转载自have-life.iteye.com/blog/1533521
今日推荐
周排行