python爬取网页

1. 使用requests库

import requests 
url="http://www.starbaby.cn/zhinan/609987" 
req =requests.get(url) 
req.encoding='utf-8' #显式地指定网页编码,一般情况可以不用 
print(req.text)

2. 使用BeautifulSoup

from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
import requests

def run():
    r = requests.get('http://zy.upln.cn/gongshi2014/index.html')
    soup = r.text.encode(r.encoding) #这里获取的text先按照指定的字符集解析下,这样gbk、utf8都可以了
    soup = BeautifulSoup(soup, 'html.parser')
    soup = soup.find('tbody')
    for x in soup.find_all('tr'):
        for y in x.find_all('td'):
            s = y.a.text
            print(s)


if __name__=="__main__":
    run()


猜你喜欢

转载自blog.csdn.net/sunfoot001/article/details/75805348