python3爬虫爬取金庸小说所有角色

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

url = 'http://www.jinyongwang.com/data/renwu/';

def getHTML(url):
    r = requests.get(url)
    return r.content

def parseHTML(html):
    soup = BeautifulSoup(html,'html.parser')
    body = soup.body
    
    main = body.find('div',attrs={'class':'main'})
    booklist = main.find('div',attrs={'class':'booklist'})

    for datapice in booklist.find_all('div',attrs={'class':'datapice'}):
        for a in datapice.find_all('a'):
            print(a.get_text().replace(' ',''))
html = getHTML(url);
print(parseHTML(html))

猜你喜欢

转载自blog.csdn.net/qq_36625806/article/details/81268686