Python3小工具之小说爬虫

使用Python3编写小说爬虫爬取笔趣阁网站的小说。

使用方法:将源代码保存为book.py文件,自行前往笔趣阁网站查询书号,在拥有python3环境的命令行或者IDE里执行以下命令

# 例如下载笔趣阁书号为 1 的小说龙符
# 运行
python3 book.py 1
# 或者
./python3 book.py 1

Python3源代码如下

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys
import requests
from bs4 import BeautifulSoup


def crawl_book_use_bs4(page):
    url = 'http://www.biqiuge.com/book/' + page
    res = requests.get(url)
    res.encoding = 'gbk'
    soup = BeautifulSoup(res.text, 'html.parser')
    return soup


def crawl_book_info(page):
    soup = crawl_book_use_bs4(page)
    book_list = soup.find_all('div', id='list')
    for child in book_list:
        for child2 in child.find_all('a'):
            list_chapters.append(child2.get_text())
            list_url.append(child2['href'])
    book_info = soup.find_all('div', id='info')
    for child in book_info:
        name = child.find('h1').get_text()
        author = child.find('a').get_text()
        return name, author


def crawl_book_text(page):
    soup = crawl_book_use_bs4(page)
    book_text = soup.find_all('div', id='content')
    for child in book_text:
        return child.get_text()


list_url = []
list_chapters = []
info = crawl_book_info(str(sys.argv[1]))
book = open((str(info[0]) + '.txt'), 'a', encoding='utf-8')
book.write(str(info[0]) + '\n' + info[1] + '\n\n')
for i in range(0, len(list_url)):
    book.write('\n' + str(list_chapters[i]) + '\n')
    book.write(crawl_book_text(str(sys.argv[1]) + '/' + str(list_url[i])))
book.close()

猜你喜欢

转载自blog.csdn.net/chickenstar/article/details/80270117