帮同学爬取某官方公众号的众多企业信息

import requests
import xlwt
from bs4 import BeautifulSoup
from threading import Thread
# 725 1160
def main():
    workbook = xlwt.Workbook()
    sheet1 = workbook.add_sheet('test', cell_overwrite_ok=True)
    first = 1
    for i in range(725, 1161): # 分析信息特点 
        response = requests.get("http://jingxinwei.wxshidai.com/jxj/style/detail?id=%d&redirect=1"%i)
        html = response.text
        soup = BeautifulSoup(html, "html.parser")
        print(i)
        # myWorkbook = xlwt.Workbook()
        w = 0

        for i in soup.select(".answerDesc"):  # 选择标签
            d = i.get_text().strip()    # 获取标签中的文本信息
            sheet1.write(first,w,d)  # 写入excel
            w += 1
        first += 1
    workbook.save("data.xls")

t = Thread(target=main, args=())
t.start()

猜你喜欢

转载自www.cnblogs.com/cjj-zyj/p/10097735.html