python-爬取东方财富网期货市场大商所数据

python-爬取东方财富网期货市场大商所数据
注意因为是用谷歌爬取,所以要先下载好chromedriver
然后用到的第三方模块有os、time、openpyxl、webdriver
这里是将爬取下来的数据先存为txt,然后再转化为excel文件

def dashangsuo():
    import os
    import time

    import openpyxl
    from selenium import webdriver

    # 打开chrome浏览器
    response = webdriver.Chrome()
    # 指向该网址
    base_url = "http://quote.eastmoney.com/center/gridlist2.html#futures_114"
    # 响应
    response.get(base_url)
    #因为网页响应可能没那么快,所以给5秒缓冲时间
    time.sleep(5)

    path = os.path.abspath("D:\python代码\eastmoneytry\eco_dashangsuo\output")
    # 创建一个文件夹用于存放建立好的txt文件
    folder_name = path
    if not os.path.exists(os.path.join(path,folder_name)):
            os.makedirs(os.path.join(path,folder_name))

    # 定义文件
    current_time=time.strftime('%Y-%m-%d',time.localtime())
    file_name=current_time+".txt"
    # 文件路径
    file_path=folder_name+"/"+file_name


    def get():
        a = response.find_element_by_id("table_wrapper-table")
        
        # 添加一段数据就换一次行
        b = a.text + "\n"
		#点击下一页
        response.find_element_by_xpath("//*[@id='main-table_paginate']/a[2]").click()
		
		#写入txt中
        with open(file_path, "a", encoding="utf-8") as fp:
            fp.write(b)

    # 一共十三页运行十二遍
    i = 0
    while i<=12:
        get()
        i +=1
        time.sleep(2)

    # 打开存放数据的txt文件
    fopen = open("D:\python代码\eastmoneytry\eco_dashangsuo\output"+'\\'+file_name, 'r',encoding="utf-8")
    # 读取
    lines = fopen.readlines()
    # 建立一个excel文件
    wb = openpyxl.Workbook()
    # 获取当前有效的work sheet
    sheet = wb.active
    # 给excel的title命名
    sheet.title = "期货市场大商所"
    # print(lines)
    for line in lines:
        line = line.split(" ")
        # print(line)
        sheet.append(line)

    path1 = os.path.abspath("D:\python代码\eastmoneytry\eco_dashangsuo\output_xlsx")
    # 创建一个文件夹用于存放建立好的excel文件
    folder_name_1 = path1
    if not os.path.exists(os.path.join(path1, folder_name_1)):
        os.makedirs(os.path.join(path1, folder_name_1))

	#创建excel文件
    file_name_1 = current_time + ".xlsx"
    file_path_1=folder_name_1+"\\"+file_name_1

    wb.save(file_path_1)

dashangsuo()

猜你喜欢

转载自blog.csdn.net/m0_50481455/article/details/108724684