Python爬取新浪财经商品期权

python爬取新浪财经商品期权并生成excel表格连接Navicat然后将数据存入数据库

为什么在做了东方财富网的大商所爬取代码后还要搞一个这个?
因为新浪财经的看涨合约和看跌合约是在一块的(相当于一次要爬取两个模块还要进行排版)
这里先给大家捋一下逻辑
1.我是先将爬取下来的数据存为txt文件(因为不知道怎么直接存为excel文件)
2.然后将txt文件转化为excel文件
3.将excel文件导入数据库
在做的时候,要注意变量的命名,不要重复了。。然后一直爆红改错(泪啊都是)
附上代码如下

import os
import time

import openpyxl
import pymysql
import xlrd
from selenium import webdriver

response = webdriver.Chrome()

# 获取网页地址
url = "https://stock.finance.sina.com.cn/futures/view/optionsDP.php/m_o/dce"

response.get(url)

a = ['m2101','m2105','m2011','m2012','m2109','m2103']
s = 1

# 连接数据库
try:
    db = pymysql.connect(host='rm-wz95oz7m93cejtuko4o.mysql.rds.aliyuncs.com', port=3306, user='super', password='super888', db='financial')
except:
    print("could not connect to mysql server")

# 定义爬取数据代码
def crash():
    a = response.find_element_by_xpath("//*[@class='table_up fl']")
    a1 = response.find_element_by_xpath("//*[@class='table_down fr']")

    b = a.text
    time.sleep(1)
    c = b.split("\n")

    b1 = a1.text
    time.sleep(1)
    c1 = b1.split("\n")
    # 因为看涨和看跌数量不一样,所以设if
    if len(c1) < len(c):
        for i in range(len(c1)):
            s = c[i] + ' ' + c1[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
        for i in range(len(c1), len(c)):
            s = c[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
    elif len(c1) > len(c):
        for i in range(len(c)):
            s = c[i] + ' ' + c1[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
        for i in range(len(c), len(c1)):
            s = '- ' * 8 + c1[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
    elif len(c) == len(c1):
        for i in range(len(c)):
            s = ''
            s = c[i] + ' ' + c1[i] + "\n"
            # t.append(s)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
            time.sleep(1)

# 点击
def pin():
    response.find_element_by_xpath("//*[@id='option_suffix']/span").click()

# a包含豆粕期权下边的各种
for i in range(len(a)):
    def open_excel(self):
        try:
            # file_path为路径
            current_time = time.strftime('%Y-%m-%d', time.localtime())
            file_name = current_time + "_" + a[self] + ".xlsx"
            file_path = "D:\python代码\\first\\file_name2" + "\\" + file_name
            book = xlrd.open_workbook(file_path)
            # file_path1 = "D:\python代码\eastmoneytry\eco_dashangsuo\output_xlsx\\2020-09-" + str(i) + ".xlsx"
            # book = xlrd.open_workbook(file_path1)
        except:
            print("open excel file failed!")
        try:
            # execl里面的sheet1名字
            sheet = book.sheet_by_name(a[self])
            return sheet
        except:
            print("locate worksheet in excel failed!")
    # 往数据库里插入表格数据
    def insert_deta():
        global i
        # sheet = open_excel(a[i])
        sheet = open_excel(i)
        print(sheet)
        cursor = db.cursor()
        # 获取excel表格的竖行
        row_num = sheet.nrows
        for dd in range(0, row_num):  # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1
            # row_data是表格的竖行
            row_data = sheet.row_values(dd)
            value = (row_data[0],row_data[1], row_data[2], row_data[3], row_data[4], row_data[5], row_data[6], row_data[7], row_data[8],row_data[9], row_data[10], row_data[11], row_data[12], row_data[13], row_data[14])
            sql = "INSERT INTO douboqiquan(buy,buy_price,last_price,sell_price,sell_count,hava,ud,xqj,buy1,buy_price1,last_price1,sell_price1,sell_count1,hava1,ud1)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s )"
            # 执行sql语句
            cursor.execute(sql, value)
            db.commit()
        # 关闭连接
        cursor.close()

    # 更新日期
    def update(self):
        cursor = db.cursor()
        # 往数据库中的date插入数据,内容为当天的时间
        update = "update douboqiquan set date=CURDATE() where date is null"
        # da = '2020-09-%s' % i
        # update = "update qhsc_dashangsuo set date='%s' where date is null" % da
        cursor.execute(update)
        db.commit()
        cursor.close()

    # 更新数据库中name的数据,name为豆粕期权的比如m2011
    def update1(self):
        cursor = db.cursor()
        na = a[self]
        update1 = "update douboqiquan set name='%s' where name is null" % na
        # da = '2020-09-%s' % i
        # update = "update qhsc_dashangsuo set date='%s' where date is null" % da
        cursor.execute(update1)
        db.commit()
        cursor.close()

    # 移除买量买价这些
    def remove():
        cursor = db.cursor()
        # 删除数据库中id为0的数据
        remove = "DELETE  from douboqiquan WHERE buy='买量'"
        cursor.execute(remove)
        db.commit()
        cursor.close()


    # 创建一个文件夹用于存放建立好的txt文件
    path = os.path.abspath("D:\python代码\\first\\file_name1")
    folder_name = path

    if not os.path.exists(os.path.join(path, folder_name)):
        os.makedirs(os.path.join(path, folder_name))

    current_time = time.strftime('%Y-%m-%d', time.localtime())
    file_name = current_time + "_" + a[i] + ".txt"
    # 文件路径
    file_path = folder_name + "/" + file_name


    crash()


    # 打开存放数据的txt文件
    fopen = open(file_path, 'r', encoding="utf-8")
    # 读取
    lines = fopen.readlines()
    # 建立一个excel文件
    wb = openpyxl.Workbook()
    # 获取当前有效的work sheet
    sheet = wb.active
    # 给excel的title命名
    sheet.title = a[i]
    # print(lines)
    for line in lines:
        line = line.split(" ")
        # print(line)
        sheet.append(line)

    # 创建一个文件夹用于存放建立好的excel文件
    path1 = os.path.abspath("D:\python代码\\first\\file_name2")

    folder_name_1 = path1
    if not os.path.exists(os.path.join(path1, folder_name_1)):
        os.makedirs(os.path.join(path1, folder_name_1))

    file_name_1 = current_time + "_" + a[i] + ".xlsx"
    file_path_1 = folder_name_1 + "\\" + file_name_1

    wb.save(file_path_1)

    pin()
    time.sleep(1)

    if s < 6:
        s = i+2
        ipp = "//*[@id='option_suffix']/ul/li[%d]" % s
        response.find_element_by_xpath(ipp).click()
        time.sleep(0.8)

    # 打开excel表格
    open_excel(i)
    # 插入数据
    insert_deta()
    # 更新数据
    update(i)
    update1(i)
    # 删除数据
    remove()





然后这个是我的数据库字段
在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/m0_50481455/article/details/108830383