Python crawls Sina Finance commodity options

Python crawls Sina Finance commodity options and generates an excel form to connect to Navicat and then store the data in the database

Why do you want to do this after doing the code crawled by Dashang of Oriental Fortune.com?
Because Sina Finance’s bullish contract and bearish contract are in one piece (equivalent to crawling two modules at a time and typesetting),
here is the logic
1. I will save the crawled data as a txt file. (Because I don't know how to directly save it as an excel file)
2. Then convert the txt file into an excel file
3. Import the excel file into the database
When doing it, pay attention to the naming of the variables and don't repeat it. . And then it keeps going hot and correcting the mistakes (all tears) and
attach the code as follows

import os
import time

import openpyxl
import pymysql
import xlrd
from selenium import webdriver

response = webdriver.Chrome()

# 获取网页地址
url = "https://stock.finance.sina.com.cn/futures/view/optionsDP.php/m_o/dce"

response.get(url)

a = ['m2101','m2105','m2011','m2012','m2109','m2103']
s = 1

# 连接数据库
try:
    db = pymysql.connect(host='rm-wz95oz7m93cejtuko4o.mysql.rds.aliyuncs.com', port=3306, user='super', password='super888', db='financial')
except:
    print("could not connect to mysql server")

# 定义爬取数据代码
def crash():
    a = response.find_element_by_xpath("//*[@class='table_up fl']")
    a1 = response.find_element_by_xpath("//*[@class='table_down fr']")

    b = a.text
    time.sleep(1)
    c = b.split("\n")

    b1 = a1.text
    time.sleep(1)
    c1 = b1.split("\n")
    # 因为看涨和看跌数量不一样,所以设if
    if len(c1) < len(c):
        for i in range(len(c1)):
            s = c[i] + ' ' + c1[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
        for i in range(len(c1), len(c)):
            s = c[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
    elif len(c1) > len(c):
        for i in range(len(c)):
            s = c[i] + ' ' + c1[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
        for i in range(len(c), len(c1)):
            s = '- ' * 8 + c1[i] + "\n"
            time.sleep(1)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
    elif len(c) == len(c1):
        for i in range(len(c)):
            s = ''
            s = c[i] + ' ' + c1[i] + "\n"
            # t.append(s)
            with open(file_path, "a", encoding="utf-8") as fp:
                fp.write(s)
            time.sleep(1)

# 点击
def pin():
    response.find_element_by_xpath("//*[@id='option_suffix']/span").click()

# a包含豆粕期权下边的各种
for i in range(len(a)):
    def open_excel(self):
        try:
            # file_path为路径
            current_time = time.strftime('%Y-%m-%d', time.localtime())
            file_name = current_time + "_" + a[self] + ".xlsx"
            file_path = "D:\python代码\\first\\file_name2" + "\\" + file_name
            book = xlrd.open_workbook(file_path)
            # file_path1 = "D:\python代码\eastmoneytry\eco_dashangsuo\output_xlsx\\2020-09-" + str(i) + ".xlsx"
            # book = xlrd.open_workbook(file_path1)
        except:
            print("open excel file failed!")
        try:
            # execl里面的sheet1名字
            sheet = book.sheet_by_name(a[self])
            return sheet
        except:
            print("locate worksheet in excel failed!")
    # 往数据库里插入表格数据
    def insert_deta():
        global i
        # sheet = open_excel(a[i])
        sheet = open_excel(i)
        print(sheet)
        cursor = db.cursor()
        # 获取excel表格的竖行
        row_num = sheet.nrows
        for dd in range(0, row_num):  # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1
            # row_data是表格的竖行
            row_data = sheet.row_values(dd)
            value = (row_data[0],row_data[1], row_data[2], row_data[3], row_data[4], row_data[5], row_data[6], row_data[7], row_data[8],row_data[9], row_data[10], row_data[11], row_data[12], row_data[13], row_data[14])
            sql = "INSERT INTO douboqiquan(buy,buy_price,last_price,sell_price,sell_count,hava,ud,xqj,buy1,buy_price1,last_price1,sell_price1,sell_count1,hava1,ud1)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s )"
            # 执行sql语句
            cursor.execute(sql, value)
            db.commit()
        # 关闭连接
        cursor.close()

    # 更新日期
    def update(self):
        cursor = db.cursor()
        # 往数据库中的date插入数据,内容为当天的时间
        update = "update douboqiquan set date=CURDATE() where date is null"
        # da = '2020-09-%s' % i
        # update = "update qhsc_dashangsuo set date='%s' where date is null" % da
        cursor.execute(update)
        db.commit()
        cursor.close()

    # 更新数据库中name的数据,name为豆粕期权的比如m2011
    def update1(self):
        cursor = db.cursor()
        na = a[self]
        update1 = "update douboqiquan set name='%s' where name is null" % na
        # da = '2020-09-%s' % i
        # update = "update qhsc_dashangsuo set date='%s' where date is null" % da
        cursor.execute(update1)
        db.commit()
        cursor.close()

    # 移除买量买价这些
    def remove():
        cursor = db.cursor()
        # 删除数据库中id为0的数据
        remove = "DELETE  from douboqiquan WHERE buy='买量'"
        cursor.execute(remove)
        db.commit()
        cursor.close()


    # 创建一个文件夹用于存放建立好的txt文件
    path = os.path.abspath("D:\python代码\\first\\file_name1")
    folder_name = path

    if not os.path.exists(os.path.join(path, folder_name)):
        os.makedirs(os.path.join(path, folder_name))

    current_time = time.strftime('%Y-%m-%d', time.localtime())
    file_name = current_time + "_" + a[i] + ".txt"
    # 文件路径
    file_path = folder_name + "/" + file_name


    crash()


    # 打开存放数据的txt文件
    fopen = open(file_path, 'r', encoding="utf-8")
    # 读取
    lines = fopen.readlines()
    # 建立一个excel文件
    wb = openpyxl.Workbook()
    # 获取当前有效的work sheet
    sheet = wb.active
    # 给excel的title命名
    sheet.title = a[i]
    # print(lines)
    for line in lines:
        line = line.split(" ")
        # print(line)
        sheet.append(line)

    # 创建一个文件夹用于存放建立好的excel文件
    path1 = os.path.abspath("D:\python代码\\first\\file_name2")

    folder_name_1 = path1
    if not os.path.exists(os.path.join(path1, folder_name_1)):
        os.makedirs(os.path.join(path1, folder_name_1))

    file_name_1 = current_time + "_" + a[i] + ".xlsx"
    file_path_1 = folder_name_1 + "\\" + file_name_1

    wb.save(file_path_1)

    pin()
    time.sleep(1)

    if s < 6:
        s = i+2
        ipp = "//*[@id='option_suffix']/ul/li[%d]" % s
        response.find_element_by_xpath(ipp).click()
        time.sleep(0.8)

    # 打开excel表格
    open_excel(i)
    # 插入数据
    insert_deta()
    # 更新数据
    update(i)
    update1(i)
    # 删除数据
    remove()





Then this is my database field
Insert picture description here

Guess you like

Origin blog.csdn.net/m0_50481455/article/details/108830383