python find value stocks based on financial indicators

Identify stock pools

from concurrent.futures import ThreadPoolExecutor
import urllib
import os
from time import sleep
import pandas as pd
# 上证代码
shanghaicode = []
for i in range(600000, 606000, 1):
    shanghaicode.append(str(i))

# 深证代码
shenzhencode = []
for i in range(1000000, 1005000, 1):
    i = str(i)[1:]
    shenzhencode.append(i)

 Crawl data

def get_data(num):
    url = 'http://quotes.money.163.com/service/lrb_' + str(num) + '.html'
    while True:
        try:
            content = urllib.request.urlopen(url, timeout=2).read()
            path = '利润表_multi/' + str(num) + '.csv'
            if os.path.exists(path):
                print(path + " already existed!!!")
                break
            
            with open('利润表_multi/' + str(num) + '.csv', 'wb') as f:
                f.write(content)
            print(num)
    
            sleep(1)
        except Exception as e:
            if str(e) == 'HTTP Error 404: Not Found':
                print(f"{num} : {e}")
                break
            else:
                print(e)

multithreaded operation

executor = ThreadPoolExecutor(max_workers=10)
executor.map(get_data, shenzhencode)
executor.shutdown()

executor = ThreadPoolExecutor(max_workers=10)
executor.map(get_data, shanghaicode)
executor.shutdown()

 read local data

def generatefile(path):
    names = []
    for dirpath, dirnames, filenames in os.walk(path):
        names = filenames
    return names

datapath = '利润表_multi/'
datalist = generatefile(datapath)
invest = []
for data in datalist:
    try:
        path = datapath + data
        temp = pd.read_csv(path, encoding='gbk', header=None)
        temp = pd.DataFrame(temp.values.T, index=temp.columns, columns=temp.index)
        temp.columns = temp.loc[0]
        temp = temp[1:]
        temp = temp[:-1]
        #temp['报告日期'] = temp['报告日期'].apply(convert_date)
        temp = temp[['报告日期','净利润(万元)']]
        temp['净利润(万元)'] = temp['净利润(万元)'].astype(int)
        temp_g = pd.DataFrame(temp.groupby('报告日期').sum())
        temp_g = temp_g[:-1] # 去除2021
        temp_g.reset_index(inplace=True)
        temp_g = temp_g['净利润(万元)']
        anu_diff = temp_g.diff()
        temp_g = temp_g.values
        # anu_diff = anu_diff.values
        temp_g = temp_g[::-1]
        ratio = 0.3
        if len(temp_g) >= 5:
            # rate = anu_diff[-5:]/temp_g[-6:-1]
            # if rate[-1] >= ratio and rate[-2] >= ratio and rate[-3] >= ratio and rate[-4] >= ratio:
            #         invest.append(data)
            growth_anu = []
            for i in range(len(temp_g)):
                if i == (len(temp_g)-1):
                    continue
                year = temp_g[i]
                ex_year = temp_g[i+1]
                if i+1 <= 5:
                    growth = (year - ex_year)/ex_year
                    growth_anu.append(growth)
            
            if growth_anu[0] >= ratio and growth_anu[1] >= ratio and growth_anu[2] >= ratio and growth_anu[3] >= ratio:
                invest.append(data)
                
    except Exception as e:
        print(data + f':{e}')
invest = pd.DataFrame(invest)
invest.to_excel('连续4年增长30%.xls')

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324323091&siteId=291194637