https://www.cnblogs.com/wzj998/p/7278630.html #-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup import tushare as ts import pandas as pd import lxml import time import datetime import pymysql pymysql.install_as_MySQLdb() from sqlalchemy import create_engine ''' 股票基本信息获取模块 ''' # 调用stock_basic接口,获取股票基本信息 def get_security_info(): # 获取字段包含:ts代码、股票代码、股票名称、行业、股票市场类型、交易所类型、上市日期 stock_list = pro.stock_basic(list_status='L', fields='ts_code,symbol,name,industry,market,exchange,list_date') cursor = conn.cursorsor() # 执行删除数据命令 cursor.execute('delect from stock_list') conn.commit() for index, row in stock_list.iterrows(): ts_code = row['ts_code'] symbol = row['symbol'] name = row['name'] industry = row['industry'] market = row['market'] exchange = row['exchange'] list_date = datetime.datetime.strptime(row['list_date'],’%Y%m%d') cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,symbol,name,industry,market,exchange,list_date) values (%s, %s, %s, %s, %s, %s, %s)', %(ts_code,symbol,name,industry,market,exchange,list_date)) conn.commit() # 调用stock_company接口,获取公司基本信息 def get_company_info(): # 获取字段包含:ts代码、省份、城市、员工人数、主要业务及产品,SSE上交所 SZSE深交所 ,默认SSE company_list1 = pro.stock_company(exchange='SSE', fields='ts_code,province,city,employees,main_business') company_list2 = pro.stock_company(exchange='SZSE', fields='ts_code,province,city,employees,main_business') cursor = conn.cursorsor() # 执行删除数据命令 cursor.execute('delect from stock_list') conn.commit() for index, row in company_list1.iterrows(): ts_code = row['ts_code'] province = row['province'] city = row['city'] employees = row['employees'] main_business = row['main_business'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.company_list(ts_code,province,city,employees,main_business) values (%s, %s, %s, %s, %s)', %(ts_code,province,city,employees,main_business)) conn.commit() for index, row in company_list2.iterrows(): ts_code = row['ts_code'] province = row['province'] city = row['city'] employees = row['employees'] main_business = row['main_business'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.company_list(ts_code,province,city,employees,main_business) values (%s, %s, %s, %s, %s)', %(ts_code,province,city,employees,main_business)) conn.commit() # 调用concept_detail接口,获取概念股列表 def get_concept_info(): # 获取字段包含:ts代码、concept_name stk = pro.stock_basic(list_status='L', fields='ts_code') cursor = conn.cursorsor() # 执行删除数据命令 cursor.execute('delect from stock_list') conn.commit() for index, row in stk.iterrows(): ts_code = stk['ts_code'] concept_list = pro.concept_detail(ts_code=ts_code, fields='ts_code,concept_name') cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 调用stk_holdernumber接口,获取股东人数(不定期公布) 单次最大3000,总量不限制,每分钟调取100次 def get_stk_holder(): # 获取字段:ts代码、公告日期、股东人数 stk = pro.stock_basic(list_status='L', fields='ts_code') for index, row in stk.iterrows(): ts_code = stk['ts_code'] stk_holder = pro.stk_holdernumber(ts_code=ts_code,fields='ts_code,ann_date,holder_num') cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,ann_date,holder_num) values (%s, %s, %s)', %(ts_code,ann_date,holder_num)) conn.commit() time.sleep(2) ''' 股票每日行情数据获取模块 ''' # 调用daily接口,获取每日行情数据 每分钟内最多调取200次,每次4000条数据 def get_daily_info(): # 获取字段包含:交易日期、ts代码、开盘价、最高价、最低价、收盘价、昨收价、涨跌幅、成交额 stk = pro.stock_basic(list_status='L', fields='ts_code') for index, row in stk.iterrows(): ts_code = row['ts_code'] daily_info = pro.daily(ts_code=ts_code,fields='trade_date,ts_code,open,high,low,close,pre_close,pct_chg,amount') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] open = row['open'] high = row['high'] low = row['low'] close = row['close'] pre_close = row['pre_close'] pct_chg = row['pct_chg'] amount = row['amount'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 爬取新浪财经股市雷达异动数据 def sina_stock_radar_Spider(): for page_num in range(1,16): url = 'http://finance.sina.com.cn/stockradar/stockradar' + str(page_num) + '.html' headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"} html = requests.get(url=url,headers=headers) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text,'lxml') tr_list = soup.find_all('tr') for index, tr in enumerate(tr_list1): if index != 0: th_list = tr.find_all('th') change_time = th_list[0].string symbol = th_list[1].string name = th_list[2].string change_value = th_list[3].string trade_date = time.strftime("%Y%m%d", time.localtime()) cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(symbol,name,trade_date,change_time,change_value) values (%s, %s, %s, %s, %s)', %(symbol,name,trade_date,change_time,change_value)) conn.commit() # 调用top_inst接口,获取龙虎榜数据 单次最大10000 def get_lhb_info(): # 获取字段:交易日期、ts代码、营业部名称、买入额(万)、买入占总成交比例、卖出额(万)、卖出占总成交比例、净成交额(万) lhb_list = pro.top_inst(trade_date='20180928',fields='trade_date,ts_code,exalter,buy,buy_rate,sell,sell_rate,net_buy') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] yyb = row['exalter'] buy = row['buy'] buy_rate = row['buy_rate'] sell = row['sell'] sell_rate = row['sell_rate'] net_buy = row['net_buy'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 调用limit_list接口,获取涨跌停数据 单次最大1000,总量不限制 def get_zdt_info(): #获取字段包含:交易日期、ts代码、股票名称、收盘价、封单金额、封单金额/日成交金额、首次涨停时间、最后封板时间、打开次数、涨跌停强度、D跌停U涨停 zdt_info = pro.limit_list(trade_date=, fields='trade_date,ts_code,name,close,fd_amount,fc_ratio,first_time,last_time,open_times,strth,limit') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] name = row['name'] close = row[''] fd_amount = row['fd_amount'] fc_ratio = row['fc_ratio'] first_time = row['first_time'] last_time = row['last_time'] open_times = row['open_times'] strth = row['strth'] limit = row['limit'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() # 调用moneyflow接口,获取个股资金流向数据 单次最大提取4000行记录,总量不限制 def moneyflow(): # 获取字段包含:交易日期、ts代码、大单买入金额(万元)、大单卖出金额(万元)、特大单买入金额(万元)、特大单卖出金额(万元)、净流入额(万元) stk = pro.stock_basic(list_status='L', fields='ts_code') for index, row in stk.iterrows(): ts_code = row['ts_code'] moneyflow = pro.moneyflow(ts_code=ts_code,trade_date=,fields='trade_date,ts_code,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount') for index, row in stk.iterrows(): trade_date = row['trade_date'] ts_code = row['ts_code'] buy_lg_amount = row['buy_lg_amount'] sell_lg_amount = row['sell_lg_amount'] buy_elg_amount = row['buy_elg_amount'] sell_elg_amount = row['sell_elg_amount'] net_mf_amount = row['net_mf_amount'] cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name)) conn.commit() ''' 大盘风控数据获取模块 ''' # 爬取新浪财经实时大单 def spider(): for page in range(0,3000): url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_all.php?num=100&sort=ticktime&asc=0&volume=10000&type=0&' + str(page) headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"} html = requests.get(url=url,headers=headers) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text,'lxml') tr_list = soup.find_all('tr') for index, tr in enumerate(tr_list1): if index != 0: th_list = tr.find_all('th') td_list = tr.find_all('td') change_time = th_list[2].string symbol = th_list[1].string name = th_list[0].string cjj = th_list[3] cjl = change_value = th_list[3].string trade_date = time.strftime("%Y%m%d", time.localtime()) cursor = conn.cursorsor() # 执行新增数据命令 cursor.execute('insert into stock.stock_list(symbol,name,trade_date,change_time,change_value) values (%s, %s, %s, %s, %s)', %(symbol,name,trade_date,change_time,change_value)) conn.commit() ''' 市场资讯信息获取模块 ''' ''' 个股风控数据获取模块 ''' # 主函数 if __name__=='__main__': ts.set_token('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7') pro = ts.pro_api() # engine = create_engine('mysql://root:[email protected]/stock?charset=utf8') # 打开数据库连接 conn = pymysql.connect(host="127.0.0.1", port="3306", user='root', password='123456',database='stock_data',charset="utf-8") # 使用cursorsor()方法获取操作游标 cursorsor = conn.cursor() # 获取股票基本信息 get_security_info() # 获取公司基本信息 get_company_info() # 爬取新浪财经股市雷达异动数据 sina_stock_radar_Spider() # 关闭数据库连接 conn.close() #-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup import time import tushare as ts import pandas as pd import lxml def spider(): for page in range(0, 3000): url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_all.php?num=100&sort=ticktime&asc=0&volume=10000&type=0&' + str( page) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"} html = requests.get(url=url, headers=headers) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text, 'lxml') tr_list = soup.find_all('tr') #print(tr_list) for index, tr in enumerate(tr_list): if index != 0: th_list = tr.find_all('th') print(th_list) print(type(th_list)) td_list = tr.find_all('td') value = tr_list[3].sting print(value) # cjl = td_list[1].content # print(cjl) ''' name = th_list[0].string print(name) code = th_list[1].sting print(code) mytime = tr_list[2].sting print(mytime) cjj = td_list[0].sting print(cjj) ''' if __name__=='__main__': spider()
最新大大
猜你喜欢
转载自www.cnblogs.com/Iceredtea/p/11735976.html
今日推荐
周排行