网址:https://www.banban.cn/gupiao/list_cyb.html
代码:
from bs4 import BeautifulSoup
import requests
from requests import RequestException
import pymssql
import json
# 判断字符是否为中文或英文字母
def is_chinese_or_english(ch):
if '\u4e00' <= ch <= '\u9fff' or 'a' <= ch <= 'z' or 'A' <= ch <= 'Z':
return True
return False
# 获取网页
def get_page(url):
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
# 获取股票代码及名称的词条数据
def get_content():
temp = []
page_content = get_page("https://www.banban.cn/gupiao/list_cyb.html").encode('ISO-8859-1').decode('utf-8')
items = BeautifulSoup(page_content, 'lxml')
div_item = items.find(name='div', attrs={'class': 'u-postcontent cz'})
a_items = div_item.find_all('a')
for item in a_items:
temp.append(item.string)
return temp
# 分割词条数据,每个词条的代码和名称这两部分数据都放入一个dict,然后将dict放入list
def cut_content():
content = get_content()
result = []
for item in content:
name = ''
code = ''
for i in range(len(item)):
if is_chinese(item[i]):
name = name + item[i]
if item[i].isdigit():
code = code + item[i]
d = dict({})
d["code"] = code
d["name"] = name
result.append(d)
result = sorted(result, key=lambda e: e.__getitem__('code'))
# final_result = result[-33: -1]
print(result)
return result
# 上传至数据库表中
def upload_database():
data = cut_content()
conn = pymssql.connect("localhost:1433", "sa", "123456", "AIStock")
if conn:
print("连接成功!")
cursor = conn.cursor()
for item in data:
values = "('" + str(item["code"]) + "', '" + str(item["name"]) + "', '" + '0' + "', '" + '0' + "')"
sql_string = "insert into Stocks (StockCode, StockName, StockExchangeName, StockType) values %s" % values
cursor.execute(sql_string)
conn.commit()
# 主函数
if __name__ == '__main__':
upload_database()