Code:
from bs4 import BeautifulSoup
import requests
from requests import RequestException
import pymssql
import json
def get_page(url, headers):
try:
response = requests.get(url, headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def get_content():
temp = []
headers = {'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Host': 'quote.eastmoney.com',
'Referer': 'http://quote.eastmoney.com/hk/HStock_list.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
}
page_content = get_page("http://quote.eastmoney.com/hk/HStock_list.html", headers).encode('ISO-8859-1').decode('gbk')
items = BeautifulSoup(page_content, 'lxml')
div_item = items.find(name='div', attrs={'class': 'hklists'})
a_items = div_item.find('ul').find_all('a')
for item in a_items:
temp.append(item.string)
print(temp)
return temp
if __name__ == '__main__':
get_content()