爬取所有港股(代码和名字)

代码:

from bs4 import BeautifulSoup
import requests
from requests import RequestException
import pymssql
import json


def get_page(url, headers):
    try:
        response = requests.get(url, headers)
        if response.status_code == 200:
            return response.text
        return None
    except RequestException:
        return None


def get_content():
    temp = []
    headers = {'Accept': '*/*',
               'Accept-Encoding': 'gzip, deflate',
               'Accept-Language': 'zh-CN,zh;q=0.9',
               'Connection': 'keep-alive',
               'Host': 'quote.eastmoney.com',
               'Referer': 'http://quote.eastmoney.com/hk/HStock_list.html',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
               }
    page_content = get_page("http://quote.eastmoney.com/hk/HStock_list.html", headers).encode('ISO-8859-1').decode('gbk')
    items = BeautifulSoup(page_content, 'lxml')
    div_item = items.find(name='div', attrs={'class': 'hklists'})
    a_items = div_item.find('ul').find_all('a')
    for item in a_items:
        temp.append(item.string)
    print(temp)
    return temp


if __name__ == '__main__':
    get_content()
发布了105 篇原创文章 · 获赞 17 · 访问量 11万+

猜你喜欢

转载自blog.csdn.net/qq_38890412/article/details/104264532