import requests
import time
import re
time.sleep(4)
from urllib import parse
url = "http://guba.eastmoney.com/list,002506.html"
headers={
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0)'
'Gecko/20100101 Firefox/80.0'}
response0 = requests.get(url)
news_comment_urls0 = re.findall(r'/news,002506,\S+html', response0.text)
print(news_comment_urls0)
for comment_url0 in news_comment_urls0:
list_url = "http://guba.eastmoney.com"
whole_url0 = parse.urljoin(list_url, comment_url0)
print(whole_url0)
response1=requests.get(whole_url0)
name=re.findall('<font>(.*?)</font>',response1.text)
for nam in name:
print(nam)
time = re.findall('<div class="zwfbtime">(.*?)</div>', response1.text)
time=str(time)
time1=re.findall('\d\d\d\d-\d\d-\d\d',time)
for second in time1:
print(second)
title= re.findall('<title>(.*?)</title>', response1.text)
title=str(title)
title1=re.findall(r".*'(.*)_协.*",title)
for j in title1:
print(j)
content = re.findall('<div class="stockcodec .xeditor">(.*?)</div>', response1.text, re.DOTALL)
for i in content:
print(i)
获取东方股票网里的吧贴评论
猜你喜欢
转载自blog.csdn.net/liaoqingjian/article/details/108418026
今日推荐
周排行