获取东方股票网里的吧贴评论

import requests
import time
import re
time.sleep(4)
from urllib import parse
url = "http://guba.eastmoney.com/list,002506.html"
headers={
    
    'User-Agent':
	     'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0)'
          'Gecko/20100101 Firefox/80.0'}
response0 = requests.get(url)

news_comment_urls0 = re.findall(r'/news,002506,\S+html', response0.text)
print(news_comment_urls0)
for comment_url0 in news_comment_urls0:
    list_url = "http://guba.eastmoney.com"
    whole_url0 = parse.urljoin(list_url, comment_url0)
    print(whole_url0)
    response1=requests.get(whole_url0)
    name=re.findall('<font>(.*?)</font>',response1.text)
    for nam in name:
        print(nam)
    time = re.findall('<div class="zwfbtime">(.*?)</div>', response1.text)
    time=str(time)
    time1=re.findall('\d\d\d\d-\d\d-\d\d',time)
    for second in time1:
        print(second)
    title= re.findall('<title>(.*?)</title>', response1.text)
    title=str(title)
    title1=re.findall(r".*'(.*)_协.*",title)
    for j in title1:
        print(j)
    content = re.findall('<div class="stockcodec .xeditor">(.*?)</div>', response1.text, re.DOTALL)
    for i in content:
        print(i)

网页时间贴

猜你喜欢

转载自blog.csdn.net/liaoqingjian/article/details/108418026