import requests from requests.exceptions import RequestException import re def get_one_page(url): try: response = requests.get(url) if response.status_code == 200: return response.text return None except RequestException: return None def parse_one_page(html): pattern=re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S) items = re.findall(pattern,html) print(items) def main(): url='https://maoyan.com/board/4' html=get_one_page(url) #print(html) parse_one_page(html) if __name__ == '__main__': main()
爬虫,爬取猫眼电影
猜你喜欢
转载自blog.csdn.net/ganghaodream/article/details/88426163
今日推荐
周排行