python-- small reptiles Case

import re
from urllib.request import urlopen

def getPage(url):
    response = urlopen(url)
    return response.read().decode('utf-8')

def parsePage(s):
    ret = re.findall(
        '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>'
       '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',s,re.S)
    return ret

def main(num):
    URL= ' Https://movie.douban.com/top250?start=%s&filter= ' % NUM 
    response_html = the getPage (URL) 
    RET = parsePage (response_html)
     Print (RET) 

COUNT = 0
 for I in Range (10):    # 10 
    main (COUNT) 
    COUNT + = 25 # url from the web to get down the code 
# bytes decode -> UTF-8 is my web content to be matched string 
# RET = re.findall (regular, with matching character string) #ret is a list of all the content to match the composition of

 

Guess you like

Origin www.cnblogs.com/jsit-dj-it/p/11456005.html