Crawling watercress movie scores

Import Requests
 from BS4 Import BeautifulSoup
 Import Time
 Import Re
 Import json
 Import CSV 

urls = [] 
tc = [ ' name ' , ' score ' , ' director ' , ' actor ' , ' long ' ] 
with Open ( ' C: \\ the Users Desktop Lenovo \\ \\ go1.csv \\ ' , ' A + ' , NEWLINE = ' ' , encoding ='utf-8')as f:
    writers = csv.writer(f)
    writers.writerow(tc)
header={'Host':'movie.douban.com',
       'Referer':'https://movie.douban.com/explore',
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
for i in range(25):
     url='{} https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start= ' .format (i * 20) # Since the page URL unchanged from developer tools to find 
     the Response = requests.get (url, headers = header) 
     ds = json.loads (response.text) # the Response is the JSON format the function loads the dictionary into a string 
     for J in Range (20 is ): 
          D = DS [ ' Subjects ' ] [J] [ ' URL ' ] # JSON format, find the URL 
          urls.append (D) 
          listw = [] 
          Response= requests.get(d, headers=header)
          soup=BeautifulSoup(response.text,'html.parser')
          time.sleep(10)
          name=soup.find('span',{'property':'v:itemreviewed'})
    
          score=soup.find('strong',{'property':'v:average'})
    
          daoyan=soup.find('span',{'class':'attrs'})
  
          star=soup.find_all('span',{'class':'attrs'})
          for i in star:
            c=i.find_all('a',{'rel':'v:starring'})
            for i in c:
               listw.append(i.text)
          shijian=soup.find('span',{'property':'v:runtime'})
   
          with open('C:\\Users\\lenovo\\Desktop\\go1.csv', 'a+', newline='', encoding='utf-8')as f:
            writers=csv.writer(f)
            h=[name.text,score.text,daoyan.text,listw,shijian.text]
            writers.writerow(h)

 

Guess you like

Origin www.cnblogs.com/persistence-ok/p/10949339.html