python scraping webs - python取得NIPS oral paper列表

 1 from lxml import html
 2 import requests
 3 
 4 # using xpath
 5 
 6 # page = requests.get('http://econpy.pythonanywhere.com/ex/001.html')
 7 page = requests.get('https://nips.cc/Conferences/2019/Schedule')
 8 tree = html.fromstring(page.content)
 9 
10 #This will create a list of buyers:
11 # buyers = tree.xpath('//div[@title="buyer-name"]/text()')
12 # test = tree.xpath('//*[@id="maincard_15788"]/div[3]')
13 # print(test)
14 
15 
16 
17 doc = tree
18 # btags = doc.xpath("//*[@class[starts-with(., 'maincard narrower Oral') and string-length() > 3]]")
19 btags = doc.xpath("//*[@class[starts-with(., 'maincard narrower Spotlight') and string-length() > 3]]")
20 idx = 1
21 with open('nips_paperlist_spotlight.txt', 'w') as f:
22     for b in btags:
23         type = b.xpath("div[1]")[0].text
24         title = b.xpath("div[3]")[0].text
25         author = b.xpath("div[5]")[0].text
26         out_str = "%d, %s, %s, %s\n"%(idx, type,  title, author)
27         print(out_str)
28         f.writelines(out_str)
29         # print(idx)
30         # print(type)
31         # print(title)
32         # print(author)
33         idx += 1

使用XPath

lxml, requests

https://docs.python-guide.org/scenarios/scrape/

https://stackoverflow.com/questions/12393858/xpath-using-contains-with-a-wildcard

猜你喜欢

转载自www.cnblogs.com/imoon22/p/12034855.html
今日推荐