Use of pandas in read_html get the page table

# Use of pandas in read_html get the page table 
Import pandas AS pd
Import Requests
# crawling URLs
headers = {
"the User-Agent": "Mozilla / 5.0 (Windows NT 10.0; WOW64) AppleWebKit / 537.36 (KHTML, like Gecko) Chrome / Safari 67.0.3396.99 / 537.36 ",
'Connection':" use Close ",
}
url = 'HTTP:? = 001 & Industry //guj.315i.com/assess/detial classId = 001002'
Page = requests.get (url = url, = headers headers) .text
# table to find the desired crawling [0] represents a table taking the first
TB = pd.read_html (Page) [0]
# saved as a csv file, or the like txt
tb.to_csv ( 'aaa.txt ', MODE =' W ', encoding =' UTF-. 8 ', header = 0, index = 0)

with Open (' aaa.txt ',' R & lt ', encoding =' UTF-. 8 ') AS F:
lis_new = f. readlines () # will return a list of
print (lis_new)

Guess you like

Origin www.cnblogs.com/xdlzs/p/11237441.html