Basics of bs4 data analysis



import requests
from bs4 import BeautifulSoup

if __name__ == "__main__":
# headers = {
# 'User-Agent': 'Mozilla / 5.0 (Windows NT 6.3; WOW64) AppleWebKit / 537.36 (KHTML, like Gecko) Chrome / 63.0.3239.132 Safari / 537.36 '
#} # UA disguise

fp = open (' ./ text.html ',' r ', encoding =' utf-8 ')
soup = BeautifulSoup (fp,' lxml ') #Place the local html document The data is loaded into the object
# print (soup)
# print (soup.input ['href']) # soup.tagName returns the tag corresponding to the first occurrence of the tagname in the html file
# print (soup.find (' div ')) # equivalent to soup.div
# print (soup.find (' div ', class_ =' hzbtabs')) # attribute positioning
# print (soup.find_all ("div"))
# print (soup.select ( ".hzbtabs"))
# print(soup.select('.hzbbannertxt >a')[0]['href'])

Guess you like

Origin www.cnblogs.com/huahuawang/p/12692333.html