Python_爬虫_request+urllib+xpath

典型:

from urllib import request
from lxml import etree
from bs4 import BeautifulSoup

url = "https://www.zhipin.com/job_detail/1418671405.html?ka=search_list_1"
req = request.urlopen(url).read().decode("utf8")

sel=etree.HTML(req) #实例化
job_desc = sel.xpath(r'//*[@id="main"]/div[3]/div/div[2]/div[3]/div[1]/div/text()')[0]
company_desc = sel.xpath(r'//*[@id="main"]/div[3]/div/div[2]/div[3]/div[2]/div/text()')[0]
print(job_desc)
print(company_desc)

# ---------------------------------------------------------------------

from urllib import request
from bs4 import BeautifulSoup

url = "https://www.zhipin.com/job_detail/1418671405.html?ka=search_list_1"
req = request.urlopen(url).read().decode("utf8")
soup = BeautifulSoup(req,'html.parser')
job_desc = soup.find_all() 
print(job_desc)

猜你喜欢

转载自www.cnblogs.com/hellangels333/p/8991293.html
今日推荐