Python_crawler_request+urllib+xpath

typical:

from urllib import request
from lxml import etree
from bs4 import BeautifulSoup

url = "https://www.zhipin.com/job_detail/1418671405.html?ka=search_list_1"
req = request.urlopen(url).read().decode("utf8")

sel =etree.HTML(req) #Instantiate job_desc 
= sel.xpath(r ' //*[@id="main"]/div[3]/div/div[2]/div[3]/div[ 1]/div/text() ' )[0]
company_desc = sel.xpath(r'//*[@id="main"]/div[3]/div/div[2]/div[3]/div[2]/div/text()')[0]
print(job_desc)
print(company_desc)

# ---------------------------------------------------------------------

from urllib import request
from bs4 import BeautifulSoup

url = "https://www.zhipin.com/job_detail/1418671405.html?ka=search_list_1"
req = request.urlopen(url).read().decode("utf8")
soup = BeautifulSoup(req,'html.parser')
job_desc = soup.find_all() 
print(job_desc)

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325272578&siteId=291194637