Python 爬虫网页

import requests
import re
from lxml import html
from bs4 import BeautifulSoup

payload = {
    "os_username": "xxxxx",
    "os_password": "xxxxxxx",
    "login": "Log in",
    "os_destination": "",
}

# 登陆session
session_requests = requests.session()

login_url = "https://wiki.*********"
result = session_requests.post(
    login_url,
    data=payload,
    headers=dict(referer=login_url)
)

# 爬取页面
url = 'https://wiki*********/pages/viewinfo.action?pageId=35343810'
result = session_requests.get(url)

soup = BeautifulSoup(result.text, 'lxml')
for title in soup.find_all("a", text=re.compile("-")):
    print(title)

猜你喜欢

转载自blog.csdn.net/jonwu0102/article/details/81239415