BeautifulSoup 的应用

# coding=utf-8
import re
import requests
import bs4

# 获取源代码
content = requests.get("http://www.qq.com").text
#print(content)
# 得到标准的xml的HML格式代码
soup = bs4.BeautifulSoup(content,'lxml')
# print(soup.prettify())
# 获取title,link,p等的名字、内容
print(soup.title)
print(soup.title.name)
print(soup.title.string)
print(soup.head.title.string)
# print(soup.head)
print(soup.link)
print(soup.link['href'])
print(soup.link['rel'])
print(soup.p)
print(soup.p.string)
# 获取子节点
print(soup.p.contents)
# 获取父节点
print(soup.p.parent)

# 根据标签名、属性、内容查找文档
print(soup.find_all("ul"))

猜你喜欢

转载自www.cnblogs.com/samtang/p/12639523.html