代码小测试

from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
#a=soup.text
a = soup.find_all(name='div',attrs={"class":"p"})[0].text
#a = soup.select('')

#print(a)#以上为内容爬取


#网页的url进行爬取
from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
#a = soup.find_all(name='li',attrs={"class":"last"})
#a = soup.a.attrs['href']
print(soup.select('a')[32]['href'])

#获取标题

from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
a = soup.find_all(name='div',attrs={"class":"chapter_update_time"})[0].text
print(a)

#全局变量的使用
def ja (a,b):
	global c
	c = a+b
	return c

def  main ():
	a=1
	b=2
	n=ja(a,b)
	
	print(c)

if __name__ == '__main__':
	main()'''

#bs4清洗数据的小练习
from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
a = soup.find_all(name='div',attrs={"class":"chapter_update_time"})[0].text
print(a)

from bs4 import BeautifulSoup
file = 'hm.html'
htmlfile = open(file,'r',encoding='utf-8').read()
soup = BeautifulSoup(htmlfile,'lxml')
a = soup.find_all(name='div',attrs={"class":"chapter_update_time"})[0].text
print(a)

发布了23 篇原创文章 · 获赞 0 · 访问量 517

猜你喜欢

转载自blog.csdn.net/weixin_46244909/article/details/104214512