1. Put the page into the BeautifulSoup container
with open('D:/xxxxx/the_blah.html',
'r')as web_data:
soup = BeautifulSoup(web_data, 'lxml')
2. Get page elements
images = soup.select('body > div.main-content > ul > li > img')
titles = soup.select('body > div.main-content > ul > li > h3 > a')
info = soup.select('body > div.main-content > ul > li > p')
3. The specific information of the filter element
for image, title, info in zip(images, titles, infos):
data = {
'title': title.get_text(), #Get the value of the
label'image': image.get('src'), #Get the label Property
'info' in: info.get_text()
}