python 抓取猫眼电影评分

目标网页
在这里插入图片描述
完整代码:

from selenium import webdriver
from PIL import Image
from io import BytesIO
import time
import pytesseract

# 定义缩放比例
scale = 1.0


driver = webdriver.Chrome("./chromedriver")
driver.implicitly_wait(10)
driver.get("https://maoyan.com/films/410629")
time.sleep(2)

# 让背景变白色和文字黑色,使它更容易进行识别
driver.execute_script('document.querySelector(".banner").style.background = "white"')
driver.execute_script('document.querySelector(".stonefont").style.color = "black"')

# 截屏整体
png_img = driver.get_screenshot_as_png()
img = Image.open(BytesIO(png_img))

# 寻找元素
span_el = driver.find_element_by_xpath('//span[@class="index-left info-num "]/span[@class="stonefont"]')#(".index-left.info-num")
print(span_el)

# 获取元素位置
location = span_el.location
left = int(location["x"]) * scale
top = int(location["y"]) * scale
print(location)
size = span_el.size
# size = {
#     "width":400,
#     "height":400
# }
right = left + int(size["width"]) * scale
bottom = top + int(size["height"]) * scale

# 构建截图
cut_info = (left,top,right,bottom)
print(cut_info)
img.save("test1.png")

# 截取识别图片
img = img.crop(cut_info)

# 获取最终结果
print("结果:",pytesseract.image_to_string(img,config="-psm 7"))
img.save('test2.png')

driver.quit()

猜你喜欢

转载自blog.csdn.net/weixin_43751840/article/details/88430378