任务四

用python爬取近30天百度指数
代码均转载,如下:
#1.登录
url = ‘http://index.baidu.com/
driver = webdriver.Chrome(executable_path=‘C:/Program Files
(x86)/Google/Chrome/Application/chromedriver.exe’)
driver.get(url)
cookieList = []
for cookie in cookieList:
driver.add_cookie(cookie)
driver.get(url)
time.sleep(3)
driver.refresh()
#2.输入关键词并最大化页面
WebDriverWait(driver, 10, 0.5).until(
EC.element_to_be_clickable((By.XPATH, “//input[@class=‘search-input’]”)))
driver.find_element_by_xpath("//input[@class=‘search-input’]").send_keys(keyword)
WebDriverWait(driver, 10, 0.5).until(
EC.element_to_be_clickable((By.XPATH, “//span[@class=‘search-input-cancle’]”)))
driver.find_element_by_xpath("//span[@class=‘search-input-cancle’]").click()
driver.maximize_window()
#3.鼠标移动到指数所在矩形框并进行移动使出现viewbox
time.sleep(2)
WebDriverWait(driver, 10, 0.5).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, ‘#trend > svg > rect’)))
element = driver.find_elements_by_css_selector(’#trend > svg > rect’)[1]
time.sleep(2)
ActionChains(driver).move_to_element_with_offset(element, x, y).perform()
time.sleep(3)
driver.get_screenshot_as_file(str(index)+’.png’)
WebDriverWait(driver, 10, 0.5).until(
EC.element_to_be_clickable((By.XPATH, “//div[@id=‘viewbox’]”)))
#4.获取viewbox位置截图并进行图像识别
element = driver.find_element_by_xpath("//div[@id=‘viewbox’]")
getElementImage(driver,element, str(index)+’.png’, ‘day’+str(index)+’.png’,keyword)
time.sleep(2)
number = Image.open(‘day’+str(index)+’.png’)
number = pytesseract.image_to_string(number,lang=‘fontyp’)
number = re.sub(r’,?.?\s?’, ‘’, number)
number=number.replace(‘z’,‘2’).replace(‘i’,‘7’).replace(‘e’,‘9’)
print(number)

def getElementImage(driver,element,fromPath,toPath,keyword):
“”"
该元素所对应的截图
:param element: 元素
:param fromPath: 图片源
:param toPath: 截图
“”"
# 找到图片坐标
locations = element.location
# 跨浏览器兼容
scroll = driver.execute_script(“return window.scrollY;”)
top = locations[‘y’] - scroll
# 找到图片大小
sizes = element.size
# 构造关键词长度
add_length = (len(keyword) - 2) * sizes[‘width’] / 15
# 构造指数的位置
rangle = (
int(locations[‘x’] + sizes[‘width’] / 4 + add_length)-2, int(top +
sizes[‘height’] / 2),
int(locations[‘x’] + sizes[‘width’] * 2 / 3)+2, int(top + sizes[‘height’]))
time.sleep(2)
image = Image.open(fromPath)
cropImg = image.crop(rangle)
cropImg.save(toPath)

本人能力有限,代码均转载
原文链接:https://blog.csdn.net/qq_37913997/article/details/81480143

发布了6 篇原创文章 · 获赞 7 · 访问量 302

猜你喜欢

转载自blog.csdn.net/weixin_44222568/article/details/87557320