Python selenium PIL 全网页滚动截屏 && headless全网页截屏

  1. 思路

    ​ 先截取当前屏幕的图片,获取其高度作为base高度 h,再获取全网页body到尾部的高度 H ,循环截取图片,再通过PIL进行拼接。

  2. 代码

    # -*- coding:utf-8 -*-
    # author: [email protected]
    # software: PyCharm
    import os
    
    from PIL import Image
    from time import sleep
    
    
    class ScreenShot:
        __JS__ = {
          
          
            'scroll_to_bottom': "window.scroll({top:document.body.clientHeight,left:0,behavior:'auto'});",
            'scroll_to_y': "window.scroll({top:%d,left:0,behavior:'auto'});",
        }
        __base_end__ = 'tmp_end.png'
        __scroll_bottom__ = 'scroll_to_bottom'
        __scroll_y__ = 'scroll_to_y'
        __body__ = '//body'
        __height__ = 'height'
        __clear_shell__ = 'rm -rf *.png'
        __RGB__ = 'RGB'
    
        @classmethod
        def screen_shot(cls, driver, title, uploader_url='', delete=False):
            """
            全网页滚动截屏
            :param driver: webdriver 示例
            :param title: 标题(最终图片命名)
            :param uploader_url: 上传url
            :param delete: 是否清除所有图片
            :return:
            """
            base_image = '{}.png'.format(title)
            driver.save_screenshot(base_image)
            body_h = int(driver.find_element_by_xpath(cls.__body__).size.get(cls.__height__))
            current_h = Image.open(base_image).size[1] / 2
            for i in range(1, int(body_h / current_h)):
                driver.execute_script(cls.__JS__[cls.__scroll_y__] % (current_h * i))
                sleep(.5)
                driver.save_screenshot(f'tmp_{
            
            i}.png')
                cls.__join_images__(base_image, f'tmp_{
            
            i}.png', 0, base_image)
            driver.execute_script(cls.__JS__[cls.__scroll_bottom__])
            driver.save_screenshot(cls.__base_end__)
            cls.__join_images__(base_image, cls.__base_end__, int(current_h - int(body_h % current_h)), base_image)
            # TODO 上传图片
            url = ''
            # 移除图片
            if delete:
                os.system(cls.__clear_shell__)
            return url
    
        @classmethod
        def __join_images__(cls, png1, png2, size=0, output='result.png'):
            """
            图片拼接
            :param png1: 图片1
            :param png2: 图片2
            :param size: 两个图片重叠的距离
            :param output: 输出的图片文件
            :return:
            """
            size = size * 2
            img1, img2 = Image.open(png1), Image.open(png2)
            size1, size2 = img1.size, img2.size
            joint = Image.new(cls.__RGB__, (size1[0], size1[1] + size2[1] - size))
            loc1, loc2 = (0, 0), (0, size1[1] - size)
            joint.paste(img1, loc1)
            joint.paste(img2, loc2)
            joint.save(output)
    
    
    if __name__ == '__main__':
        from selenium import webdriver
        driver = webdriver.Chrome()
        driver.get("https://www.cnblogs.com/worldline/")
        ScreenShot.screen_shot(driver, 'worldline')
        driver.quit()
    
    
  3. 其他

    如果是在headless模式,可以使用

    
    def get_image(url, pic_name):
        """
        适用于无头全屏截图
        :param url: url访问路径
        :param pic_name: 图片名称
        :return:
        """
        chrome_options = Options()
        chrome_options.add_argument('headless')
        driver = webdriver.Chrome(options=chrome_options)
        driver.get(url)
        time.sleep(.5)
        width = driver.execute_script("return document.documentElement.scrollWidth")
        height = driver.execute_script("return document.documentElement.scrollHeight")
        print(width, height)
        driver.set_window_size(width, height)
        time.sleep(.5)
        driver.save_screenshot(pic_name)
        driver.close()
    

猜你喜欢

转载自blog.csdn.net/SpringBoots/article/details/120872393