使用Phantomjs进行截图

        做了第一个自己的微服务,其中参考了很多人的文章,于是也想记录下来。一是方便自己查看,二也是为了帮助有需要的人。这里使用了phantomjs,这是一个无头浏览器,可以用它进行网址截图。


这是一个截图的样例,大家可以先去官网下载一个phantomjs.exe的,因为我是windows系统。我用的是springboot框架,很简单的配置。

直接附上调用的主程序:

public class Screenshot {
	//phantomjs程序位置
	//private static final String cmdPath = System.getProperty("user.dir") + "\\target\\phantomjs.exe";   //windows
	private static final String cmdPath =	System.getProperty("user.dir") + "/phantomjs";    //linux
	
	// imgpath   /img/2222.png
	public static void screenshot(String url, String imgpath, String jsPath) {
		 screenshot(url, imgpath,jsPath, "");
	}

	public static void screenshot(String url, String imgpath, String jsPath,String title) {
		BufferedReader reader = null;
		InputStream inputStream = null;
		try {
			System.out.println("start......"+url);
			String[] cmd = new String[] { cmdPath, jsPath, url, getPicSavePath(imgpath), title };
			for (String string : cmd) {
				System.out.println(string);
			}
			Process process = Runtime.getRuntime().exec(cmd);
			inputStream = process.getInputStream();
			reader = new BufferedReader(new InputStreamReader(inputStream));
			StringBuffer sbf = new StringBuffer();
			String tmp = " ";
			while ((tmp = reader.readLine()) != null) {
				sbf.append(tmp);
			}
			System.out.println("图片存放" + ScreenConstant.picBasePath + "目录");
		} catch (IOException e) {
			e.printStackTrace();
		}
		//return true;
	}

	private static String getPicSavePath(String imgpath) {
		return ScreenConstant.picBasePath + imgpath;
	}

}

主要是使用phantomjs,然后调用写好的js,附上一个baidu的js

"use strict";
var page = require('webpage').create(),
    system = require('system'),
    address, output, title,size, pageWidth, pageHeight;

page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36';
page.settings.resourceTimeout = 15000; // 15 seconds
if (system.args.length < 3 || system.args.length > 5) {
    console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
    console.log('  paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
    console.log('  image (png/jpg output) examples: "1920px" entire page, window width 1920px');
    console.log('                                   "800px*600px" window, clipped to 800x600');
    phantom.exit(1);
} else {
    //String[] cmd = new String[] { cmdPath, jsPath, url, getPicSavePath(imgpath), title };
    //这是传入的参数,第一位是phantomjs的路径,二是js的路径
    address = system.args[1];     //传入的url,需要截图的url
    output = system.args[2];    //图片保存的路径
    title = system.args[3];    //这里是我的一个小功能,传入的title,如果和网页上的title一样,就可以画个红框
    address = encodeURI(address);     //遇到过传入url和url不匹配的情况,就要先编码一下
    page.viewportSize = { width: 1920, height: 1080 };   //设置大小,一般网页都可以用
    if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
        size = system.args[3].split('*');
        page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: '0px' }
                                           : { format: system.args[3], orientation: 'portrait', margin: '1cm' };
    } else if (system.args.length > 3 && system.args[3].substr(-2) === "px") {
        size = system.args[3].split('*');
        if (size.length === 2) {
            var pageWidth = parseInt(size[0], 10),
                pageHeight = parseInt(size[1], 10);
            page.viewportSize = { width: pageWidth, height: pageHeight };
            page.clipRect = { top: 0, left: 0, width: pageWidth, height: pageHeight };
        } else {
            console.log("size:", system.args[3]);
            var pageWidth = parseInt(system.args[3], 10),
                pageHeight = parseInt(pageWidth * 3/4, 10); // it's as good an
															// assumption as any
            console.log ("pageHeight:",pageHeight);
            page.viewportSize = { width: pageWidth, height: pageHeight };
        }
    }
    if (system.args.length > 4) {
        page.zoomFactor = system.args[4];
    }
    
    page.open(address, function (status) {
        if (status !== 'success') {
            console.log('Unable to load the address!');
            phantom.exit(1);
        } else {
            //这个函数特殊,因为无法调试,具体的可以在百度,这就是传入的title,对比若一样,则会在截图上画红框
        	 page.evaluate(function(tt){  
					document.body.bgColor = 'white';   //添加背景色为白色,不然会有很多透明的图片
        	      	window.scrollTo(0,10000);// 滚动到底部
        	      	
    	        	var ele = document.getElementById('content_left');
    	            var list = ele.getElementsByClassName('result c-container');
    	            for(var y=0,j = list.length;y < j;y++){
    	            	var ee = list[y];
    	            	var h3 = ee.getElementsByTagName('h3');
    	            	var a = h3[0].getElementsByTagName('a');
    	            	var tit = a[0].innerText;       	       
    	            	if(tit == tt){          	        
    	            		ee.style.border = "2px solid red";       	       
    	            	} 
    	            }      	       
        	    },title);          //添加title

            //截图的路径
            window.setTimeout(function () {
                page.render(output);
                phantom.exit();
            }, 1000);
        }
    });
}
这是主要的js文件,之后调用传入即可,非常的简单。
@Service
public class BaiduService {
	@Async
	public void screenshot(String url,String imgpath,String title) {
		url = url.replaceAll("%20", " ").replaceAll("%22", "\"");
		String jsPath =   System.getProperty("user.dir") + "/js/baidu.js";   //linux		
		//String jsPath = System.getProperty("user.dir") + "\\target\\js\\baidu.js"; //windows
		Screenshot.screenshot(url, imgpath, jsPath, title);
	}		
}

因为调用phantomjs很耗费内存,我使用redis缓存需要截图的url,然后定时从redis取出url,进行截图

package com.zy.screenshot.service;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.ListOperations;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.data.redis.core.SetOperations;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;

import com.alibaba.fastjson.JSONObject;
import com.zy.screenshot.model.Screen;

@Component
public class ControlTimeService {
	@Autowired
	RedisTemplate redisTemplate;
	
	@Autowired
	private BaiduService baiduService;
	
	@Autowired
	private SoService soService;
	
	@Autowired
	private SogouService sogouService;
	
	protected static Logger logger = LoggerFactory.getLogger(ControlTimeService.class);
	
	@Async("phantomjsAsync")
	public void getDateFromRedis() {
		logger.info("............................................................................................");
		logger.info(".......................................【开始截图】............................................");
		ListOperations<String, Object> list = redisTemplate.opsForList();
		SetOperations<String, Object> set = redisTemplate.opsForSet();
		for (int i = 0; i < 100; i++) {
			String json = (String) list.leftPop("phantomjsList");
			if (json == null || "".equals(json)) {
				continue;
			}		
			if(set.isMember("phantomjsSetHistroy", json)) {
				continue;
			}else {
				set.add("phantomjsSetHistroy", json);
				screen(json);
			}		
		}
		logger.info("............................................................................................");
		logger.info(".......................................【完成截图】............................................");
	}
	
	private void screen(String json) {
		Screen screen = JSONObject.parseObject(json, Screen.class);
		if (screen.getUrl() == null || "".equals(screen.getUrl()) || screen.getImgPath() == null
				|| "".equals(screen.getImgPath())) {
			return;
		}
		String url = screen.getUrl();
		String imgPath = screen.getImgPath();
		String title = "";
		if (screen.getTitle() != null && !"".equals(screen.getTitle())) {
			title = screen.getTitle();
		}		
		if (url.contains("baidu.com"))
			baiduService.screenshot(url, imgPath, title);
		else if (url.contains("so.com"))
			soService.screenshot(url, imgPath, title);
		else if (url.contains("sogou.com"))
			sogouService.screenshot(url, imgPath, title);
		else
			baiduService.screenshot(url, imgPath, title);
	}
}

因为我用的是虚拟机,配置不是很高,差不多1分钟100条保证内存不会爆掉。

谢谢!有疑问可以加微信:18932009560


猜你喜欢

转载自blog.csdn.net/weixin_40397083/article/details/80496220