PHP采用curl多线程抓取网页功能实现

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u014236259/article/details/77171233

CURL 支持多线程的抓取网页的功能,代码例子如下

class HttpMulti {

    //curl选项
    private static $options = array(
        CURLOPT_SSL_VERIFYPEER => 0,   //不开启HTTPS请求
        CURLOPT_RETURNTRANSFER => 1,  //请求信息以文件流方式返回
        CURLOPT_CONNECTTIMEOUT => 10,  //连接超时时间 默认为10s
        CURLOPT_TIMEOUT => 20,   //设置curl执行最大时间
        CURLOPT_ENCODING => "gzip", //HTTP请求头中"Accept-Encoding"的值,为空发送所有支持的编码类型
        CURLOPT_HEADER => 0, //设置为true,请求返回的文件流中就会包含response header
        CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
		CURLOPT_POST => FALSE,   //默认选择GET的方式发送
	);


	public static function multiRun($urlData=array()){
        if(empty($urlData)) return;
        $data = $curls = array();
        $mh = curl_multi_init();
        foreach($urlData as $k=>$val){
			$ch = curl_init($val);
            curl_setopt_array($ch, self::$options);
            curl_multi_add_handle($mh, $ch);
			$curls[$k] = $ch;
		}
		// 执行批处理句柄
        self::execMultiHandle($mh);
        if($curls){
            foreach($curls as $_k=>$v){
				//获得返回信息
                $data[$_k] = curl_multi_getcontent($v);
                curl_close($v);
                curl_multi_remove_handle($mh, $v);
                curl_multi_close($mh);
            }
        }

        return $data;

    }

	static private function execMultiHandle($mh){
        if(empty($mh)) return false;
        do{
            $mrc = curl_multi_exec($mh, $active);
        }while($mrc == CURLM_CALL_MULTI_PERFORM);
        while($active && $mrc == CURLM_OK){
            if(curl_multi_select($mh) != -1){
                do{
                    $mrc = curl_multi_exec($mh, $active);
                }while($mrc == CURLM_CALL_MULTI_PERFORM);
			}
        }
    }

}

//测试代码
$urlData = [
'https://www.baidu.com/',
'https://www.taobao.com/',
'http://weibo.com/',
'http://www.qq.com/'
];
$res = HttpMulti::multiRun($urlData);


猜你喜欢

转载自blog.csdn.net/u014236259/article/details/77171233