<?php
/*
* Curl高效率采集类
* Author: 浩瀚星空
* Mail: [email protected]
* Date: 2018-1-7
*/
class Curl
{
//$curl = new Curl();
//$curl->setopt([CURLOPT_USERAGENT=>'Curl']);
//echo $curl->get('http://3ghh.cn',[CURLOPT_CONNECTTIMEOUT => 1]);
/**
* 资源句柄
* @var resource
*/
public $ch;
/**
* 全局选项
* @var array
*/
public $opt = array(
CURLOPT_RETURNTRANSFER => true,//不直接打印结果
CURLINFO_HEADER_OUT => true,//INFO中包含请求头
CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,//指定协议
CURLOPT_ACCEPT_ENCODING => '',//接受所有编码格式
CURLOPT_FOLLOWLOCATION => true,//跟随重定向
CURLOPT_CONNECTTIMEOUT => 3,//请求超时秒数
CURLOPT_TIMEOUT => 30,//Curl执行超时秒数
CURLOPT_SSL_VERIFYPEER => false,//验证SSL证书
CURLOPT_AUTOREFERER => true//自动设置来源地址
);
/**
* 响应结果
* @var
*/
public $response;
/**
* 连接信息
* @var
*/
public $info;
/**
* 初始化资源句柄
* Curl constructor.
* @param array|null $opt 设置全局选项
*/
public function __construct(array $opt = null)
{
$this->ch = curl_init();
!$opt ?: $this->setopt($opt);
}
/**
* 关闭资源句柄
*/
public function __destruct()
{
curl_close($this->ch);
}
/**
* 设置全局选项(可覆盖)
* @param array $opt
*/
public function setopt(array $opt)
{
$this->opt = $opt + $this->opt;
}
/**
* 设置COOKIE数据
* @param $cookie
*/
public function setCookie($cookie)
{
$this->opt[CURLOPT_COOKIE] = $cookie;
}
/**
* 设置浏览器标识串
* @param $userAgent
*/
public function setUserAgent($userAgent)
{
$this->opt[CURLOPT_USERAGENT] = $userAgent;
}
/**
* 设置来源地址
* @param $referer
*/
public function setReferer($referer)
{
$this->opt[CURLOPT_REFERER] = $referer;
}
/**
* 设置请求头部
* @param array $header 索引数组
*/
public function setHeader(array $header)
{
$this->opt[CURLOPT_HEADER] = $header;
}
/**
* 执行
* @param $url string URL地址
* @return mixed
*/
public function exec($url)
{
//使用全局选项
curl_setopt_array($this->ch, $this->opt);
//设置URL
curl_setopt($this->ch, CURLOPT_URL, $url);
//执行并保存响应结果
$this->response = curl_exec($this->ch);
//重置资源句柄所有选项
curl_reset($this->ch);
return $this->response;
}
/**
* GET方式执行
* @param $url string URL地址
* @param $opt array 局部选项
* @return mixed
*/
public function get($url, $opt = null)
{
//检查并设置局部选项
!$opt ?: curl_setopt_array($this->ch, $opt);
return $this->exec($url);
}
/**
* POST方式执行
* @param $url string URL地址
* @param $data string|array POST数据
* @param $opt array 局部选项
* @return mixed
*/
public function post($url, $data, $opt = null)
{
//设置POST数据
curl_setopt_array($this->ch, array(CURLOPT_POST => true, CURLOPT_POSTFIELDS => $data));
//检查并设置局部选项
!$opt ?: curl_setopt_array($this->ch, $opt);
return $this->exec($url);
}
/**
* FTP上传
* @param $user string 用户名
* @param $pass string 密码
* @param $host string 主机
* @param $fileName string 远程保存路径
* @param $file resource 要上传的文件资源
* @return mixed
*/
public function ftpUpload($user, $pass, $host, $path, $file)
{
curl_setopt_array($this->ch, array(CURLOPT_UPLOAD => true, CURLOPT_INFILE => $file));
return $this->exec('ftp://' . $user . ':' . $pass . '@' . $host . '/' . $path);
}
/**
* 获得最近一次连接信息
* @return array
*/
public function getinfo()
{
return $this->info = curl_getinfo($this->ch);
}
/**
* 错误代码
* @return int 无错为0
*/
public function errno()
{
return curl_errno($this->ch);
}
/**
* 错误提示
* @return string
*/
public function error()
{
return curl_error($this->ch);
}
/**
* 链接转码 转换中文部分
* @param $url
* @return string
*/
public function linkEncode($url)
{
$uri = '';
$cs = unpack('C*', $url);
$len = count($cs);
for ($i = 1; $i <= $len; $i++) {
$uri .= $cs[$i] > 127 ? '%' . strtoupper(dechex($cs[$i])) : $url{$i - 1};
}
return $uri;
}
/**
* 分割响应头和响应正文(前提开启了HEADER)
* @return array
*/
public function segmentResponse()
{
$this->info = $this->getinfo();
$response_header = substr($this->response, 0, $this->info['header_size']);
$response_body = substr($this->response, $this->info['header_size']);
return array('response_header' => $response_header, 'response_body' => $response_body);
}
/**
* 转换响应头部为关联数组
* @param $response_header string 响应头部字符串
* @return array 响应头部关联数组
*/
public function transformHeader($response_header)
{
$response_headers_index = explode("\r\n\r\n", $response_header);
$response_header_arr = array();
for ($i = 0; $i <= $this->info['redirect_count']; $i++) {
$response_header_index = explode("\r\n", $response_headers_index[$i]);
$response_header_arr[$i] = array();
foreach ($response_header_index as $value) {
$kv = explode(': ', $value);
if ($kv[0] == 'Set-Cookie') $response_header_arr[$i]['Set-Cookie'][] = $kv[1]; else $response_header_arr[$i][$kv[0]] = @$kv[1];
}
}
return $response_header_arr;
}
/**
* 提取html标签或标签的属性
* @param $label
* @param null $attr
* @return mixed
*/
public function extract($label, $attr = null)
{
switch ($label) {
case 'a':
$regexp = empty($attr) ? '/<a .*?href=[\'"](.*?)[\'"].*?>([\s\S]*?)<\/a>/i' : '/<a .*?' . $attr . '=[\'"](.*?)[\'"].*?>(.*?)<\/a>/i';
preg_match_all($regexp, $this->response, $content);
break;
case 'text':
$content = preg_replace('/<script[\s\S]*?<\/script>/i', '', $this->response);
$content = preg_replace('/<.*?>/', '', $content);
break;
case'form':
$regexp = '/<form[\s\S]*?<\/form>/i';
preg_match_all($regexp, $this->response, $content);
empty($attr) ?: preg_match_all('/' . $attr . '=[\'"](.*?)[\'"]/i', $content[0][0], $content);
break;
case'img':
$regexp = empty($attr) ? '/<img .*?src=[\'"](.*?)[\'"].*?\/*>/i' : '/<img .*?' . $attr . '=[\'"](.*?)[\'"].*?\/*>/i';
preg_match_all($regexp, $this->response, $content);
break;
default:
$regexp = empty($attr) ? '/<' . $label . '.*?>([\s\S]*?)<*\/*[(' . $label . ')]*>/i' : '/<' . $label . '.*?' . $attr . '=[\'"](.*?)[\'"].*?>([\s\S]*?)<\/*[(' . $label . ')]*>/i';
preg_match_all($regexp, $this->response, $content);
break;
}
return $content;
}
}
PHP Curl High Performance Gathering
Guess you like
Origin www.cnblogs.com/liujiuzhou/p/11547118.html
Recommended
Ranking