PHP Curl High Performance Gathering

<?php

/*
 * Curl高效率采集类
 * Author: 浩瀚星空
 * Mail: [email protected]
 * Date: 2018-1-7
 */

class Curl
{
    //$curl = new Curl();

    //$curl->setopt([CURLOPT_USERAGENT=>'Curl']);

    //echo $curl->get('http://3ghh.cn',[CURLOPT_CONNECTTIMEOUT => 1]);

    /**
     * 资源句柄
     * @var resource
     */
    public $ch;

    /**
     * 全局选项
     * @var array
     */
    public $opt = array(
        CURLOPT_RETURNTRANSFER => true,//不直接打印结果
        CURLINFO_HEADER_OUT => true,//INFO中包含请求头
        CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,//指定协议
        CURLOPT_ACCEPT_ENCODING => '',//接受所有编码格式
        CURLOPT_FOLLOWLOCATION => true,//跟随重定向
        CURLOPT_CONNECTTIMEOUT => 3,//请求超时秒数
        CURLOPT_TIMEOUT => 30,//Curl执行超时秒数
        CURLOPT_SSL_VERIFYPEER => false,//验证SSL证书
        CURLOPT_AUTOREFERER => true//自动设置来源地址
    );

    /**
     * 响应结果
     * @var
     */
    public $response;

    /**
     * 连接信息
     * @var
     */
    public $info;

    /**
     * 初始化资源句柄
     * Curl constructor.
     * @param array|null $opt 设置全局选项
     */
    public function __construct(array $opt = null)
    {
        $this->ch = curl_init();
        !$opt ?: $this->setopt($opt);
    }

    /**
     * 关闭资源句柄
     */
    public function __destruct()
    {
        curl_close($this->ch);
    }

    /**
     * 设置全局选项(可覆盖)
     * @param array $opt
     */
    public function setopt(array $opt)
    {
        $this->opt = $opt + $this->opt;
    }

    /**
     * 设置COOKIE数据
     * @param $cookie
     */
    public function setCookie($cookie)
    {
        $this->opt[CURLOPT_COOKIE] = $cookie;
    }

    /**
     * 设置浏览器标识串
     * @param $userAgent
     */
    public function setUserAgent($userAgent)
    {
        $this->opt[CURLOPT_USERAGENT] = $userAgent;
    }

    /**
     * 设置来源地址
     * @param $referer
     */
    public function setReferer($referer)
    {
        $this->opt[CURLOPT_REFERER] = $referer;
    }

    /**
     * 设置请求头部
     * @param array $header 索引数组
     */
    public function setHeader(array $header)
    {
        $this->opt[CURLOPT_HEADER] = $header;
    }

    /**
     * 执行
     * @param $url string URL地址
     * @return mixed
     */
    public function exec($url)
    {
        //使用全局选项
        curl_setopt_array($this->ch, $this->opt);

        //设置URL
        curl_setopt($this->ch, CURLOPT_URL, $url);

        //执行并保存响应结果
        $this->response = curl_exec($this->ch);

        //重置资源句柄所有选项
        curl_reset($this->ch);

        return $this->response;
    }

    /**
     * GET方式执行
     * @param $url string URL地址
     * @param $opt array 局部选项
     * @return mixed
     */
    public function get($url, $opt = null)
    {
        //检查并设置局部选项
        !$opt ?: curl_setopt_array($this->ch, $opt);

        return $this->exec($url);
    }

    /**
     * POST方式执行
     * @param $url string URL地址
     * @param $data string|array POST数据
     * @param $opt array 局部选项
     * @return mixed
     */
    public function post($url, $data, $opt = null)
    {
        //设置POST数据
        curl_setopt_array($this->ch, array(CURLOPT_POST => true, CURLOPT_POSTFIELDS => $data));

        //检查并设置局部选项
        !$opt ?: curl_setopt_array($this->ch, $opt);

        return $this->exec($url);
    }

    /**
     * FTP上传
     * @param $user string 用户名
     * @param $pass string 密码
     * @param $host string 主机
     * @param $fileName string 远程保存路径
     * @param $file resource 要上传的文件资源
     * @return mixed
     */
    public function ftpUpload($user, $pass, $host, $path, $file)
    {
        curl_setopt_array($this->ch, array(CURLOPT_UPLOAD => true, CURLOPT_INFILE => $file));
        return $this->exec('ftp://' . $user . ':' . $pass . '@' . $host . '/' . $path);
    }

    /**
     * 获得最近一次连接信息
     * @return array
     */
    public function getinfo()
    {
        return $this->info = curl_getinfo($this->ch);
    }

    /**
     * 错误代码
     * @return int 无错为0
     */
    public function errno()
    {
        return curl_errno($this->ch);
    }

    /**
     * 错误提示
     * @return string
     */
    public function error()
    {
        return curl_error($this->ch);
    }

    /**
     * 链接转码 转换中文部分
     * @param $url
     * @return string
     */
    public function linkEncode($url)
    {
        $uri = '';
        $cs = unpack('C*', $url);
        $len = count($cs);
        for ($i = 1; $i <= $len; $i++) {
            $uri .= $cs[$i] > 127 ? '%' . strtoupper(dechex($cs[$i])) : $url{$i - 1};
        }
        return $uri;
    }

    /**
     * 分割响应头和响应正文(前提开启了HEADER)
     * @return array
     */
    public function segmentResponse()
    {
        $this->info = $this->getinfo();
        $response_header = substr($this->response, 0, $this->info['header_size']);
        $response_body = substr($this->response, $this->info['header_size']);
        return array('response_header' => $response_header, 'response_body' => $response_body);
    }

    /**
     * 转换响应头部为关联数组
     * @param $response_header string 响应头部字符串
     * @return array 响应头部关联数组
     */
    public function transformHeader($response_header)
    {
        $response_headers_index = explode("\r\n\r\n", $response_header);
        $response_header_arr = array();
        for ($i = 0; $i <= $this->info['redirect_count']; $i++) {
            $response_header_index = explode("\r\n", $response_headers_index[$i]);
            $response_header_arr[$i] = array();
            foreach ($response_header_index as $value) {
                $kv = explode(': ', $value);
                if ($kv[0] == 'Set-Cookie') $response_header_arr[$i]['Set-Cookie'][] = $kv[1]; else $response_header_arr[$i][$kv[0]] = @$kv[1];
            }
        }
        return $response_header_arr;
    }

    /**
     * 提取html标签或标签的属性
     * @param $label
     * @param null $attr
     * @return mixed
     */
    public function extract($label, $attr = null)
    {
        switch ($label) {
            case 'a':
                $regexp = empty($attr) ? '/<a .*?href=[\'"](.*?)[\'"].*?>([\s\S]*?)<\/a>/i' : '/<a .*?' . $attr . '=[\'"](.*?)[\'"].*?>(.*?)<\/a>/i';
                preg_match_all($regexp, $this->response, $content);
                break;
            case 'text':
                $content = preg_replace('/<script[\s\S]*?<\/script>/i', '', $this->response);
                $content = preg_replace('/<.*?>/', '', $content);
                break;
            case'form':
                $regexp = '/<form[\s\S]*?<\/form>/i';
                preg_match_all($regexp, $this->response, $content);
                empty($attr) ?: preg_match_all('/' . $attr . '=[\'"](.*?)[\'"]/i', $content[0][0], $content);
                break;
            case'img':
                $regexp = empty($attr) ? '/<img .*?src=[\'"](.*?)[\'"].*?\/*>/i' : '/<img .*?' . $attr . '=[\'"](.*?)[\'"].*?\/*>/i';
                preg_match_all($regexp, $this->response, $content);
                break;
            default:
                $regexp = empty($attr) ? '/<' . $label . '.*?>([\s\S]*?)<*\/*[(' . $label . ')]*>/i' : '/<' . $label . '.*?' . $attr . '=[\'"](.*?)[\'"].*?>([\s\S]*?)<\/*[(' . $label . ')]*>/i';
                preg_match_all($regexp, $this->response, $content);
                break;
        }
        return $content;
    }
}

Guess you like

Origin www.cnblogs.com/liujiuzhou/p/11547118.html