QueryList异步抓取网页数据

环境要求:

php7及以上

phantomjs引擎 下载地址

QueryList4.0

核心思想:

根据抓取对象网页的布局,编写相应的规则,规则语法参考jquery即可。

抓取示例:

京东商城产品列表


抓取规则:

//京东商城采集规则配置
$rules = array(
    'product_list' => array(
        'url' => 'https://list.jd.com/list.html?cat=670,671,672',
        'range' => '.gl-item .j-sku-item',
        'rules' => array(
            'link' => array('.p-img a','href'),
            'image' => array('.p-img a img','src'),
            'lazyImage' => array('.p-img a img','data-lazy-img'),
            'name' => array('.p-name a em','text'),
            'price' => array('.p-price .J_price:eq(0) i','text')
        ),
        'desc' => '产品列表'
    )
);
return $rules;

抓取结果:

一点资讯


抓取规则:

//一点资讯网站采集规则配置
$rules = array(
    'video' => array(
        'url' => 'http://www.yidianzixun.com/channel/u13746',
        'range' => '.style-content-middle',
        'rules' => array(
            'link' => array('','href'),
            'image' => array('.doc-image-small-wrapper .doc-image-box img','src'),
            'duration' => array('.doc-image-small-wrapper .doc-image-box .video-time','text'),
            'title' => array('.doc-content .doc-content-inline .doc-title','text')
        ),
        'desc' => '视频列表'
    ),
    'amuse' => array(
        'url' => 'http://www.yidianzixun.com/channel/s10671',
        'range' => '.style-content-middle',
        'rules' => array(
            'link' => array('','href'),
            'image' => array('.doc-image-small-wrapper .doc-image-box img','src'),
            'duration' => array('.doc-image-small-wrapper .doc-image-box .video-time','text'),
            'title' => array('.doc-content .doc-content-inline .doc-title','text')
        ),
        'desc' => '搞笑列表'
    ),
);
return $rules;

抓取结果:

图片抓取


抓取规则:

//图片网站采集规则配置
$rules = array(
    //昵图网
    'www.nipic.com' => array(
        'index' => array(
            array(
                'url' => 'http://www.nipic.com/',
                'range' => '',
                'rules' => array(
                    'link' => array('.newIndex-hotpic','href'),
                    'image' => array('.newIndex-hotpic img','src'),
                    'title' => array('.newIndex-hotpic .newIndex-textItem','text'),
                ),
                'desc' => '首页热门专题'
            ),
            array(
                'url' => 'http://www.nipic.com/',
                'range' => '',
                'rules' => array(
                    'link' => array('.right-choicePic','href'),
                    'image' => array('.right-choicePic img','src'),
                    'title' => array('.right-choicePic .newIndex-textItem','text'),
                ),
                'desc' => '首页精选推荐'
            )
        )
    )
);
return $rules;

抓取结果:

猜你喜欢

转载自blog.csdn.net/tdcqfyl/article/details/81454786