<?php namespace Home\Controller; use Think\Controller; class CollectionController extends Controller { public function Collection(){ header("Content-Type: text/html;charset=utf-8"); // 初始化一个 cURL 对象 $curl = curl_init(); // 设置你需要抓取的URL curl_setopt($curl, CURLOPT_URL, 'http://fenxiang.banguanshui.com/'); // 设置header // curl_setopt($curl, CURLOPT_HEADER, 1); // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 运行cURL,请求网页 $data = curl_exec($curl); // 关闭URL请求 curl_close($curl); //$data是curl_exec返回的的值,即采集的目标内容 preg_match_all('/<span class="username" (.*)>(.*)<\/span>/isU',$data, $asd, PREG_SET_ORDER); preg_match_all('/<h3><a href="(.*)" (.*)>(.*)<\/a>/',$data, $out, PREG_SET_ORDER); $row =array(); $i = 0; foreach($out as $key => $value){ //此处$value是数组,同时记录找到带匹配字符的整句和单独匹配的字符 $row[1] = 'http://fenxiang.banguanshui.com/'.$value[1]; $row[2] = $value[3]; // foreach($asd as $key => $value){ // $row[3] = $value[2]; // } $row[3] = $asd[$i][2]; $i++; // dump($row);exit; $collection = M('collection'); $bata['url'] = $row[1]; $seke = $bata['url']; // $sdsa = $bata['title']; // print_r($seke);exit; // $blog1 = $collection->where(array('url' => $seke))->find(); $blog = $collection->where(array('url' =>$seke))->find(); // echo strlen($seke);exit; // print_r($blog1);exit; if($bata['url'] != $blog['url']){ $bata['title'] = $row[2]; $bata['author'] = $row[3]; $cent = $collection->add($bata); echo '添加成功'; dump($cent); } else{ echo '添加失败'; } } } }
php采集网站
猜你喜欢
转载自blog.csdn.net/hyy1206317124/article/details/80216451
今日推荐
周排行