php 爬虫爱奇艺 视频、内容

<?php
function getdata( $i, $url) {

   $data = array();
// 把整个文件读入到字符串中
   $str = file_get_contents( $url);
   $str = substr( $str, strpos( $str, 'album-head-info clearfix'));
// print_r($str);
   $str = substr( $str, 0, strpos( $str, 'class="album-auto"'));


   $preg= '/<a .* ?id="j-album-title".* ?>(.* ?)< \/ a>/is'
   preg_match_all( $preg, $str, $match);
   //echo $str;exit;
   $title = $match[ 1][ 0];
   $data[ 'title'] = $title;

   $preg= '/<span .* ?class="info-intro-title-s".* ?>(.* ?)< \/ span>/is'
   preg_match_all( $preg, $str, $match);
   $other_title = $match[ 1][ 0];
   $data[ 'other_title'] = $other_title;

   //地区
   $preg= '/<p .* ?class="episodeIntro-area".* ?>.* ?<em>(.* ?)< \/ em>.* ?<a.* ?>(.* ?)< \/ a>.* ?< \/ p>/is'
   preg_match_all( $preg, $str, $match);
   $data[ 'area'] = trim( $match[ 2][ 0]);


   //语言
   $preg= '/<p .* ?class="episodeIntro-lang".* ?>.* ?<em>(.* ?)< \/ em>.* ?<span.* ?>(.* ?)< \/ span>.* ?< \/ p>/is'
   preg_match_all( $preg, $str, $match);
   $data[ 'lang'] = trim( $match[ 2][ 0]);


   //类型-悬疑/历史/剧情
   $preg= '/<a .* ?qwys_leixing.* ?>(.* ?)< \/ a>/is'
   preg_match_all( $preg, $str, $match);

   $data[ 'type'] = implode( '/', $match[ 1]);
  
// 时间
   $preg= '/<p .* ?class="episodeIntro-time".* ?>.* ?<em>(.* ?)< \/ em>.* ?<span.* ?>(.* ?)< \/ span>.* ?< \/ p>/is'
   preg_match_all( $preg, $str, $match);
   $data[ 'time'] = $match[ 2][ 0];
// 导演
   $preg= '/<p .* ?class="episodeIntro-director".* ?>.* ?<em>(.* ?)< \/ em>.* ?<a.* ?>(.* ?)< \/ a>.* ?< \/ p>/is'
   preg_match_all( $preg, $str, $match);
   $data[ 'daoyan'] = $match[ 2][ 0];
// 简介
   $preg= '/<span .* ?class="briefIntroTxt".* ?>(.* ?)< \/ span>/is';
   preg_match_all( $preg, $str, $match);
   //$data['summary'] = $match[1][0];
  
   if(! empty( $match[ 1][ 1])) {
     $data[ 'summary_all'] = $match[ 1][ 1];
  } elseif (! empty( $match[ 1][ 0])) {
     $data[ 'summary_all'] = $match[ 1][ 0];
  }

   $preg= '/<img .* ?src="(.* ?)".* ?id="j-album-img".* ?>/is'
   preg_match_all( $preg, $str, $match);
   $img = $match[ 1][ 0];

   $file = '/data/' . $i. '.jpg';
   if(! file_exists( $file)) {
     $f = file_get_contents( $img);
     if( $f) {
       file_put_contents( $file, $f);
    }
  }
  
   return $data;
}
// explode 将字符串打散
$data = file( 'dianshiju02.txt');

$ret = array();

$i = 5000;
foreach( $data as $v) {

   $i++;
   // if(strpos($v, 'mp4') !== false) {
   //  continue;
   // }
  
   $tmp = explode( " \t ", $v);

   // print_r($tmp);
   // exit;
   $num = ( int) $tmp[ 0];

   $_names = explode( "/", $tmp[ 1]);
   $_names = explode( "-", $_names[ 0]);

   $mp4 = $i . "/01.mp4";

   $infos = array();
   if(! empty( $tmp[ 2])) {
     print_r( $i);
     print_r( $tmp[ 2]);
     // exit;
     $infos = getdata( $i, $tmp[ 2]);
  } else {
     continue;
  }

   $ret[ $i] = array(
     'title' => $infos[ 'title'],
     'num' => $num,
     'img' => '//static0.qianqian.com/movies/' . $i . '.jpg',
     'mp4' => 'http://qukufile2.qianqian.com/data2/film_tv/tv/' . $mp4,
     'id' => $i,
     'infos' => $infos
);
}
// echo count($ret);
echo var_export( $ret, true);

40 雪山飞狐-01.mpg http://www.iqiyi.com/lib/m_204754714.html?src=search 33 大捕房-01.mp4 http://www.iqiyi.com/lib/m_202787314.html?src=search 40 嫁入豪门=01.mp4 http://www.iqiyi.com/lib/m_200881014.html?src=search 32 劝和小姐-01.mp4 http://www.iqiyi.com/lib/m_200840914.html?src=search 30 血色恋情-01.mp4 http://www.iqiyi.com/lib/m_202498214.html?src=search 32 锁定美军特使-01.mp4 http://www.iqiyi.com/lib/m_218730014.html?src=search 32 红狐-01.mp4 http://www.iqiyi.com/lib/m_202587214.html?src=search 34 大浴堂-01.mp4 http://www.iqiyi.com/lib/m_200880514.html?src=search 30 女婿难当-01.mp4 http://www.iqiyi.com/lib/m_206378814.html?src=search 27 风云1911-01.mp4 http://www.iqiyi.com/lib/m_202904114.html?src=search 28 醉红尘-01.mp4 http://www.iqiyi.com/lib/m_202964014.html?src=search 23 栗裕大将-01.mp4 http://www.iqiyi.com/lib/m_215180614.html?src=search 24 将军日记-01.mp4 http://www.iqiyi.com/lib/m_202547514.html?src=search 

猜你喜欢

转载自www.cnblogs.com/yayaxuping/p/11200146.html