40 雪山飞狐-01.mpg http://www.iqiyi.com/lib/m_204754714.html?src=search 33 大捕房-01.mp4 http://www.iqiyi.com/lib/m_202787314.html?src=search 40 嫁入豪门=01.mp4 http://www.iqiyi.com/lib/m_200881014.html?src=search 32 劝和小姐-01.mp4 http://www.iqiyi.com/lib/m_200840914.html?src=search 30 血色恋情-01.mp4 http://www.iqiyi.com/lib/m_202498214.html?src=search 32 锁定美军特使-01.mp4 http://www.iqiyi.com/lib/m_218730014.html?src=search 32 红狐-01.mp4 http://www.iqiyi.com/lib/m_202587214.html?src=search 34 大浴堂-01.mp4 http://www.iqiyi.com/lib/m_200880514.html?src=search 30 女婿难当-01.mp4 http://www.iqiyi.com/lib/m_206378814.html?src=search 27 风云1911-01.mp4 http://www.iqiyi.com/lib/m_202904114.html?src=search 28 醉红尘-01.mp4 http://www.iqiyi.com/lib/m_202964014.html?src=search 23 栗裕大将-01.mp4 http://www.iqiyi.com/lib/m_215180614.html?src=search 24 将军日记-01.mp4 http://www.iqiyi.com/lib/m_202547514.html?src=search
php 爬虫爱奇艺 视频、内容
<?php
function
getdata(
$i,
$url) {
$data =
array();
// 把整个文件读入到字符串中
$str =
file_get_contents(
$url);
$str =
substr(
$str,
strpos(
$str,
'album-head-info clearfix'));
// print_r($str);
$str =
substr(
$str,
0,
strpos(
$str,
'class="album-auto"'));
$preg=
'/<a .*
?id="j-album-title".*
?>(.*
?)<
\/
a>/is';
preg_match_all(
$preg,
$str,
$match);
//echo $str;exit;
$title =
$match[
1][
0];
$data[
'title'] =
$title;
$preg=
'/<span .*
?class="info-intro-title-s".*
?>(.*
?)<
\/
span>/is';
preg_match_all(
$preg,
$str,
$match);
$other_title =
$match[
1][
0];
$data[
'other_title'] =
$other_title;
//地区
$preg=
'/<p .*
?class="episodeIntro-area".*
?>.*
?<em>(.*
?)<
\/
em>.*
?<a.*
?>(.*
?)<
\/
a>.*
?<
\/
p>/is';
preg_match_all(
$preg,
$str,
$match);
$data[
'area'] =
trim(
$match[
2][
0]);
//语言
$preg=
'/<p .*
?class="episodeIntro-lang".*
?>.*
?<em>(.*
?)<
\/
em>.*
?<span.*
?>(.*
?)<
\/
span>.*
?<
\/
p>/is';
preg_match_all(
$preg,
$str,
$match);
$data[
'lang'] =
trim(
$match[
2][
0]);
//类型-悬疑/历史/剧情
$preg=
'/<a .*
?qwys_leixing.*
?>(.*
?)<
\/
a>/is';
preg_match_all(
$preg,
$str,
$match);
$data[
'type'] =
implode(
'/',
$match[
1]);
// 时间
$preg=
'/<p .*
?class="episodeIntro-time".*
?>.*
?<em>(.*
?)<
\/
em>.*
?<span.*
?>(.*
?)<
\/
span>.*
?<
\/
p>/is';
preg_match_all(
$preg,
$str,
$match);
$data[
'time'] =
$match[
2][
0];
// 导演
$preg=
'/<p .*
?class="episodeIntro-director".*
?>.*
?<em>(.*
?)<
\/
em>.*
?<a.*
?>(.*
?)<
\/
a>.*
?<
\/
p>/is';
preg_match_all(
$preg,
$str,
$match);
$data[
'daoyan'] =
$match[
2][
0];
// 简介
$preg=
'/<span .*
?class="briefIntroTxt".*
?>(.*
?)<
\/
span>/is';
preg_match_all(
$preg,
$str,
$match);
//$data['summary'] = $match[1][0];
if(!
empty(
$match[
1][
1])) {
$data[
'summary_all'] =
$match[
1][
1];
}
elseif (!
empty(
$match[
1][
0])) {
$data[
'summary_all'] =
$match[
1][
0];
}
$preg=
'/<img .*
?src="(.*
?)".*
?id="j-album-img".*
?>/is';
preg_match_all(
$preg,
$str,
$match);
$img =
$match[
1][
0];
$file =
'/data/' .
$i.
'.jpg';
if(!
file_exists(
$file)) {
$f =
file_get_contents(
$img);
if(
$f) {
file_put_contents(
$file,
$f);
}
}
return
$data;
}
// explode 将字符串打散
$data =
file(
'dianshiju02.txt');
$ret =
array();
$i =
5000;
foreach(
$data as
$v) {
$i++;
// if(strpos($v, 'mp4') !== false) {
// continue;
// }
$tmp =
explode(
"
\t
",
$v);
// print_r($tmp);
// exit;
$num = (
int)
$tmp[
0];
$_names =
explode(
"/",
$tmp[
1]);
$_names =
explode(
"-",
$_names[
0]);
$mp4 =
$i .
"/01.mp4";
$infos =
array();
if(!
empty(
$tmp[
2])) {
print_r(
$i);
print_r(
$tmp[
2]);
// exit;
$infos =
getdata(
$i,
$tmp[
2]);
}
else {
continue;
}
$ret[
$i] =
array(
'title' =>
$infos[
'title'],
'num' =>
$num,
'img' =>
'//static0.qianqian.com/movies/' .
$i .
'.jpg',
'mp4' =>
'http://qukufile2.qianqian.com/data2/film_tv/tv/' .
$mp4,
'id' =>
$i,
'infos' =>
$infos
);
}
// echo count($ret);
echo
var_export(
$ret,
true);
猜你喜欢
转载自www.cnblogs.com/yayaxuping/p/11200146.html
今日推荐
周排行