? < PHP // address 1. Tell the acquisition of the page $ url = 'http://www.zgjiemeng.com/dongwu/' ; // 2. read page addresses collected $ str = file_get_contents ( $ url ); // STR $ echo; echo '<Meta HTTP-equiv = "the Type-the Content" Content = "text / HTML; charset = UTF-. 8" />' ; // 3. acquisition articles link area defined regular $ pattern_qu = '/ <UL class = \ "List2 clearfix \"> (? [\ S \ S] *) <\ / UL> / ' ; // 4. perform a regular matching area matching links to articles to the preg_match ( $ pattern_qu , $ STR , $ match_url ); // var_dump($match_url[1]); preg_match_all ("/<li>(.*)<\/li>/U", $match_url[1], $pat_array); print_r( $pat_array[0][1]); preg_match_all ("/<li><a target=\"_blank\" title=\"(.*)\" href=\"(.*)\">(.*)<\/a><\/li>/U", $pat_array[0][1], $pat_array); print_r($pat_array); //preg_match_all("/(<([\w]+)[^>]*>)(.*?)(<\/\\2>)/", $match_url[1], $matches, PREG_SET_ORDER); //print_r($matches ); die; preg_match($pattern_qu,$match_url[1],match_url $ ); var_dump ( $ match_url ); Die ; // 5. The article defined the matching link regular $ pattern_url = '/<a\s+href=\"(.*?)\"\s+title/S' ; // 6. link address matching articles preg_match_all ( $ pattern_url , $ match_url [. 1], $ match ); var_dump ( $ match ); Die ; $ NUM =. 1 ; // 7. the traversing the matched content address all articles the foreach ( $ match [. 1] AS $ K => $ V ) { //. echo $ V 'a'; //7.1 cycle on article content address $ Content = file_get_contents ( $ V ); // 7.2 define the contents of the article match the regular $ con_pattern = '/ <div \ S + class = \ "AD \"> <\ / div> <span \ S + the above mentioned id = \ "the Supports \" \ S + class = \ "Praise \" / Ss' (*.?) ; // 7.3 define the matching article title regularization title_pattern $ = '/<title>(.*?)<\/title>/Ss' ; // 7.4 article for content matching the preg_match ( $ con_pattern , $ content , $ NEWCON ); // var_dump ($ NEWCON); Exit; // 7.5 article title match preg_match ( $ title_pattern, $ Content , $ the newTitle ); // var_dump ($ the newTitle); //7.6 composition string $ newStr = $ the newTitle .. [0] '<Meta charset = "UTF-. 8" />' $ NEWCON [. 1 ] ; // 7.7 written to the specified file stored file_put_contents ( './collect/'. $ NUM ,. 'HTML.' $ newStr ); $ NUM ++ ; }