CURL and use DOMDocument, by xpath filter data, to achieve simple PHP reptiles
<?php header('Content-type: text/plain; charset=utf-8'); $target_url = "http://www.baidu.com"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $target_url); curl_setopt($ch, CURLOPT_FAILONERROR, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $html = curl_exec($ch); if (!$html) { echo "<br />cURL error number:" .curl_errno($ch); echo "<br />cURL error:" . curl_error($ch); exit; } // Create a DomDocument object for processing a HTML $ DOM = new new the DOMDocument (); // load from a string of HTML @ $ DOM -> the loadHTML ( $ HTML ); // The HTML normalized $ DOM -> the normalize (); // load the DOM with DOMXpath, for querying $ XPath = new new DOMXPath ( $ DOM ); # obtain the address of a label all $ hrefs = $ XPath -> the evaluate ( '* @ [@ ID = "U1"] / A ' ); for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $linktext = $href->nodeValue; echo $linktext . PHP_EOL; } ?> <hr> <pre> <?= $html ?> </pre>