Super Simple Simple PHP reptiles

CURL and use DOMDocument, by xpath filter data, to achieve simple PHP reptiles

<?php
header('Content-type: text/plain; charset=utf-8');

$target_url = "http://www.baidu.com";
$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, $target_url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);

$html = curl_exec($ch);

if (!$html) {
    echo "<br />cURL error number:" .curl_errno($ch);
    echo "<br />cURL error:" . curl_error($ch);
    exit;
}

// Create a DomDocument object for processing a HTML 
$ DOM = new new the DOMDocument ();
 // load from a string of HTML 
@ $ DOM -> the loadHTML ( $ HTML );
 // The HTML normalized 
$ DOM -> the normalize ();

// load the DOM with DOMXpath, for querying 
$ XPath = new new DOMXPath ( $ DOM );
 # obtain the address of a label all 
$ hrefs = $ XPath -> the evaluate ( '* @ [@ ID = "U1"] / A ' );

for ($i = 0; $i < $hrefs->length; $i++) {
    $href = $hrefs->item($i);
    $linktext = $href->nodeValue;
    echo $linktext . PHP_EOL;
}

?>

<hr>
<pre>
<?= $html ?>
</pre>

 

Guess you like

Origin www.cnblogs.com/zjfree/p/11763156.html