抓取全国行政区划(PHP)代码

项目需要,简单写了一个抓取全国行政区划的代码。

class AreaCodeCtrl extends Controller{

static function getSubstr($str, $leftStr, $rightStr)
   {
	   $llen = strlen($leftStr);
	   $left = strpos($str, $leftStr);
	   $right = strpos($str, $rightStr,$left + $llen);

	   if($left <0 || $right < $left)
		   return "";

	   return substr($str, $left+$llen, $right - $left-$llen);
	}
   public function area()
   {
	   $prv = [
			['name'=>'湖北省','code'=>'42','cat'=>'000'], // 有字符集问题.
		];
	// http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/
	
	foreach($prv as $item) {
		$data = new Area([
			'code'=>$item['code'].'0000000000',
			'name'=>$item['name'],
		]);
		$data->save();
		Self::GetCode("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/{$item['code']}.html");
	}
		//Self::GetCode("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/43.html");
   }
   
	private static function GetCode($url){
		$raw = http_read($url);
		
		try {
			$raw = iconv('gbk','utf-8//IGNORE',$raw);
		}catch(ErrorException $e){
			echo "iconv error \n";
		}
		
		$html = Self::getSubstr($raw,'名称</td>','</TD>');
		$dec = Self::getSubstr($html,"<tr class='","'>");
		$body = Self::getSubstr($html,"<tr class='{$dec}'><td>","</table>");
		$body = substr($body, 0, -12);
		$rows = explode("</td></tr><tr class='{$dec}'><td>",$body);
		foreach($rows as $row){
			if($row{0} == '<'){
				$suburl = substr($url, 0, strrpos($url,"/")+1).Self::getSubstr($row, "<a href='","'>");
				$cols = explode('</td><td>',$row);
				try {
					$data = new Area([
						'code'=>Self::getSubstr($cols[0],'>','<'),
						'name'=>Self::getSubstr($cols[1],'>','<')
					]);
					$data->save();
					echo("{$data->code}, {$data->name}\n");
				}catch(PDOException $e){
					Log::info("ERROR: {$cols[0]}, {$cols[1]}");
				}
				
				Self::getCode($suburl);
			}else{
				$cols = explode('</td><td>',$row);
				try{
					if(count($cols)>2) {
						$data = new Area([
							'code'=>$cols[0],
							'name'=>$cols[2],
							'tcode'=>$cols[1],
						]);
					}else{
						$data = new Area([
							'code'=>$cols[0],
							'name'=>$cols[1],
						]);
					}
					echo("{$data->code}, {$data->name}\n");
					$data->save();
				}catch(PDOException $e){
					Log::info("ERROR: {$data->code}, {$data->name}\n");
				}
				
			}
		}
	}
}

猜你喜欢

转载自blog.csdn.net/bywayboy/article/details/88342761