PHP 过滤生僻字和特殊字符

<?php
header('Content-Type:text/html;charset=utf-8');
$content = "abc阅谁问君诵,水落清香浮。下面是生僻字和特殊字符:<>'\"'[生僻字iteye无法识别]";
echo htmlspecialchars(filterUtf8($content), ENT_QUOTES);
//过滤生僻字 所谓的生僻字是指超过三个字节的字符,或者非utf8字符
function filterUtf8($str) {
	/*utf8 编码表:
	 * Unicode符号范围           | UTF-8编码方式
	 * u0000 0000 - u0000 007F   | 0xxxxxxx
	 * u0000 0080 - u0000 07FF   | 110xxxxx 10xxxxxx
	 * u0000 0800 - u0000 FFFF   | 1110xxxx 10xxxxxx 10xxxxxx
	 *
	 */
	$re = '';
	$str = str_split(bin2hex($str), 2);
	$mo = 1<<7;
	$mo2 = $mo | (1<<6);
	$mo3 = $mo2 | (1<<5);
	//三个字节
	$mo4 = $mo3 | (1<<4);
	//四个字节
	$mo5 = $mo4 | (1<<3);
	//五个字节
	$mo6 = $mo5 | (1<<2);
	//六个字节
	for ($i = 0; $i < count($str); $i++) {
		if ((hexdec($str[$i]) & ($mo)) == 0) {
			$re .= chr(hexdec($str[$i]));
			continue;
		}
		//4字节 及其以上舍去
		if ((hexdec($str[$i]) & ($mo6)) == $mo6) {
			$i = $i + 5;
			continue;
		}
		if ((hexdec($str[$i]) & ($mo5)) == $mo5) {
			$i = $i + 4;
			continue;
		}
		if ((hexdec($str[$i]) & ($mo4)) == $mo4) {
			$i = $i + 3;
			continue;
		}
		if ((hexdec($str[$i]) & ($mo3)) == $mo3) {
			$i = $i + 2;
			if (((hexdec($str[$i]) & ($mo)) == $mo) && ((hexdec($str[$i - 1]) & ($mo)) == $mo)) {
				$r = chr(hexdec($str[$i - 2])) . chr(hexdec($str[$i - 1])) . chr(hexdec($str[$i]));
				$re .= $r;
			}
			continue;
		}
		if ((hexdec($str[$i]) & ($mo2)) == $mo2) {
			$i = $i + 1;
			if ((hexdec($str[$i]) & ($mo)) == $mo) {
				$re .= chr(hexdec($str[$i - 1])) . chr(hexdec($str[$i]));
			}
			continue;
		}
	}
	return $re;
}
?>

效果图:

 

猜你喜欢

转载自onestopweb.iteye.com/blog/2367682