<?php $content = ''; $search = array ("'<script[^>]*?>.*?</script>'si", // 去掉 javascript "'<style[^>]*?>.*?</style>'si", // 去掉 css "'<[/!]*?[^<>]*?>'si", // 去掉 HTML 标记 "'<!--[/!]*?[^<>]*?>'si", // 去掉 注释标记 "'([rn])[s]+'", // 去掉空白字符 "'&(quot#34);'i", // 替换 HTML 实体 "'&(amp#38);'i", "'&(lt#60);'i", "'&(gt#62);'i", "'&(nbsp#160);'i", "'&(iexcl#161);'i", "'&(cent#162);'i", "'&(pound#163);'i", "'&(copy#169);'i", "'&#(d+);'e"); // 作为 PHP 代码运行 $replace = array ("", "", "", "", "\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), ); $content = preg_replace($search, $replace,$content);
$arr1 = array(" "); $arr2 = array(""); $content = str_replace($arr1,$arr2,$content); $content = preg_replace('#\s+#', ' ', trim($content)); $content = substr($content,0,300); $re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/"; preg_match_all($re['utf-8'], $content, $match);