文本过滤

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/grq15203514615/article/details/81905278
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DelHtmlTagUtils {

    // 普通输入框的过滤
    public static String delHTMLTag(String htmlStr) {
        if (htmlStr != null && htmlStr.length() > 0) {
            String regExscript = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // 定义script的正则表达式
            String regExstyle = "<style[^>]*?>[\\s\\S]*?<\\/style>"; // 定义style的正则表达式
            String regExHtml = "&lt;script.*?&gt;.*?&lt;/script&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml1 = "&lt;script.*?&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml2 = "&lt;/script&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml3 = "script"; // 定义转义后script标签的正则表达式
            String regExHtml4 = "javascript";
            String regExHtml5 = "eval\\((.*)\\)";
            String regExHtml6 = "<[^>]+>";
            String regExHtml7 = "<>";
            String regExHtml8 = "src[\r\n]*=[\r\n]*\\\'(.*?)\\\'";
            String regExHtml10 = "&lt;[^>]*on.*?&gt;";
            String regExHtml11 = "&lt;/[^>]*on.*&gt;";
            String regExHtml13 = "<a[^>]*?>[\\s\\S]*?<\\/a>";
            String regExHtml14 = "<iframe[^>]*?>[\\s\\S]*?<\\/iframe>";
            String regExHtml15 = "&lt;a.*?&gt;.*?&lt;/a&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml16 = "&lt;iframe.*?&gt;.*?&lt;/iframe&gt;";
            // String reg = "select|update|and|delete|insert|trancate|char|into|substr|ascii"
            // + "|declare|exec|count|master|into|drop|execute|sleep|ELT|extractvalue"
            // + "|concat|procedure|analyse|EXP|XML|MAKE_SET";
            String reg = " (?:')|(?:--)|(/\\*(?:.|[\\n\\r])*?\\*/)|"
                    + "(\\b(select|update|sleep|delete|insert|truncate|substr"
                    + "|ascii|declare|exec|drop|execute"
                    + "|ELT|concat|procedure|analyse|EXP|XML|MAKE_SET)\\b)";

            Pattern pScript = Pattern.compile(regExscript, Pattern.CASE_INSENSITIVE);
            Matcher mScript = pScript.matcher(htmlStr);
            htmlStr = mScript.replaceAll(""); // 过滤

            Pattern pStyle = Pattern.compile(regExstyle, Pattern.CASE_INSENSITIVE);
            Matcher mStyle = pStyle.matcher(htmlStr);
            htmlStr = mStyle.replaceAll(""); // 过滤style标签

            Pattern pHtml = Pattern.compile(regExHtml, Pattern.CASE_INSENSITIVE);
            Matcher mHtml = pHtml.matcher(htmlStr);
            htmlStr = mHtml.replaceAll(""); // 过滤转义后script标签

            Pattern pHtml1 = Pattern.compile(regExHtml1, Pattern.CASE_INSENSITIVE);
            Matcher mHtml1 = pHtml1.matcher(htmlStr);
            htmlStr = mHtml1.replaceAll(""); // 过滤转义后script标签

            Pattern pHtml2 = Pattern.compile(regExHtml2, Pattern.CASE_INSENSITIVE);
            Matcher mHtml2 = pHtml2.matcher(htmlStr);
            htmlStr = mHtml2.replaceAll(""); // 过滤转义后script标签

            Pattern pHtml3 = Pattern.compile(regExHtml3, Pattern.CASE_INSENSITIVE);
            Matcher mHtml3 = pHtml3.matcher(htmlStr);
            htmlStr = mHtml3.replaceAll(""); // 过滤script标签

            Pattern pHtml4 = Pattern.compile(regExHtml4, Pattern.CASE_INSENSITIVE);
            Matcher mHtml4 = pHtml4.matcher(htmlStr);
            htmlStr = mHtml4.replaceAll(""); // 过滤javascript标签

            Pattern pHtml5 = Pattern.compile(regExHtml5, Pattern.CASE_INSENSITIVE);
            Matcher mHtml5 = pHtml5.matcher(htmlStr);
            htmlStr = mHtml5.replaceAll(""); // 过滤eval标签

            Pattern pHtml6 = Pattern.compile(regExHtml6, Pattern.CASE_INSENSITIVE);
            Matcher mHtml6 = pHtml6.matcher(htmlStr);
            htmlStr = mHtml6.replaceAll(""); // 过滤html标签

            Pattern pHtml7 = Pattern.compile(regExHtml7, Pattern.CASE_INSENSITIVE);
            Matcher mHtml7 = pHtml7.matcher(htmlStr);
            htmlStr = mHtml7.replaceAll(""); // 过滤html标签

            Pattern pHtml8 = Pattern.compile(regExHtml8, Pattern.CASE_INSENSITIVE);
            Matcher mHtml8 = pHtml8.matcher(htmlStr);
            htmlStr = mHtml8.replaceAll(""); // 过滤html标签

            Pattern pHtml10 = Pattern.compile(regExHtml10, Pattern.CASE_INSENSITIVE);
            Matcher mHtml10 = pHtml10.matcher(htmlStr);
            htmlStr = mHtml10.replaceAll("");

            Pattern pHtml11 = Pattern.compile(regExHtml11, Pattern.CASE_INSENSITIVE);
            Matcher mHtml11 = pHtml11.matcher(htmlStr);
            htmlStr = mHtml11.replaceAll("");

            Pattern pHtml13 = Pattern.compile(regExHtml13, Pattern.CASE_INSENSITIVE);
            Matcher mHtml13 = pHtml13.matcher(htmlStr);
            htmlStr = mHtml13.replaceAll("");

            Pattern pHtml14 = Pattern.compile(regExHtml14, Pattern.CASE_INSENSITIVE);
            Matcher mHtml14 = pHtml14.matcher(htmlStr);
            htmlStr = mHtml14.replaceAll("");

            Pattern pHtml15 = Pattern.compile(regExHtml15, Pattern.CASE_INSENSITIVE);
            Matcher mHtml15 = pHtml15.matcher(htmlStr);
            htmlStr = mHtml15.replaceAll("");

            Pattern pHtml16 = Pattern.compile(regExHtml16, Pattern.CASE_INSENSITIVE);
            Matcher mHtml16 = pHtml16.matcher(htmlStr);
            htmlStr = mHtml16.replaceAll("");

            Pattern pHtml17 = Pattern.compile(reg, Pattern.CASE_INSENSITIVE);
            Matcher mHtml17 = pHtml17.matcher(htmlStr);
            htmlStr = mHtml17.replaceAll("");

            return htmlStr.trim(); // 返回文本字符串
        } else {
            return htmlStr;
        }

    }

    // 富文本框的过滤
    public static String delHTMLTag2(String htmlStr) {
        if (htmlStr != null && htmlStr.length() > 0) {
            String regExscript = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // 定义script的正则表达式
            String regExstyle = "<style[^>]*?>[\\s\\S]*?<\\/style>"; // 定义style的正则表达式
            String regExHtml = "&lt;script.*?&gt;.*?&lt;/script&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml1 = "&lt;script.*?&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml2 = "&lt;/script&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml3 = "script"; // 定义转义后script标签的正则表达式
            String regExHtml4 = "javascript";
            String regExHtml5 = "eval\\((.*)\\)";
            String regExHtml6 = "&lt;[^>]*java.*?&gt;";
            String regExHtml7 = "&lt;/[^>]*java.*?&gt;";
            String regExHtml8 = "<on[^>]*?>[\\s\\S]*?<\\/on>";
            String regExHtml9 = "<java[^>]*?>[\\s\\S]*?<\\/java>";
            String regExHtml10 = "&lt;[^>]*on.*?&gt;";
            String regExHtml11 = "&lt;/[^>]*on.*&gt;";
            String regExHtml12 = "&lt;java.*?&gt;.*?&lt;/java&gt;";
            String regExHtml13 = "<a[^>]*?>[\\s\\S]*?<\\/a>";
            String regExHtml14 = "<iframe[^>]*?>[\\s\\S]*?<\\/iframe>";
            String regExHtml15 = "&lt;a.*?&gt;.*?&lt;/a&gt;"; // 定义转义后script标签的正则表达式
            String regExHtml16 = "&lt;iframe.*?&gt;.*?&lt;/iframe&gt;";

            Pattern pScript = Pattern.compile(regExHtml9, Pattern.CASE_INSENSITIVE);
            Matcher mScript = pScript.matcher(htmlStr);
            htmlStr = mScript.replaceAll(""); // 过滤

            Pattern pStyle = Pattern.compile(regExstyle, Pattern.CASE_INSENSITIVE);
            Matcher mStyle = pStyle.matcher(htmlStr);
            htmlStr = mStyle.replaceAll(""); // 过滤style标签

            Pattern pHtml = Pattern.compile(regExHtml, Pattern.CASE_INSENSITIVE);
            Matcher mHtml = pHtml.matcher(htmlStr);
            htmlStr = mHtml.replaceAll(""); // 过滤转义后script标签

            Pattern pHtml1 = Pattern.compile(regExHtml1, Pattern.CASE_INSENSITIVE);
            Matcher mHtml1 = pHtml1.matcher(htmlStr);
            htmlStr = mHtml1.replaceAll(""); // 过滤转义后script标签

            Pattern pHtml2 = Pattern.compile(regExHtml2, Pattern.CASE_INSENSITIVE);
            Matcher mHtml2 = pHtml2.matcher(htmlStr);
            htmlStr = mHtml2.replaceAll(""); // 过滤转义后script标签

            Pattern pHtml3 = Pattern.compile(regExHtml3, Pattern.CASE_INSENSITIVE);
            Matcher mHtml3 = pHtml3.matcher(htmlStr);
            htmlStr = mHtml3.replaceAll(""); // 过滤script标签

            Pattern pHtml4 = Pattern.compile(regExHtml4, Pattern.CASE_INSENSITIVE);
            Matcher mHtml4 = pHtml4.matcher(htmlStr);
            htmlStr = mHtml4.replaceAll(""); // 过滤javascript标签

            Pattern pHtml5 = Pattern.compile(regExHtml5, Pattern.CASE_INSENSITIVE);
            Matcher mHtml5 = pHtml5.matcher(htmlStr);
            htmlStr = mHtml5.replaceAll(""); // 过滤eval标签

            Pattern pHtml6 = Pattern.compile(regExHtml6, Pattern.CASE_INSENSITIVE);
            Matcher mHtml6 = pHtml6.matcher(htmlStr);
            htmlStr = mHtml6.replaceAll(""); // 过滤转义后的javascript标签

            Pattern pHtml7 = Pattern.compile(regExHtml7, Pattern.CASE_INSENSITIVE);
            Matcher mHtml7 = pHtml7.matcher(htmlStr);
            htmlStr = mHtml7.replaceAll(""); // 过滤转义后的javascript标签

            Pattern pHtml8 = Pattern.compile(regExHtml8, Pattern.CASE_INSENSITIVE);
            Matcher mHtml8 = pHtml8.matcher(htmlStr);
            htmlStr = mHtml8.replaceAll(""); // 过滤on标签

            Pattern pHtml9 = Pattern.compile(regExscript, Pattern.CASE_INSENSITIVE);
            Matcher mHtml9 = pHtml9.matcher(htmlStr);
            htmlStr = mHtml9.replaceAll("");

            Pattern pHtml10 = Pattern.compile(regExHtml10, Pattern.CASE_INSENSITIVE);
            Matcher mHtml10 = pHtml10.matcher(htmlStr);
            htmlStr = mHtml10.replaceAll("");

            Pattern pHtml11 = Pattern.compile(regExHtml11, Pattern.CASE_INSENSITIVE);
            Matcher mHtml11 = pHtml11.matcher(htmlStr);
            htmlStr = mHtml11.replaceAll("");

            Pattern pHtml12 = Pattern.compile(regExHtml12, Pattern.CASE_INSENSITIVE);
            Matcher mHtml12 = pHtml12.matcher(htmlStr);
            htmlStr = mHtml12.replaceAll("");

            Pattern pHtml13 = Pattern.compile(regExHtml13, Pattern.CASE_INSENSITIVE);
            Matcher mHtml13 = pHtml13.matcher(htmlStr);
            htmlStr = mHtml13.replaceAll("");

            Pattern pHtml14 = Pattern.compile(regExHtml14, Pattern.CASE_INSENSITIVE);
            Matcher mHtml14 = pHtml14.matcher(htmlStr);
            htmlStr = mHtml14.replaceAll("");

            Pattern pHtml15 = Pattern.compile(regExHtml15, Pattern.CASE_INSENSITIVE);
            Matcher mHtml15 = pHtml15.matcher(htmlStr);
            htmlStr = mHtml15.replaceAll("");

            Pattern pHtml16 = Pattern.compile(regExHtml16, Pattern.CASE_INSENSITIVE);
            Matcher mHtml16 = pHtml16.matcher(htmlStr);
            htmlStr = mHtml16.replaceAll("");

            return htmlStr.trim(); // 返回文本字符串
        } else {
            return htmlStr;
        }

    }
}

    /**
     * 防xss漏洞参数处理
     * 
     * @param param
     * @return
     */
    public String dealParamsForXss(String param) {
        if (param == null) {
            return null;
        }
        return param.replaceAll("<", " &lt;").replaceAll(">", "    &gt;").replaceAll("\"", "&quot;")
                .replaceAll("'", "&apos;").replaceAll("&", "&amp;");
    }

猜你喜欢

转载自blog.csdn.net/grq15203514615/article/details/81905278
今日推荐