过滤文本编辑器中的特殊字符 工具类

过滤文本编辑其中的特殊字符
public class HtmlToTex {

    private static final String regEx_script    = "<script[^>]*?>[\\s\\S]*?<\\/script>";        /* 定义script的正则表达式 */  
    private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>";          /* 定义style的正则表达式 */  
    private static final String regEx_html  = "<[^>]+>";                                    /* 定义HTML标签的正则表达式<[^>]*> */  
    private static final String regEx_space = "<a>\\s*|\t|\r|\n</a>";                       /* 定义空格回车换行符 */  


    public static String delHTMLTag(String htmlStr )  
    {  
        /* 去掉script标签 */  
        Pattern p_script = Pattern.compile( regEx_script,  
                            Pattern.CASE_INSENSITIVE );  
        Matcher m_script = p_script.matcher( htmlStr );  
        htmlStr = m_script.replaceAll( "" );    /* 过滤script标签 */  
        /* 去掉style标签 */  
        Pattern p_style = Pattern  
                  .compile( regEx_style, Pattern.CASE_INSENSITIVE );  
        Matcher m_style = p_style.matcher( htmlStr );  
        htmlStr = m_style.replaceAll( "" );     /* 过滤style标签 */  
        /* 去掉html标签 */  
        Pattern p_html  = Pattern.compile( regEx_html, Pattern.CASE_INSENSITIVE );  
        Matcher m_html  = p_html.matcher( htmlStr );  
        htmlStr = m_html.replaceAll( "" );      /* 过滤html标签 */  
        /* 去掉空格 */  
        Pattern p_space = Pattern  
                  .compile( regEx_space, Pattern.CASE_INSENSITIVE );  
        Matcher m_space = p_space.matcher( htmlStr );  
        htmlStr = m_space.replaceAll( "" );     /* 过滤空格回车标签 */  
        /* 去掉<p>标签<br></br>标签和<>之间内容 */  
        htmlStr.replaceAll( "<p .*?>", "\r\n" );  
        htmlStr.replaceAll( "<br\\s*/?>", "\r\n" );  
        htmlStr.replaceAll( "\\<.*?>", "" );  
        return(htmlStr.trim() );                /* 返回文本字符串 */  
    }  


    public static String getTextFromHtml( String htmlStr )  
    {  
        htmlStr = delHTMLTag( htmlStr );  
       // htmlStr = htmlStr.replaceAll( " ", "" );  
       // htmlStr = htmlStr.substring( 0, htmlStr.indexOf( "。" ) + 1 );  
        //System.out.println(htmlStr);
        return(htmlStr);  
    }  
}

HtmlToTex.getTextFromHtml(account.getProjectContent())
“`

猜你喜欢

转载自blog.csdn.net/qq_30443907/article/details/82592152