Web page remove html elements

*, remove html elements

public static String splitAndFilterString(String input, int length) {   
        if (input == null || input.trim().equals("")) {   
            return "";   
        }   
        // remove all html elements,   
		//--------------------
		String htmlStr = input; // String with html tags
		String str = "";
		java.util.regex.Pattern p_script;
		java.util.regex.Matcher m_script;
		java.util.regex.Pattern p_style;
		java.util.regex.Matcher m_style;
		java.util.regex.Pattern p_html;
		java.util.regex.Matcher m_html;
		java.util.regex.Pattern p_ba;
		java.util.regex.Matcher m_ba;
		try {
			String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*? \\/[\\s]*?script[ \\s]*?>"; // regular expression to define script { or <script[^>]*?>[\\s\\S]*?<\\/script>
			// }
			String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*? \\/[\\s]*?style[ \\s]*?>"; // Regular expression to define style { or <style[^>]*?>[\\s\\S]*?<\\/style>
			// }
			String regEx_html = "<[^>]+>"; // Define regular expression for HTML tags
			String patternStr = "\\s+";
			p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
			m_script = p_script.matcher(htmlStr);
			htmlStr = m_script.replaceAll(""); // filter script tags

			p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
			m_style = p_style.matcher(htmlStr);
			htmlStr = m_style.replaceAll(""); // filter style tags
			p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
			m_html = p_html.matcher(htmlStr);
			htmlStr = m_html.replaceAll(""); // filter html tags
			p_ba = Pattern.compile(patternStr, Pattern.CASE_INSENSITIVE);
			m_ba = p_ba.matcher (htmlStr);
			htmlStr = m_ba.replaceAll(" "); // filter spaces
			str = htmlStr;
		} catch (Exception e) {
			e.printStackTrace ();
		}
		
        int len = str.length();   
        if (len <= length) {  
		    str = str.replace("\'", "\''");
           return str;   
      } else {   
            str = str.substring(0, length);   
            str += "...";  
			str = str.replace("\'", "\''");
        }   
        return str;   
   }  

    input: field with html elements

 length: target length

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326535353&siteId=291194637