public static void main(String[] args) { String str = readFile(new File("D:\\workspace\\izbra_front\\WebRoot\\aa\\aa.java")); // 先过滤 script 标签 String reg_tag = "<[\\s]*?#t#[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?#t#[\\s]*?>".replace("#t#", "script"); str = Pattern.compile(reg_tag,Pattern.CASE_INSENSITIVE).matcher(str).replaceAll(""); // 再 过滤 hmtl 标签 reg_tag = "<[^>]+>"; reg_tag = "<[\\s\\S]*?>"; str = Pattern.compile(reg_tag,Pattern.CASE_INSENSITIVE).matcher(str).replaceAll(""); str = str.replaceAll(" ", ""); str = str.replaceAll("\n{1,}", "#"); str = str.startsWith("#") ? str.substring(1) : str; str = str.endsWith("#") ? str.substring(0, str.length() - 1) : str; System.out.println("数量=="+str.split("#").length); str = str.replaceAll("#", "\n"); System.out.println(str); } public static String readFile(File file){ StringBuilder sb = new StringBuilder(); try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); String line = null; while((line = br.readLine()) != null){ if("添加".equals(line)) continue; sb.append(line+"\n"); } br.close(); } catch (Exception e) { e.printStackTrace(); } return sb.toString(); }
使用正则表达式去除html中的标签元素以及js和css脚本和样式
猜你喜欢
转载自276833190.iteye.com/blog/2255648
今日推荐
周排行