/**
* 从html文本中获取img标签的src内容
*/
private List<String> getImgsrcs(String content){
List<String> srcList = new ArrayList<>();
Pattern p = Pattern.compile("<(img|IMG)(.*?)(>|></img>|/>");
Matcher matcher = p.matcher(content);
boolean hasPic = mathcher.find();
if(hasPic == true){
while(hasPic){
String group = matcher.group(2);
Pattern srcText = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
Matcher matcher2 = srcText.matcher(group);
if(matcher2.find()){
srcList.add(matcher2.group(3));
}
hasPic = matcher.find();
}
}
return srcList;
}
/**
* 从html文本中获取a标签的href内容
*/
private List<String> getAhrefs(String content){
List<String> srcList = new ArrayList<>();
String regex = "<a.*?/a>";
//正则忽略大小写
Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(content);
boolean hasPic = mathcher.find();
while(hasPic){
String regex2= "href=\"(.*?)\"";
Pattern p2= Pattern.compile(regex2,Pattern.CASE_INSENSITIVE);
Matcher m2= p2.matcher(matcher.group());
if(m2.find()){
srcList.add(m2.group(1));
}
hasPic = matcher.find();
}
return srcList;
}
La captura de pantalla de verificación es la siguiente: