从html文本中获取img标签的src、a标签的href内容

/**
* 从html文本中获取img标签的src内容
*/
private List<String> getImgsrcs(String content){
    List<String> srcList = new ArrayList<>();
    Pattern p = Pattern.compile("<(img|IMG)(.*?)(>|></img>|/>");
    Matcher matcher = p.matcher(content);
    boolean hasPic = mathcher.find();
    if(hasPic == true){
        while(hasPic){
            String group = matcher.group(2);
            Pattern srcText = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
            Matcher matcher2 = srcText.matcher(group);
            if(matcher2.find()){
                srcList.add(matcher2.group(3));
            }
            hasPic = matcher.find();
        }
    }
    return srcList;

}
/**
* 从html文本中获取a标签的href内容
*/
private List<String> getAhrefs(String content){
    List<String> srcList = new ArrayList<>();
    String regex = "<a.*?/a>";
    //正则忽略大小写
    Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
    Matcher matcher = p.matcher(content);
    boolean hasPic = mathcher.find();
    while(hasPic){
            String regex2= "href=\"(.*?)\"";
            Pattern p2= Pattern.compile(regex2,Pattern.CASE_INSENSITIVE);
            Matcher m2= p2.matcher(matcher.group());
            if(m2.find()){
                srcList.add(m2.group(1));
            }
            hasPic = matcher.find();
    }
    return srcList;

}

验证截图如下:

猜你喜欢

转载自blog.csdn.net/dhklsl/article/details/115477936