Get the src and href content of the img tag and a tag from the html text

 

 

/**
* 从html文本中获取img标签的src内容
*/
private List<String> getImgsrcs(String content){
    List<String> srcList = new ArrayList<>();
    Pattern p = Pattern.compile("<(img|IMG)(.*?)(>|></img>|/>");
    Matcher matcher = p.matcher(content);
    boolean hasPic = mathcher.find();
    if(hasPic == true){
        while(hasPic){
            String group = matcher.group(2);
            Pattern srcText = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
            Matcher matcher2 = srcText.matcher(group);
            if(matcher2.find()){
                srcList.add(matcher2.group(3));
            }
            hasPic = matcher.find();
        }
    }
    return srcList;

}

 

/**
* 从html文本中获取a标签的href内容
*/
private List<String> getAhrefs(String content){
    List<String> srcList = new ArrayList<>();
    String regex = "<a.*?/a>";
    //正则忽略大小写
    Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
    Matcher matcher = p.matcher(content);
    boolean hasPic = mathcher.find();
    while(hasPic){
            String regex2= "href=\"(.*?)\"";
            Pattern p2= Pattern.compile(regex2,Pattern.CASE_INSENSITIVE);
            Matcher m2= p2.matcher(matcher.group());
            if(m2.find()){
                srcList.add(m2.group(1));
            }
            hasPic = matcher.find();
    }
    return srcList;

}

 

The verification screenshot is as follows:

 

 

 

Guess you like

Origin blog.csdn.net/dhklsl/article/details/115477936