Obtenga el contenido src y href de la etiqueta img y una etiqueta del texto html

 

 

/**
* 从html文本中获取img标签的src内容
*/
private List<String> getImgsrcs(String content){
    List<String> srcList = new ArrayList<>();
    Pattern p = Pattern.compile("<(img|IMG)(.*?)(>|></img>|/>");
    Matcher matcher = p.matcher(content);
    boolean hasPic = mathcher.find();
    if(hasPic == true){
        while(hasPic){
            String group = matcher.group(2);
            Pattern srcText = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
            Matcher matcher2 = srcText.matcher(group);
            if(matcher2.find()){
                srcList.add(matcher2.group(3));
            }
            hasPic = matcher.find();
        }
    }
    return srcList;

}

 

/**
* 从html文本中获取a标签的href内容
*/
private List<String> getAhrefs(String content){
    List<String> srcList = new ArrayList<>();
    String regex = "<a.*?/a>";
    //正则忽略大小写
    Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
    Matcher matcher = p.matcher(content);
    boolean hasPic = mathcher.find();
    while(hasPic){
            String regex2= "href=\"(.*?)\"";
            Pattern p2= Pattern.compile(regex2,Pattern.CASE_INSENSITIVE);
            Matcher m2= p2.matcher(matcher.group());
            if(m2.find()){
                srcList.add(m2.group(1));
            }
            hasPic = matcher.find();
    }
    return srcList;

}

 

La captura de pantalla de verificación es la siguiente:

 

 

 

Supongo que te gusta

Origin blog.csdn.net/dhklsl/article/details/115477936
Recomendado
Clasificación