Use Jsoup to get csdn blog data

直接上代码
BlogInfoUtil class

/**
 * csdn数据爬取工具类
 */
@Component
@Slf4j
public class BlogInfoUtil {
    
    

    private static final Map<String, String> headers;

    private static final String csdnUrl = "https://blog.csdn.net/weixin_42825651";

    static {
    
    
        headers = new HashMap<>();
        headers.put("referer", "https://www.google.com/");
        headers.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0" +
                ".4183.83 Safari/537.36");
    }

    public static BlogInfoVo getBlogInfo() {
    
    
        try {
    
    
            BlogInfoVo blogInfoVo = new BlogInfoVo();
            blogInfoVo.setDate(new Date());
            Document doc = Jsoup.connect(csdnUrl).headers(headers).get();
            Element blogElement = doc.getElementsByClass("data-info d-flex item-tiling").get(0);
            // 文章数量
            int articleCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(0).attr("title"));
            blogInfoVo.setArticleCnt(articleCnt);
            // 周排名
            String wranking = blogElement.getElementsByTag("dl").get(1).attr("title");
            blogInfoVo.setWranking(wranking);
            // 总排名
            String ranking = blogElement.getElementsByTag("dl").get(2).attr("title");
            blogInfoVo.setRanking(ranking);
            // 总阅读量
            int viewCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(3).attr("title"));
            blogInfoVo.setViewCnt(viewCnt);

            blogElement = doc.getElementsByClass("data-info d-flex item-tiling").get(1);
            // 总积分
            int scoreCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(0).attr("title"));
            blogInfoVo.setScore(scoreCnt);
            // 粉丝数量
            int fansCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(1).attr("title"));
            blogInfoVo.setFansCnt(fansCnt);
            // 点赞量
            int likeCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(2).attr("title"));
            blogInfoVo.setLikeCnt(likeCnt);
            // 评论量
            int commentCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(3).attr("title"));
            blogInfoVo.setCommentCnt(commentCnt);
            // 收藏量
            int collectCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(4).attr("title"));
            blogInfoVo.setCollectCnt(collectCnt);
            return blogInfoVo;
        } catch (Exception e) {
    
    
            log.error("get bloginfo error, {}", e);
        }
        return null;
    }

    public static void main(String[] args) {
    
    
        BlogInfoVo blogInfo = getBlogInfo();
        System.out.println(blogInfo);
    }
}

BlogInfoVo class

@Data
@ToString
public class BlogInfoVo {
    
    

    private Date date;

    private Integer articleCnt;

    private String wranking;

    private String ranking;

    private Integer viewCnt;

    private Integer score;

    private Integer fansCnt;

    private Integer likeCnt;

    private Integer commentCnt;

    private Integer collectCnt;
}

输出结果
Insert picture description here
Insert picture description here
As you can see, we crawled all the data. You can build this into a timed task, run it once a day, and store it in the database. Interested friends can make chart data, and you can intuitively see the daily data changes.

Guess you like

Origin blog.csdn.net/weixin_42825651/article/details/108618057