直接上代码
BlogInfoUtil class
/**
* csdn数据爬取工具类
*/
@Component
@Slf4j
public class BlogInfoUtil {
private static final Map<String, String> headers;
private static final String csdnUrl = "https://blog.csdn.net/weixin_42825651";
static {
headers = new HashMap<>();
headers.put("referer", "https://www.google.com/");
headers.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0" +
".4183.83 Safari/537.36");
}
public static BlogInfoVo getBlogInfo() {
try {
BlogInfoVo blogInfoVo = new BlogInfoVo();
blogInfoVo.setDate(new Date());
Document doc = Jsoup.connect(csdnUrl).headers(headers).get();
Element blogElement = doc.getElementsByClass("data-info d-flex item-tiling").get(0);
// 文章数量
int articleCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(0).attr("title"));
blogInfoVo.setArticleCnt(articleCnt);
// 周排名
String wranking = blogElement.getElementsByTag("dl").get(1).attr("title");
blogInfoVo.setWranking(wranking);
// 总排名
String ranking = blogElement.getElementsByTag("dl").get(2).attr("title");
blogInfoVo.setRanking(ranking);
// 总阅读量
int viewCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(3).attr("title"));
blogInfoVo.setViewCnt(viewCnt);
blogElement = doc.getElementsByClass("data-info d-flex item-tiling").get(1);
// 总积分
int scoreCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(0).attr("title"));
blogInfoVo.setScore(scoreCnt);
// 粉丝数量
int fansCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(1).attr("title"));
blogInfoVo.setFansCnt(fansCnt);
// 点赞量
int likeCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(2).attr("title"));
blogInfoVo.setLikeCnt(likeCnt);
// 评论量
int commentCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(3).attr("title"));
blogInfoVo.setCommentCnt(commentCnt);
// 收藏量
int collectCnt = Integer.parseInt(blogElement.getElementsByTag("dl").get(4).attr("title"));
blogInfoVo.setCollectCnt(collectCnt);
return blogInfoVo;
} catch (Exception e) {
log.error("get bloginfo error, {}", e);
}
return null;
}
public static void main(String[] args) {
BlogInfoVo blogInfo = getBlogInfo();
System.out.println(blogInfo);
}
}
BlogInfoVo class
@Data
@ToString
public class BlogInfoVo {
private Date date;
private Integer articleCnt;
private String wranking;
private String ranking;
private Integer viewCnt;
private Integer score;
private Integer fansCnt;
private Integer likeCnt;
private Integer commentCnt;
private Integer collectCnt;
}
输出结果
As you can see, we crawled all the data. You can build this into a timed task, run it once a day, and store it in the database. Interested friends can make chart data, and you can intuitively see the daily data changes.