package cn.xxx.magic.test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
public class hotProcessor implements PageProcessor {
//负责解析页面
public void process(Page page){
page.putField("div",page.getHtml().css("div#pl_top_realtimehot > table > tbody > tr").all());
}
private Site site = Site.me();
public Site getSite() {
return site;
}
//主函数,执行爬虫
public static void main(String[] args) {
Spider.create(new hotProcessor())
.addUrl("https://s.weibo.com/top/summary?cate=realtimeho")
.run();
}
}
运行结果: