Get the Douban TOP250 book list (only get the title and author information)
format is as follows
public class HttpDemo {
public static void main(String[] args) throws Exception {
Integer cot=1;
File file = new File("d://豆瓣T250书单.txt");
if(file.exists()==false) file.createNewFile();
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file,true));
for (int i = 0; i <=225; i+=25) {
Document document = Jsoup.parse(new URL("https://book.douban.com/top250?start=" + i), 10000);
//获取书名
List<String> bNameList = document.select("div#content .item .pl2 [title]").eachText();
//获取简介
List<String> bConList = document.select("div#content .item p.pl").eachText();
for (int j = 0; j < bNameList.size(); j++) {
bufferedOutputStream.write(String.valueOf(cot).getBytes());
bufferedOutputStream.write("\r\n".getBytes());
bufferedOutputStream.write(bNameList.get(j).getBytes());
bufferedOutputStream.write("\r\n".getBytes());
bufferedOutputStream.write(bConList.get(j).getBytes());
bufferedOutputStream.write("\r\n".getBytes());
cot++;
}
}
bufferedOutputStream.close();
System.out.println("completed");
The final result is as follows: