假期十

热词爬取

package word;

import java.io.IOException;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import dao.Dao;

public class Baidu1 {
public static void main(String[] args)
{
lianjie("https://baike.baidu.com/item/%E6%89%8B%E6%9C%BA/6342");

}
public static void lianjie(String address)
{
String explanation="";
String title="";
String url="";
try {
Document document=Jsoup.connect(address).get();
Elements links=document.getElementsByClass("para");
Elements reault=links.select("a[href]");
for(Element link:reault)
{
String linkHref=link.attr("href");
//System.out.println(linkHref);
String title1=link.text();
//System.out.println(title1);
title=timu("https://baike.baidu.com"+linkHref);
explanation=ex("https://baike.baidu.com"+linkHref);
url="https://baike.baidu.com"+linkHref;
System.out.println(title);
System.out.println(explanation);
Dao dao=new Dao();
try {
//dao.Hotword(title, explanation, url);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
public static String timu(String address)
{
String name="";
try {
Document document=Jsoup.connect(address).get();
Elements ProjectName=document.getElementsByClass("lemmaWgt-lemmaTitle-title");
Elements h1=ProjectName.select("h1");
Elements h2=ProjectName.select("h2");
name=h1.text()+h2.text();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return name;
}
public static String ex(String address)
{
String name="";
try {
Document document=Jsoup.connect(address).get();
Elements ProjectName=document.getElementsByClass("lemma-summary");
name=ProjectName.text();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return name;
}

}

猜你喜欢

转载自www.cnblogs.com/jbwen/p/12293366.html