An import-dependent
<!--java爬虫--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.3</version> </dependency> <!--httpclient依赖--> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> </dependency>
Second, write demo class
Be careful not guide the wrong package, the following is org.jsoup.nodes
package com.taotao.entity; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; /** * Author: TaoTao 2019/9/26 */ public class intefaceTest { public static void main (String [] args) throws IOException { CloseableHttpClient httpClient = HttpClients.createDefault (); // Create httpClient HttpGet HttpGet = new new HttpGet ( "http://www.cnblogs.com/"); // Create instance httpget CloseableHttpResponse Response = httpClient.execute (HttpGet); // perform get request the HttpEntity entity response.getEntity = (); // Get return entity String content = EntityUtils.toString (entity, " utf-8"); // web content response.close (); // close the stream and release system resources Jsoup.parse (Content); the Document DOC = Jsoup.parse (Content); // parse the page to get the document object Elements elements = doc.getElementsByTag ( "title" ); // get all dom document title tag is the element element = elements.get (0); // get the first element of a String title = element.text (); // .html return HTML System.out.println ( " page title: "+ title); Element element1 = doc.getElementById (" site_nav_top "); // get id = site_nav_top label String str = element1.text (); System.out.println ("str:"+str); } }