Crawl data
Get the page information returned by the request and filter out the data we want.
jsoup
Package: Parse the web page
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
tika
Package: Climbing Movie
Code
package com.qun.util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.net.URL;
public class HtmlParseUtil {
public static void main(String[] args) throws Exception {
String url="https://search.jd.com/Search?keyword=java";
//解析网页,返回的是浏览器document对象
Document document = Jsoup.parse(new URL(url),3000);
Element element = document.getElementById("J_goodsList");
// System.out.println(element.html());
Element elements = element.getElementById("li");
System.out.println(elements);
}
}