/** * 过滤https协议 * @author edwin */ public class FiltratHttpsUtils { static HostnameVerifier hv = new HostnameVerifier() { public boolean verify(String urlHostName, SSLSession session) { System.out.println("Warning: URL Host: " + urlHostName + " vs. "+ session.getPeerHost()); return true; } }; private static void trustAllHttpsCertificates() throws Exception { javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1]; javax.net.ssl.TrustManager tm = new miTM(); trustAllCerts[0] = tm; javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("SSL"); sc.init(null, trustAllCerts, null); javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); } static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager { public java.security.cert.X509Certificate[] getAcceptedIssuers() { return null; } public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) { return true; } public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) { return true; } public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } } public static void doFiltra() throws Exception { trustAllHttpsCertificates(); HttpsURLConnection.setDefaultHostnameVerifier(hv); }
}
Only need to call before requesting: FiltratHttpsUtils .doFiltra () can be.
package com.dao; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.Connection.Method; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.dao.HttpsUrlValidator.FiltratHttpsUtils; public class PhotoVerification { public static void main(String[] args) throws Exception { try { FiltratHttpsUtils .doFiltra(); URL = String "https://inv-veri.chinatax.gov.cn/index.html"; // HttpsUrlValidator.retrieveResponseFromServer (URL); // DOC = Jsoup.connect (URL) .header ( "the User-- Agent" , rand_agents) .timeout (10000) .get (); // = doc.getElementsByTag body ( "body") HTML ();. // log.info (e.getMessage ()); // // through the Jsoup Connect method to get the document class the Document document = Jsoup.connect (URL) .get (); // System.out.println (document.title ()); // print the page header console // Get class Elements The class // Element timeElement = document.getElementById ( "yzminfo" ); // text // Element titleElement = document.getElementById ( "yzm_img "); // pictures base64 code // Element timeElement = document.getElementById ( "yzminfo"); / / text Element titleElement = document.getElementById ( "imgarea"); / / images base64 code System.out.println(timeElement); System.out.println("----------------------"); System.out.println(titleElement); //指定文件名及路径 // File file = new File("D:\\title.txt"); // File contentFile = new File("D:\\content.txt"); // if(!file.exists()){ // file.createNewFile(); // } // if(!contentFile.exists()){ // contentFile.createNewFile(); // } // //写入本地 // PrintWriter pw = new PrintWriter("D:\\title.txt"); // PrintWriter contentPw = new PrintWriter("D:\\content.txt"); // pw.close(); // // //// String href = titleElement.get (i) .attr ( "href"); // url news headlines extracted // String schoolHref = "http://www.haie.edu.cn/"; // url as taken out of the press is not standardized, can not directly access it needs to be spliced into the normal page URL // String contentHref the href + = schoolHref; // repeat the first content based on the URL categories to take Documet // = the Document contentDoc Jsoup.connect (contentHref) .get (); // continue to observe the page, the detailed information extracted the text of the page. Elements contentElement = contentDoc.getElementsByClass // ( "contentstyle125127"); // Elements authorElement = contentDoc.getElementsByClass ( "authorstyle125127"); // String Content = contentElement.text (); // String author = authorElement.text (); // print out of the news for details // contentPw.println (author); // contentPw.println (content); // contentPw.println ( "---------------- ----------- "); // // contentPw.close (); } the catch (IOException E) { e.printStackTrace (); } } }