package org.ycl.commons.text; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.Closeable; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Reader; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.Proxy; import java.net.URL; import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.apache.commons.io.LineIterator; /** * Functions: * * 1. getInputStream(String url)/getInputStream(String url, Proxy proxy) * <li>- get InputStream from url with proxy(or not)</li> * 2. getString(String url) * <li>- get String from url with one line</li> * <li>- this is simple get html content, {@link HttpClient}</li> * 3. getStringList(String url) * <li>- get List<String> from url with any lines</li> * 4. getStringToday(String urlstring) * <li>- get String from url and save copy in file.</li> * 5. writeFileToday(String urlstring) * <li>- wirte urlstring content to file</li> * 6. needWriteFileToday(String urlstring) * <li>- check this file is generator today, or will be re-write file</li> * 7. getURLFile(String urlstring) * <li>- via urlstring to generator file</li> * 8. writeFile(String url, File file) * <li>- write url content to file</li> * 9. htmlEscape(String input)/htmlUnescape(String input) * <li>- turn Html language to transferred meaning, or reverse.</li> * * * NOTE:this is from my tool box * * {@link org.springframework.web.util.HtmlUtils} * @author e557400 * */ public class HtmlUtils { public static String DEFAULT_CONNECTION_POST = "POST";// request in "POST" method public static String DEFAULT_CONNECTION_GET = "GET";// request in "POST" method public static boolean DEFAULT_CONNECTION_DOOUTPUT = false;// if you intend to use the URL connection for output public static boolean DEFAULT_CONNECTION_ALLOWUSERINTERACTION = false;// Don't need to interaction with user, exp:Applet public static boolean DEFAULT_CONNECTION_DOINPUT = true;// if you intend to use the URL connection for input public static boolean DEFAULT_CONNECTION_FOLLOWREDIRECTS = true;//default is true public static boolean DEFAULT_DEBUG = true;// if DEBUG is true, will be print error message public static boolean DEFAULT_SKIP_SSL = false;// if we vistor https, should be skip ssl validate? public static String DEFAULT_ENCODE = "UTF-8";// we read html use this encode. public static String DEFAULT_HTML_FOLDER = "/usr";// we read html use this encode. public static int DEFAULT_CONNECTION_CONN_TIMEOUT = 3;// timeout in minutes public static int DEFAULT_CONNECTION_READ_TIMEOUT = 3;// timeout in minutes public static Proxy DEFAULT_PROXY = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("proxy.statestr.com", 80)); public static boolean DEFAULT_PROXY_FLAG = false; /** * The number of second is 1000 milliseconds. */ public static final int ONE_SEC = 1000; /** * The number of minute is 60 second */ public static final int ONE_MIN = ONE_SEC * 60; // remove in product env. static { DEFAULT_PROXY_FLAG = true; } /** * override default proxy * * @param proxy */ public static void setDefaultProxy(Proxy proxy) { DEFAULT_PROXY = proxy; } /** * main set Connection attribute of * requestMethod,ConnectTimeout,ReadTimeout. * * @param urlstring * @return * @throws IOException */ private static HttpURLConnection initConnection(String urlstring) throws IOException { return initConnection(urlstring, null); } /** * main set Connection attribute of * requestMethod,ConnectTimeout,ReadTimeout. we can give Proxy, or use * default Proxy, or no Proxy. * * @param urlstring * @param proxy * @return * @throws IOException */ private static HttpURLConnection initConnection(String urlstring, Proxy proxy) throws IOException { URL url = new URL(urlstring); HttpURLConnection conn = null; if (proxy != null) { conn = (HttpURLConnection) url.openConnection(proxy); } else { if (DEFAULT_PROXY_FLAG) { conn = (HttpURLConnection) url.openConnection(DEFAULT_PROXY); } else { conn = (HttpURLConnection) url.openConnection(); } } //NOTE: SSL valid must be set first, or will be unusable. if(DEFAULT_SKIP_SSL){ try{ // Create a trust manager that does not validate certificate chains final TrustManager[] trustAllCerts = new TrustManager[] { new X509TrustManager() { @Override public void checkClientTrusted( final X509Certificate[] chain, final String authType ) { } @Override public void checkServerTrusted( final X509Certificate[] chain, final String authType ) { } @Override public X509Certificate[] getAcceptedIssuers() { return null; } } }; // Install the all-trusting trust manager final SSLContext sslContext = SSLContext.getInstance( "SSL" ); sslContext.init( null, trustAllCerts, new java.security.SecureRandom() ); // Create an ssl socket factory with our all-trusting manager final SSLSocketFactory sslSocketFactory = sslContext.getSocketFactory(); ( (HttpsURLConnection) conn ).setSSLSocketFactory(sslSocketFactory); ( (HttpsURLConnection) conn ).setHostnameVerifier(new HostnameVerifier(){ @Override public boolean verify(String arg0, SSLSession arg1) { // TODO Auto-generated method stub return true; } }); }catch(Exception e){ if (DEFAULT_DEBUG) { e.printStackTrace(); } } } conn.setRequestMethod(DEFAULT_CONNECTION_GET); conn.setConnectTimeout(DEFAULT_CONNECTION_CONN_TIMEOUT * ONE_MIN); conn.setReadTimeout(DEFAULT_CONNECTION_READ_TIMEOUT * ONE_MIN); // set request property // conn.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); // conn.setRequestProperty("Content-Type", "text/html; charset=utf-8"); // conn.setRequestProperty("Accept-Language", "en-US"); // conn.setRequestProperty("Accept", // "text/html, application/xhtml+xml, */*"); // conn.setRequestProperty("Accept-Encoding", "gzip, deflate"); // conn.setRequestProperty("User-Agent", // "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"); // conn.setRequestProperty("Content-Length","10"); conn.setAllowUserInteraction(DEFAULT_CONNECTION_ALLOWUSERINTERACTION); conn.setDoOutput(DEFAULT_CONNECTION_DOOUTPUT); conn.setDoInput(DEFAULT_CONNECTION_DOINPUT); if (DEFAULT_DEBUG) { Map<String, List<String>> headers = conn.getHeaderFields(); if (headers != null) { System.out.println("begin header"); for (Map.Entry<String, List<String>> header : headers .entrySet()) { System.out.println("key:" + header.getKey()); System.out.println("value:" + header.getValue()); } System.out.println("end header"); } } return conn; } /** * Unconditionally close a <code>Closeable</code>. * <p> * Equivalent to {@link Closeable#close()}, except any exceptions will be * ignored. This is typically used in finally blocks. * <p> * Example code: * * <pre> * Closeable closeable = null; * try { * closeable = new FileReader("foo.txt"); * // process closeable * closeable.close(); * } catch (Exception e) { * // error handling * } finally { * IOUtils.closeQuietly(closeable); * } * </pre> * * @param closeable * the object to close, may be null or already closed * @since 2.0 */ private static void closeQuietly(Closeable closeable) { try { if (closeable != null) { closeable.close(); } } catch (IOException ioe) { // ignore } } /** * @{link org.apache.commons.io.IOUtils} Return an Iterator for the lines in * a <code>Reader</code>. * <p> * <code>LineIterator</code> holds a reference to the open * <code>Reader</code> specified here. When you have finished with * the iterator you should close the reader to free internal * resources. This can be done by closing the reader directly, or by * calling {@link LineIterator#close()} or * {@link LineIterator#closeQuietly(LineIterator)}. * <p> * The recommended usage pattern is: * * <pre> * try { * LineIterator it = IOUtils.lineIterator(reader); * while (it.hasNext()) { * String line = it.nextLine(); * // / do something with line * } * } finally { * IOUtils.closeQuietly(reader); * } * </pre> * * @param reader * the <code>Reader</code> to read from, not null * @return an Iterator of the lines in the reader, never null * @throws IllegalArgumentException * if the reader is null * @since 1.2 */ private static void closeQuietly(Reader input) { closeQuietly((Closeable) input); } /** * get URL content with InputStream * * @param url * @return * @throws IOException */ public static InputStream getInputStream(String url) throws IOException { HttpURLConnection conn = initConnection(url); InputStream is = null; if (conn.getResponseCode() >= 400) { is = conn.getErrorStream(); } else { is = conn.getInputStream(); } return is; } /** * get URL content with InputStream with Proxy * * @param url * @param proxy * @return * @throws IOException */ public static InputStream getInputStream(String url, Proxy proxy) throws IOException { HttpURLConnection conn = initConnection(url, proxy); InputStream is = conn.getInputStream(); return is; } /** * get URL content with String. 1. success return content. 2. fail null * * @param url * @return */ public static String getString(String url) { BufferedReader in = null; StringBuffer sb = new StringBuffer(); try { in = new BufferedReader(new InputStreamReader(getInputStream(url), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { sb.append(inputLine); } } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } return null; } finally { closeQuietly(in); } return sb.toString(); } public static List<String> getStringList(String url) { BufferedReader in = null; List<String> contents = new ArrayList<String>(); try { in = new BufferedReader(new InputStreamReader(getInputStream(url), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { contents.add(inputLine); } } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } return null; } finally { closeQuietly(in); } return contents; } /** * get url to string, if this file is exist, then read it, or read from URL. * * @param urlstring * @return */ public static String getStringToday(String urlstring) { BufferedReader in = null; try { writeFileToday(urlstring); StringBuffer sb = new StringBuffer(); File file = getURLFile(urlstring); in = new BufferedReader(new InputStreamReader( FileUtils.openInputStream(file), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { sb.append(inputLine); sb.append(System.getProperty("line.separator")); } // Scanner scanner = new Scanner(new FileInputStream(file), // DEFAULT_ENCODE); // while (scanner.hasNextLine()){ // sb.append(scanner.nextLine()); // } // scanner.close(); return sb.toString(); } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } return null; } finally { closeQuietly(in); } } /** * we may be read URL content to file, if we have read, so next test we just * get from file. not EveryTime from URL, it can save so may times. * * @param url * @param fileName */ public static void writeFile(String url, File file) { BufferedReader in = null; BufferedWriter fw = null; try { in = new BufferedReader(new InputStreamReader(getInputStream(url), DEFAULT_ENCODE)); fw = new BufferedWriter(new OutputStreamWriter( FileUtils.openOutputStream(file), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { fw.write(inputLine); fw.write(System.getProperty("line.separator")); } } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } } finally { closeQuietly(in); closeQuietly(fw); } } /** * add file in weather folder * * how to judge we have download today, every day file is difference * * @param url */ public static void writeFileToday(String urlstring) { if (needWriteFileToday(urlstring)) { writeFile(urlstring, getURLFile(urlstring)); } } /** * Path /weather/101210101.shtml so we will be generator file in this * derectory. * * @param urlstring */ public static boolean needWriteFileToday(String urlstring) { File file = getURLFile(urlstring); Long lastmodify = file.lastModified(); Long todaymodify = new Date().getTime(); if (todaymodify - lastmodify < DateUtils.MILLIS_PER_DAY) { return false; } return true; } /** * via url generator file * * @param urlstring * @return */ public static File getURLFile(String urlstring) { URL url = null; File file = null; try { url = new URL(urlstring); String path = url.getPath(); file = new File(DEFAULT_HTML_FOLDER + path+ DateUtils.getDateAsString(new Date(), "MM-dd-yyyy") .concat(".html")); } catch (MalformedURLException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } } return file; } /** * Turn special characters into HTML character references. * Handles complete character set defined in HTML 4.01 recommendation. * <p>Escapes all special characters to their corresponding * entity reference (e.g. {@code <}). * <p>Reference: * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> * http://www.w3.org/TR/html4/sgml/entities.html * </a> * @param input the (unescaped) input string * @return the escaped string */ public static String htmlEscape(String input) { return org.springframework.web.util.HtmlUtils.htmlEscape(input); } /** * Turn HTML character references into their plain text UNICODE equivalent. * <p>Handles complete character set defined in HTML 4.01 recommendation * and all reference types (decimal, hex, and entity). * <p>Correctly converts the following formats: * <blockquote> * &#<i>Entity</i>; - <i>(Example: &amp;) case sensitive</i> * &#<i>Decimal</i>; - <i>(Example: &#68;)</i><br> * &#x<i>Hex</i>; - <i>(Example: &#xE5;) case insensitive</i><br> * </blockquote> * Gracefully handles malformed character references by copying original * characters as is when encountered.<p> * <p>Reference: * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> * http://www.w3.org/TR/html4/sgml/entities.html * </a> * @param input the (escaped) input string * @return the unescaped string */ public static String htmlUnescape(String input) { return org.springframework.web.util.HtmlUtils.htmlUnescape(input); } public static void main(String args[]) throws Exception { //List<String>contexts=getStringList("http://www.weather.com.cn/weather/101210101.shtml");//101210101 //String context =getStringToday("http://weather.yahooapis.com/forecastrss?w=1940345"); //String context = getStringToday("http://weather.yahooapis.com/forecastrss?w=2502265"); String context = getStringToday("http://weather.yahooapis.com/forecastrss?p=CHXX0044&u=c"); //String context = getStringToday("http://m.weather.com.cn/data/101270803.html"); //String context = getStringToday("http://m.weather.com.cn/data/101210101.html"); //String context = getStringToday("http://www.google.com"); //String context = getStringToday("http://www.baidu.com"); //String context = getStringToday("http://www.weather.com.cn/weather/101210101.shtml"); //String context = getStringToday("https://aplmd5.it.statestr.com:9445/PALMSServiceWEB/cacheReset"); //String context = getStringToday("http://aplmd5.it.statestr.com:9080/PLM/login.do"); //String context = getStringToday("http://aplmd5.it.statestr.com:9080/PLM/unittest/testproperties.jsp"); // for(String context:contexts) String escape = htmlEscape(context); System.out.println(escape); System.out.println(htmlUnescape(escape)); } }
commons-httpClient Helper
使用HttpClient来发送请求获取数据最经典,以下呢我们使用jdk自带的HttpUrlConnection来操作, 很简单,发个请求取出数据,还可以条过https的验证.
猜你喜欢
转载自a123159521.iteye.com/blog/2201831
今日推荐
周排行