commons-httpClient Helper

使用HttpClient来发送请求获取数据最经典,以下呢我们使用jdk自带的HttpUrlConnection来操作, 很简单,发个请求取出数据,还可以条过https的验证.

package org.ycl.commons.text;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.commons.io.LineIterator;

/**
 * Functions:
 * 
 * 1. getInputStream(String url)/getInputStream(String url, Proxy proxy)
 * <li>- get InputStream from url with proxy(or not)</li>
 * 2. getString(String url)
 * <li>- get String from url with one line</li>
 * <li>- this is simple get html content, {@link HttpClient}</li>
 * 3. getStringList(String url)
 * <li>- get List<String> from url with any lines</li>
 * 4. getStringToday(String urlstring)
 * <li>- get String from url and save copy in file.</li>
 * 5. writeFileToday(String urlstring)
 * <li>- wirte urlstring content to file</li>
 * 6. needWriteFileToday(String urlstring)
 * <li>- check this file is generator today, or will be re-write file</li>
 * 7. getURLFile(String urlstring)
 * <li>- via urlstring to generator file</li>
 * 8.  writeFile(String url, File file)
 * <li>- write url content to file</li> 
 * 9. htmlEscape(String input)/htmlUnescape(String input)
 * <li>- turn Html language to transferred meaning, or reverse.</li> 
 * 
 * 
 * NOTE:this is from my tool box
 * 
 * {@link  org.springframework.web.util.HtmlUtils}
 * @author e557400
 *
 */
public class HtmlUtils {
	public static String DEFAULT_CONNECTION_POST = "POST";// request in "POST" method
	public static String DEFAULT_CONNECTION_GET = "GET";// request in "POST" method
	public static boolean DEFAULT_CONNECTION_DOOUTPUT = false;// if you intend to use the URL connection for output
	public static boolean DEFAULT_CONNECTION_ALLOWUSERINTERACTION = false;// Don't  need to interaction with user, exp:Applet
	public static boolean DEFAULT_CONNECTION_DOINPUT = true;// if you intend to use the URL connection for input
	public static boolean DEFAULT_CONNECTION_FOLLOWREDIRECTS = true;//default is true
	public static boolean DEFAULT_DEBUG = true;// if DEBUG is true, will be print error message
	public static boolean DEFAULT_SKIP_SSL = false;// if we vistor https, should be skip ssl validate?
	public static String DEFAULT_ENCODE = "UTF-8";// we read html use this encode.
	public static String DEFAULT_HTML_FOLDER = "/usr";// we read html use this encode. 
	public static int DEFAULT_CONNECTION_CONN_TIMEOUT = 3;// timeout in minutes
	public static int DEFAULT_CONNECTION_READ_TIMEOUT = 3;// timeout in minutes 

	public static Proxy DEFAULT_PROXY = new Proxy(Proxy.Type.HTTP,
			new InetSocketAddress("proxy.statestr.com", 80));
	public static boolean DEFAULT_PROXY_FLAG = false;

	/**
	 * The number of second is 1000 milliseconds.
	 */
	public static final int ONE_SEC = 1000;

	/**
	 * The number of minute is 60 second
	 */
	public static final int ONE_MIN = ONE_SEC * 60;

	// remove in product env.
	static {
		DEFAULT_PROXY_FLAG = true;
	}

	/**
	 * override default proxy
	 * 
	 * @param proxy
	 */
	public static void setDefaultProxy(Proxy proxy) {
		DEFAULT_PROXY = proxy;
	}

	/**
	 * main set Connection attribute of
	 * requestMethod,ConnectTimeout,ReadTimeout.
	 * 
	 * @param urlstring
	 * @return
	 * @throws IOException
	 */
	private static HttpURLConnection initConnection(String urlstring)
			throws IOException {
		return initConnection(urlstring, null);
	}

	/**
	 * main set Connection attribute of
	 * requestMethod,ConnectTimeout,ReadTimeout. we can give Proxy, or use
	 * default Proxy, or no Proxy.
	 * 
	 * @param urlstring
	 * @param proxy
	 * @return
	 * @throws IOException
	 */
	private static HttpURLConnection initConnection(String urlstring,
			Proxy proxy) throws IOException {
		URL url = new URL(urlstring);
		HttpURLConnection conn = null;
		if (proxy != null) {
			conn = (HttpURLConnection) url.openConnection(proxy);
		} else {
			if (DEFAULT_PROXY_FLAG) {
				conn = (HttpURLConnection) url.openConnection(DEFAULT_PROXY);
			} else {
				conn = (HttpURLConnection) url.openConnection();
			}
		}
		//NOTE: SSL valid must be set first, or will be unusable.
		if(DEFAULT_SKIP_SSL){
			try{
				 // Create a trust manager that does not validate certificate chains    
				final TrustManager[] trustAllCerts = new TrustManager[] {
						new X509TrustManager() {        
							@Override        
							public void checkClientTrusted( final X509Certificate[] chain, final String authType ) {
								
							}        
							@Override        
							public void checkServerTrusted( final X509Certificate[] chain, final String authType ) { 
								
							}        
							@Override        
							public X509Certificate[] getAcceptedIssuers() { 
								return null;       
							}    
						} 
					};
				// Install the all-trusting trust manager    
				final SSLContext sslContext = SSLContext.getInstance( "SSL" );    
				sslContext.init( null, trustAllCerts, new java.security.SecureRandom() );    
				// Create an ssl socket factory with our all-trusting manager    
				final SSLSocketFactory sslSocketFactory = sslContext.getSocketFactory(); 
				( (HttpsURLConnection) conn ).setSSLSocketFactory(sslSocketFactory);
				( (HttpsURLConnection) conn ).setHostnameVerifier(new HostnameVerifier(){ 
					@Override
					public boolean verify(String arg0, SSLSession arg1) {
						// TODO Auto-generated method stub
						return true;
					}
					
				});   
			}catch(Exception e){
				if (DEFAULT_DEBUG) {
					e.printStackTrace();
				}
			}
		}   
		conn.setRequestMethod(DEFAULT_CONNECTION_GET);
		conn.setConnectTimeout(DEFAULT_CONNECTION_CONN_TIMEOUT * ONE_MIN);
		conn.setReadTimeout(DEFAULT_CONNECTION_READ_TIMEOUT * ONE_MIN);

		// set request property
		// conn.setRequestProperty("Content-Type",
		// "application/x-www-form-urlencoded");
		// conn.setRequestProperty("Content-Type", "text/html; charset=utf-8");

		// conn.setRequestProperty("Accept-Language", "en-US");
		// conn.setRequestProperty("Accept",
		// "text/html, application/xhtml+xml, */*");
		// conn.setRequestProperty("Accept-Encoding", "gzip, deflate");
		// conn.setRequestProperty("User-Agent",
		// "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)");
		// conn.setRequestProperty("Content-Length","10");

		conn.setAllowUserInteraction(DEFAULT_CONNECTION_ALLOWUSERINTERACTION);
		conn.setDoOutput(DEFAULT_CONNECTION_DOOUTPUT);
		conn.setDoInput(DEFAULT_CONNECTION_DOINPUT);

		if (DEFAULT_DEBUG) {
			Map<String, List<String>> headers = conn.getHeaderFields();
			if (headers != null) {
				System.out.println("begin header");
				for (Map.Entry<String, List<String>> header : headers
						.entrySet()) {
					System.out.println("key:" + header.getKey());
					System.out.println("value:" + header.getValue());
				}
				System.out.println("end header");
			}

		}  
		return conn;
	}

	/**
	 * Unconditionally close a <code>Closeable</code>.
	 * <p>
	 * Equivalent to {@link Closeable#close()}, except any exceptions will be
	 * ignored. This is typically used in finally blocks.
	 * <p>
	 * Example code:
	 * 
	 * <pre>
	 * Closeable closeable = null;
	 * try {
	 * 	closeable = new FileReader(&quot;foo.txt&quot;);
	 * 	// process closeable
	 * 	closeable.close();
	 * } catch (Exception e) {
	 * 	// error handling
	 * } finally {
	 * 	IOUtils.closeQuietly(closeable);
	 * }
	 * </pre>
	 * 
	 * @param closeable
	 *            the object to close, may be null or already closed
	 * @since 2.0
	 */
	private static void closeQuietly(Closeable closeable) {
		try {
			if (closeable != null) {
				closeable.close();
			}
		} catch (IOException ioe) {
			// ignore
		}
	}

	/**
	 * @{link org.apache.commons.io.IOUtils} Return an Iterator for the lines in
	 *        a <code>Reader</code>.
	 *        <p>
	 *        <code>LineIterator</code> holds a reference to the open
	 *        <code>Reader</code> specified here. When you have finished with
	 *        the iterator you should close the reader to free internal
	 *        resources. This can be done by closing the reader directly, or by
	 *        calling {@link LineIterator#close()} or
	 *        {@link LineIterator#closeQuietly(LineIterator)}.
	 *        <p>
	 *        The recommended usage pattern is:
	 * 
	 *        <pre>
	 * try {
	 * 	LineIterator it = IOUtils.lineIterator(reader);
	 * 	while (it.hasNext()) {
	 * 		String line = it.nextLine();
	 * 		// / do something with line
	 * 	}
	 * } finally {
	 * 	IOUtils.closeQuietly(reader);
	 * }
	 * </pre>
	 * 
	 * @param reader
	 *            the <code>Reader</code> to read from, not null
	 * @return an Iterator of the lines in the reader, never null
	 * @throws IllegalArgumentException
	 *             if the reader is null
	 * @since 1.2
	 */
	private static void closeQuietly(Reader input) {
		closeQuietly((Closeable) input);
	}

	/**
	 * get URL content with InputStream
	 * 
	 * @param url
	 * @return
	 * @throws IOException
	 */
	public static InputStream getInputStream(String url) throws IOException {
		HttpURLConnection conn = initConnection(url);
		InputStream is = null;
		if (conn.getResponseCode() >= 400) {
		    is = conn.getErrorStream();
		} else {
		    is = conn.getInputStream();
		} 
		return is;
	}

	/**
	 * get URL content with InputStream with Proxy
	 * 
	 * @param url
	 * @param proxy
	 * @return
	 * @throws IOException
	 */
	public static InputStream getInputStream(String url, Proxy proxy)
			throws IOException {
		HttpURLConnection conn = initConnection(url, proxy);
		InputStream is = conn.getInputStream();
		return is;
	}

	/**
	 * get URL content with String. 1. success return content. 2. fail null
	 * 
	 * @param url
	 * @return
	 */
	public static String getString(String url) {
		BufferedReader in = null;
		StringBuffer sb = new StringBuffer();
		try {
			in = new BufferedReader(new InputStreamReader(getInputStream(url),
					DEFAULT_ENCODE));
			String inputLine;
			while ((inputLine = in.readLine()) != null) {
				sb.append(inputLine);
			}

		} catch (IOException e) {
			if (DEFAULT_DEBUG) {
				e.printStackTrace();
			}
			return null;
		} finally {
			closeQuietly(in);
		}
		return sb.toString();
	}

	public static List<String> getStringList(String url) {
		BufferedReader in = null;
		List<String> contents = new ArrayList<String>();
		try {
			in = new BufferedReader(new InputStreamReader(getInputStream(url),
					DEFAULT_ENCODE));
			String inputLine;
			while ((inputLine = in.readLine()) != null) {
				contents.add(inputLine);
			}

		} catch (IOException e) {
			if (DEFAULT_DEBUG) {
				e.printStackTrace();
			}
			return null;
		} finally {
			closeQuietly(in);
		}
		return contents;
	}

	/**
	 * get url to string, if this file is exist, then read it, or read from URL.
	 * 
	 * @param urlstring
	 * @return
	 */
	public static String getStringToday(String urlstring) {
		BufferedReader in = null;
		try {
			writeFileToday(urlstring);
			StringBuffer sb = new StringBuffer();
			File file = getURLFile(urlstring);
			in = new BufferedReader(new InputStreamReader(
					FileUtils.openInputStream(file), DEFAULT_ENCODE));
			String inputLine;
			while ((inputLine = in.readLine()) != null) {
				sb.append(inputLine);
				sb.append(System.getProperty("line.separator"));
			}
			// Scanner scanner = new Scanner(new FileInputStream(file),
			// DEFAULT_ENCODE);
			// while (scanner.hasNextLine()){
			// sb.append(scanner.nextLine());
			// }
			// scanner.close();
			return sb.toString();
		} catch (IOException e) {
			if (DEFAULT_DEBUG) {
				e.printStackTrace();
			}
			return null;
		} finally {
			closeQuietly(in);
		}
	}

	/**
	 * we may be read URL content to file, if we have read, so next test we just
	 * get from file. not EveryTime from URL, it can save so may times.
	 * 
	 * @param url
	 * @param fileName
	 */
	public static void writeFile(String url, File file) {
		BufferedReader in = null;
		BufferedWriter fw = null;
		try {
			in = new BufferedReader(new InputStreamReader(getInputStream(url),
					DEFAULT_ENCODE));
			fw = new BufferedWriter(new OutputStreamWriter(
					FileUtils.openOutputStream(file), DEFAULT_ENCODE));
			String inputLine;
			while ((inputLine = in.readLine()) != null) {
				fw.write(inputLine);
				fw.write(System.getProperty("line.separator"));
			}
		} catch (IOException e) {
			if (DEFAULT_DEBUG) {
				e.printStackTrace();
			}
		} finally {
			closeQuietly(in);
			closeQuietly(fw);
		}
	}

	/**
	 * add file in weather folder
	 * 
	 * how to judge we have download today, every day file is difference
	 * 
	 * @param url
	 */
	public static void writeFileToday(String urlstring) {
		if (needWriteFileToday(urlstring)) {
			writeFile(urlstring, getURLFile(urlstring));
		}
	}

	/**
	 * Path /weather/101210101.shtml so we will be generator file in this
	 * derectory.
	 * 
	 * @param urlstring
	 */
	public static boolean needWriteFileToday(String urlstring) {
		File file = getURLFile(urlstring);
		Long lastmodify = file.lastModified();
		Long todaymodify = new Date().getTime();
		if (todaymodify - lastmodify < DateUtils.MILLIS_PER_DAY) {
			return false;
		}
		return true;
	}

	/**
	 * via url generator file
	 * 
	 * @param urlstring
	 * @return
	 */
	public static File getURLFile(String urlstring) {
		URL url = null;
		File file = null;
		try {
			url = new URL(urlstring);
			String path = url.getPath();
			file = new File(DEFAULT_HTML_FOLDER + path+
					DateUtils.getDateAsString(new Date(), "MM-dd-yyyy")
					.concat(".html")); 
		} catch (MalformedURLException e) {
			if (DEFAULT_DEBUG) {
				e.printStackTrace();
			}
		}
		return file;
	}
	
	/**
	 * Turn special characters into HTML character references.
	 * Handles complete character set defined in HTML 4.01 recommendation.
	 * <p>Escapes all special characters to their corresponding
	 * entity reference (e.g. {@code &lt;}).
	 * <p>Reference:
	 * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
	 * http://www.w3.org/TR/html4/sgml/entities.html
	 * </a>
	 * @param input the (unescaped) input string
	 * @return the escaped string
	 */
	public static String htmlEscape(String input) {
		return org.springframework.web.util.HtmlUtils.htmlEscape(input);
	}
	
	/**
	 * Turn HTML character references into their plain text UNICODE equivalent.
	 * <p>Handles complete character set defined in HTML 4.01 recommendation
	 * and all reference types (decimal, hex, and entity).
	 * <p>Correctly converts the following formats:
	 * <blockquote>
	 * &amp;#<i>Entity</i>; - <i>(Example: &amp;amp;) case sensitive</i>
	 * &amp;#<i>Decimal</i>; - <i>(Example: &amp;#68;)</i><br>
	 * &amp;#x<i>Hex</i>; - <i>(Example: &amp;#xE5;) case insensitive</i><br>
	 * </blockquote>
	 * Gracefully handles malformed character references by copying original
	 * characters as is when encountered.<p>
	 * <p>Reference:
	 * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
	 * http://www.w3.org/TR/html4/sgml/entities.html
	 * </a>
	 * @param input the (escaped) input string
	 * @return the unescaped string
	 */
	public static String htmlUnescape(String input) {
		return org.springframework.web.util.HtmlUtils.htmlUnescape(input);
	}

	public static void main(String args[]) throws Exception {
		    
		 
		//List<String>contexts=getStringList("http://www.weather.com.cn/weather/101210101.shtml");//101210101
		//String context =getStringToday("http://weather.yahooapis.com/forecastrss?w=1940345");
		//String context = getStringToday("http://weather.yahooapis.com/forecastrss?w=2502265");
		String context = getStringToday("http://weather.yahooapis.com/forecastrss?p=CHXX0044&u=c");
		//String context = getStringToday("http://m.weather.com.cn/data/101270803.html");
		//String context = getStringToday("http://m.weather.com.cn/data/101210101.html");
		//String context = getStringToday("http://www.google.com");
		//String context = getStringToday("http://www.baidu.com");
		//String context = getStringToday("http://www.weather.com.cn/weather/101210101.shtml");
		//String context = getStringToday("https://aplmd5.it.statestr.com:9445/PALMSServiceWEB/cacheReset");
		//String context = getStringToday("http://aplmd5.it.statestr.com:9080/PLM/login.do");
		//String context = getStringToday("http://aplmd5.it.statestr.com:9080/PLM/unittest/testproperties.jsp");
		// for(String context:contexts)
		String escape = htmlEscape(context);
		System.out.println(escape);
		System.out.println(htmlUnescape(escape)); 
		
	}

}

猜你喜欢

转载自a123159521.iteye.com/blog/2201831