Java抓取URL指定的HTML内容

通过Java获取到URL指定的内容,如果有目标数据可以再写代码解析获取到的内容
package com.boonya;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Scanner;

public class WebCapturer {
	
	/**
	 * 根据用户获取的网络地址获取该网页的内容
	 * @param myURL  地址
	 *    如:"http://www.google.com.hk/search?q=biao"
	 * @return
	 */
	@SuppressWarnings("resource")
	public static String getHtmlByURL(String myURL){
		URL url=null;
		String htmlContext="";
		try {
			url = new URL(myURL);
			HttpURLConnection conn;
			try {
				conn = (HttpURLConnection) url.openConnection();
				conn.setRequestProperty("User-Agent", // 很重要
						"Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) "
								+ " Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2");
				conn.setRequestMethod("GET");
				Scanner scanner = new Scanner(conn.getInputStream());
				while (scanner.hasNextLine()) {
					htmlContext+=scanner.nextLine();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		}
		return htmlContext;
		
	}

	public static void main(String[] args) throws Exception {
		System.out.println(WebCapturer.getHtmlByURL("http://www.google.com.hk/search?q=biao"));
	}
}

猜你喜欢

转载自boonya.iteye.com/blog/1870520