httpclient获取百度真实url(java)

百度搜索后显示的结果中url是临时的,我们想要的是点进去的浏览器地址栏的那个网址
如:
https://www.baidu.com/link?url=a2VZ4Xp9ukhitVl8mvA9gjMEQXz02cI51SVwYmtbFaLv61OTHeSzmrk0CkX-UN6tqAvrvpGTppbygbyuRyXf0Y__Jy404M0S4-aJ1b_DSDS&wd=&eqid=8761145d00014ab10000000359549e56
转换为
http://muzhi.baidu.com/question/1366450327137412899.html


需要的jar包:
commons-codec-1.6.jar
commons-httpclient-3.1.jar
commons-logging.jar

--------------------------------------代码
package httpclient;

import java.io.IOException;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;

public class HttpClient_Get_Url {
/**
* 根据百度url,获取原本url
* @throws IOException
* @throws HttpException
* */
public static String GetTrueUrlByBaiduUrl(String baidu_url) throws HttpException, IOException{
//---------------------------1
HttpClient client = new HttpClient();
//设置代理IP
//client.getHostConfiguration().setProxy("172.22.40.20", 8080);
GetMethod getMethod = new GetMethod(baidu_url);
//获取状态码
int stateCode =client.executeMethod(getMethod);
String text=getMethod.getResponseBodyAsString();
//释放
getMethod.releaseConnection();
if (stateCode == HttpStatus.SC_OK) {
text=text.split("URL='")[1].split("'")[0];
//System.out.println("访问成功,网址:"+text);
return text;
}
return null;
}

public static void main(String[] args) throws HttpException, IOException {
String url="https://www.baidu.com/link?url=a2VZ4Xp9ukhitVl8mvA9gjMEQXz02cI51SVwYmtbFaLv61OTHeSzmrk0CkX-UN6tqAvrvpGTppbygbyuRyXf0Y__Jy404M0S4-aJ1b_DSDS&wd=&eqid=8761145d00014ab10000000359549e56";
System.out.println(GetTrueUrlByBaiduUrl(url));;
}
}

猜你喜欢

转载自st4024589553.iteye.com/blog/2382086