java 实现通过 post 方式提交json参数

由于所爬取的网站需要验证码,通过网页的开发人员工具【F12】及在线http post,get接口测试请求工具发现访问时加上请求头header 信息时可以跳过验证码校验。

而且该网站只接受post请求,对提交的参数也只接受json格式,否则请求失败。

现将通过 post 方式提交json参数的方法记录如下:

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
 * <p>@PostJsonParamsTest.java</p> 
 * @version 1.0
 * @author zxk
 * @Date 2018-3-3
 */
public class PostJsonParamsTest {

    // 超时时间
    private static final int RUN_TIME =10000;

    // 爬取初始页数
    private String page;

    public static void main(String[] args) throws Exception {
        PostJsonParamsTest crawl = new PostJsonParamsTest();

        // 请求的url地址
        String url ="http://www.gzcredit.gov.cn/Service/CreditService.asmx/searchOrgWithPage";
        // 设置起始访问页码
        crawl.setPage("1");
        String isStop = "";

        // 设置请求
        HttpRequestBase request = null;
        request = new HttpPost(url);

        try {
            // 设置config
            RequestConfig requestConfig = RequestConfig.custom()
                        .setSocketTimeout(RUN_TIME)
                        .setConnectTimeout(RUN_TIME)
                        .setConnectionRequestTimeout(RUN_TIME)
                        .build();
            request.setConfig(requestConfig);

            // json 格式的 post 参数
            String postParams ="{\"condition\":{\"qymc\":\"%%%%\",\"cydw\":\"\"},\"pageNo\":"+crawl.getPage()+",\"pageSize\":100,count:2709846}";
            System.out.println(postParams);
            HttpEntity httpEntity = new StringEntity(postParams);
            ((HttpPost) request).setEntity(httpEntity);

            // 添加请求头,可以绕过验证码
            request.addHeader("Accept","application/json, text/javascript, */*");
            request.addHeader("Accept-Encoding","gzip, deflate");
            request.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
            request.addHeader("Connection", "keep-alive");
            request.addHeader("Host", "www.gzcredit.gov.cn");
            request.addHeader("Content-Type", "application/json; charset=UTF-8");

            URIBuilder builder = new URIBuilder(url);              
            URI uri = builder.build();
            uri = new URI(URLDecoder.decode(uri.toString(), "UTF-8"));
            request.setURI(uri);

            while(!isStop.equals("停止")||isStop.equals("重跑")){
                isStop = crawl.crawlList(request);
                if(isStop.equals("爬取")){
                    crawl.setPage(String.valueOf(Integer.parseInt(crawl.getPage())+1));
                }

                // if("2713".equals(crawl.getPage())) break;
                if("2".equals(crawl.getPage())){
                    break;
                }
            }
        } catch (NumberFormatException e) {
            e.printStackTrace();
            throw new NumberFormatException("数字格式错误");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
            throw new UnsupportedEncodingException("不支持的编码集");
        }
    }


    /**
     * 爬取搜索列表
     * @param page
     * @return
     */
    private String crawlList(HttpRequestBase request){
        int statusCode = 0;

        // 下面两种方式都可以用来创建客户端连接,相当于打开了一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();  
        // HttpClient httpClient = HttpClientBuilder.create().build();

        HttpEntity httpEntity = null;
        HttpResponse response = null;
        try {           
            try {               
                response = httpClient.execute(request);
            } catch (Exception e){
                e.printStackTrace();
                EntityUtils.consumeQuietly(httpEntity);
                return "重跑";
            } 

            //打印状态
            statusCode =response.getStatusLine().getStatusCode();
            if(statusCode!=200){
                EntityUtils.consumeQuietly(httpEntity);
                return "重跑";
            }
            //实体
            httpEntity = response.getEntity();
            String searchListStr = EntityUtils.toString(httpEntity,"GBK").replaceAll("\\\\米", "米");
            String allData = (String) JSONObject.parseObject(searchListStr).get("d");
            // 字符串值中间含双引号的替换处理
            String s = allData.replaceAll("\\{\"","{'")
                    .replaceAll("\":\"", "':'")
                    .replaceAll("\",\"", "','")
                    .replaceAll("\":", "':")
                    .replaceAll(",\"", ",'")
                    .replaceAll("\"\\}", "'}")
                    .replaceAll("\"", "")
                    .replaceAll("'", "\"")
                    .replaceAll("<br />", "")               
                    .replaceAll("\t", "")
                    .replaceAll("\\\\", "?");
            JSONObject jsonData = JSONObject.parseObject(s);
            JSONArray jsonContent = jsonData.getJSONArray("orgList");

            searchListStr = null;
            allData = null; 
            s = null;

            if (jsonContent==null || jsonContent.size()<1) {
                return "重跑";
            }
            System.out.println(jsonContent.toJSONString());
            return "爬取";
        } catch (Exception e) {
            e.printStackTrace();
            return "重跑";
        } finally{
            EntityUtils.consumeQuietly(httpEntity);
        }
    }

    private String getPage() {
        return page;
    }

    private void setPage(String page) {
        this.page = page;
    }

}

最后,在调试程序进入死胡同的时候,感谢同事的帮助!



猜你喜欢

转载自blog.csdn.net/zhouxukun123/article/details/79441031