Java 解析http返回的xml数据

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/meteorsshower2013/article/details/80896106

Java 解析http返回的xml数据,写成txt文件

需求:

每小时抓取给定api接口返回的xml数据,把xml数据保存为XML文件;把xml数据转换txt文件格式数据,保存txt文件。
文件名以yyyyMMddHH0000.txt和yyyyMMddHH0000.xml方式命名,如20180703090000.xml,表示2018年7月3日上午9时下载的数据

api说明:

GetLastHoursData 获取任意小时小时数据 请求案例:
http://59.172.208.250:8001/AppServer/PublishData.asmx/GetLastHoursData?UsrName={账号}&passWord={密码}&date=2017-11-13%2012:00:00

返回数据格式:

<ArrayOfLt_HourAqiModel xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://tempuri.org/">
  <Lt_HourAqiModel>
    <StationName>刘家沟</StationName>
    <UniqueCode>420300052</UniqueCode>
    <QueryTime>2018-07-03 09:00:00</QueryTime>
    <PM25OneHour>NA</PM25OneHour>
    <PM10OneHour>NA</PM10OneHour>
    <SO2OneHour>NA</SO2OneHour>
    <NO2OneHour>NA</NO2OneHour>
    <COOneHour>NA</COOneHour>
    <O3OneHour>NA</O3OneHour>
    <AQI>NA</AQI>
    <PrimaryEP />
    <AQDegree />
    <AQType />
  </Lt_HourAqiModel>
  <Lt_HourAqiModel>
    <StationName>大冶市</StationName>
    <UniqueCode>420200402</UniqueCode>
    <QueryTime>2018-07-03 09:00:00</QueryTime>
    <PM25OneHour>NA</PM25OneHour>
    <PM10OneHour>NA</PM10OneHour>
    <SO2OneHour>NA</SO2OneHour>
    <NO2OneHour>NA</NO2OneHour>
    <COOneHour>NA</COOneHour>
    <O3OneHour>NA</O3OneHour>
    <AQI>NA</AQI>
    <PrimaryEP />
    <AQDegree />
    <AQType />
  </Lt_HourAqiModel>
</ArrayOfLt_HourAqiModel>

第一步

编写配置文件url.xml

<?xml version="1.0" encoding="UTF-8"?>
<!-- http://59.172.208.250:8001/AppServer/PublishData.asmx/ GetLastHoursData? 
    UsrName={账号}&passWord={密码}&date=2017-11-13%2012:00:00 -->
<pm25>
    <baseUrl>http://59.172.208.250:8001/AppServer/PublishData.asmx/
    </baseUrl>
    <method>GetLastHoursData
    </method>
    <UsrName>a******n
    </UsrName>
    <passWord>I*****XWO
    </passWord>
    <xmlSavePath>K:\baidudownload\</xmlSavePath>
    <txtSavePath>K:\baidudownload\</txtSavePath>
</pm25>

导入日志记录jar包,记录每小时的流程

1.log4j-api-2.3.jar
2.log4j-core-2.3.jar

第二步:解析下载所需要的配置xml文件

1.获取url链接
2.获取xml文件的存放路径
3.获取txt文件的存放路径

package cn.whu.edu.sendimage.pm25.spider;

import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;

public class XMLReaderUtil {
    private static Document document;
    private static final Logger logger = LogManager.getLogger("XMLReaderUtil.class");

    public XMLReaderUtil(String path) throws SAXException, IOException, ParserConfigurationException {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = factory.newDocumentBuilder();
        XMLReaderUtil.document = db.parse(path);
    }

    // 拼接url路径,时间从每小时从系统时间触发获得
    public String getUrl() throws Exception {
        StringBuilder sb = new StringBuilder();
        String baseurl = document.getElementsByTagName("baseUrl").item(0).getFirstChild().getNodeValue().trim();
        sb.append(baseurl);
        String method = document.getElementsByTagName("method").item(0).getFirstChild().getNodeValue().trim();
        sb.append(method);
        sb.append("?");
        String UsrName = document.getElementsByTagName("UsrName").item(0).getFirstChild().getNodeValue().trim();
        sb.append("UsrName=" + UsrName);
        sb.append("&");
        String passWord = document.getElementsByTagName("passWord").item(0).getFirstChild().getNodeValue().trim();
        sb.append("passWord=" + passWord);
        logger.info(sb.toString());
        return sb.toString();
    }

    // 获取xml存放路径
    public String getXmlSavePath() throws Exception {
        String xmlSavePath = document.getElementsByTagName("xmlSavePath").item(0).getFirstChild().getNodeValue().trim();
        logger.info(xmlSavePath);
        return xmlSavePath;
    }

    // 获取txt存放路径
    public String getTxtSavePath() {
        String txtSavePath = document.getElementsByTagName("txtSavePath").item(0).getFirstChild().getNodeValue().trim();
        logger.info(txtSavePath);
        return txtSavePath;
    }

    public static void main(String[] args) throws Exception {
        XMLReaderUtil reader = new XMLReaderUtil("url2.xml");
        String url = reader.getUrl();
        String xmlpath = reader.getXmlSavePath();
        String txtpath = reader.getTxtSavePath();
        System.out.println(url);
        System.out.println(xmlpath);
        System.out.println(txtpath);
    }
}


第三步:爬取一个url的数据

导入httpClient包

httpClient下载链接:http://hc.apache.org/downloads.cgi
这里写图片描述

获取http数据写入xml文件

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Calendar;

import org.apache.http.HttpEntity;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

public class Spider {

    private static final Logger logger = LogManager.getLogger("Spider.class");

    public static void start() throws Exception {

        // 读取xml,
        XMLReaderUtil util = null;
        try {
            util = new XMLReaderUtil("url2.xml");
        } catch (Exception e) {
            e.printStackTrace();
        }
        String url = util.getUrl();
        String xmlpath = util.getXmlSavePath();
        String txtpath = util.getTxtSavePath();
        String time = getBeijingTimeStr();

        //写入xml文件
        getHttpJson(url, getUrlDate(), xmlpath);

        // xml变为txt
        Xml2Txt xml2txt = new Xml2Txt(xmlpath + time + ".xml", txtpath + time + ".txt");
        xml2txt.start();
    }

    //url中需要的时间格式
    private static String getUrlDate() {
        Calendar cal = Calendar.getInstance();
        // date=2017-11-13%2012:00:00
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd%20HH:00:00");
        String dateString = sdf.format(cal.getTime());
        return dateString;
    }

    //拼接url需要的时间。下载数据放入其中xml文件中
    private static void getHttpJson(String url, String time, String path) throws InterruptedException {

        logger.info("getHttpJson========================================");

        String urlTail = "&date=" + time;
        // 建立一个新的请求客户端
        HttpClient httpClient = HttpClients.createDefault();
        HttpGet httpGet = null;
        // 获取网址的返回结果
        CloseableHttpResponse response = null;
        System.out.println(url + urlTail);
        httpGet = new HttpGet(url + urlTail);
        try {
            response = (CloseableHttpResponse) httpClient.execute(httpGet);
            // System.out.println(response);
        } catch (IOException e) {
            e.printStackTrace();
        }

        // 获取返回结果中的实体
        HttpEntity entity = response.getEntity();

        try {
            // EntityUtils.toString(entity);
            InputStream in = entity.getContent();
            //写入xml文件
            File file = new File(path + getBeijingTimeStr() + ".xml");
            if (!file.exists()) {
                file.createNewFile();
            }

            try {
                FileOutputStream fout = new FileOutputStream(file);
                int l = -1;
                byte[] tmp = new byte[1024];
                while ((l = in.read(tmp)) != -1) {
                    fout.write(tmp, 0, l);
                    // 注意这里如果用OutputStream.write(buff)的话,图片会失真,大家可以试试
                }
                fout.flush();
                fout.close();
            } finally {
                // 关闭低层流。
                in.close();
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    //文件名的前缀
    private static String getBeijingTimeStr() {
        StringBuffer BeijingTimeBuffer = new StringBuffer();
        // 1、取得本地时间:
        Calendar c1 = Calendar.getInstance();
        // 获得年份
        int year = c1.get(Calendar.YEAR);
        // 获得月份
        int month = c1.get(Calendar.MONTH) + 1;
        // 获得日期
        int day = c1.get(Calendar.DATE);
        // 获得小时
        int hour = c1.get(Calendar.HOUR_OF_DAY);

        BeijingTimeBuffer.append(year).append(String.format("%02d", month)).append(String.format("%02d", day));
        BeijingTimeBuffer.append(String.format("%02d", hour)).append("00").append("00");
        return BeijingTimeBuffer.toString();
    }

    public static void main(String[] args) throws Exception {
        start();
    }
}

第四步:解析下载的xml文件,写成txt

使用xstream包
下载xstream包链接:http://x-stream.github.io/
导入以下三个包:

1.xstream-1.4.10.jar
2.xpp3_min-1.1.4c.jar
3.xmlpull-1.1.3.1.jar

根据xml标签创建类:ArrayOfLt_HourAqiModel;Lt_HourAqiModel
1.ArrayOfLt_HourAqiModel类:

import java.util.List;

public class ArrayOfLt_HourAqiModel {
    private List<Lt_HourAqiModel> Lt_HourAqiModel;

    public void add(Lt_HourAqiModel lt) {
        Lt_HourAqiModel.add(lt);
    }

    public List<Lt_HourAqiModel> getContent() {
        return Lt_HourAqiModel;
    }
}

2.Lt_HourAqiModel类


public class Lt_HourAqiModel {
    // <Lt_HourAqiModel>
    // <StationName>刘家沟</StationName>
    // <UniqueCode>420300052</UniqueCode>
    // <QueryTime>2018-07-02 16:00:00</QueryTime>
    // <PM25OneHour>NA</PM25OneHour>
    // <PM10OneHour>NA</PM10OneHour>
    // <SO2OneHour>NA</SO2OneHour>
    // <NO2OneHour>NA</NO2OneHour>
    // <COOneHour>NA</COOneHour>
    // <O3OneHour>NA</O3OneHour>
    // <AQI>NA</AQI>
    // <PrimaryEP/>
    // <AQDegree/>
    // <AQType/>
    // </Lt_HourAqiModel>

    private String StationName;
    private String UniqueCode;
    private String QueryTime;
    private String PM25OneHour;
    private String PM10OneHour;
    private String SO2OneHour;

    private String NO2OneHour;
    private String COOneHour;
    private String O3OneHour;
    private String AQI;
    private String PrimaryEP;
    private String AQDegree;
    private String AQType;

    @Override
    public String toString() {
        return "Lt_HourAqiModel [StationName=" + StationName + ", UniqueCode=" + UniqueCode + ", QueryTime=" + QueryTime
                + ", PM25OneHour=" + PM25OneHour + ", PM10OneHour=" + PM10OneHour + ", SO2OneHour=" + SO2OneHour
                + ", NO2OneHour=" + NO2OneHour + ", COOneHour=" + COOneHour + ", O3OneHour=" + O3OneHour + ", AQI="
                + AQI + ", PrimaryEP=" + PrimaryEP + ", AQDegree=" + AQDegree + ", AQType=" + AQType + "]";
    }

    public String getStationName() {
        return StationName;
    }

    public void setStationName(String stationName) {
        StationName = stationName;
    }

    public String getUniqueCode() {
        return UniqueCode;
    }

    public void setUniqueCode(String uniqueCode) {
        UniqueCode = uniqueCode;
    }

    public String getQueryTime() {
        return QueryTime;
    }

    public void setQueryTime(String queryTime) {
        QueryTime = queryTime;
    }

    public String getPM25OneHour() {
        return PM25OneHour;
    }

    public void setPM25OneHour(String pM25OneHour) {
        PM25OneHour = pM25OneHour;
    }

    public String getPM10OneHour() {
        return PM10OneHour;
    }

    public void setPM10OneHour(String pM10OneHour) {
        PM10OneHour = pM10OneHour;
    }

    public String getSO2OneHour() {
        return SO2OneHour;
    }

    public void setSO2OneHour(String sO2OneHour) {
        SO2OneHour = sO2OneHour;
    }

    public String getNO2OneHour() {
        return NO2OneHour;
    }

    public void setNO2OneHour(String nO2OneHour) {
        NO2OneHour = nO2OneHour;
    }

    public String getCOOneHour() {
        return COOneHour;
    }

    public void setCOOneHour(String cOOneHour) {
        COOneHour = cOOneHour;
    }

    public String getO3OneHour() {
        return O3OneHour;
    }

    public void setO3OneHour(String o3OneHour) {
        O3OneHour = o3OneHour;
    }

    public String getAQI() {
        return AQI;
    }

    public void setAQI(String aQI) {
        AQI = aQI;
    }

    public String getPrimaryEP() {
        return PrimaryEP;
    }

    public void setPrimaryEP(String primaryEP) {
        PrimaryEP = primaryEP;
    }

    public String getAQDegree() {
        return AQDegree;
    }

    public void setAQDegree(String aQDegree) {
        AQDegree = aQDegree;
    }

    public String getAQType() {
        return AQType;
    }

    public void setAQType(String aQType) {
        AQType = aQType;
    }

}

从xm文件中转换为对象,并写入txt

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.thoughtworks.xstream.XStream;

public class Xml2Txt {
    private static final Logger logger = LogManager.getLogger("Xml2Txt.class");

    private String xmlPath;
    private String txtPath;

    public Xml2Txt(String xmlPath, String txtPath) {
        super();
        this.xmlPath = xmlPath;
        this.txtPath = txtPath;
    }

    public String getXmlPath() {
        return xmlPath;
    }

    public void setXmlPath(String xmlPath) {
        this.xmlPath = xmlPath;
    }

    public String getTxtPath() {
        return txtPath;
    }

    public void setTxtPath(String txtPath) {
        this.txtPath = txtPath;
    }


    //从xml文件获取数据转换为对象列表
    public static List<Lt_HourAqiModel> getXmlDataList(String xmlPath) {
        logger.info("======getXmlDataList========");
        logger.info("xmlPath:"+xmlPath);
        XStream xstream = new XStream();
        xstream.alias("ArrayOfLt_HourAqiModel", ArrayOfLt_HourAqiModel.class);
        xstream.alias("Lt_HourAqiModel", Lt_HourAqiModel.class);

        xstream.addImplicitCollection(ArrayOfLt_HourAqiModel.class, "Lt_HourAqiModel");
        ArrayOfLt_HourAqiModel ArrayOfLt = (ArrayOfLt_HourAqiModel) xstream.fromXML(new File(xmlPath));
        logger.info(ArrayOfLt.getContent());
        return (ArrayOfLt.getContent());

    }

    //把对象列表写入txt
    public static void WriteStringToTxtFile(List<Lt_HourAqiModel> list, String txtPath) throws IOException {
        logger.info("================WriteListToTxtFile========================");
        // System.out.println(json);
        logger.info("写入地址:" + txtPath);
        File file = new File(txtPath);
        if (!file.exists()) {
            file.createNewFile();
        }
        if (list == null||list.size()==0) {
            logger.info("list size is null or size is 0");
            return;
        } else {

            FileWriter out = new FileWriter(file, true); // 文件写入流

            // 判断文件是都为空,如果不为空,说明有列标题,那么直接写入数组。如果为空,则写入列标题
            if (file.exists() && file.length() != 0) {
                for (Lt_HourAqiModel lt : list) {

                    out.write(lt.getStationName() + "\t");
                    out.write(lt.getUniqueCode() + "\t");
                    out.write(lt.getQueryTime() + "\t");
                    out.write(lt.getPM25OneHour() + "\t");
                    out.write(lt.getNO2OneHour() + "\t");
                    out.write(lt.getCOOneHour() + "\t");
                    out.write(lt.getO3OneHour() + "\t");
                    out.write(lt.getAQI() + "\t");
                    out.write(lt.getPrimaryEP() + "\t");
                    out.write(lt.getAQDegree() + "\t");
                    out.write(lt.getAQType() + "\t");

                    out.write("\r\n");
                }
            } else if (file.exists() && file.length() == 0) {
                out.write("StationName" + "\t" + "UniqueCode" + "\t" + "QueryTime" + "\t" + "PM25OneHour" + "\t"
                        + "PM10OneHour" + "\t" + "SO2OneHour" + "\t" + "NO2OneHour" + "\t" + "COOneHour" + "\t"
                        + "COOneHour" + "\t" + "O3OneHour" + "\t" + "AQI" + "\t" + "PrimaryEP" + "\t" + "AQDegree"
                        + "\t" + "AQType" + "\t\r\n");
                for (Lt_HourAqiModel lt : list) {
                    // <Lt_HourAqiModel>
                    // <StationName>刘家沟</StationName>
                    // <UniqueCode>420300052</UniqueCode>
                    // <QueryTime>2018-07-02 16:00:00</QueryTime>
                    // <PM25OneHour>NA</PM25OneHour>
                    // <PM10OneHour>NA</PM10OneHour>
                    // <SO2OneHour>NA</SO2OneHour>
                    // <NO2OneHour>NA</NO2OneHour>
                    // <COOneHour>NA</COOneHour>
                    // <O3OneHour>NA</O3OneHour>
                    // <AQI>NA</AQI>
                    // <PrimaryEP/>
                    // <AQDegree/>
                    // <AQType/>
                    // </Lt_HourAqiModel>
                    out.write(lt.getStationName() + "\t");
                    out.write(lt.getUniqueCode() + "\t");
                    out.write(lt.getQueryTime() + "\t");
                    out.write(lt.getPM25OneHour() + "\t");
                    out.write(lt.getNO2OneHour() + "\t");
                    out.write(lt.getCOOneHour() + "\t");
                    out.write(lt.getO3OneHour() + "\t");
                    out.write(lt.getAQI() + "\t");
                    out.write(lt.getPrimaryEP() + "\t");
                    out.write(lt.getAQDegree() + "\t");
                    out.write(lt.getAQType() + "\t");

                    out.write("\r\n");
                }
            }
            out.close();
        }
    }

    public void start() throws IOException {
        List<Lt_HourAqiModel> list = getXmlDataList(this.xmlPath);
        logger.info(list);
        WriteStringToTxtFile(list, this.txtPath);
    }

}

第五步:定时任务调度

每小时下载一次
创建任务

import java.util.Date;
import java.util.TimerTask;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;



public class Tasker extends TimerTask{

    private static final Logger logger = LogManager.getLogger("TimerTask.class");
    @Override
    public void run() {
        logger.info("============开始执行"+new Date()+"===================");

        try {
            Spider.start();

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

}

调度程序

import java.util.Timer;

public class PMTimer {

    public static void main(String[] args) {
        Timer timer = new Timer();
        Tasker tasker = new Tasker();

        timer.schedule(tasker, 0L, 60*60*1000L);


    }

}

猜你喜欢

转载自blog.csdn.net/meteorsshower2013/article/details/80896106
今日推荐