利用 JDK 自带的 Document + XPath 解析 XML

利用 JDK 自带的 Document + XPath 解析 XML，记录一下

准备工作

inventory.dtd 和 inventory.xml 文件
dtd 中的 PCDATA 的意思是被解析的字符数据（parsed character data）。可把字符数据想象为 XML 元素的开始标签与结束标签之间的文本。PCDATA 是会被解析器解析的文本。
CDATA 的意思是字符数据（character data）。CDATA 是不会被解析器解析的文本。

inventory.dtd

<?xml version="1.0" encoding="UTF-8"?>
<!ELEMENT inventory (book+)>
<!ELEMENT book (title,author,dynasty,price)>
<!ATTLIST book year CDATA #REQUIRED>
<!ELEMENT title (#PCDATA)>
<!ELEMENT author (#PCDATA)>
<!ELEMENT dynasty (#PCDATA)>
<!ELEMENT price (#PCDATA)>

inventory.xml

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE inventory SYSTEM "D:\\work-space\\springboot\\src\\main\\resources\\inventory.dtd">
<inventory>
    <book year="2012">
        <title>菜根谭</title>
        <author>洪应明</author>
        <dynasty>明朝</dynasty>
        <price>38</price>
    </book>
    <book year="2013">
        <title>曾国藩家书</title>
        <author>曾国藩</author>
        <dynasty>清朝</dynasty>
        <price>70</price>
    </book>
    <book year="2014">
        <title>高等代数</title>
        <author>丘维声</author>
        <dynasty>中华人民共和国</dynasty>
        <price>86</price>
    </book>
</inventory>

工具类

借鉴了博客【用DOM解析XML ，用xpath快速查询XML节点】的内容

package com.me.util;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import java.io.File;

/**
 * @Author yanyg
 * @Date 2020/6/10 16:37
 * @Descripetion admin
 */
public class ParseXmlUtil {
    
    
    //查询价格大于80的书的标题的内容
    private static final String XPath_EXPRESSION = "//book[price>80]/title/text()";

    public static void main(String[] args) {
    
    

        try {
    
    
            DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
            //开启验证：
            documentBuilderFactory.setValidating(true);
            documentBuilderFactory.setNamespaceAware(false);
            documentBuilderFactory.setIgnoringComments(true);
            documentBuilderFactory.setIgnoringElementContentWhitespace(true);
            documentBuilderFactory.setCoalescing(false);
            documentBuilderFactory.setExpandEntityReferences(true);
            DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
            //设置异常处理：
            documentBuilder.setErrorHandler(new ErrorHandler() {
    
    
                @Override
                public void warning(SAXParseException exception) throws SAXException {
    
    
                    System.out.println("warn:" + exception.getMessage());
                }

                @Override
                public void error(SAXParseException exception) throws SAXException {
    
    
                    System.out.println("error:" + exception.getMessage());
                }

                @Override
                public void fatalError(SAXParseException exception) throws SAXException {
    
    
                    System.out.println("fatalError:" + exception.getMessage());
                }
            });
            //将inventory.xml加载到一个Document的对象中：
            String filePath = "D:\\work-space\\springboot\\src\\main\\resources\\inventory.xml";
            Document document = documentBuilder.parse(new File(filePath));
            //根据表达式查询内容
            processParseXmlWithXpath(document, XPath_EXPRESSION);
        } catch (Exception e) {
    
    
            e.printStackTrace();
        } finally {
    
    
        }
    }


    private static void processParseXmlWithXpath(Document document, String xPathExpression) throws Exception {
    
    
        // 表达式可以参考：https://blog.csdn.net/zlj_blog/article/details/54092534
        xPathExpression = "/inventory/book/title";
        //创建XPathFactory:
        XPathFactory xPathFactory = XPathFactory.newInstance();
        XPath xPath = xPathFactory.newXPath();
        NodeList nodeList = (NodeList) xPath.evaluate(xPathExpression, document, XPathConstants.NODESET);
        for (int i = 0; i < nodeList.getLength(); i++) {
    
    
            Node item = nodeList.item(i);
            System.out.println(item.getNodeName() + "=" + item.getTextContent());
        }
    }

    private static void processParseXmlWithXpathExpress(Document document, String xPathExpression) throws Exception {
    
    
        //创建XPathFactory:
        XPathFactory xPathFactory = XPathFactory.newInstance();
        XPath xPath = xPathFactory.newXPath();
        XPathExpression expression = xPath.compile(xPathExpression);
        Object result = expression.evaluate(document, XPathConstants.NODESET);
        if (result instanceof NodeList) {
    
    
            NodeList nodes = (NodeList) result;
            for (int i = 0; i < nodes.getLength(); i++) {
    
    
                System.out.println(String.format("%s=%s", nodes.item(i).getNodeName(), nodes.item(i).getNodeValue()));
            }
        }
    }
}

利用 JDK 自带的 Document + XPath 解析 XML

准备工作

猜你喜欢