使用DOM方式去解析word内容
@Override public String exportPath(ZjclSj sj) { String filepath = "/temp/" + sj.getSjmc() + ".xml"; //调用 DocumentBuilderFactory.newInstance() 方法得到创建 DOM 解析器的工厂 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); try { //调用工厂对象的 newDocumentBuilder方法得到 DOM 解析器对象 DocumentBuilder builder = factory.newDocumentBuilder(); //1、通过文件的方式获取Document对象 String path = ResourceManager.getRealPath() + "/temp/sjmb.xml"; Document document = builder.parse(new File(path)); //根据标签名称获取该名称的所有节点对象 // NodeList nodelist = document.getElementsByTagName("wx:sect"); // //遍历 // for (int i = 0; i < nodelist.getLength(); i++) { // //得到具体的某个节点对象 // Node node = nodelist.item(i); // System.out.println(node.getNodeName()); // listNodes(node); // } //Node sectNode = document.getElementById("sectId"); Node sectNode = document.getElementsByTagName("wx:sect").item(0); System.out.println("----------"+sectNode.getNodeName()); // <w:p> // <w:r> // <w:t>Hello, World.</w:t> // </w:r> // </w:p> //创建元素节点 Element wp = document.createElement("w:p"); Node wr = document.createElement("w:r"); Node wt = document.createElement("w:t"); wt.appendChild(document.createTextNode("Hello, World.")); wr.appendChild(wt); wp.appendChild(wr); sectNode.appendChild(wp); String path2 = ResourceManager.getRealPath() + "/temp/" + sj.getSjmc() + ".doc";; System.out.println("path2:"+path2); saveXml(path2, document); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return filepath; } public static void saveXml(String fileName, Document doc) {// 将Document输出到文件 TransformerFactory transFactory = TransformerFactory.newInstance(); try { Transformer transformer = transFactory.newTransformer(); transformer.setOutputProperty("indent", "yes"); DOMSource source = new DOMSource(); source.setNode(doc); StreamResult result = new StreamResult(); result.setOutputStream(new FileOutputStream(fileName)); transformer.transform(source, result); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } } /** * 遍历根据节点对象下面的所有的节点对象 * @param node */ public void listNodes(Node node) { // 节点是什么类型的节点 if (node.getNodeType() == Node.ELEMENT_NODE) {// 判断是否是元素节点 Element element = (Element) node; //判断此元素节点是否有属性 if(element.hasAttributes()){ //获取属性节点的集合 NamedNodeMap namenm = element.getAttributes();//Node //遍历属性节点的集合 for(int k=0;k<namenm.getLength();k++){ //获取具体的某个属性节点 Attr attr = (Attr) namenm.item(k); System.out.println("attr:"+attr.getNodeName()+" value:" +attr.getNodeValue()+" type:"+attr.getNodeType()); } } //获取元素节点的所有孩子节点 NodeList listnode = element.getChildNodes(); //遍历 for (int j = 0; j < listnode.getLength(); j++) { //得到某个具体的节点对象 Node nd = listnode.item(j); System.out.println("nd:" + nd.getNodeName() + " value:" + nd.getNodeValue() + " type:" + nd.getNodeType()); //重新调用遍历节点的操作的方法 listNodes(nd); } } }
--摘自 试卷考试系统、手动组卷导出