Using java NIO and high-speed buffer written to the file

byte[] bytes = Files.readAllBytes(Paths.get("E:\\pdf\\aaa\\html\\text.txt").normalize());
        String text = IOUtils.toString(bytes);

        String xml = text.substring(text.indexOf("<tbody>"));
        InputSource inputXML = new InputSource( new StringReader( xml ) );

        XPath xPath = XPathFactory.newInstance().newXPath();
        NodeList nodes = (NodeList) xPath.evaluate("/tbody/tr", inputXML, XPathConstants.NODESET);
        int length = nodes.getLength();
        Path file = Paths.get("E:\\pdf\\aaa\\html\\out.txt");
        try (BufferedWriter writer = Files.newBufferedWriter(file, Charset.defaultCharset(), StandardOpenOption.CREATE)) {
            for (int i = 0; i < length; i++) {
                Node node = nodes.item(i);

                NodeList childList = (NodeList) xPath.evaluate("td", node, XPathConstants.NODESET);
                for (int j = 0; j < childList.getLength(); j++) {
                    Node child = childList.item(j);
                    String content = child.getTextContent();
                    //System.out.print(content);
                    writer.write(content);
                    if (j <childList.getLength() - 1) {
                        writer.write("\t");
                    }
                }
                writer.newLine();
            }


        }

text.txt content

 

 Output content:

 

Guess you like

Origin www.cnblogs.com/passedbylove/p/11462562.html