解析word,图片

将word转换为xml,根据xml的标签,解析内容,从而解析出word里面的文字,图片。

public class ReadWord2
{
    Element resultNode = null;
    boolean isFound = false;
    String topoPicId= "";
    String topoPicName="";
    String imgPicName = "/word/media/";
    String topoPicStr = "";
    public static void main(String[] args)
    {
        ReadWord2 rw = new  ReadWord2();
        rw. readTable();
    }

     void readTable()
    {

        String filePath = "D:\\temp\\target1.xml";
        try
        {

            SAXReader sax = new SAXReader();//创建一个SAXReader对象
            File xmlFile = new File(filePath);//根据指定的路径创建file对象
            Document document = sax.read(xmlFile);//获取document对象,如果文档无节点,则会抛出Exception提前结束
            Element root = document.getRootElement();//获取根节点

            getNodesByNameAndText(root,"w:tc","#networkTopo#");
//            getNodes(root,"shape","#networkTopo#");
            if(resultNode!=null)
            {
                isFound = false;
                getNodesByNameAndText(resultNode,"v:imagedata","");
                Element shapeNode = resultNode;
                List<Attribute> listAttr = shapeNode.attributes();//当前节点的所有属性的list
               for (Attribute attr : listAttr)
               {//遍历当前节点的所有属性
                   String name = attr.getQualifiedName();//属性名称
                   String value = attr.getValue();//属性的值
                   if (name.equals("r:id"))
                   {
                       topoPicId = value;  //rId8
                       isFound =false;
                       getNodesByNameAndAttr(root,"Relationship",topoPicId);
                       if(isFound)
                       {
                            String target = resultNode.attribute("Target").getValue();
                            topoPicName = target.split("/")[1];
                           isFound =false;
                           imgPicName += topoPicName;
                           getNodesByNameAndAttr(root,"pkg:part",imgPicName);
                           if(isFound)
                           {
                               topoPicStr = resultNode.element("binaryData").getText();
                               try
                               {
                                   base64toImg(topoPicStr);
                               }
                               catch (IOException e)
                               {
                                   e.printStackTrace();
                               }
                           }
                       }
                       System.out.println("属性名称:" + name + "属性值:" + value);
                       break;
                   }
               }
            }

        }

        catch (DocumentException e)
        {
            e.printStackTrace();
        }

    }

    /**
     * 从指定节点开始,递归遍历所有子节点
     *
     * @author chenleixing
     */
    public  void getNodesByNameAndText(Element node,String nodeName,String nodeText)
    {

        if(nodeName.equals( node.getQualifiedName()))
        {
            String txt = node.getStringValue();

            List<?> trs = node.elements();



           if(txt.contains(nodeText))
           {

               resultNode = node;
               isFound = true;

               return ;
           }
        }else
        {
            //递归遍历当前节点所有的子节点

            List<Element> listElement = node.elements();//所有一级子节点的list
            for (Element e : listElement)
            {//遍历所有一级子节点
                if(!isFound)
                {
                    getNodesByNameAndText(e, nodeName, nodeText);//递归
                }
            }
        }

    }

    public void getNodesByNameAndAttr(Element node, String nodeName, String attributeValue)
    {

        if (nodeName.equals(node.getQualifiedName()))
        {
            String txt = node.getStringValue();

            List<Attribute> listAttr = node.attributes();//当前节点的所有属性的list
            for (Attribute attr : listAttr)
            {//遍历当前节点的所有属性
                String name = attr.getName();//属性名称
                String value = attr.getValue();//属性的值
                 if (value.equals(attributeValue))
                {
                    resultNode = node;
                    isFound = true;

                    return;
                }
            }
        }
        else
        {
            //递归遍历当前节点所有的子节点

            List<Element> listElement = node.elements();//所有一级子节点的list
            for (Element e : listElement)
            {//遍历所有一级子节点
                if (!isFound)
                {
                    getNodesByNameAndAttr(e, nodeName, attributeValue);//递归
                }
            }
        }
    }

    private void convertStrToImg(String imgStr)
        throws IOException
    {
       byte[] imgByte = imgStr.getBytes();
        ByteArrayInputStream bais = new ByteArrayInputStream(imgByte);
        try {
        BufferedImage bi1 = ImageIO.read(bais);

            File w2 = new File("d:\\temp\\pp.png");//可以是jpg,png,gif格式
            ImageIO.write(bi1, "jpg", w2);//不管输出什么格式图片,此处不需改动
        } catch (IOException e) {
            e.printStackTrace();
        }
        finally{
            bais.close();
        }

    }

    private void base64toImg(String imgStr)
        throws IOException
    {
        BASE64Decoder decoder = new BASE64Decoder();
        byte[] imgByte = decoder.decodeBuffer(imgStr);
        ByteArrayInputStream bais = new ByteArrayInputStream(imgByte);
        try
        {
            BufferedImage bi1 = ImageIO.read(bais);
            File w2 = new File("d:\\temp\\pp.png");//可以是jpg,png,gif格式
            ImageIO.write(bi1, "jpg", w2);//不管输出什么格式图片,此处不需改动

        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
        finally
        {
            bais.close();
        }
    }
}

猜你喜欢

转载自blog.csdn.net/os2046/article/details/80858367