java对xml文件的解析(存在特殊文字和父子节点同名)

1.存在特殊字符请参考:http://blog.csdn.net/zhutulang/article/details/37736407

2.父子节点同名可以通过count计数的方式避免,具体代码如下:

public  static void main(String [] args){
	     
		 
	     try {  
	            FileInputStream input = new FileInputStream(tmpFileStr+"/"+"farfetch.xml");  
	            List<HashMap<String, String>> list = _readXml(input, "product");
	            
        	    String starttime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
	    		System.out.println("---"+starttime+"--------Farfetch开始更新-----------");
 public static List<HashMap<String, String>> _readXml(InputStream input, String nodeName){  
	        try {  
	            SAXParserFactory spf = SAXParserFactory.newInstance();  
	            SAXParser parser = spf.newSAXParser();  
	            SaxHandler handler = new SaxHandler(nodeName);  
	            parser.parse(input, handler);
	            input.close();  
	            return handler.getList();  
	        } catch (ParserConfigurationException e) {  
	            e.printStackTrace();  
	        } catch (SAXException e) {  
	            e.printStackTrace();  
	        } catch (IOException e) {  
	            e.printStackTrace();  
	        }  
	        return null;  
	    }  

 核心代码:

package tools;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;




public class SaxHandler extends DefaultHandler {
   
	 private HashMap<String, String> map = null;  
	    private List<HashMap<String, String>> list = null;  
	    /** 
	     * 正在解析的元素的标签 
	     */  
	    private String currentTag = null;  
	    /** 
	     * 正在解析的元素的值 
	     */  
	    private String currentValue = null;  
	    private String nodeName = null;  
	    
	    //计算第几次进入结束标签,避免两个同名porduct出问题
	    int count =0;
	    //对于含有特殊字符的URL解析时进行拼接
	    StringBuilder sb = new StringBuilder();  
	    
	    public List<HashMap<String, String>> getList(){  
	        return list;  
	    }  
	  
	    public SaxHandler(String nodeName) {  
	        this.nodeName = nodeName;  
	    }  
	  
	    @Override  
	    public void startDocument() throws SAXException {  
	        // TODO 当读到一个开始标签的时候,会触发这个方法  
	        list = new ArrayList<HashMap<String,String>>();  
	    }  
	  
	    @Override  
	    public void startElement(String uri, String localName, String name,  
	            Attributes attributes) throws SAXException {  
	        // TODO 当遇到文档的开头的时候,调用这个方法  
	    	sb.setLength(0);
	    	
	        if(name.equals(nodeName) && map == null){  
	            map = new HashMap<String, String>();  
	        }
	        
	        /****对两个相同的属性名进行特殊处理 start*****/
	        if("discount".equals(name)){
	        	map.put("discount_currency", attributes.getValue("currency"));
	        	attributes = null;
	        }
	        if("price".equals(name)){
	        	map.put("price_currency", attributes.getValue("currency"));
	        	attributes = null;
	        }
	        /****对两个相同的属性名进行特殊处理 end*****/
	        
	        if(attributes != null && map != null){  
	            for(int i = 0; i < attributes.getLength();i++){  
	                map.put(attributes.getQName(i), attributes.getValue(i));  
	            }  
	        }  
	        currentTag = name;  
	    }  
	      
	    @Override  
	    public void characters(char[] ch, int start, int length)  
	            throws SAXException {  
	        // TODO 这个方法用来处理在XML文件中读到的内容  
	    	System.out.println("characters()");
	        if(currentTag != null && map != null){  
	            currentValue = new String(ch, start, length);  
	            if(currentValue != null && !currentValue.trim().equals("") && !currentValue.trim().endsWith("\n")){  
	            	sb.append(currentValue);
	                map.put(currentTag, sb.toString());  
	            }  
	        }  
//	        currentTag=null;  
//	        currentValue=null;  
	    }  
	  
	    @Override  
	    public void endElement(String uri, String localName, String name)  
	            throws SAXException {  
	        // TODO 在遇到结束标签的时候,调用这个方法  
	    	System.out.println("endElement()");
	        if(name.equals(nodeName)){  
			    if (count > 0) {
			    	list.add(map);
			    	map = null;
			    	count = 0;
			    }
	            count++;
	        }  
	        currentTag = null;
	        currentValue=null;
	        super.endElement(uri, localName, name);  
	    }  
}

 xml文件具体内容如下:

<?xml version="1.0" encoding="utf-8"?>

<merchandiser xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="merchandiser.xsd">
  <header>
    <merchantId>37938</merchantId>
    <merchantName>FARFETCH.COM Australia</merchantName>
    <createdOn>12/05/2015 07:22:29</createdOn>
  </header>  
  <product product_id="10100630" name="WERKSTATT:MÜNCHEN leather belt" sku_number="10100630" manufacturer_name="WERKSTATT:MÜNCHEN">
    <category>
      <primary>Accessories</primary>
      <secondary>Belts</secondary>
    </category>
    <URL>
      <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&amp;offerid=389625.10100630&amp;type=15&amp;murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10100630.aspx</product>
      <productImage>http://cdn-images.farfetch.com/10/10/06/30/10100630_579509_800.jpg</productImage>
    </URL>
    <description>
      <short>Black leather belt from Werkstatt: Munchen featuring a silver buckle and silver loop.</short>
    </description>
    <discount currency="AUD">
      <type>amount</type>
    </discount>
    <price currency="AUD">
      <retail>1265.42</retail>
    </price>
    <brand>WERKSTATT:MÜNCHEN</brand>
    <shipping>
      <availability>in-stock</availability>
    </shipping>
    <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&amp;bids=389625.10100630&amp;type=15&amp;subid=0</pixel>
    <attributeClass class_id="60">
      <Product_Type>Accessories</Product_Type>
      <Size>S</Size>
      <Color>Black</Color>
      <Gender>Male</Gender>
    </attributeClass>
  </product>  
  <product product_id="10100676" name="WERKSTATT:MÜNCHEN leather belt" sku_number="10100676" manufacturer_name="WERKSTATT:MÜNCHEN">
    <category>
      <primary>Accessories</primary>
      <secondary>Belts</secondary>
    </category>
    <URL>
      <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&amp;offerid=389625.10100676&amp;type=15&amp;murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10100676.aspx</product>
      <productImage>http://cdn-images.farfetch.com/10/10/06/76/10100676_579578_800.jpg</productImage>
    </URL>
    <description>
      <short>Black leather belt from Werkstatt: Munchen featuring a silver buckle and leather loop.</short>
    </description>
    <discount currency="AUD">
      <type>amount</type>
    </discount>
    <price currency="AUD">
      <retail>1539.03</retail>
    </price>
    <brand>WERKSTATT:MÜNCHEN</brand>
    <shipping>
      <availability>in-stock</availability>
    </shipping>
    <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&amp;bids=389625.10100676&amp;type=15&amp;subid=0</pixel>
    <attributeClass class_id="60">
      <Product_Type>Accessories</Product_Type>
      <Size>S-M-L-XL</Size>
      <Color>Black</Color>
      <Gender>Male</Gender>
    </attributeClass>
  </product>  
  <product product_id="10212594" name="SCUNZANI IVO toad skin belt" sku_number="10212594" manufacturer_name="SCUNZANI IVO">
    <category>
      <primary>Accessories</primary>
      <secondary>Belts</secondary>
    </category>
    <URL>
      <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&amp;offerid=389625.10212594&amp;type=15&amp;murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fmen%2Fitem10212594.aspx</product>
      <productImage>http://cdn-images.farfetch.com/10/21/25/94/10212594_1130649_800.jpg</productImage>
    </URL>
    <description>
      <short>Green and black toad skin belt from Scunzani Ivo featuring a silver-tone buckle. Please note that this item cannot be shipped outside the E.U.</short>
    </description>
    <discount currency="AUD">
      <type>amount</type>
    </discount>
    <price currency="AUD">
      <retail>461.25</retail>
    </price>
    <brand>SCUNZANI IVO</brand>
    <shipping>
      <availability>in-stock</availability>
    </shipping>
    <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&amp;bids=389625.10212594&amp;type=15&amp;subid=0</pixel>
    <attributeClass class_id="60">
      <Product_Type>Accessories</Product_Type>
      <Size>M-L</Size>
      <Color>Green</Color>
      <Gender>Male</Gender>
    </attributeClass>
  </product>  
  <product product_id="11249583" name="JIL SANDER VINTAGE straight leg trousers" sku_number="11249583" manufacturer_name="JIL SANDER VINTAGE">
    <category>
      <primary>Vintage &amp; Archive</primary>
      <secondary>Trousers</secondary>
    </category>
    <URL>
      <product>http://click.linksynergy.com/link?id=DGme2yum/2Y&amp;offerid=389625.11249583&amp;type=15&amp;murl=http%3A%2F%2Fwww.farfetch.com%2Fshopping%2Fwomen%2Fitem11249583.aspx</product>
      <productImage>http://cdn-images.farfetch.com/11/24/95/83/11249583_6003611_800.jpg</productImage>
    </URL>
    <description>
      <short>Sky grey linen-wool blend straight leg trousers from Jil Sander Vintage featuring a button and zip fly, side pockets and a back pocket. Circa 1990. Please note that vintage items are not new and therefore might have minor imperfections.</short>
    </description>
    <discount currency="AUD">
      <type>amount</type>
    </discount>
    <price currency="AUD">
      <retail>189.77</retail>
    </price>
    <brand>JIL SANDER VINTAGE</brand>
    <shipping>
      <availability>in-stock</availability>
    </shipping>
    <pixel>http://ad.linksynergy.com/fs-bin/show?id=DGme2yum/2Y&amp;bids=389625.11249583&amp;type=15&amp;subid=0</pixel>
    <attributeClass class_id="60">
      <Product_Type>Vintage &amp; Archive</Product_Type>
      <Size>36</Size>
      <Color>Grey</Color>
      <Gender>Female</Gender>
    </attributeClass>
  </product>  
  <trailer>
    <numberOfProducts>118204</numberOfProducts>
  </trailer>
</merchandiser>

猜你喜欢

转载自sky-xin.iteye.com/blog/2262981
今日推荐