Python解析xml写入xlsx文件

版权声明:仅限学习使用 https://blog.csdn.net/u014590889/article/details/83351273

        初次接触Python实现的一个小功能。最初的版本全局变量满篇飞,简直不忍直视,此处代码是后来重构过的,感觉还有好多优化的地方,以后有空再搞。

  <metric>
    <complexity>61</complexity>
    <unit>TypeData</unit>
    <classification>untestable, very high risk</classification>
    <file>source\misc.c</file>
    <startLineNumber>139</startLineNumber>
    <endLineNumber>426</endLineNumber>
    <SLOC>287</SLOC>
  </metric>
  <metric>
    <complexity>49</complexity>
    <unit>urMinStat</unit>
    <classification>complex, high risk</classification>
    <file>mp\source\formance.c</file>
    <startLineNumber>1514</startLineNumber>
    <endLineNumber>1665</endLineNumber>
    <SLOC>151</SLOC>
  </metric>
# !/usr/bin/python
# -*- coding: UTF-8 -*-

import os
import sys
import xml.sax
import xlsxwriter
from collections import OrderedDict

print 'Run:',sys.argv[0]
filePath = sys.argv[1]
os.chdir(filePath)
pwd = os.getcwd()
print pwd
dictOutPut = {'Team':' ' ,'SumCcmValue':0.0 ,'averageCcmValue':0.0, 'veryHighRiskNum':0, 'highRiskNum':0,
              'moderateRiskNum':0, 'muchRiskNum':0, 'ccmNum':0, 'funcNum':0, 'fileNum':0, 'codeNum':0, 'codeNumOver200':0,
              'codeNumOver100':0, 'codeNumLess20':0, 'complexityOver10':0}
xlsxTitle =['Team','SumCcmValue', 'veryHighRiskNum','highRiskNum', 'moderateRiskNum', 'muchRiskNum', 'funcNum', 'codeNum']

dictOutPut['Team'] = filePath.split("\\")[-1]
ccmXlsxFileName = 'ccm_info.xlsx'
monitorItems = OrderedDict()

class CcmHandler(xml.sax.ContentHandler):
    def __init__(self):
        self.CurrentData = ""
        self.complexity = ""
        self.unit = ""
        self.classification = ""
        self.file = ""
        self.SLOC = ""

    def startElement(self, tag, attributes):
        self.CurrentData = tag

    def endElement(self, tag):
        if self.CurrentData == "complexity":
            dictOutPut['SumCcmValue'] = dictOutPut['SumCcmValue'] + int(self.complexity)
            if int(self.complexity) >= 10:dictOutPut['complexityOver10'] = dictOutPut['complexityOver10'] + 1
        elif self.CurrentData == "classification":
            if self.classification == "untestable, very high risk":
                dictOutPut['veryHighRiskNum'] = dictOutPut['veryHighRiskNum'] + 1
            elif self.classification == "complex, high risk":
                dictOutPut['highRiskNum'] = dictOutPut['highRiskNum'] + 1
            elif self.classification == "more complex, moderate risk":
                dictOutPut['moderateRiskNum'] = dictOutPut['moderateRiskNum'] + 1
            elif self.classification == "simple, without much risk":
                dictOutPut['muchRiskNum'] = dictOutPut['muchRiskNum'] + 1
        elif self.CurrentData == "unit":
                dictOutPut['funcNum'] += 1
        elif self.CurrentData == "file":
                dictOutPut['fileNum'] += 1
        elif self.CurrentData == "SLOC":
                dictOutPut['codeNum'] = dictOutPut['codeNum'] + int(self.SLOC)
                if int(self.SLOC) > 200: dictOutPut['codeNumOver200'] = dictOutPut['codeNumOver200'] + 1
                if int(self.SLOC) > 100: dictOutPut['codeNumOver100'] = dictOutPut['codeNumOver100'] + 1
                if int(self.SLOC) < 20:  dictOutPut['codeNumLess20']  = dictOutPut['codeNumLess20'] + 1
        self.CurrentData = ""
        if dictOutPut['funcNum'] != 0:
                dictOutPut['averageCcmValue'] = dictOutPut['SumCcmValue'] / dictOutPut['funcNum']

    def characters(self, content):
        if self.CurrentData == "complexity":
            self.complexity = content
        elif self.CurrentData == "classification":
            self.classification = content
        elif self.CurrentData == "file":
            self.file = content
        elif self.CurrentData == "SLOC":
            self.SLOC = content

    def writeXls(self):
        del_ccm_info_xlsx()
        add_ccm_info_xlsx()

def add_ccm_info_xlsx():
    row = col =0
    workbook = xlsxwriter.Workbook(ccmXlsxFileName)
    worksheet = workbook.add_worksheet('ccm')
    for key in xlsxTitle:
        if dictOutPut.has_key(key) is not None:  # python2用的是row.has_key(key)
            monitorItems[key] = dictOutPut.get(key)
    for key, values in monitorItems.items():
        print key,values
        worksheet.write(row, col, key)
        worksheet.write(row + 1, col, values)
        col += 1
    workbook.close()

def del_ccm_info_xlsx():
    if os.path.exists(ccmXlsxFileName):
        os.remove(ccmXlsxFileName)

if (__name__ == "__main__"):
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 0)
    Handler = CcmHandler()
    parser.setContentHandler(Handler)
    parser.parse("ccm.xml")
    Handler.writeXls()

猜你喜欢

转载自blog.csdn.net/u014590889/article/details/83351273