When working on a project, you need to analyze the number of effective submission lines of the code on git, that is: the number of added lines-blank lines-comment lines. I did not find analysis code that supports multiple languages on the Internet, so I wrote a piece of code and shared it.
The code receives two parameters. The name of the file changed by filepath, the changed content of the file (can be viewed through git diff {commit_id})
Features of this code:
- Judge the language contained in the file by the suffix of filepath
- Get the submitted code, the number of additions and deletions of comments and blank lines in it
- Support file types, py, sh, java, rb, vue, html, js, xml, sql, css, etc.
- If you need to support more language code analysis, just add the corresponding key to the luanges, single represents a single line comment, and multi represents a multi-line comment.
def get_commit_diff_comment_rows(filepath,diff):
endfix = filepath.split("/")[-1].split('.')[-1]
diff_rows = diff.split("\n")
if len(diff_rows) < 3:
return {}
luanges={
'py':{
'single':'#',
'multi_start':["'''",'"""'],
'multi_end':["'''",'"""']
},
'java':{
'single':'//',
'multi_start':["/*"],
'multi_end':["*/"],
},
'js':{
'single':'//',
'multi_start':["/*"],
'multi_end':["*/"],
},
'vue':{
'single':'//',
'multi_start':["<!--",'/*'],
'multi_end':["-->","*/"],
},
'html':{
'single':'//',
'multi_start':["<!--",'/*'],
'multi_end':["-->","*/"],
},
'jsx':{
'multi_start':["/*","{/*"],
'multi_end':["*/","*/}"],
},
'less':{
'single':'//',
'multi_start':["/*"],
'multi_end':["*/"],
},
'rb':{
'single':'#',
'multi_start':["=begin"],
'multi_end':["=end"],
},
'yml':{
'single':'#',
},
'xml':{
'multi_start':["<!--"],
'multi_end':["-->"],
},
'sql':{
'single':'--',
'multi_start':["/*"],
'multi_end':["*/"],
},
'sh':{
'single':'#',
},
'css':{
'multi_start':["/*"],
'multi_end':["*/"],
},
}
luange = luanges.get(endfix)
if not luange:
return {}
single_start = luange.get("single")
multi_start = luange.get("multi_start")
multi_end = luange.get("multi_end")
comment_add = 0
comment_del = 0
empty_add = 0
empty_del = 0
block_comment_flag=False #块注释默认为空
for row in diff_rows:
if row.startswith("---") or row.startswith("+++") or row.startswith("@@"):
continue
if row.startswith("+"):
node_type = '+'
elif row.startswith("-"):
node_type = '-'
else:
continue
row = row[1:].strip()
if row == '':
if node_type == '+':
empty_add += 1
elif node_type == '-':
empty_del += 1
else:
add_number = 0
if single_start:
if row.startswith(single_start):
add_number = 1
if multi_start:
for i in multi_start:
if row.startswith(i):
add_number = 1
block_comment_flag=True
if multi_end:
for i in multi_end:
if row.endswith(i):
add_number = 1
block_comment_flag=False
if block_comment_flag:
add_number = 1
if node_type == '+':
comment_add += add_number
elif node_type == '-':
comment_del += add_number
return {"comment_add":comment_add,
"comment_del":comment_del,
"empty_add":empty_add,
"empty_del":empty_del
}