正则表达式去除中文标点符号并且获取数字

#-*-coding:utf8-*-
import re
file=open("D:/资料/山西/data_no_null.txt","r",encoding="utf8")
all_word=[["全水"],["分析水"],["灰分"],["挥发"],["固定碳"],["焦渣特征"],["硫"],["低位热量"]]
for line in file:
	# string = "全水22.21,分析水8.06,灰分8.87,挥发33.44,固定碳53.12,焦渣特征2,硫0.82,低位热量5053。"
	string=line.strip()
	for word_one in all_word:
		word_hash = {}
		for word in word_one:
			results = re.finditer(word, string)
			for result in results:
				son_string = string[result.span()[1]:]
				son_string = re.sub("[\s+\!\/_,$%^*(+\"\')]+|[::+——()?【】“”!,。?、~@#¥%……&*()]+", "", son_string)
				pattern = "\d+([.])?(\d)*"
				number = re.match(pattern, son_string)
				if number != None:
					print(string)
					word_hash[result.span()[0]] = number.group()
					print(word+":" + number.group())
				else:
					print("没有结果")
		# 正则表达式去除前面的字符
		# print(list(word_hash.items()))




猜你喜欢

转载自blog.csdn.net/baidu_15113429/article/details/80913006
今日推荐