Python 导入文件夹中所有excel并按列名读取插入数据库

#-*- coding: utf-8 -*-
# python 3.5.0
__author__ = 'HZC'
 
import os
import xlrd
import pymssql
import datetime
 
class BUSINESS:
	def __init__(self):
		self.host = "HZC"
		self.user = "kk"
		self.pwd = "kk"
		self.db = "business"
		self._conn = self.GetConnect()
		if(self._conn):
			self._cur = self._conn.cursor()
	
	#连接数据库
	def GetConnect(self):
		conn = False
		try:
			conn = pymssql.connect(
				host=self.host,
				user=self.user,
				password=self.pwd,
				database =self.db
			)
		except Exception as err:
			print("连接数据库失败, %s" % err)
		else:
			return conn
	
	#执行非查询类语句
	def ExecNonQuery(self, sql):
		flag = False
		try:
			self._cur.execute(sql)
			self._conn.commit()
			flag = True
		except Exception as err:
			flag = False
			self._conn.rollback()
			print("执行失败, %s" % err)
		else:
			return flag
	
	#excel 中的日期格式化(否则为浮点数值)
	def FormatDatetimeValue(self,book, sheet,ind_row,ind_col):
		cell = sheet.cell(ind_row, ind_col)
		if cell.ctype == 3: # 3 means 'xldate' , 1 means 'text
			year, month, day, hour, minute, second = xlrd.xldate_as_tuple(cell.value, book.datemode)
			py_date = datetime.datetime(year, month, day, hour, minute, second)
		return py_date.strftime('%Y-%m-%d %H:%M:%S')
	
	#读取表格第一行作为列名。避免重复读取,名称和位置存储为字典(如:{"name":0, "mobile":1})
	def GetColumnTitle(self, sheet):
		col_dict = {}  
		for i in range(sheet.ncols):
			col_dict[sheet.cell_value(0, i).strip()] = i
		return col_dict
	
	#遍历文件夹,读取所有【xls】文件名称
	def GetFolderFile(self,dir):
		for directory, subdirectories, files in os.walk(dir):
			#files = [s for s in files if ".xls" in s]
			for file in files:
				if os.path.splitext(file)[1][1:] != "xls":
					continue
				filepath = os.path.join(directory, file)
				self.SaveOneFile(filepath)
		
	#读取一个 xls 文件到数据库(第一个 sheet$)
	def SaveOneFile(self,filepath):
		print(filepath)
		workbook = xlrd.open_workbook(filepath)
		sheet = workbook.sheet_by_index(0) #只读第一个$sheet,没遍历所有!
		#如第一行有列名,则按列名取数
		col_dict = self.GetColumnTitle(sheet)
		if bool(col_dict):
			number_of_rows = sheet.nrows
			for row in range(1, number_of_rows):
				sql = """INSERT INTO tab(col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col2)
				VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %s, '%s', '%s')""" % \
				(
					self.FormatDatetimeValue(workbook,sheet,row,col_dict["col1"]),
					self.FormatDatetimeValue(workbook,sheet,row,col_dict["col2"]),
					self.FormatDatetimeValue(workbook,sheet,row,col_dict["col3"]),
					sheet.cell(row,col_dict["col4"]).value,
					sheet.cell(row,col_dict["col5"]).value,
					sheet.cell(row,col_dict["col6"]).value,
					sheet.cell(row,col_dict["col7"]).value,
					sheet.cell(row,col_dict["col8"]).value,
					sheet.cell(row,col_dict["col9"]).value,
					sheet.cell(row,col_dict["col10"]).value,
					sheet.cell(row,col_dict["col11"]).value,
					sheet.cell(row,col_dict["col12"]).value
					#datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
				)
				self.ExecNonQuery(sql)
		else:
			pass
	
	
if __name__ == "__main__":
	bs = BUSINESS()
	#整个文件夹
	bs.GetFolderFile(r'C:/Users/huangzecheng/Desktop/11月/')
	#一个文件
	#bs.SaveOneFile(r'C:/Users/huangzecheng/Desktop/11月/py20171101.xls')
 

Guess you like

Origin blog.csdn.net/amberom/article/details/119701239