数据读取、新建sheet写入数据

版权声明:不要转载复制当原创就好了,指明下参考地址或者书目,大家一起学习进步。 https://blog.csdn.net/Monk_donot_know/article/details/86088247

笔记说明:超链接加不进来!!!一加进来就卡死,真是崩溃!openpyxl这块不是我写的,是从一个人的博客上摘下来的。
就是下面这个。
https://blog.csdn.net/weixin_43094965/article/details/82226263
好神奇,上述这个链接居然是手打出来的,居然可以显示,复制粘贴过来就会卡死!
后面的案例部分是我工作写的,反正你们没数据,我就大胆摆出来了,欢迎指正。

dataframe

#文件(示例,data数据框并不存在)
import pandas as pd
a=pd.DataFrame(data)
a.head(6)
a.describe()
a.T  ###转置
a.sort_index(axis=1,asceding=True)
a['x']
a[0:3]
a.loc[:,['a','b']] ##通过标签来索引
a.iloc[1:3,1:6]  ###通过位置来索引
a.iloc[1:2] 
a.iloc[[0,2,3],[1,2,7]]  ##选取任意的行和列
a.loc[:,1:3]=7  ##直接赋值或者改变内容

read_excel()

读取数据之后原始数据第一行会变为标签,第二行变为第一行

pd.read_excel(io, sheet_name=0, header=0, names=None, index_col=None, 
              usecols=None, squeeze=False,dtype=None, engine=None, 
              converters=None, true_values=None, false_values=None, 
              skiprows=None, nrows=None, na_values=None,
              parse_dates=False, 
              date_parser=None, thousands=None, comment=None, skipfooter=0, 
              convert_float=True, **kwds)

"""
io:excel的存储路径
sheet_name:要读取的工作表名称
header:用哪一行作列名
names:自定义最终的列名
index_col:用作索引的列
usecols:需要读取哪些列
converters:强行规定列数据类型
skiprows:跳过特定行
nrows:需要读取的行数
skipfooter:跳过末尾n行
"""

io=r'C:\Users\Administrator\Documents\WeChat Files\data'
sheet_name='中英文名都可以或者Sheet1' #S要大写,默认0(即Sheet1)
names=['a','b','c','d'] ##命名列
usecols=[0,1,2]
usecols='A:C,E' ##包括C
converters= {'排名':str,'场次':int}  ##默认全部是整型

openpyxl

## 创建xlsx
from openpyxl import Workbook
wb = Workbook() ## 实例化,对excel数据表的操作在wb上
ws = wb.active ##激活sheet

## 读取xlsx
load_workbook(filename,read_only=False, use_iterators=False,  
              keep_vba=False, guess_types=False,  
              data_only=False) 
from openpyxl import load_workbook
wb = load_workbook('文件名.xlsx') ## 默认只读第一个sheet

##一个例子
def read_excel_xlsx(path, sheet_name):
    workbook = openpyxl.load_workbook(path)
    # sheet = wb.get_sheet_by_name(sheet_name)这种方式已经弃用,不建议使用
    sheet = workbook[sheet_name]
    for row in sheet.rows:
        for cell in row:
            print(cell.value, "\t", end="")
        print()

## 创建新的sheet
help(openpyxl.workbook.Workbook.create_sheet)
create_sheet(self, title=None, index=None)

##一个例子
def save(data1,K,path):
	f = load_workbook(path)  # 创建工作簿
	sheet1 = f.create_sheet(title=para_phase, index=0)  # 创建sheet
	L = len(data1)
	for j in range(0,L):
		sheet1.cell(j+1,1).value = float(data1[j])
	f.save(path) ## !!!!重要!!!必须保存!!!!

一个案例,做随机森林,并将一些列指标计算出来储存在excel中。

import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import xlwt
import sys
import datetime
from openpyxl import load_workbook
from imblearn.over_sampling import SMOTE


global para_phase
para_phase = "phase11" #phase1,phase2,phase3。。。。。。。。。。 其中phase1 为未删特征版本

global balance
balance = "TRUE"  #TRUE/FALSE


def save(data1,data2,data3,data4,data5,data6,data7,data8,path):
	f = load_workbook(path)  # 创建工作簿
	sheet1 = f.create_sheet(title='model_%s'%(para_phase), index=0)  # 创建sheet
	L = len(data1)  # h为行数,l为列数
	for j in range(0,L):
		sheet1['A1'].value = 'TN'
		sheet1['A2'].value = 'FP'
		sheet1['A3'].value = 'FN'
		sheet1['A4'].value = 'TP'
		sheet1['A5'].value = 'accuary rate'
		sheet1['A6'].value = 'OOB Score'
		sheet1['A7'].value = 'KS'
		sheet1['A8'].value = 'AUC'
		sheet1.cell(1,j+2).value = int(data1[j])
		sheet1.cell(2,j+2).value = int(data2[j])
		sheet1.cell(3,j+2).value = int(data3[j])
		sheet1.cell(4,j+2).value = int(data4[j])
		sheet1.cell(5,j+2).value = float(data5[j])
		sheet1.cell(6,j+2).value = float(data6[j])
		sheet1.cell(7,j+2).value = float(data7[j])
		sheet1.cell(8,j+2).value = float(data8[j])


	f.save(path)



if __name__ == '__main__':
	
	all_data = pd.read_excel('TRAIN_all_%s.xlsx'%(para_phase),'Sheet1')

	train_array = all_data.values

	train_row = np.size(train_array,0)
	train_col = np.size(train_array,1)

	train_data =train_array[0:train_row,3:train_col] 
	train_re = train_array[0:train_row,2] 

	# train_re = train_re.astype(numpy.int)

	kf = KFold(n_splits=10,shuffle=True)

	predict_score = []
	oob_score = []
	KS = []
	AUC_A = []
	TN = []
	FP = []
	FN = []
	TP = []

	for train_index,test_index in kf.split(train_re):
		#k += 1
		X_train,X_test = train_data[train_index],train_data[test_index] #数据
		Y_train,Y_test = train_re[train_index],train_re[test_index]

		if balance == 'TRUE':
			over_sample = SMOTE(random_state = 255)
			over_sample_X,oversample_Y = over_sample.fit_sample(X_train,Y_train)
		else:
			over_sample_X = X_train
			oversample_Y = Y_train

		model = RandomForestRegressor(n_estimators=500,oob_score=True)
		re_fit = model.fit(over_sample_X,oversample_Y)
		re_pred = re_fit.predict(X_test)

		re_pred_A = re_pred
		re_pred[re_pred>=0.3] = 1
		re_pred[re_pred<0.3] = 0
		re_pred = re_pred.tolist()
		Y_test = Y_test.tolist()

		tn1, fp1, fn1, tp1 = confusion_matrix(Y_test, re_pred).ravel()

		accuary = (float(tn1)+float(tp1))/(float(tn1)+float(fp1)+float(fn1)+float(tp1))
		oobscore = model.oob_score_
		K_S = float(tp1)/(float(tp1)+float(fn1))-float(fp1)/(float(fp1)+float(tn1))
		AUC = roc_auc_score(Y_test, re_pred_A)
		
		print(accuary)
		print(oobscore)
		print(K_S)
		print(AUC)

		predict_score.append(accuary)
		oob_score.append(oobscore)
		KS.append(K_S)
		AUC_A.append(AUC)
		TN.append(tn1)
		FP.append(fp1)
		FN.append(fn1)
		TP.append(tp1)

	save(TN,FP,FN,TP,predict_score,oob_score,KS,AUC_A,'para_phase_%s.xlsx'%(balance))
	print ("success!")

猜你喜欢

转载自blog.csdn.net/Monk_donot_know/article/details/86088247