版权声明:转载请联系博主。 https://blog.csdn.net/sunyaowu315/article/details/82462364
基于python的p2p 贷后指标全自动日报制作
- author : sunyaowu
- begin_time : 2018年8月27日
- end_time : 2018年9月7日
- 说明 :贷后指标,包括贷放、逾期、清理等内容。
一 项目规划
- 项目整体框架:
- ①数据获取
- ②数据处理及分析
- ③生成结构化汇总数据,并制作excel单页可视化报告
- ④定时发送邮件,并通过微信搜索向定向群体发送报告
- 周期规划:一周左右
- 预期结果:日报可以上线工作
二 项目代码
- 代码模块化
- 函数式编程思想
☆ 导入python包
Created on Mon Aug 27 11:44:35 2018
@贷后全自动日报设计
@bigin_time:2018-08-27
@ end_time:2018-09-07
@author: SunYaowu
"""
import pandas as pd #结构化数据处理
import numpy as np #矩阵数据处理
import pymysql as pms #sql模块
import time
import datetime
from openpyxl import load_workbook #excel文件读写模块
from email.mime.text import MIMEText #邮件模块
from email.mime.multipart import MIMEMultipart
from email.header import Header
import smtplib #传输协议模块
from wxpy import * #微信
import itchat #微信
☆ 定义类
#################################☆.主类#################################
class AfterLoanData(): #贷后日报class
def __init__(self):
pass
二 数据获取
- python 模块:pyecharts ,mysql数据库提取数据
☆ 数据库query数据模块
#################################☆.功能模块#################################
'''数据库取数模块'''
# 一个传入sql导出数据的函数
def sql_query(self,sql):
config = {
'host':'xxxxxxxx',
'port':xxxx,
'db':'pxxxxxn',
'user':'cxxxxx_xxxxxxx_r',
'password':'Ixxxxxxxexxxxx',
'charset':'utxxxxx4',
'cursorclass':pms.cursors.DictCursor,
}
try:
conn = pms.connect(**config)
# 使用 cursor() 方法创建一个游标对象 cursor
with conn.cursor() as cur:
# 使用 execute() 方法执行 SQL
cur.execute(sql)
conn.commit()
# 获取所需要的数据
data = pd.DataFrame(cur.fetchall())
#关闭连接
cur.colse()
except:
conn.rollback()
finally:
conn.close()
return data
三 数据处理
- python 模块:numpy,pandas,数据规整:筛选、填充、合并
- 进件情况:data_jinjian,data_jinjian_time,data_jinjian_city,data_jinjian_age,data_jinjian_gender
- 回款情况:data_huikuan_time,data_huikuan_city,data_huikuan_age,data_huikuan_gender
- 逾期情况:yuqi_liuru,data_yuqi_time
☆ 其他一些需要多次调用的功能模块
'''获取用户地址模块'''
def user_addr(self):
address_ds=pd.read_pickle(_path + '\code.pkl')
address_ds=address_ds.loc[address_ds['coordinate']=='success',]
address_ds['num']=address_ds['num'].astype(str)
address_dict=dict(zip(address_ds['num'],address_ds['city_final']))#zip的作用是压缩元素级对象,生成新的列表
return address_dict
'''合并字段模块'''
def merge_ds(self,ds_list,on_var,how):
for i in range(len(ds_list)):
if i==1:
tmp =pd.merge(ds_list[0],ds_list[1],on=on_var,how=how)
elif i>1:
tmp =pd.merge(tmp,ds_list[i],on=on_var,how=how)
return tmp
'''excel文件读写模块'''
def get_excel(self,indata,sheetname,row_add,col_add):
# 将数据和字段名写入excel的函数
ds1 = indata.copy()
#加载文件
#激活sheet页
#workbook = load_workbook(_path + '\日报数据.xlsx')
worksheet = workbook.get_sheet_by_name(sheetname)
#将数据循环写入excel的每个单元格中
for row in range(ds1.shape[0]):
for col in range(ds1.shape[1]):
worksheet.cell(row = row + row_add, column = col + col_add).value = ds1.values[row][col]
#返回生成的excel
#workbook.save(_path + '\日报数据.xlsx')
'''字符类型转换模块'''
def char2num(self,indata,varlist):
tmp=indata.copy()
for var in varlist:
tmp[var]=pd.to_numeric(tmp[var], errors='coerce')
return tmp
'''获取时间模块'''
def get_time(self):
#今日
nowtime = datetime.datetime.now()
nowtime_str = nowtime.strftime("%Y-%m-%d")
#昨日
yestime = nowtime + datetime.timedelta(days=-1)
yestime_str = yestime.strftime("%Y-%m-%d")
#月初
firstday = datetime.datetime(nowtime.year,nowtime.month,1,0,0,0)
firstday_str = firstday.strftime("%Y-%m-%d")
#本月
thismonth_str = nowtime.strftime("%Y-%m")
#上月今日
last_month = nowtime.month - 1
last_year = nowtime.year
if last_month == 0:
last_month = 12
last_year -= 1
lastmonthday = datetime.datetime(month=last_month, year=last_year, day=nowtime.day)
lastmonthday_str = lastmonthday.strftime("%Y-%m-%d")
#上月
lastmonth_str = lastmonthday.strftime("%Y-%m")
return nowtime,nowtime_str,yestime,yestime_str,firstday,firstday_str,thismonth_str,lastmonthday,lastmonthday_str,lastmonth_str
四 数据分析
- python 模块:pandas,scipy,分组聚合运算,描述性统计分析。
☆ 进件情况统计模块
#################################1.进件信息#################################
'''获取进件数据'''
def data_jinjian(self):
sql = 'select user_id,amount,real_amount,service_fee,interest,time_limit,state,address,coordinate,again,create_time,loan_time from cl_borrow' #借款信息
try:
data_apply = self.sql_query(sql = sql)
data_apply = data_apply[['user_id','amount','real_amount','service_fee','interest','time_limit','again','state','address','coordinate','create_time','loan_time']]
data_apply['pass'] = np.where(data_apply['loan_time'].notnull() == True,1,0)
except:
print('Oh No,something error!')
return data_apply
'''按进件时间统计1'''
def _data_jinjian_time(self,indata):
_tmp = indata.copy()
tmp_grp1 = _tmp.groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'申请金额'})
tmp_grp2 = _tmp.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'申请笔数'})
tmp_grp3 = _tmp.loc[_tmp['pass'] == 1,].groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'放款金额'})
tmp_grp4 = _tmp.loc[_tmp['pass'] == 1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'放款笔数'})
tmp = self.merge_ds([tmp_grp1,tmp_grp2,tmp_grp3,tmp_grp4],on_var = 'apply_time',how = 'outer')
tmp['通过率%'] = tmp[['放款笔数','申请笔数']].apply(lambda x: '{x:.2f}'.format(x = x['放款笔数']/x['申请笔数']) if x['申请笔数'] >0 else '0.00',axis=1 )
tmp =self.char2num( indata = tmp , varlist = ['通过率%'] )
_tmp2 = indata.sort_values(by = ['user_id','create_time'] )
_tmp2['贷款次数'] = _tmp2.groupby(['user_id']).cumcount()+1
_tmp2 = _tmp2.loc[_tmp2['贷款次数']>1,]
tmp2_grp1 = _tmp2.groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'复贷-申请金额'})
tmp2_grp2 = _tmp2.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-申请笔数'})
tmp2_grp3 = _tmp2.loc[_tmp2['pass']==1,].groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'复贷-放款金额'})
tmp2_grp4 = _tmp2.loc[_tmp2['pass']==1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-放款笔数'})
tmp2 = self.merge_ds([tmp2_grp1,tmp2_grp2,tmp2_grp3,tmp2_grp4 ],on_var = 'apply_time',how = 'outer')
tmp=pd.merge(tmp,tmp2,on = 'apply_time',how = 'outer')
tmp['复贷-通过率%'] = tmp[['复贷-放款笔数','复贷-申请笔数']].apply(lambda x: x['复贷-放款笔数'] / x['复贷-申请笔数'] if x['复贷-申请笔数'] > 0 else 0,axis = 1 )
tmp['复贷通过占比%'] = tmp[['复贷-放款笔数','放款笔数']].apply(lambda x: x['复贷-放款笔数'] / x['放款笔数'] if x['放款笔数'] > 0 else 0,axis = 1 )
tmp = tmp[['apply_time','申请金额','申请笔数','放款金额','放款笔数','通过率%','复贷-申请金额','复贷-申请笔数','复贷-放款金额','复贷-放款笔数','复贷通过占比%','复贷-通过率%']]
return tmp
'''按进件时间统计2'''
def data_jinjian_time(self,indata):
#日
ds1 = indata.copy()
ds1['apply_time'] = ds1['create_time'].apply(lambda x: x.strftime("%Y-%m-%d") )
tmp_1 = self._data_jinjian_time( indata = ds1 )
#为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
format_day = pd.DataFrame({'apply_time': list(set([(ds1['create_time'].min() + datetime.timedelta(days = diff)).strftime("%Y-%m-%d") for diff in range( int((nowtime - data_apply['create_time'].min()) / np.timedelta64(1,'D')) )])) })
tmp_1 = pd.merge(tmp_1,format_day,on = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
tmp_1 = tmp_1.fillna(0).sort_values(['申请时间'])
#周
ds2 = indata.copy()
week_cutday=[((nowtime-datetime.timedelta(days=diff ))).strftime("%Y-%m-%d" ) for diff in range(1,8) if (nowtime-datetime.timedelta(days=diff )).weekday() ==0]
ds2['apply_time'] = ds2['create_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if x >= pd.to_datetime(week_cutday[0]) else np.nan )
tmp_2 = self._data_jinjian_time( indata = ds2 )
#为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
format_week = pd.DataFrame({'apply_time': [(pd.to_datetime(week_cutday[0]) + datetime.timedelta(days = diff)).strftime("%Y-%m-%d" ) for diff in range(0,7) if pd.to_datetime(week_cutday[0]) + datetime.timedelta(days=diff) <= nowtime ] })
tmp_2 = pd.merge(tmp_2,format_week,on = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
tmp_2 = tmp_2.fillna(0).sort_values(['申请时间'])
#生成进件数据,打印到excel,用以制作excel报告
self.get_excel(indata = tmp_2,sheetname = 'jietiao_apply_day',row_add = 9,col_add = 1)
#月
ds3 = indata.copy()
ds3['apply_time'] = ds3['create_time'].apply(lambda x: x.strftime("%Y-%m") )
tmp_3 = self._data_jinjian_time( indata=ds3 )
#为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
format_month = pd.DataFrame({'apply_time': list(set([(ds2['create_time'].min() + datetime.timedelta(days = diff)).strftime("%Y-%m") for diff in range( int((nowtime - data_apply['create_time'].min()) / np.timedelta64(1,'D')) )])) })
tmp_3 = pd.merge(tmp_3,format_month,on = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
tmp_3 = tmp_3.fillna(0).sort_values(['申请时间'])
#生成进件数据,打印到excel,用以制作excel报告
self.get_excel(indata = tmp_3,sheetname = 'jietiao_apply_day',row_add = 24,col_add = 2)
return tmp_1,tmp_2,tmp_3
def jinjian_dayReport(self,indata):
#日数据
ds1 = indata.copy()
ds1['apply_time'] = ds1['create_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) == nowtime_str) or (x.strftime("%Y-%m-%d" ) == yestime_str) or (x.strftime("%Y-%m-%d" ) == lastmonthday_str) else np.nan )
ds1_grp1 = ds1.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'申请笔数'})
ds1_grp2 = ds1.loc[ds1['pass'] == 1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'通过笔数'})
ds1_tmp = pd.DataFrame({'apply_time':[nowtime_str],
'本月申请笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['create_time'] <= nowtime),'user_id'].shape[0]],
'本月通过笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['create_time'] <= nowtime) & (ds1['pass'] == 1),'user_id'].shape[0]],
'历史申请笔数':[ds1.loc[ds1['create_time'] <= nowtime,'user_id'].shape[0]],
'历史通过笔数':[ds1.loc[(ds1['create_time'] <= nowtime) & (ds1['pass'] == 1),'user_id'].shape[0]],
})
#ds1_tmp = self.merge_ds([ds1_tmp,ds1_grp1,ds1_grp2],on_var = 'apply_time',how = 'outer')
_ds1_tmp = pd.DataFrame({'apply_time':[lastmonthday_str],
'本月申请笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == lastmonth_str) & (ds1['create_time'] <= lastmonthday),'user_id'].shape[0]],
'本月通过笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == lastmonth_str) & (ds1['create_time'] <= lastmonthday) & (ds1['pass'] == 1),'user_id'].shape[0]],
'历史申请笔数':[ds1.loc[ds1['create_time'] <= lastmonthday,'user_id'].shape[0]],
'历史通过笔数':[ds1.loc[(ds1['create_time'] <= lastmonthday) & (ds1['pass'] == 1),'user_id'].shape[0]],
})
ds1_tmp = pd.concat([ds1_tmp,_ds1_tmp])
_ds1 = ds1.copy().sort_values(by = ['user_id','create_time'] )
_ds1['贷款次数'] = _ds1.groupby(['user_id']).cumcount()+1
_ds1 = _ds1.loc[_ds1['贷款次数']>1,]
_ds1_grp1 = _ds1.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-申请笔数'})
_ds1_grp2 = _ds1.loc[_ds1['pass']==1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-通过笔数'})
ds2_tmp = pd.DataFrame({'apply_time':[nowtime_str],
'复贷-历史申请笔数':[_ds1.loc[_ds1['create_time'] <= nowtime,'user_id'].shape[0]],
'复贷-历史通过笔数':[_ds1.loc[(_ds1['create_time'] <= nowtime) & (_ds1['pass'] == 1),'user_id'].shape[0]]
})
tmp_day = self.merge_ds([ds1_grp1,ds1_grp2,ds1_tmp,_ds1_grp1,_ds1_grp2,ds2_tmp],on_var = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
tmp_day['通过率'] = tmp_day[['申请笔数','通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['通过笔数'] / x['申请笔数']) if x['申请笔数'] > 0 else '0.00',axis = 1 )
tmp_day['本月通过率'] = tmp_day[['本月申请笔数','本月通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['本月通过笔数'] / x['本月申请笔数']) if x['本月申请笔数'] > 0 else '0.00',axis = 1 )
tmp_day['历史通过率'] = tmp_day[['历史申请笔数','历史通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['历史通过笔数'] / x['历史申请笔数']) if x['历史申请笔数'] > 0 else '0.00',axis = 1 )
tmp_day['复贷-通过率'] = tmp_day[['复贷-申请笔数','复贷-通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['复贷-通过笔数'] / x['复贷-申请笔数']) if x['复贷-申请笔数'] > 0 else '0.00',axis = 1 )
tmp_day['复贷-历史通过率'] = tmp_day[['复贷-历史申请笔数','复贷-历史通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['复贷-历史通过笔数'] / x['复贷-历史申请笔数']) if x['复贷-历史申请笔数'] > 0 else '0.00',axis = 1 )
tmp_day = tmp_day[['申请时间','申请笔数','通过笔数','通过率','本月申请笔数','本月通过笔数','本月通过率','历史申请笔数','历史通过笔数','历史通过率','复贷-申请笔数','复贷-通过笔数','复贷-通过率','复贷-历史申请笔数','复贷-历史通过笔数','复贷-历史通过率']]
#生成进件数据,打印到excel,用以制作excel报告
self.get_excel(indata = tmp_day,sheetname = 'jietiao_apply_day',row_add = 3,col_add = 1)
return tmp_day
'''按进件城市统计'''
def data_jinjian_city(self,indata,cutoff):
#计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
ds1 = indata.copy()
ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
info_query = self.sql_query('select user_id,id_no from cl_user_base_info')
ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str ) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
#通过身份证前6位,得到具体地区。对应错误的进件排除
addr = self.user_addr()
ds1_tmp['city'] = ds1_tmp['id_no'].apply(lambda x: addr.get(str(x[:6]),None) )
ds1_tmp = ds1_tmp.loc[ds1_tmp['city'].notnull(),].reset_index(drop = True)
#计算申请数apply_number,通过数pass_number
tmp = ds1_tmp.groupby(['city'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
tmp['pass_rate%' ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number']) if x['apply_number' ] > 0 else '0.00' ,axis = 1 )
#统计城市前10名
tmp=tmp.sort_values(by=['apply_number'],ascending=[0]).reset_index(drop=True)
if tmp.shape[0]>10:
tmp1=tmp.loc[:9,]
tmp2=tmp.loc[10:,]
last_row=pd.DataFrame({
'city':['其他'],
'pass_number':[sum(tmp2['pass_number'])] ,
'apply_number':[sum(tmp2['apply_number'])] ,
'apply_ratio%':pd.to_numeric(['{x:.2f}'.format(x= (sum(tmp2['apply_number'])/sum(tmp['apply_number' ]))*100 if sum(tmp['apply_number' ])>0 else 0 )]) ,
'pass_ratio%' :pd.to_numeric(['{x:.2f}'.format(x= (sum(tmp2['pass_number']) /sum(tmp['pass_number' ]))*100 if sum(tmp['pass_number' ])>0 else 0)]),
'pass_rate%' :pd.to_numeric(['{x:.2f}'.format(x= (sum(tmp2['pass_number']) /sum(tmp2['apply_number']))*100 if sum(tmp2['apply_number' ])>0 else 0)]),
})
tmp1=tmp1.append(last_row,ignore_index=True)
tmp1=self.char2num( indata=tmp1, varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
tmp =self.char2num( indata=tmp , varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp1[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
else:
return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
'''用户年龄统计'''
def data_jinjian_age(self,indata,cutoff):
ds1 = indata.copy()
ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
info_query = self.sql_query('select user_id,age from cl_user_base_info')
ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
#计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str ) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
ds1_tmp = ds1_tmp.loc[ds1_tmp['age'].notnull(),].reset_index(drop = True)
#计算申请数apply_number,通过数pass_number
tmp = ds1_tmp.groupby(['age'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
tmp['pass_rate%' ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number']) if x['apply_number' ] > 0 else '0.00' ,axis = 1 )
return tmp[['age','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
'''用户性别统计'''
def data_jinjian_gender(self,indata,cutoff):
ds1 = indata.copy()
ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
info_query = self.sql_query('select user_id,sex from cl_user_base_info')
ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
#计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str ) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
ds1_tmp = ds1_tmp.loc[ds1_tmp['sex'].notnull(),].reset_index(drop = True)
#计算申请数apply_number,通过数pass_number
tmp = ds1_tmp.groupby(['sex'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
tmp['pass_rate%' ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number']) if x['apply_number' ] > 0 else '0.00' ,axis = 1 )
return tmp[['sex','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
#################################2.放款信息#################################
def data_fangkuan(self):
sql = 'select user_id,amount,repay_time,penalty_amout,penalty_day,create_time,principle,interest,already_paid,finish_pay_time from cl_borrow_repay' #借款信息
try:
data_borrow = self.sql_query(sql)
data_borrow = data_borrow[['user_id','principle','amount','interest','already_paid','penalty_amout',
'create_time','repay_time','finish_pay_time','penalty_day']]
except:
print('Oh No,something error!')
return data_borrow
☆ 回款数据统计模块
#################################3.回款信息#################################
'''逾期用户进件时间统计'''
def data_huikuan_time(self,indata):
ds = indata.copy()
ds['放款日期'] = ds['create_time'].apply(lambda x: x.strftime("%Y-%m-%d") )
ds['应还款日期'] = ds['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d") )
ds['当前是否还款'] = np.where(ds['finish_pay_time'].isnull() == False,1,0)
ds['当前是否逾期'] = np.where((ds['finish_pay_time'].isnull() == True) & (ds['penalty_day'] > 0),1,0)
ds['当前逾期天数'] = np.where(ds['finish_pay_time'].isnull() == False,0,ds['penalty_day'])
ds['流入'] = np.where(ds['penalty_day'] > 0,1,0)
ds['流入催回'] = np.where((ds['流入'] == 1) & (ds['当前逾期天数'] == 0),1,0)
ds['催回用时'] = np.where(ds['流入催回'] == 1,(ds['finish_pay_time'] - ds['repay_time'])/np.timedelta64(1,'D'),0)
ds.loc[(ds['流入'] == 1) & (ds['流入催回'] == 0) ,'催回用时'] = '未催回'
return ds[['user_id','放款日期','repay_time','finish_pay_time','当前是否还款','当前是否逾期','当前逾期天数','流入','流入催回','催回用时']]
def huikuan_dayReport(self,indata):
#当日回款
ds1 = indata.copy()
ds1['还款时间'] = ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) == nowtime_str) or (x.strftime("%Y-%m-%d" ) == yestime_str) or (x.strftime("%Y-%m-%d" ) == lastmonthday_str) else np.nan )
ds1_grp1 = ds1.groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'应还笔数'})
ds1_grp2 = ds1.loc[ds1['finish_pay_time'].isnull() == False,].groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'已还笔数'})
ds1_tmp = pd.DataFrame({'还款时间':[nowtime_str],
'本月累计应还':[ds1.loc[(ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['repay_time'] <= nowtime),'user_id'].shape[0]],
'本月累计流入':[ds1.loc[(ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['repay_time'] <= nowtime) & (ds1['流入'] == 1),'user_id'].shape[0]],
'历史到期应还':[ds1.loc[ds1['repay_time'] <= nowtime,'user_id'].shape[0]],
'历史到期已还':[ds1.loc[(ds1['repay_time'] <= nowtime) & (ds1['finish_pay_time'].isnull() == False),'user_id'].shape[0]]
})
_ds1 = indata.copy().sort_values(by = ['user_id','repay_time'] )
_ds1['贷款次数'] = _ds1.groupby(['user_id']).cumcount()+1
_ds1 = _ds1.loc[_ds1['贷款次数']>1,]
_ds1['还款时间'] = _ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) == nowtime_str) or (x.strftime("%Y-%m-%d" ) == yestime_str) or (x.strftime("%Y-%m-%d" ) == lastmonthday_str) else np.nan )
_ds1_grp1 = _ds1.groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-应还笔数'})
_ds1_grp2 = _ds1.loc[_ds1['finish_pay_time'].isnull() == False,].groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-已还笔数'})
_ds1_tmp = pd.DataFrame({'还款时间':[nowtime_str],
'复贷-本月累计应还':[_ds1.loc[(_ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (_ds1['repay_time'] <= nowtime),'user_id'].shape[0]],
'复贷-本月累计流入':[_ds1.loc[(_ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (_ds1['repay_time'] <= nowtime) & (_ds1['流入'] == 1),'user_id'].shape[0]],
'复贷-历史到期应还':[_ds1.loc[_ds1['repay_time'] <= nowtime,'user_id'].shape[0]],
'复贷-历史到期已还':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['finish_pay_time'].isnull() == False),'user_id'].shape[0]]
})
ds1_tmp = self.merge_ds([ds1_grp1,ds1_grp2,ds1_tmp,_ds1_grp1,_ds1_grp2,_ds1_tmp],on_var = '还款时间',how = 'outer')
#为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
format_month=pd.DataFrame({'还款时间': [lastmonthday_str]})
ds1_tmp = pd.merge(ds1_tmp,format_month,on='还款时间',how='outer')
ds1_tmp = ds1_tmp.fillna(0).sort_values(['还款时间'])
ds1_tmp = ds1_tmp[['还款时间','应还笔数','已还笔数','本月累计应还','本月累计流入','历史到期应还','历史到期已还','复贷-应还笔数','复贷-已还笔数','复贷-本月累计应还','复贷-本月累计流入','复贷-历史到期应还','复贷-历史到期已还']]
#生成逾期数据,打印到excel,用以制作excel报告
self.get_excel(indata = ds1_tmp,sheetname = 'jietiao_borrow_day',row_add = 3,col_add = 1)
#逾期分布
ds2 = indata.copy()
ds2['还款时间'] = ds2['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if x.strftime("%Y-%m-%d" ) == nowtime_str else np.nan )
ds2_tmp = pd.DataFrame({'还款时间':[nowtime_str],
'历史到期应还':[ds2.loc[ds2['repay_time'] <= nowtime,'user_id'].shape[0]],
'历史到期已还':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
'C':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] == 0),'user_id'].shape[0]],
'D1~D3#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] > 0) & (ds2['当前逾期天数'] < 4),'user_id'].shape[0]],
'D4~D10#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] >= 4) & (ds2['当前逾期天数'] < 11),'user_id'].shape[0]],
'D11~D30#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] >= 11) & (ds2['当前逾期天数'] < 30),'user_id'].shape[0]],
'D30+#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] > 30),'user_id'].shape[0]],
'复贷-到期应还':[_ds1.loc[_ds1['repay_time'] <= nowtime,'user_id'].shape[0]],
'复贷-到期已还':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
'流入数':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['流入'] == 1),'user_id'].shape[0]],
'催回数':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['流入催回'] == 1),'user_id'].shape[0]]
})
#生成逾期数据,打印到excel,用以制作excel报告
self.get_excel(indata = ds2_tmp,sheetname = 'jietiao_borrow_day',row_add = 9,col_add = 1)
#近30天账户还款
ds3 = indata.copy()
ds3['还款时间'] = ds3['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) <= nowtime_str) & (x.strftime("%Y-%m-%d" ) >= ((nowtime + datetime.timedelta(days=-30)).strftime('%Y-%m-%d'))) else np.nan )
ds3_grp1 = ds3.groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'应还笔数'})
ds3_grp2 = ds3.loc[ds3['finish_pay_time'].isnull() == False,].groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'已还笔数'})
ds3_tmp = pd.merge(ds3_grp1,ds3_grp2,on = '还款时间',how = 'outer')
ds3_tmp['未还笔数'] = ds3_tmp['应还笔数'] - ds3_tmp['已还笔数']
ds3_tmp['逾期率'] = ds3_tmp[['未还笔数','应还笔数']].apply(lambda x: x['未还笔数'] / x['应还笔数'] if x['应还笔数'] > 0 else 0,axis = 1)
ds3_tmp = ds3_tmp[['还款时间','应还笔数','未还笔数','逾期率']].sort_values(['还款时间'])
#生成逾期数据,打印到excel,用以制作excel报告
self.get_excel(indata = ds3_tmp,sheetname = 'jietiao_borrow_day',row_add = 14,col_add = 1)
#本月逾期及流入变化
ds4 = indata.copy()
this_month = self.yuqi_liuru(intime = nowtime,ds4 = ds4)
self.get_excel(indata = this_month.sort_values(['还款时间']),sheetname = 'jietiao_borrow_day',row_add = 14,col_add = 9)
last_month = self.yuqi_liuru(intime = lastmonthday,ds4 = ds4)
self.get_excel(indata = last_month.sort_values(['还款时间']),sheetname = 'jietiao_borrow_day',row_add = 47,col_add = 9)
return ds1_tmp,ds2_tmp,ds3_tmp
def yuqi_liuru(self,intime,ds4):
month_box = pd.DataFrame({'还款时间': [],'逾期数':[],'逾期率':[],'新增逾期':[],'流入率':[]})
daytime = intime
while daytime.month == intime.month:
ds4['还款时间'] = ds4['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) <= daytime.strftime('%Y-%m-%d')) & (x.strftime("%Y-%m" ) == daytime.strftime('%Y-%m')) else np.nan )
ds4['repay_time1'] = ds4['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) )
tmp_day4 = pd.DataFrame({'还款时间':[daytime.strftime("%Y-%m-%d")],
'应还笔数':[ds4.loc[ds4['repay_time1'] <= daytime.strftime("%Y-%m-%d" ),'user_id'].shape[0]],
'按期还款笔数':[ds4.loc[(ds4['repay_time1'] <= daytime.strftime("%Y-%m-%d" )) & (ds4['finish_pay_time'].isnull() == False) & (ds4['finish_pay_time'] <= daytime.strftime("%Y-%m-%d" )) ,'user_id'].shape[0]],
'当天应还':[ds4.loc[ds4['repay_time1'] == daytime.strftime("%Y-%m-%d" ),'user_id'].shape[0]],
'当天已还':[ds4.loc[(ds4['repay_time1'] == daytime.strftime("%Y-%m-%d" )) & (ds4['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
})
tmp_day4['逾期数'] = tmp_day4['应还笔数'] - tmp_day4['按期还款笔数']
tmp_day4['逾期率'] = tmp_day4[['逾期数','应还笔数']].apply(lambda x: x['逾期数'] / x['应还笔数'] if x['应还笔数'] > 0 else 0,axis = 1)
tmp_day4['新增逾期'] = tmp_day4['当天应还'] - tmp_day4['当天已还']
tmp_day4['流入率'] = tmp_day4[['新增逾期','当天应还']].apply(lambda x: x['新增逾期'] / x['当天应还'] if x['新增逾期'] > 0 else 0,axis = 1)
tmp_day4 = tmp_day4[['还款时间','逾期数','逾期率','新增逾期','流入率']]
month_box = month_box.append(tmp_day4)
daytime = daytime - datetime.timedelta(days = 1)
month_box = month_box.sort_values(['还款时间'])
return month_box
def data_yuqi_time(self,indata):
ds = indata.copy()
#放款笔数
cal1 = ds.groupby(['放款日期'])['user_id'].agg([pd.Series.count]).rename(columns = {'count':'放款笔数'}).reset_index()
#笔数D3
ds['笔数D3'] = np.where( (ds['当前逾期天数'] >= 3) & (ds['当前逾期天数'] < 10),1,0)
cal2 = ds.groupby(['放款日期'])['笔数D3'].sum().reset_index()
#笔数D10
ds['笔数D10']=np.where( (ds['当前逾期天数'] >= 10) & (ds['当前逾期天数'] < 30),1,0)
cal3 = ds.groupby(['放款日期'])['笔数D10'].sum().reset_index()
#笔数M1
ds['笔数M1'] = np.where((ds['当前逾期天数'] >= 30) & (ds['当前逾期天数'] < 60),1,0)
cal4 = ds.groupby(['放款日期'])['笔数M1'].sum().reset_index()
#笔数M2+
ds['笔数M2+'] = np.where(ds['当前逾期天数'] >= 60,1,0)
cal5 = ds.groupby(['放款日期'])['笔数M2+'].sum().reset_index()
#流入笔数
cal6 = ds.groupby(['放款日期'])['流入'].sum().reset_index()
#催回笔数
cal7 = ds.groupby(['放款日期'])['流入催回'].sum().reset_index()
tmp=self.merge_ds( ds_list = [cal1,cal2,cal3,cal4,cal5,cal6,cal7],on_var = '放款日期',how = 'outer')
tmp['笔数D3占比'] = tmp[['笔数D3','放款笔数' ]].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数D10'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
tmp['笔数D10占比'] = tmp[['笔数D10','放款笔数']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数D10'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
tmp['笔数M1占比'] = tmp[['笔数M1' ,'放款笔数' ]].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数M1' ] / x['放款笔数' ]) if x['放款笔数'] > 0 else '0' ,axis = 1)
tmp['笔数M2+占比'] = tmp[['笔数M2+','放款笔数']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数M2+'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
tmp['流入笔数占比'] = tmp[['流入','放款笔数']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['流入'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
tmp['催回笔数占比'] = tmp[['流入催回','流入']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['流入催回'] / x['流入']) if x['流入'] > 0 else '0' ,axis = 1)
tmp=tmp[['放款日期','放款笔数','笔数D3','笔数D3占比','笔数D10','笔数D10占比','笔数M1','笔数M1占比','笔数M2+','笔数M2+占比','流入','流入笔数占比','流入催回','催回笔数占比']]
tmp=self.char2num( indata=tmp, varlist= ['笔数D3占比','笔数D10占比','笔数M1占比','笔数M2+占比','流入笔数占比','催回笔数占比'])
#为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
format_month=pd.DataFrame({'放款时间': list(set([(ds['申请时间'].min()+ datetime.timedelta(days=diff)).strftime("%Y-%m-%d") for diff in range( int((nowtime-uer_data['申请时间'].min())/np.timedelta64(1,'D')) )])) })
tmp=pd.merge(tmp,format_month,on='放款时间',how='outer')
tmp=tmp.fillna(0)
return tmp
'''逾期用户进件城市统计'''
def data_huikuan_city(self):
#计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
ds1 = indata.copy()
ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
info_query = self.sql_query('select user_id,id_no from cl_user_base_info')
ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
#通过身份证前6位,得到具体地区。对应错误的进件排除
addr = self.user_addr()
ds1_tmp['city'] = ds1_tmp['id_no'].apply(lambda x: addr.get(str(x[:6]),None) )
ds1_tmp = ds1_tmp.loc[ds1_tmp['city'].notnull(),].reset_index(drop = True)
#计算申请数apply_number,通过数pass_number
tmp = ds1_tmp.groupby(['city'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
tmp['pass_rate%' ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number']) if x['apply_number' ] > 0 else '0.00' ,axis = 1 )
#统计城市前10名
tmp=tmp.sort_values(by=['apply_number'],ascending=[0]).reset_index(drop=True)
if tmp.shape[0]>10:
tmp1=tmp.loc[:9,]
tmp2=tmp.loc[10:,]
last_row=pd.DataFrame({
'city':['其他'],
'pass_number':[sum(tmp2['pass_number'])] ,
'apply_number':[sum(tmp2['apply_number'])] ,
'apply_ratio%':pd.to_numeric(['{x:.2f}'.format(x= (sum(tmp2['apply_number'])/sum(tmp['apply_number' ]))*100 if sum(tmp['apply_number' ])>0 else 0 )]) ,
'pass_ratio%' :pd.to_numeric(['{x:.2f}'.format(x= (sum(tmp2['pass_number']) /sum(tmp['pass_number' ]))*100 if sum(tmp['pass_number' ])>0 else 0)]),
'pass_rate%' :pd.to_numeric(['{x:.2f}'.format(x= (sum(tmp2['pass_number']) /sum(tmp2['apply_number']))*100 if sum(tmp2['apply_number' ])>0 else 0)]),
})
tmp1=tmp1.append(last_row,ignore_index=True)
tmp1=self.char2num( indata=tmp1, varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
tmp =self.char2num( indata=tmp , varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp1[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
else:
return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
'''逾期用户年龄统计'''
def data_huikuan_age(self,indata,cutoff):
ds1 = indata.copy()
ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
info_query = self.sql_query('select user_id,age from cl_user_base_info')
ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
#计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
ds1_tmp = ds1_tmp.loc[ds1_tmp['age'].notnull(),].reset_index(drop = True)
#计算申请数apply_number,通过数pass_number
tmp = ds1_tmp.groupby(['age'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
tmp['pass_rate%' ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number']) if x['apply_number' ] > 0 else '0.00' ,axis = 1 )
return tmp[['age','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
'''逾期用户性别统计'''
def data_huikuan_gender(self,indata,cutoff):
ds1 = indata.copy()
ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
info_query = self.sql_query('select user_id,sex from cl_user_base_info')
ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
#计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
ds1_tmp = ds1_tmp.loc[ds1_tmp['sex'].notnull(),].reset_index(drop = True)
#计算申请数apply_number,通过数pass_number
tmp = ds1_tmp.groupby(['sex'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
tmp['pass_rate%' ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number']) if x['apply_number' ] > 0 else '0.00' ,axis = 1 )
return tmp[['sex','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
☆ 清理数据统计模块
#################################4.清理信息#################################
def qingli_dayReport(self,indata):
'''清理日报'''
#近一周内清理情况统计
ds1 = indata.copy()
ds1 = ds1.loc[(ds1['finish_pay_time'].isnull() == False) & (ds1['流入'] == 1),]
ds1['还款时间'] = ds1['finish_pay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) <= nowtime_str) & (x.strftime("%Y-%m-%d" ) >= (nowtime- datetime.timedelta(days=6)).strftime('%Y-%m-%d')) else np.nan )
ds1_tmp = ds1.loc[ds1['还款时间'].isnull() == False,].groupby(['还款时间'])['流入'].sum().reset_index().rename(columns = {'流入':'清理个数'})
self.get_excel(indata = ds1_tmp,sheetname = 'jietiao_repay_day',row_add = 18,col_add = 1)
#清理时逾期天数
ds2 = indata.copy()
ds2 = ds2.loc[(ds2['finish_pay_time'].isnull() == False) & (ds2['流入'] == 1),]
ds2['amount_day'] = (ds2['finish_pay_time'] - ds2['repay_time'])/np.timedelta64(1,'D')
ds2['amount_day'] = np.where(ds2['amount_day'] <= 3,'D01~D03',
np.where(ds2['amount_day'] <= 7,'D04~D07',
np.where(ds2['amount_day'] <= 11,'D08~D11',
np.where(ds2['amount_day'] <= 15,'D12~D15',
np.where(ds2['amount_day'] <= 30,'D16~D30','>D30')))))
ds2_tmp = ds2.groupby(['amount_day'])['流入'].sum().reset_index().rename(columns = {'流入':'清理个数'})
self.get_excel(indata = ds2_tmp,sheetname = 'jietiao_repay_day',row_add = 18,col_add = 4)
#本月清理情况
ds3 = indata.copy()
ds3['还款时间'] = ds3['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ))
ds3['还款月'] = ds3['repay_time'].apply(lambda x: x.strftime("%Y-%m" ))
ds3 = ds3.loc[(ds3['流入'] == 1) & (ds3['还款时间'] <= nowtime_str),]
ds3_tmp = pd.DataFrame({'还款时间':[nowtime_str],
'昨日流入':[ds3.loc[ ds3['还款时间'] == yestime ,'user_id'].shape[0]],
'昨日清理':[ds3.loc[(ds3['还款时间'] == yestime) & (ds3['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
'D3内流入':[ds3.loc[ ds3['还款时间'] >= (nowtime - datetime.timedelta(days=-2)).strftime("%Y-%m-%d" ) ,'user_id'].shape[0]],
'D3内清理':[ds3.loc[(ds3['还款时间'] >= (nowtime - datetime.timedelta(days=-2)).strftime("%Y-%m-%d" )) & (ds3['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
'本月流入':[ds3.loc[ ds3['还款月'] == thismonth_str ,'user_id'].shape[0]],
'本月清理':[ds3.loc[(ds3['还款月'] == thismonth_str) & (ds3['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
'累计流入':[ds3.shape[0]],
'累计清理':[ds3.loc[ds3['finish_pay_time'].isnull() == False,'user_id'].shape[0]]
})
#本月复贷清理情况
ds4 = indata.copy()
ds4 = ds4.loc[ds4['repay_time'] < firstday,]
ds4['amount_day'] = np.where((ds4['finish_pay_time'].isnull() == False) & (ds4['finish_pay_time'] < firstday ) ,0, (firstday - ds4['repay_time'])/np.timedelta64(1,'D'))
ds4['amount_day'] = np.where(ds4['amount_day'] == 0,'已回款',
np.where(ds4['amount_day'] <= 7,'D01~D07',
np.where(ds4['amount_day'] <= 15,'D08~D15','>D15')))
ds4_tmp = pd.DataFrame({'还款时间':[nowtime_str],
'D01~D07':[ds4.loc[ ds4['amount_day'] == 'D01~D07' ,'user_id'].shape[0]],
'D01~D07清理':[ds4.loc[ (ds4['amount_day'] == 'D01~D07') & (ds4['finish_pay_time'].isnull() == False) ,'user_id'].shape[0]],
'D08~D15':[ds4.loc[ ds4['amount_day'] == 'D08~D15' ,'user_id'].shape[0]],
'D08~D15清理':[ds4.loc[ (ds4['amount_day'] == 'D08~D15') & (ds4['finish_pay_time'].isnull() == False) ,'user_id'].shape[0]],
'>D15':[ds4.loc[ ds4['amount_day'] == '>D15' ,'user_id'].shape[0]],
'>D15清理':[ds4.loc[ (ds4['amount_day'] == '>D15') & (ds4['finish_pay_time'].isnull() == False) ,'user_id'].shape[0]]
})
tmp = pd.merge(ds3_tmp,ds4_tmp,on = '还款时间',how = 'outer')
self.get_excel(indata = tmp,sheetname = 'jietiao_repay_day',row_add = 3,col_add = 1)
五 可视化展示
- 因为考虑到python制图和excel有差异,所以最终的一页报告设计由excel完成,数据部分由python实现,图表使用的数据用公式调用。
六 报告发送
- python 模块:matplotlib/echarts,画图
- python 模块:xlwt,操作excel文件,制作excel报告。
- 定时发邮件:send_email
- 定时群发微信:wechat_get_news
☆ 邮件发送模块
'''发送邮件模块'''
def create_email(self,email_from, email_to, email_Subject, email_text, annex_path, annex_name):
# 输入发件人昵称、收件人昵称、主题,正文,附件地址,附件名称生成一封邮件
#生成一个空的带附件的邮件实例
message = MIMEMultipart()
#将正文以text的形式插入邮件中
message.attach(MIMEText(email_text, 'plain', 'utf-8'))
#生成发件人名称(这个跟发送的邮件没有关系)
message['From'] = Header(email_from, 'utf-8')
#生成收件人名称(这个跟接收的邮件也没有关系)
message['To'] = Header(email_to, 'utf-8')
#生成邮件主题
message['Subject'] = Header(email_Subject, 'utf-8')
#读取附件的内容
att1 = MIMEText(open(annex_path, 'rb').read(), 'base64', 'utf-8')
att1["Content-Type"] = 'application/octet-stream'
#生成附件的名称
att1["Content-Disposition"] = 'attachment; filename=' + annex_name
#将附件内容插入邮件中
message.attach(att1)
#返回邮件
return message
'''发送邮件模块'''
def send_email(self,sender, password, receiver, msg):
# 一个输入邮箱、密码、收件人、邮件内容发送邮件的函数
try:
#找到你的发送邮箱的服务器地址,已加密的形式发送
server = smtplib.SMTP_SSL("smtp.mxhichina.com", 465) # 发件人邮箱中的SMTP服务器
server.ehlo()
#登录你的账号
server.login(sender, password) # 括号中对应的是发件人邮箱账号、邮箱密码
#发送邮件
server.sendmail(sender, receiver, msg.as_string()) # 括号中对应的是发件人邮箱账号、收件人邮箱账号(是一个列表)、邮件内容
print("邮件发送成功")
server.quit() # 关闭连接
except Exception:
print(traceback.print_exc())
print("邮件发送失败")
☆ 微信发送模块
'''发送微信模块'''
def wechat_get_news(self):
bot = Bot(console_qr=True, cache_path=True)
try:
my_friend = bot.friends().search('keep a clear mind耀武')[0]
my_friend.send_image(r'C:\Users\A3\Desktop\skr.png')
print('seccess')
except:
bot.friends().search('keep a clear mind耀武')[0].send(u"消息发送失败了")
七 执行函数
☆ 主函数
#################################5.主函数#################################
if __name__ == "__main__":
ALD = AfterLoanData()
''' 全局变量'''
_begin = time.time()
workbook = load_workbook(_path + '\日报数据.xlsx')
nowtime,nowtime_str,yestime,yestime_str,firstday,firstday_str,thismonth_str,lastmonthday,lastmonthday_str,lastmonth_str = ALD.get_time()
''' 1、进件信息'''
# data_apply = ALD.data_jinjian()
# time_jinjain_report = ALD.jinjian_dayReport(indata = data_apply)
# time_jinjian_day,time_jinjian_week,time_jinjian_month = ALD.data_jinjian_time(indata = data_apply)
# data_apply.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\apply.xlsx')
# data_apply = pd.read_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\apply.xlsx')
# time_jinjian_week.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\time_jinjian_week.xlsx')
# time_jinjian_month.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\time_jinjian_month.xlsx')
# city_apply_week,city_apply_week1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 7)
# city_apply_month,city_apply_month1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 30)
# city_apply_quarter,city_apply_quarter1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 90)
# city_apply_halfyear,city_apply_halfyear1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 180)
# age_apply_week = ALD.data_jinjian_age(indata = data_apply,cutoff = 7)
# age_apply_month = ALD.data_jinjian_age(indata = data_apply,cutoff = 30)
# age_apply_quarter = ALD.data_jinjian_age(indata = data_apply,cutoff = 90)
# age_apply_halfyear = ALD.data_jinjian_age(indata = data_apply,cutoff = 180)
# gender_apply_week = ALD.data_jinjian_gender(indata = data_apply,cutoff = 7)
# gender_apply_month = ALD.data_jinjian_gender(indata = data_apply,cutoff = 30)
# gender_apply_quarter = ALD.data_jinjian_gender(indata = data_apply,cutoff = 90)
# gender_apply_halfyear = ALD.data_jinjian_gender(indata = data_apply,cutoff = 180)
''' 2、放款信息'''
# data_borrow = ALD.data_fangkuan()
# data_borrow.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\data_borrow.xlsx')
# data_borrow = pd.read_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\data_borrow.xlsx')
''' 3、回款信息'''
# data_huikuan = ALD.data_huikuan_time(data_borrow)
# _path = r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报'
# time_huikuan_today,time_huikuan_fenbu,day_30 = ALD.huikuan_dayReport(indata = data_huikuan)
''' 4、清理信息'''
data_qingli = ALD.qingli_dayReport(indata = data_huikuan)
''' 5、邮件、微信发送'''
# email_send = ALD.send_email()
# wechat_send = ALD.wechat_get_news()
workbook.save(_path + '\日报数据.xlsx')
_end = time.time()
print('You have finished!\nfanilly use time: {x:.2f}s'.format(x = _end - _begin))
八 项目总结
- 初衷:此项目用于python‘萌宠’项目练手,重在完整的展现一个报告项目的数据整理流程。包括数据获取、处理、挖掘、可视化等模块。实际用时8个工作日,‘萌宠’期可以接受。
- 反思:代码只是执行思维的工具,有一个接触、理解、运用、熟练的过程。重点是coder思维逻辑是否清晰,能否按照工程流workflow、项目流、数据流的方式层次、结构化、逻辑化的去执行工作。
- Finally!
- 高效学习有两个很重要的习惯:
①快速进入专注的状态。
②长期保持专注的状态。
- 高效学习有两个很重要的习惯: