【项目实战】:基于python的 p2p 贷后指标全自动日报制作

版权声明:转载请联系博主。 https://blog.csdn.net/sunyaowu315/article/details/82462364

基于python的p2p 贷后指标全自动日报制作

  • author : sunyaowu
  • begin_time : 2018年8月27日
  • end_time : 2018年9月7日
  • 说明 :贷后指标,包括贷放、逾期、清理等内容。

一 项目规划

  • 项目整体框架:
    • ①数据获取
    • ②数据处理及分析
    • ③生成结构化汇总数据,并制作excel单页可视化报告
    • ④定时发送邮件,并通过微信搜索向定向群体发送报告
  • 周期规划:一周左右
  • 预期结果:日报可以上线工作

二 项目代码

  • 代码模块化
  • 函数式编程思想

☆ 导入python包

Created on Mon Aug 27 11:44:35 2018
@贷后全自动日报设计
@bigin_time:2018-08-27
@  end_time:2018-09-07
@author: SunYaowu
"""
import pandas as pd   #结构化数据处理
import numpy as np   #矩阵数据处理
import pymysql as pms   #sql模块
import time  
import datetime
from openpyxl import load_workbook   #excel文件读写模块
from email.mime.text import MIMEText   #邮件模块
from email.mime.multipart import MIMEMultipart
from email.header import Header
import smtplib   #传输协议模块
from wxpy import *   #微信
import itchat   #微信

☆ 定义类

#################################☆.主类#################################  
class AfterLoanData():  #贷后日报class
    def __init__(self):
        pass      

二 数据获取

  • python 模块:pyecharts ,mysql数据库提取数据

☆ 数据库query数据模块

    #################################☆.功能模块#################################    
    '''数据库取数模块'''
    # 一个传入sql导出数据的函数
    def sql_query(self,sql):
        config = {
          'host':'xxxxxxxx',
          'port':xxxx,
          'db':'pxxxxxn',
          'user':'cxxxxx_xxxxxxx_r',
          'password':'Ixxxxxxxexxxxx',  
          'charset':'utxxxxx4',
          'cursorclass':pms.cursors.DictCursor,
          }  
        try:
            conn = pms.connect(**config)
            # 使用 cursor() 方法创建一个游标对象 cursor
            with conn.cursor() as cur:
                # 使用 execute() 方法执行 SQL
                cur.execute(sql)
                conn.commit()
                # 获取所需要的数据
                data = pd.DataFrame(cur.fetchall())
                #关闭连接
                cur.colse()
        except:
            conn.rollback()
        finally:
            conn.close()
        return data    

三 数据处理

  • python 模块:numpy,pandas,数据规整:筛选、填充、合并
  • 进件情况:data_jinjian,data_jinjian_time,data_jinjian_city,data_jinjian_age,data_jinjian_gender
  • 回款情况:data_huikuan_time,data_huikuan_city,data_huikuan_age,data_huikuan_gender
  • 逾期情况:yuqi_liuru,data_yuqi_time

☆ 其他一些需要多次调用的功能模块

    '''获取用户地址模块'''
    def user_addr(self):
        address_ds=pd.read_pickle(_path + '\code.pkl')   
        address_ds=address_ds.loc[address_ds['coordinate']=='success',]   
        address_ds['num']=address_ds['num'].astype(str)
        address_dict=dict(zip(address_ds['num'],address_ds['city_final']))#zip的作用是压缩元素级对象,生成新的列表
        return address_dict
    
    '''合并字段模块'''
    def merge_ds(self,ds_list,on_var,how):
        for i in range(len(ds_list)):
            if i==1:
                tmp =pd.merge(ds_list[0],ds_list[1],on=on_var,how=how)
            elif i>1:
                tmp =pd.merge(tmp,ds_list[i],on=on_var,how=how)
        return tmp 
        
    '''excel文件读写模块'''
    def get_excel(self,indata,sheetname,row_add,col_add):
        # 将数据和字段名写入excel的函数
        ds1 = indata.copy()
        #加载文件
        #激活sheet页
        #workbook = load_workbook(_path + '\日报数据.xlsx')
        worksheet = workbook.get_sheet_by_name(sheetname)
        #将数据循环写入excel的每个单元格中   
        for row in range(ds1.shape[0]):
            for col in range(ds1.shape[1]):
                worksheet.cell(row = row + row_add, column = col + col_add).value = ds1.values[row][col]
        #返回生成的excel
        #workbook.save(_path + '\日报数据.xlsx')      

    '''字符类型转换模块'''
    def char2num(self,indata,varlist):
        tmp=indata.copy()
        for var in varlist:
            tmp[var]=pd.to_numeric(tmp[var],  errors='coerce')
        return tmp
        
    '''获取时间模块'''
    def get_time(self):
        #今日
        nowtime = datetime.datetime.now()
        nowtime_str = nowtime.strftime("%Y-%m-%d")
        #昨日
        yestime = nowtime + datetime.timedelta(days=-1)
        yestime_str = yestime.strftime("%Y-%m-%d")
        #月初
        firstday = datetime.datetime(nowtime.year,nowtime.month,1,0,0,0)        
        firstday_str = firstday.strftime("%Y-%m-%d")
        #本月
        thismonth_str = nowtime.strftime("%Y-%m")
        #上月今日       
        last_month = nowtime.month - 1
        last_year = nowtime.year
        if last_month == 0:
            last_month = 12
            last_year -= 1
        lastmonthday = datetime.datetime(month=last_month, year=last_year, day=nowtime.day)
        lastmonthday_str = lastmonthday.strftime("%Y-%m-%d")
        #上月        
        lastmonth_str = lastmonthday.strftime("%Y-%m")                
        return nowtime,nowtime_str,yestime,yestime_str,firstday,firstday_str,thismonth_str,lastmonthday,lastmonthday_str,lastmonth_str

四 数据分析

  • python 模块:pandas,scipy,分组聚合运算,描述性统计分析。

☆ 进件情况统计模块

    #################################1.进件信息#################################

    '''获取进件数据'''
    def data_jinjian(self):
        sql = 'select user_id,amount,real_amount,service_fee,interest,time_limit,state,address,coordinate,again,create_time,loan_time from cl_borrow'  #借款信息
        try:
            data_apply = self.sql_query(sql = sql)
            data_apply = data_apply[['user_id','amount','real_amount','service_fee','interest','time_limit','again','state','address','coordinate','create_time','loan_time']]
            data_apply['pass'] = np.where(data_apply['loan_time'].notnull() == True,1,0)
        except: 
            print('Oh No,something error!')
        return data_apply   

    '''按进件时间统计1''' 
    def _data_jinjian_time(self,indata):
        _tmp = indata.copy()
        tmp_grp1 = _tmp.groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'申请金额'})
        tmp_grp2 = _tmp.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'申请笔数'})
        tmp_grp3 = _tmp.loc[_tmp['pass'] == 1,].groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'放款金额'})
        tmp_grp4 = _tmp.loc[_tmp['pass'] == 1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'放款笔数'})
        tmp = self.merge_ds([tmp_grp1,tmp_grp2,tmp_grp3,tmp_grp4],on_var = 'apply_time',how = 'outer')                
        tmp['通过率%'] = tmp[['放款笔数','申请笔数']].apply(lambda x: '{x:.2f}'.format(x = x['放款笔数']/x['申请笔数'])  if x['申请笔数'] >0 else '0.00',axis=1 )
        tmp =self.char2num( indata = tmp , varlist = ['通过率%'] )
             
        _tmp2 = indata.sort_values(by = ['user_id','create_time'] )
        _tmp2['贷款次数'] = _tmp2.groupby(['user_id']).cumcount()+1
        _tmp2 = _tmp2.loc[_tmp2['贷款次数']>1,]        
        tmp2_grp1 = _tmp2.groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'复贷-申请金额'})
        tmp2_grp2 = _tmp2.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-申请笔数'})
        tmp2_grp3 = _tmp2.loc[_tmp2['pass']==1,].groupby(['apply_time'])['amount'].sum().reset_index().rename(columns = {'amount':'复贷-放款金额'})
        tmp2_grp4 = _tmp2.loc[_tmp2['pass']==1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-放款笔数'})
        tmp2 = self.merge_ds([tmp2_grp1,tmp2_grp2,tmp2_grp3,tmp2_grp4 ],on_var = 'apply_time',how = 'outer')

        tmp=pd.merge(tmp,tmp2,on = 'apply_time',how = 'outer')       
        tmp['复贷-通过率%'] = tmp[['复贷-放款笔数','复贷-申请笔数']].apply(lambda x: x['复贷-放款笔数'] / x['复贷-申请笔数']  if x['复贷-申请笔数'] > 0 else 0,axis = 1 )
        tmp['复贷通过占比%'] = tmp[['复贷-放款笔数','放款笔数']].apply(lambda x: x['复贷-放款笔数'] / x['放款笔数'] if x['放款笔数'] > 0 else 0,axis = 1 )
        tmp = tmp[['apply_time','申请金额','申请笔数','放款金额','放款笔数','通过率%','复贷-申请金额','复贷-申请笔数','复贷-放款金额','复贷-放款笔数','复贷通过占比%','复贷-通过率%']]
        return tmp 

    '''按进件时间统计2'''    
    def data_jinjian_time(self,indata):
        
        #日
        ds1 = indata.copy()
        ds1['apply_time'] = ds1['create_time'].apply(lambda x: x.strftime("%Y-%m-%d") )
        tmp_1 = self._data_jinjian_time( indata = ds1 )
        #为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
        format_day = pd.DataFrame({'apply_time': list(set([(ds1['create_time'].min() + datetime.timedelta(days = diff)).strftime("%Y-%m-%d") for diff in range( int((nowtime - data_apply['create_time'].min()) / np.timedelta64(1,'D')) )]))  })
        tmp_1 = pd.merge(tmp_1,format_day,on = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
        tmp_1 = tmp_1.fillna(0).sort_values(['申请时间'])
        
        #周
        ds2 = indata.copy()
        week_cutday=[((nowtime-datetime.timedelta(days=diff ))).strftime("%Y-%m-%d" ) for diff in range(1,8) if  (nowtime-datetime.timedelta(days=diff )).weekday() ==0]
        ds2['apply_time'] = ds2['create_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if x >= pd.to_datetime(week_cutday[0]) else np.nan )
        tmp_2 = self._data_jinjian_time( indata = ds2 )
        #为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
        format_week = pd.DataFrame({'apply_time': [(pd.to_datetime(week_cutday[0]) + datetime.timedelta(days = diff)).strftime("%Y-%m-%d" ) for diff in range(0,7) if pd.to_datetime(week_cutday[0]) + datetime.timedelta(days=diff) <= nowtime  ]  })
        tmp_2 = pd.merge(tmp_2,format_week,on = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
        tmp_2 = tmp_2.fillna(0).sort_values(['申请时间'])     
        #生成进件数据,打印到excel,用以制作excel报告
        self.get_excel(indata = tmp_2,sheetname = 'jietiao_apply_day',row_add = 9,col_add = 1)
        
        #月
        ds3 = indata.copy()
        ds3['apply_time'] = ds3['create_time'].apply(lambda x: x.strftime("%Y-%m") )
        tmp_3 = self._data_jinjian_time( indata=ds3 )
        #为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
        format_month = pd.DataFrame({'apply_time': list(set([(ds2['create_time'].min() + datetime.timedelta(days = diff)).strftime("%Y-%m") for diff in range( int((nowtime - data_apply['create_time'].min()) / np.timedelta64(1,'D')) )]))  })
        tmp_3 = pd.merge(tmp_3,format_month,on = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})
        tmp_3 = tmp_3.fillna(0).sort_values(['申请时间'])
        #生成进件数据,打印到excel,用以制作excel报告
        self.get_excel(indata = tmp_3,sheetname = 'jietiao_apply_day',row_add = 24,col_add = 2)
        return tmp_1,tmp_2,tmp_3

    def jinjian_dayReport(self,indata):
        
        #日数据
        ds1 = indata.copy()       
        ds1['apply_time'] = ds1['create_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) == nowtime_str) or (x.strftime("%Y-%m-%d" ) == yestime_str) or (x.strftime("%Y-%m-%d" ) == lastmonthday_str) else np.nan )        
        ds1_grp1 = ds1.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'申请笔数'})
        ds1_grp2 = ds1.loc[ds1['pass'] == 1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'通过笔数'})
        ds1_tmp = pd.DataFrame({'apply_time':[nowtime_str],
                                '本月申请笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['create_time'] <= nowtime),'user_id'].shape[0]],
                                '本月通过笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['create_time'] <= nowtime) & (ds1['pass'] == 1),'user_id'].shape[0]],
                                '历史申请笔数':[ds1.loc[ds1['create_time'] <= nowtime,'user_id'].shape[0]],
                                '历史通过笔数':[ds1.loc[(ds1['create_time'] <= nowtime) & (ds1['pass'] == 1),'user_id'].shape[0]],
                                })
        #ds1_tmp = self.merge_ds([ds1_tmp,ds1_grp1,ds1_grp2],on_var = 'apply_time',how = 'outer')        
        _ds1_tmp = pd.DataFrame({'apply_time':[lastmonthday_str],
                                '本月申请笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == lastmonth_str) & (ds1['create_time'] <= lastmonthday),'user_id'].shape[0]],
                                '本月通过笔数':[ds1.loc[(ds1['create_time'].apply(lambda x: x.strftime("%Y-%m")) == lastmonth_str) & (ds1['create_time'] <= lastmonthday) & (ds1['pass'] == 1),'user_id'].shape[0]],
                                '历史申请笔数':[ds1.loc[ds1['create_time'] <= lastmonthday,'user_id'].shape[0]],
                                '历史通过笔数':[ds1.loc[(ds1['create_time'] <= lastmonthday) & (ds1['pass'] == 1),'user_id'].shape[0]],
                                })            
        ds1_tmp = pd.concat([ds1_tmp,_ds1_tmp])
                        
        _ds1 = ds1.copy().sort_values(by = ['user_id','create_time'] )
        _ds1['贷款次数'] = _ds1.groupby(['user_id']).cumcount()+1
        _ds1 = _ds1.loc[_ds1['贷款次数']>1,]       
        _ds1_grp1 = _ds1.groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-申请笔数'})
        _ds1_grp2 = _ds1.loc[_ds1['pass']==1,].groupby(['apply_time'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-通过笔数'})        
        ds2_tmp = pd.DataFrame({'apply_time':[nowtime_str],
                                '复贷-历史申请笔数':[_ds1.loc[_ds1['create_time'] <= nowtime,'user_id'].shape[0]],
                                '复贷-历史通过笔数':[_ds1.loc[(_ds1['create_time'] <= nowtime) & (_ds1['pass'] == 1),'user_id'].shape[0]]
                                })
        
        tmp_day = self.merge_ds([ds1_grp1,ds1_grp2,ds1_tmp,_ds1_grp1,_ds1_grp2,ds2_tmp],on_var = 'apply_time',how = 'outer').rename(columns = {'apply_time':'申请时间'})        

        tmp_day['通过率'] = tmp_day[['申请笔数','通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['通过笔数'] / x['申请笔数'])  if x['申请笔数'] > 0 else '0.00',axis = 1 )
        tmp_day['本月通过率'] = tmp_day[['本月申请笔数','本月通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['本月通过笔数'] / x['本月申请笔数'])  if x['本月申请笔数'] > 0 else '0.00',axis = 1 )
        tmp_day['历史通过率'] = tmp_day[['历史申请笔数','历史通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['历史通过笔数'] / x['历史申请笔数'])  if x['历史申请笔数'] > 0 else '0.00',axis = 1 )
        tmp_day['复贷-通过率'] = tmp_day[['复贷-申请笔数','复贷-通过笔数']].apply(lambda x: '{x:.4f}'.format(x = x['复贷-通过笔数'] / x['复贷-申请笔数'])  if x['复贷-申请笔数'] > 0 else '0.00',axis = 1 )
        tmp_day['复贷-历史通过率'] = tmp_day[['复贷-历史申请笔数','复贷-历史通过笔数']].apply(lambda x: '{x:.4f}'.format(x =  x['复贷-历史通过笔数'] / x['复贷-历史申请笔数'])  if x['复贷-历史申请笔数'] > 0 else '0.00',axis = 1 )
        tmp_day = tmp_day[['申请时间','申请笔数','通过笔数','通过率','本月申请笔数','本月通过笔数','本月通过率','历史申请笔数','历史通过笔数','历史通过率','复贷-申请笔数','复贷-通过笔数','复贷-通过率','复贷-历史申请笔数','复贷-历史通过笔数','复贷-历史通过率']]
        #生成进件数据,打印到excel,用以制作excel报告
        self.get_excel(indata = tmp_day,sheetname = 'jietiao_apply_day',row_add = 3,col_add = 1)
        return tmp_day           
        
    '''按进件城市统计'''
    def data_jinjian_city(self,indata,cutoff):
        #计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
        ds1 = indata.copy()
        ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
        info_query = self.sql_query('select user_id,id_no from cl_user_base_info')
        ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
        
        ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str ) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
        #通过身份证前6位,得到具体地区。对应错误的进件排除
        addr = self.user_addr()
        ds1_tmp['city'] = ds1_tmp['id_no'].apply(lambda x: addr.get(str(x[:6]),None) )
        ds1_tmp = ds1_tmp.loc[ds1_tmp['city'].notnull(),].reset_index(drop = True)
        #计算申请数apply_number,通过数pass_number
        tmp = ds1_tmp.groupby(['city'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
        tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
        
        tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
        tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
        tmp['pass_rate%'  ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number'])  if  x['apply_number' ] > 0 else '0.00' ,axis = 1 )
        
        #统计城市前10名
        tmp=tmp.sort_values(by=['apply_number'],ascending=[0]).reset_index(drop=True)
        if tmp.shape[0]>10:
            tmp1=tmp.loc[:9,]
            tmp2=tmp.loc[10:,]
            last_row=pd.DataFrame({
                    'city':['其他'],
                    'pass_number':[sum(tmp2['pass_number'])] ,
                    'apply_number':[sum(tmp2['apply_number'])] ,
                    'apply_ratio%':pd.to_numeric(['{x:.2f}'.format(x=  (sum(tmp2['apply_number'])/sum(tmp['apply_number' ]))*100 if sum(tmp['apply_number' ])>0 else 0 )]) ,
                    'pass_ratio%' :pd.to_numeric(['{x:.2f}'.format(x=  (sum(tmp2['pass_number']) /sum(tmp['pass_number'  ]))*100 if sum(tmp['pass_number' ])>0 else 0)]),
                    'pass_rate%'  :pd.to_numeric(['{x:.2f}'.format(x=  (sum(tmp2['pass_number']) /sum(tmp2['apply_number']))*100 if sum(tmp2['apply_number' ])>0 else 0)]),
                    })
            tmp1=tmp1.append(last_row,ignore_index=True)  
            tmp1=self.char2num( indata=tmp1, varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
            tmp =self.char2num( indata=tmp , varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
            return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp1[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
        else:
            return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]

    '''用户年龄统计'''
    def data_jinjian_age(self,indata,cutoff):
        ds1 = indata.copy()
        ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
        info_query = self.sql_query('select user_id,age from cl_user_base_info')
        ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
        #计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
        ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str ) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
        ds1_tmp = ds1_tmp.loc[ds1_tmp['age'].notnull(),].reset_index(drop = True)
        #计算申请数apply_number,通过数pass_number
        tmp = ds1_tmp.groupby(['age'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
        tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
        
        tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
        tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
        tmp['pass_rate%'  ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number'])  if  x['apply_number' ] > 0 else '0.00' ,axis = 1 )
        return tmp[['age','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]

    '''用户性别统计'''
    def data_jinjian_gender(self,indata,cutoff):
        ds1 = indata.copy()
        ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
        info_query = self.sql_query('select user_id,sex from cl_user_base_info')
        ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
        #计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况        
        ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str ) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
        ds1_tmp = ds1_tmp.loc[ds1_tmp['sex'].notnull(),].reset_index(drop = True)
        #计算申请数apply_number,通过数pass_number
        tmp = ds1_tmp.groupby(['sex'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
        tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
        
        tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
        tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
        tmp['pass_rate%'  ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number'])  if  x['apply_number' ] > 0 else '0.00' ,axis = 1 )
        return tmp[['sex','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
    
    #################################2.放款信息#################################
    def data_fangkuan(self):
        sql = 'select user_id,amount,repay_time,penalty_amout,penalty_day,create_time,principle,interest,already_paid,finish_pay_time from cl_borrow_repay'  #借款信息
        try:
            data_borrow = self.sql_query(sql)
            data_borrow = data_borrow[['user_id','principle','amount','interest','already_paid','penalty_amout',
                                       'create_time','repay_time','finish_pay_time','penalty_day']]
        except: 
            print('Oh No,something error!')
        return data_borrow   

☆ 回款数据统计模块

    #################################3.回款信息#################################   
    '''逾期用户进件时间统计'''
    
    def data_huikuan_time(self,indata):
        ds = indata.copy()
        ds['放款日期'] = ds['create_time'].apply(lambda x: x.strftime("%Y-%m-%d") )
        ds['应还款日期'] = ds['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d") )       
        ds['当前是否还款'] = np.where(ds['finish_pay_time'].isnull() == False,1,0)        
        ds['当前是否逾期'] = np.where((ds['finish_pay_time'].isnull() == True) & (ds['penalty_day'] > 0),1,0)
        ds['当前逾期天数'] = np.where(ds['finish_pay_time'].isnull() == False,0,ds['penalty_day'])
        ds['流入'] = np.where(ds['penalty_day'] > 0,1,0)
        ds['流入催回'] = np.where((ds['流入'] == 1) & (ds['当前逾期天数'] == 0),1,0)
        ds['催回用时'] = np.where(ds['流入催回'] == 1,(ds['finish_pay_time'] - ds['repay_time'])/np.timedelta64(1,'D'),0)
        ds.loc[(ds['流入'] == 1) & (ds['流入催回'] == 0) ,'催回用时'] = '未催回'
        return ds[['user_id','放款日期','repay_time','finish_pay_time','当前是否还款','当前是否逾期','当前逾期天数','流入','流入催回','催回用时']]

    def huikuan_dayReport(self,indata):
        
        #当日回款
        ds1 = indata.copy()

        ds1['还款时间'] = ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) == nowtime_str) or (x.strftime("%Y-%m-%d" ) == yestime_str) or (x.strftime("%Y-%m-%d" ) == lastmonthday_str) else np.nan )               
        ds1_grp1 = ds1.groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'应还笔数'})
        ds1_grp2 = ds1.loc[ds1['finish_pay_time'].isnull() == False,].groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'已还笔数'})
        ds1_tmp = pd.DataFrame({'还款时间':[nowtime_str],
                                '本月累计应还':[ds1.loc[(ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['repay_time'] <= nowtime),'user_id'].shape[0]],
                                '本月累计流入':[ds1.loc[(ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (ds1['repay_time'] <= nowtime) & (ds1['流入'] == 1),'user_id'].shape[0]],
                                '历史到期应还':[ds1.loc[ds1['repay_time'] <= nowtime,'user_id'].shape[0]],
                                '历史到期已还':[ds1.loc[(ds1['repay_time'] <= nowtime) & (ds1['finish_pay_time'].isnull() == False),'user_id'].shape[0]]
                                })
               
        _ds1 = indata.copy().sort_values(by = ['user_id','repay_time'] )
        _ds1['贷款次数'] = _ds1.groupby(['user_id']).cumcount()+1
        _ds1 = _ds1.loc[_ds1['贷款次数']>1,]     
        _ds1['还款时间'] = _ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) == nowtime_str) or (x.strftime("%Y-%m-%d" ) == yestime_str) or (x.strftime("%Y-%m-%d" ) == lastmonthday_str) else np.nan )               
        _ds1_grp1 = _ds1.groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-应还笔数'})
        _ds1_grp2 = _ds1.loc[_ds1['finish_pay_time'].isnull() == False,].groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'复贷-已还笔数'})
        _ds1_tmp = pd.DataFrame({'还款时间':[nowtime_str],
                                '复贷-本月累计应还':[_ds1.loc[(_ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (_ds1['repay_time'] <= nowtime),'user_id'].shape[0]],
                                '复贷-本月累计流入':[_ds1.loc[(_ds1['repay_time'].apply(lambda x: x.strftime("%Y-%m")) == thismonth_str) & (_ds1['repay_time'] <= nowtime) & (_ds1['流入'] == 1),'user_id'].shape[0]],
                                '复贷-历史到期应还':[_ds1.loc[_ds1['repay_time'] <= nowtime,'user_id'].shape[0]],
                                '复贷-历史到期已还':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['finish_pay_time'].isnull() == False),'user_id'].shape[0]]
                                })
            
        ds1_tmp = self.merge_ds([ds1_grp1,ds1_grp2,ds1_tmp,_ds1_grp1,_ds1_grp2,_ds1_tmp],on_var = '还款时间',how = 'outer')        
        #为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
        format_month=pd.DataFrame({'还款时间': [lastmonthday_str]})
        ds1_tmp = pd.merge(ds1_tmp,format_month,on='还款时间',how='outer')
        ds1_tmp = ds1_tmp.fillna(0).sort_values(['还款时间']) 
        ds1_tmp = ds1_tmp[['还款时间','应还笔数','已还笔数','本月累计应还','本月累计流入','历史到期应还','历史到期已还','复贷-应还笔数','复贷-已还笔数','复贷-本月累计应还','复贷-本月累计流入','复贷-历史到期应还','复贷-历史到期已还']]

        #生成逾期数据,打印到excel,用以制作excel报告
        self.get_excel(indata = ds1_tmp,sheetname = 'jietiao_borrow_day',row_add = 3,col_add = 1)      
        
       #逾期分布
        ds2 = indata.copy()
        ds2['还款时间'] = ds2['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if x.strftime("%Y-%m-%d" ) == nowtime_str else np.nan )   
        ds2_tmp = pd.DataFrame({'还款时间':[nowtime_str],
                                '历史到期应还':[ds2.loc[ds2['repay_time'] <= nowtime,'user_id'].shape[0]],
                                '历史到期已还':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
                                'C':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] == 0),'user_id'].shape[0]],
                                'D1~D3#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] > 0) & (ds2['当前逾期天数'] < 4),'user_id'].shape[0]],
                                'D4~D10#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] >= 4) & (ds2['当前逾期天数'] < 11),'user_id'].shape[0]],
                                'D11~D30#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] >= 11) & (ds2['当前逾期天数'] < 30),'user_id'].shape[0]],
                                'D30+#':[ds2.loc[(ds2['repay_time'] <= nowtime) & (ds2['当前逾期天数'] > 30),'user_id'].shape[0]],
                                '复贷-到期应还':[_ds1.loc[_ds1['repay_time'] <= nowtime,'user_id'].shape[0]],
                                '复贷-到期已还':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
                                '流入数':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['流入'] == 1),'user_id'].shape[0]],
                                '催回数':[_ds1.loc[(_ds1['repay_time'] <= nowtime) & (_ds1['流入催回'] == 1),'user_id'].shape[0]]
                                })            
        #生成逾期数据,打印到excel,用以制作excel报告
        self.get_excel(indata = ds2_tmp,sheetname = 'jietiao_borrow_day',row_add = 9,col_add = 1)

        #近30天账户还款
        ds3 = indata.copy()        
        ds3['还款时间'] = ds3['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) <= nowtime_str) & (x.strftime("%Y-%m-%d" ) >= ((nowtime + datetime.timedelta(days=-30)).strftime('%Y-%m-%d')))  else np.nan )               
        ds3_grp1 = ds3.groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'应还笔数'})
        ds3_grp2 = ds3.loc[ds3['finish_pay_time'].isnull() == False,].groupby(['还款时间'])['user_id'].agg(pd.Series.count).reset_index().rename(columns = {'user_id':'已还笔数'})
        ds3_tmp = pd.merge(ds3_grp1,ds3_grp2,on = '还款时间',how = 'outer')     
        ds3_tmp['未还笔数'] = ds3_tmp['应还笔数'] - ds3_tmp['已还笔数']
        ds3_tmp['逾期率']  = ds3_tmp[['未还笔数','应还笔数']].apply(lambda x: x['未还笔数'] / x['应还笔数'] if x['应还笔数'] > 0 else 0,axis = 1)
        ds3_tmp = ds3_tmp[['还款时间','应还笔数','未还笔数','逾期率']].sort_values(['还款时间'])
        #生成逾期数据,打印到excel,用以制作excel报告
        self.get_excel(indata = ds3_tmp,sheetname = 'jietiao_borrow_day',row_add = 14,col_add = 1)
        
        #本月逾期及流入变化           
        ds4 = indata.copy()          
        this_month = self.yuqi_liuru(intime = nowtime,ds4 = ds4)  
        self.get_excel(indata = this_month.sort_values(['还款时间']),sheetname = 'jietiao_borrow_day',row_add = 14,col_add = 9)
        
        last_month = self.yuqi_liuru(intime = lastmonthday,ds4 = ds4)
        self.get_excel(indata = last_month.sort_values(['还款时间']),sheetname = 'jietiao_borrow_day',row_add = 47,col_add = 9)
                
        return ds1_tmp,ds2_tmp,ds3_tmp
    
    def yuqi_liuru(self,intime,ds4):
        month_box = pd.DataFrame({'还款时间': [],'逾期数':[],'逾期率':[],'新增逾期':[],'流入率':[]})
        daytime = intime
        while daytime.month ==  intime.month:          
            ds4['还款时间'] = ds4['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) <= daytime.strftime('%Y-%m-%d')) & (x.strftime("%Y-%m" ) == daytime.strftime('%Y-%m')) else np.nan )               
            ds4['repay_time1'] = ds4['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) )               
            
            tmp_day4 = pd.DataFrame({'还款时间':[daytime.strftime("%Y-%m-%d")],
                                     '应还笔数':[ds4.loc[ds4['repay_time1'] <= daytime.strftime("%Y-%m-%d" ),'user_id'].shape[0]],
                                     '按期还款笔数':[ds4.loc[(ds4['repay_time1'] <= daytime.strftime("%Y-%m-%d" )) & (ds4['finish_pay_time'].isnull() == False) & (ds4['finish_pay_time'] <= daytime.strftime("%Y-%m-%d" )) ,'user_id'].shape[0]],
                                     '当天应还':[ds4.loc[ds4['repay_time1'] == daytime.strftime("%Y-%m-%d" ),'user_id'].shape[0]],
                                     '当天已还':[ds4.loc[(ds4['repay_time1'] == daytime.strftime("%Y-%m-%d" )) & (ds4['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
                                     })
            tmp_day4['逾期数'] = tmp_day4['应还笔数'] - tmp_day4['按期还款笔数']
            tmp_day4['逾期率'] = tmp_day4[['逾期数','应还笔数']].apply(lambda x: x['逾期数'] / x['应还笔数'] if x['应还笔数'] > 0 else 0,axis = 1)
            tmp_day4['新增逾期'] = tmp_day4['当天应还'] - tmp_day4['当天已还']
            tmp_day4['流入率'] = tmp_day4[['新增逾期','当天应还']].apply(lambda x: x['新增逾期'] / x['当天应还'] if x['新增逾期'] > 0 else 0,axis = 1)                      
            tmp_day4 = tmp_day4[['还款时间','逾期数','逾期率','新增逾期','流入率']]
            month_box = month_box.append(tmp_day4)
            daytime = daytime - datetime.timedelta(days = 1)
        month_box = month_box.sort_values(['还款时间'])
        return month_box
      
    def data_yuqi_time(self,indata):
            
        ds = indata.copy()    
        #放款笔数
        cal1 = ds.groupby(['放款日期'])['user_id'].agg([pd.Series.count]).rename(columns = {'count':'放款笔数'}).reset_index()
        #笔数D3
        ds['笔数D3'] = np.where( (ds['当前逾期天数'] >= 3) & (ds['当前逾期天数'] < 10),1,0)
        cal2 = ds.groupby(['放款日期'])['笔数D3'].sum().reset_index()       
        #笔数D10
        ds['笔数D10']=np.where( (ds['当前逾期天数'] >= 10) & (ds['当前逾期天数'] < 30),1,0)
        cal3 = ds.groupby(['放款日期'])['笔数D10'].sum().reset_index()               
        #笔数M1
        ds['笔数M1'] = np.where((ds['当前逾期天数'] >= 30) & (ds['当前逾期天数'] < 60),1,0)
        cal4 = ds.groupby(['放款日期'])['笔数M1'].sum().reset_index()
        #笔数M2+
        ds['笔数M2+'] = np.where(ds['当前逾期天数'] >= 60,1,0)
        cal5 = ds.groupby(['放款日期'])['笔数M2+'].sum().reset_index()
        #流入笔数
        cal6 = ds.groupby(['放款日期'])['流入'].sum().reset_index()        
        #催回笔数
        cal7 = ds.groupby(['放款日期'])['流入催回'].sum().reset_index()        
        
        tmp=self.merge_ds( ds_list = [cal1,cal2,cal3,cal4,cal5,cal6,cal7],on_var = '放款日期',how = 'outer')

        tmp['笔数D3占比']  = tmp[['笔数D3','放款笔数' ]].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数D10'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
        tmp['笔数D10占比'] = tmp[['笔数D10','放款笔数']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数D10'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
        tmp['笔数M1占比']  = tmp[['笔数M1' ,'放款笔数' ]].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数M1' ] / x['放款笔数' ]) if x['放款笔数'] > 0 else '0' ,axis = 1)
        tmp['笔数M2+占比'] = tmp[['笔数M2+','放款笔数']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['笔数M2+'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
        tmp['流入笔数占比'] = tmp[['流入','放款笔数']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['流入'] / x['放款笔数']) if x['放款笔数'] > 0 else '0' ,axis = 1)
        tmp['催回笔数占比']  = tmp[['流入催回','流入']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['流入催回'] / x['流入']) if x['流入'] > 0 else '0' ,axis = 1)
        tmp=tmp[['放款日期','放款笔数','笔数D3','笔数D3占比','笔数D10','笔数D10占比','笔数M1','笔数M1占比','笔数M2+','笔数M2+占比','流入','流入笔数占比','流入催回','催回笔数占比']]
        tmp=self.char2num( indata=tmp, varlist= ['笔数D3占比','笔数D10占比','笔数M1占比','笔数M2+占比','流入笔数占比','催回笔数占比'])

        #为了格式整齐,防止某一天没有数据,补齐所有理论上的放款时间。
        format_month=pd.DataFrame({'放款时间': list(set([(ds['申请时间'].min()+ datetime.timedelta(days=diff)).strftime("%Y-%m-%d") for diff in range( int((nowtime-uer_data['申请时间'].min())/np.timedelta64(1,'D')) )]))  })
        tmp=pd.merge(tmp,format_month,on='放款时间',how='outer')
        tmp=tmp.fillna(0)
        return tmp

    '''逾期用户进件城市统计'''    
    def data_huikuan_city(self):
        #计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
        ds1 = indata.copy()
        ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
        info_query = self.sql_query('select user_id,id_no from cl_user_base_info')
        ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
        
        ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
        #通过身份证前6位,得到具体地区。对应错误的进件排除
        addr = self.user_addr()
        ds1_tmp['city'] = ds1_tmp['id_no'].apply(lambda x: addr.get(str(x[:6]),None) )
        ds1_tmp = ds1_tmp.loc[ds1_tmp['city'].notnull(),].reset_index(drop = True)
        #计算申请数apply_number,通过数pass_number
        tmp = ds1_tmp.groupby(['city'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
        tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
        
        tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
        tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
        tmp['pass_rate%'  ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number'])  if  x['apply_number' ] > 0 else '0.00' ,axis = 1 )
        
        #统计城市前10名
        tmp=tmp.sort_values(by=['apply_number'],ascending=[0]).reset_index(drop=True)
        if tmp.shape[0]>10:
            tmp1=tmp.loc[:9,]
            tmp2=tmp.loc[10:,]
            last_row=pd.DataFrame({
                    'city':['其他'],
                    'pass_number':[sum(tmp2['pass_number'])] ,
                    'apply_number':[sum(tmp2['apply_number'])] ,
                    'apply_ratio%':pd.to_numeric(['{x:.2f}'.format(x=  (sum(tmp2['apply_number'])/sum(tmp['apply_number' ]))*100 if sum(tmp['apply_number' ])>0 else 0 )]) ,
                    'pass_ratio%' :pd.to_numeric(['{x:.2f}'.format(x=  (sum(tmp2['pass_number']) /sum(tmp['pass_number'  ]))*100 if sum(tmp['pass_number' ])>0 else 0)]),
                    'pass_rate%'  :pd.to_numeric(['{x:.2f}'.format(x=  (sum(tmp2['pass_number']) /sum(tmp2['apply_number']))*100 if sum(tmp2['apply_number' ])>0 else 0)]),
                    })
            tmp1=tmp1.append(last_row,ignore_index=True)  
            tmp1=self.char2num( indata=tmp1, varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
            tmp =self.char2num( indata=tmp , varlist=['apply_ratio%','pass_ratio%','pass_rate%'] )
            return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp1[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
        else:
            return tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']] ,tmp[['city','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]
 
    '''逾期用户年龄统计'''
    def data_huikuan_age(self,indata,cutoff):
        ds1 = indata.copy()
        ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
        info_query = self.sql_query('select user_id,age from cl_user_base_info')
        ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
        #计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况
        ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
        ds1_tmp = ds1_tmp.loc[ds1_tmp['age'].notnull(),].reset_index(drop = True)
        #计算申请数apply_number,通过数pass_number
        tmp = ds1_tmp.groupby(['age'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
        tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
        
        tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
        tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
        tmp['pass_rate%'  ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number'])  if  x['apply_number' ] > 0 else '0.00' ,axis = 1 )
        return tmp[['age','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]

    '''逾期用户性别统计'''
    def data_huikuan_gender(self,indata,cutoff):
        ds1 = indata.copy()
        ds1['pass'] = np.where(ds1['loan_time'].notnull() == True,1,0)
        info_query = self.sql_query('select user_id,sex from cl_user_base_info')
        ds1 = pd.merge(ds1,info_query,on = 'user_id',how = 'left')
        #计算当前时间和申请时间的差值,用于计算近1周,2个月等时间段内的进件情况        
        ds1_tmp = ds1.loc[ ((pd.to_datetime(nowtime_str) - indata['create_time']) / np.timedelta64(1,'D')) <= cutoff,].reset_index(drop = True)
        ds1_tmp = ds1_tmp.loc[ds1_tmp['sex'].notnull(),].reset_index(drop = True)
        #计算申请数apply_number,通过数pass_number
        tmp = ds1_tmp.groupby(['sex'])['pass'].agg([np.sum,pd.Series.count]).reset_index()
        tmp = tmp.rename(columns = {'sum':'pass_number','count':'apply_number'})
        
        tmp['apply_ratio%'] = tmp['apply_number'].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['apply_number'])) if sum(tmp['apply_number']) > 0 else '0.00' )
        tmp['pass_ratio%' ] = tmp['pass_number' ].apply(lambda x: '{x:.2f}'.format(x = 100 * x / sum(tmp['pass_number' ])) if sum(tmp['pass_number' ]) > 0 else '0.00' )
        tmp['pass_rate%'  ] = tmp[['pass_number','apply_number']].apply(lambda x: '{x:.2f}'.format(x = 100 * x['pass_number'] / x['apply_number'])  if  x['apply_number' ] > 0 else '0.00' ,axis = 1 )
        return tmp[['sex','pass_number','apply_number','apply_ratio%','pass_ratio%','pass_rate%']]

☆ 清理数据统计模块

    #################################4.清理信息#################################
    def qingli_dayReport(self,indata):
        '''清理日报'''
        #近一周内清理情况统计
        ds1 = indata.copy()
        ds1 = ds1.loc[(ds1['finish_pay_time'].isnull() == False) & (ds1['流入'] == 1),]
        ds1['还款时间'] = ds1['finish_pay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ) if (x.strftime("%Y-%m-%d" ) <= nowtime_str) & (x.strftime("%Y-%m-%d" ) >= (nowtime- datetime.timedelta(days=6)).strftime('%Y-%m-%d')) else np.nan )               
        ds1_tmp = ds1.loc[ds1['还款时间'].isnull() == False,].groupby(['还款时间'])['流入'].sum().reset_index().rename(columns = {'流入':'清理个数'})
        self.get_excel(indata = ds1_tmp,sheetname = 'jietiao_repay_day',row_add = 18,col_add = 1)

        #清理时逾期天数
        ds2 = indata.copy()
        ds2 = ds2.loc[(ds2['finish_pay_time'].isnull() == False) & (ds2['流入'] == 1),]
        ds2['amount_day'] = (ds2['finish_pay_time'] - ds2['repay_time'])/np.timedelta64(1,'D')
        ds2['amount_day'] = np.where(ds2['amount_day'] <= 3,'D01~D03',
                                   np.where(ds2['amount_day'] <= 7,'D04~D07',
                                            np.where(ds2['amount_day'] <= 11,'D08~D11',
                                                     np.where(ds2['amount_day'] <= 15,'D12~D15',
                                                              np.where(ds2['amount_day'] <= 30,'D16~D30','>D30')))))
        ds2_tmp = ds2.groupby(['amount_day'])['流入'].sum().reset_index().rename(columns = {'流入':'清理个数'})
        self.get_excel(indata = ds2_tmp,sheetname = 'jietiao_repay_day',row_add = 18,col_add = 4)

        #本月清理情况
        ds3 = indata.copy()
        ds3['还款时间'] = ds3['repay_time'].apply(lambda x: x.strftime("%Y-%m-%d" ))  
        ds3['还款月'] = ds3['repay_time'].apply(lambda x: x.strftime("%Y-%m" ))  
        ds3 = ds3.loc[(ds3['流入'] == 1) & (ds3['还款时间'] <= nowtime_str),]        
        ds3_tmp = pd.DataFrame({'还款时间':[nowtime_str],
                                '昨日流入':[ds3.loc[ ds3['还款时间'] == yestime ,'user_id'].shape[0]],                           
                                '昨日清理':[ds3.loc[(ds3['还款时间'] == yestime) & (ds3['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
                                'D3内流入':[ds3.loc[ ds3['还款时间'] >= (nowtime - datetime.timedelta(days=-2)).strftime("%Y-%m-%d" ) ,'user_id'].shape[0]],
                                'D3内清理':[ds3.loc[(ds3['还款时间'] >= (nowtime - datetime.timedelta(days=-2)).strftime("%Y-%m-%d" )) & (ds3['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
                                '本月流入':[ds3.loc[ ds3['还款月'] == thismonth_str ,'user_id'].shape[0]],
                                '本月清理':[ds3.loc[(ds3['还款月'] == thismonth_str) & (ds3['finish_pay_time'].isnull() == False),'user_id'].shape[0]],
                                '累计流入':[ds3.shape[0]],
                                '累计清理':[ds3.loc[ds3['finish_pay_time'].isnull() == False,'user_id'].shape[0]]                                
                                })
        #本月复贷清理情况
        ds4 = indata.copy()         
        ds4 = ds4.loc[ds4['repay_time'] < firstday,]     
        ds4['amount_day'] = np.where((ds4['finish_pay_time'].isnull() == False) & (ds4['finish_pay_time'] < firstday ) ,0, (firstday - ds4['repay_time'])/np.timedelta64(1,'D'))     
        ds4['amount_day'] = np.where(ds4['amount_day'] == 0,'已回款',
                                   np.where(ds4['amount_day'] <= 7,'D01~D07',
                                            np.where(ds4['amount_day'] <= 15,'D08~D15','>D15')))
        ds4_tmp = pd.DataFrame({'还款时间':[nowtime_str],
                                'D01~D07':[ds4.loc[ ds4['amount_day'] == 'D01~D07' ,'user_id'].shape[0]],
                                'D01~D07清理':[ds4.loc[ (ds4['amount_day'] == 'D01~D07') & (ds4['finish_pay_time'].isnull() == False) ,'user_id'].shape[0]],
                                'D08~D15':[ds4.loc[ ds4['amount_day'] == 'D08~D15' ,'user_id'].shape[0]],
                                'D08~D15清理':[ds4.loc[ (ds4['amount_day'] == 'D08~D15') & (ds4['finish_pay_time'].isnull() == False) ,'user_id'].shape[0]],
                                '>D15':[ds4.loc[ ds4['amount_day'] == '>D15' ,'user_id'].shape[0]],
                                '>D15清理':[ds4.loc[ (ds4['amount_day'] == '>D15') & (ds4['finish_pay_time'].isnull() == False) ,'user_id'].shape[0]]
                                })
        tmp = pd.merge(ds3_tmp,ds4_tmp,on = '还款时间',how = 'outer')
        self.get_excel(indata = tmp,sheetname = 'jietiao_repay_day',row_add = 3,col_add = 1)

五 可视化展示

  • 因为考虑到python制图和excel有差异,所以最终的一页报告设计由excel完成,数据部分由python实现,图表使用的数据用公式调用。
    这里写图片描述

六 报告发送

  • python 模块:matplotlib/echarts,画图
  • python 模块:xlwt,操作excel文件,制作excel报告。
  • 定时发邮件:send_email
  • 定时群发微信:wechat_get_news

☆ 邮件发送模块

    '''发送邮件模块'''
    def create_email(self,email_from, email_to, email_Subject, email_text, annex_path, annex_name):
        # 输入发件人昵称、收件人昵称、主题,正文,附件地址,附件名称生成一封邮件
        #生成一个空的带附件的邮件实例
        message = MIMEMultipart()
        #将正文以text的形式插入邮件中
        message.attach(MIMEText(email_text, 'plain', 'utf-8'))
        #生成发件人名称(这个跟发送的邮件没有关系)
        message['From'] = Header(email_from, 'utf-8')
        #生成收件人名称(这个跟接收的邮件也没有关系)
        message['To'] = Header(email_to, 'utf-8')
        #生成邮件主题
        message['Subject'] = Header(email_Subject, 'utf-8')
        #读取附件的内容
        att1 = MIMEText(open(annex_path, 'rb').read(), 'base64', 'utf-8')
        att1["Content-Type"] = 'application/octet-stream'
        #生成附件的名称
        att1["Content-Disposition"] = 'attachment; filename=' + annex_name
        #将附件内容插入邮件中
        message.attach(att1)
        #返回邮件
        return message
    
    '''发送邮件模块'''
    def send_email(self,sender, password, receiver, msg):
    # 一个输入邮箱、密码、收件人、邮件内容发送邮件的函数
        try:
            #找到你的发送邮箱的服务器地址,已加密的形式发送
            server = smtplib.SMTP_SSL("smtp.mxhichina.com", 465)  # 发件人邮箱中的SMTP服务器
            server.ehlo()
            #登录你的账号
            server.login(sender, password)  # 括号中对应的是发件人邮箱账号、邮箱密码
            #发送邮件
            server.sendmail(sender, receiver, msg.as_string())  # 括号中对应的是发件人邮箱账号、收件人邮箱账号(是一个列表)、邮件内容
            print("邮件发送成功")
            server.quit()  # 关闭连接
        except Exception:
            print(traceback.print_exc())
            print("邮件发送失败")       

☆ 微信发送模块

    '''发送微信模块'''
    def wechat_get_news(self):
        bot = Bot(console_qr=True, cache_path=True)
        try:
            my_friend = bot.friends().search('keep a clear mind耀武')[0]
            my_friend.send_image(r'C:\Users\A3\Desktop\skr.png')  
            print('seccess')
        except:
            bot.friends().search('keep a clear mind耀武')[0].send(u"消息发送失败了")

七 执行函数

☆ 主函数

    #################################5.主函数#################################
if __name__ == "__main__":
      
    ALD = AfterLoanData() 
    
    ''' 全局变量'''
    _begin = time.time()   
    workbook = load_workbook(_path + '\日报数据.xlsx')
    nowtime,nowtime_str,yestime,yestime_str,firstday,firstday_str,thismonth_str,lastmonthday,lastmonthday_str,lastmonth_str = ALD.get_time()
    
    ''' 1、进件信息'''
#    data_apply = ALD.data_jinjian()
#    time_jinjain_report = ALD.jinjian_dayReport(indata = data_apply)
#    time_jinjian_day,time_jinjian_week,time_jinjian_month = ALD.data_jinjian_time(indata = data_apply)
#    data_apply.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\apply.xlsx')
#    data_apply = pd.read_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\apply.xlsx')
#    time_jinjian_week.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\time_jinjian_week.xlsx')
#    time_jinjian_month.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\time_jinjian_month.xlsx')

#    city_apply_week,city_apply_week1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 7)
#    city_apply_month,city_apply_month1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 30)
#    city_apply_quarter,city_apply_quarter1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 90)
#    city_apply_halfyear,city_apply_halfyear1 = ALD.data_jinjian_city(indata = data_apply,cutoff = 180)    

#    age_apply_week = ALD.data_jinjian_age(indata = data_apply,cutoff = 7)
#    age_apply_month = ALD.data_jinjian_age(indata = data_apply,cutoff = 30)
#    age_apply_quarter = ALD.data_jinjian_age(indata = data_apply,cutoff = 90)
#    age_apply_halfyear = ALD.data_jinjian_age(indata = data_apply,cutoff = 180)    

#    gender_apply_week = ALD.data_jinjian_gender(indata = data_apply,cutoff = 7)
#    gender_apply_month = ALD.data_jinjian_gender(indata = data_apply,cutoff = 30)
#    gender_apply_quarter = ALD.data_jinjian_gender(indata = data_apply,cutoff = 90) 
#    gender_apply_halfyear = ALD.data_jinjian_gender(indata = data_apply,cutoff = 180) 
    
    ''' 2、放款信息'''
#    data_borrow = ALD.data_fangkuan()
#    data_borrow.to_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\data_borrow.xlsx')
#    data_borrow = pd.read_excel(r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报\data\data_borrow.xlsx')
   
    ''' 3、回款信息'''
#    data_huikuan = ALD.data_huikuan_time(data_borrow)
#    _path = r'C:\Users\A3\Desktop\2:项目\项目\项目24: 基于python 的全自动贷后指标追踪日报'
#    time_huikuan_today,time_huikuan_fenbu,day_30 = ALD.huikuan_dayReport(indata = data_huikuan)    
    ''' 4、清理信息'''
    data_qingli = ALD.qingli_dayReport(indata = data_huikuan)
    ''' 5、邮件、微信发送'''
#    email_send = ALD.send_email()
#    wechat_send = ALD.wechat_get_news()
    workbook.save(_path + '\日报数据.xlsx')        
    _end = time.time()
    print('You have finished!\nfanilly use time: {x:.2f}s'.format(x = _end - _begin))

八 项目总结

  • 初衷:此项目用于python‘萌宠’项目练手,重在完整的展现一个报告项目的数据整理流程。包括数据获取、处理、挖掘、可视化等模块。实际用时8个工作日,‘萌宠’期可以接受。
  • 反思:代码只是执行思维的工具,有一个接触、理解、运用、熟练的过程。重点是coder思维逻辑是否清晰,能否按照工程流workflow、项目流、数据流的方式层次、结构化、逻辑化的去执行工作。
  • Finally!
    • 高效学习有两个很重要的习惯:
      ①快速进入专注的状态。
      ②长期保持专注的状态。

猜你喜欢

转载自blog.csdn.net/sunyaowu315/article/details/82462364
今日推荐