Summary of common Python methods (method encapsulation)

1 File moved and named

import warnings
import pandas as pd
import shutil,os,re
warnings.filterwarnings('ignore')

now_data = datetime.datetime.now().strftime('%Y-%m-%d-%H')

#取年
Y = now_data.split('-')[0]
#取月
M =  now_data.split('-')[1]
#取日
D = now_data.split('-')[2]
#取时
H = now_data.split('-')[3]

# path1 下载的原数据路径
# path2 移动的数据路径
# file_name 数据保存的名字(file_name_time.csv)

path1 = 'C:/QYJ/Project/Database/chrome'
path2 = 'C:/QYJ/Project/Database/闪迪大师运营'
file_name = '闪迪大师竞店'

def move_file(path1,path2,file_name):
    data_names = os.listdir(path1) 
    shutil.move(path1+'/'+data_names[0],path2+'/'+file_name+'_'+now_data+'.csv')

move_file(path1,path2,file_name)

For the movement of some files (to avoid undownloaded), some abnormality inspection operations can be added! Make improvements as follows:

# path1 下载的原数据路径
# path2 移动的数据路径
# file_name 数据移动的名字(file_name_time.csv)

path1 = 'C:/Users/闪迪大师/Downloads/'
path2 = 'D:/Project/0_dataset/'
file_name = '店铺__流量看板_所有终端_统计时间 2022-05-15[小旺神:xiaowangshen.com].csv'

def move_file(path1,path2,file_name):
    file_list = os.listdir(path2)
    if file_name not in file_list:
        shutil.move(path1+'/'+file_name,path2+'/'+file_name)
        print('1 移动文件:' + file_name)
    else:
        print('0 文件存在:' + file_name)

2 Delete empty rows and columns or the first few rows

# 列举删除空行的代码!
myshop_goods = pd.read_excel('D:\\Project\\0_dataset\\3_闪迪_商品_销售数据_2022_04_28.xls')

# 数据处理
# myshop_goods = myshop_goods.drop(range(4),axis=0) # 这是删除行数的情况
myshop_goods = myshop_goods.dropna(how='all',axis=0).reset_index(drop=True) # 删除空行
myshop_goods.columns = [i for i in myshop_goods.iloc[0,:]] # 改列名 -- 有一个参数可以直接选择第几列作为列名的!
myshop_goods = myshop_goods.iloc[1:][:].reset_index(drop=True) # 获取数据

3 Get files in batches

import warnings
warnings.filterwarnings('ignore')
import  pandas as pd
import re
import os

#数据探索
#1 获取所有文件的名称和路径
data_path = './data/'  #数据存储路径     !!!!!按需修改这里
data_names = os.listdir(data_path)  #列举出data_path里面所有的文件名称
data = pd.DataFrame([])
for data_name in data_names:
    # 删除不是的文件
    if data_name.endswith('.csv'): #  !!!!不同各格式  按需修改
        print((data_name))
        #3 循环读取文件
        tmp =pd.read_csv(os.path.join(data_path,data_name),encoding='gbk')
        #4 文件存储
        data = pd.concat([data,tmp])
#data = pd.read_csv('./data/订单表2018-5.csv',encoding='gbk')  #读取

4 Get the date of previous days

import datetime
def get_before_day(day, before):
    today = datetime.datetime.strptime(day, '%Y-%m-%d')
    offset = datetime.timedelta(days=-before)
    result = (today + offset).strftime('%Y-%m-%d')
    return result

get_before_day('2022-03-14', 1)
返回:'2022-03-13'

5 Panda gets the date of the day before yesterday

import datetime
import pandas as pd
def get_after_day(input_time,months,minutes,seconds):
    end_time = pd.to_datetime(input_time) + pd.DateOffset(months=months, minutes=minutes, seconds=seconds)
    end_time = end_time.strftime('%Y-%m-%d %H:%M:%S')
    return end_time

insert image description here

Guess you like

Origin blog.csdn.net/The_dream1/article/details/125096161