import pandas as pd
1. Read and save csv file
#Read df = pd.read_csv(read_file_path, header= 0
) #where read_file_path is a string, which is the path of the file to be read
#For example "../cj_data/query_result.csv"
# header=0 means that the first line contains fields Name, you can get the column name through df.columns
#Save df.to_csv
(save_file_path)
2. How does pandas filter based on fields
df = df[ # [] is the read operator (df['course_id'] == course_id) # == equals & (df['member_id'] == str(member_id)) # & to associate multiple filter conditions , ]
3. Python script running log save
import logging
dt = time.strftime( " %Y%m%d " )
#Read the current date
logfile = project_folder + " log/log_ " + str(dt) + " .txt "
logging.basicConfig(filename =logfile, filemode= ' a ' , level= logging.INFO)
#Set the log level and start writing logs
logger = logging.getLogger( __name__ )
logger.info('write_member_result ...')
logger.info(str(time.strftime( " %Y-%m-%d %H:%M:%S " )))
#Log content depends entirely on logger.info #Similar
to print, but log files can be saved, to help locate the problem
4. Sort, generate serial number
df = df.sort_values(by='lesson_start_time', ascending=True)
# The function is sort_values of pandas
df['lesson_order'] = range(df.shape[0])
# The generation sequence number starts from 0
df['lesson_order'] = [i+1 for i in df['lesson_order']]
# Serial number + 1, starting from 1
5. join operation
df3 = pd.merge(df, df2, how='left', on=['member_id', 'lesson_id'])
# Two dataframes are joined according to the primary key