Python basics - file operation (2)
CSV format file
Comma-separated values, storing tabular data in plain text
Consists of any number of records, separated by newlines
Each record consists of fields, separated by commas or tabs
Each record has the same field sequence
If there is a column name, it is located in the first line of the file
Each record data does not span lines, and there is no blank line
read CSV file
年,制造商,型号,说明,价值
1997,Ford,E350,"ac, abs, moon",3000.00
1999,Chevy,"Venture""ExtendedEdition""","",4900.00
1999,Chevy,"Venture ""Extended Edition, Very Large""","",5000.00
1996,Jeep,Grand Cherokee,"MUST SELL! \nair, moon roof, loaded",4799.00
The data in the CSV file is basically two-dimensional data composed of rows and columns.
It can be processed by the method of two-dimensional list
CSV file "score.csv":
姓名,C,Java,Python,C#
罗明,95,96,85,63
朱佳,75,93,66,85
李思,86,76,96,93
郑君,88,98,76,90
王雪,99,96,91,88
李立,82,66,100,77
with open('8.2 score.csv', 'r', encoding='utf-8') as csv_obj:
data_lst = []
for line in csv_obj:
data_lst.append(line.strip().split(','))
print(data_lst)
姓名,C,Java,Python,C#
罗明,95,96,85,63
朱佳,75,93,66,85
李思,86,76,96,93
郑君,88,98,76,90
王雪,99,96,91,88
李立,82,66,100,77
[['姓名', 'C', 'Java', 'Python', 'C#'],
['罗明', '95', '96', '85', '63'],
['朱佳', '75', '93', '66', '85'],
['李思', '86', '76', '96', '93'],
['郑君', '88', '98', '76', '90'],
['王雪', '99', '96', '91', '88'],
['李立', '82', '66', '100', '77']]
def read_csv(filename):
"""
接收csv格式文件名为参数,根据逗号将每行切分为一个列表。
每行数据做为二维列表的一个元素,返回二维列表。
"""
return data_lst
with open(filename, 'r', encoding='utf-8') as csv_obj:
data_lst = [line.strip().split(',') for line in csv_obj]
if __name__ == '__main__':
file = '8.2 score.csv' # 定义文件名变量,方便程序扩展和修改
data = read_csv(file) # 读文件转为二维列表
print(data)
# 输出列表
write CSV file
def write_file(ls, new_file):
"""
接收一个二维列表和一个表示文件名的字符串为参数,
将二维列表中的列表元素中的数据拼接在一起写入文件中,
每写入一组数据加一个换行符。
"""
with open(new_file, 'w', encoding='utf-8') as file: # 写模式
for x in ls:
file.writelines(','.join(x) + '\n')
if __name__ == '__main__':
data = [['姓名', 'C', …], …, ['李立', '82', '66', '100', '77’]]
file = 'score_new.csv’
write_file(data, file)
JSON file
JSON is a cross-language lightweight general data exchange format
JSON is a text format, the key must be in double quotes, string type
'
{
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}'
dumps()
load()
built-in json library for parsing and encoding JSON data
JSON-encoded
Convert Python object to JSON format data
json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
json.dump(obj,fp, ensure_ascii=True, indent=None,sort_keys=False)
dump(obj, fp) Convert "obj" to a string in JSON format
Write the string to the file object fp
json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
import json
defaults to ensure_ascii=True, which will convert non-ASCII characters such as Chinese into unicode encoding.
Set ensure_ascii=False to keep Chinese output as it is
info = {
'name':'李立', 'phone':'13988776655', 'city':'武汉'}
{
"name": "\u674e\u7acb", "phone": "13988776655", "city": "\u6b66\u6c49"}
print(json.dumps(info, ensure_ascii=False))
{
"name": "李立", "phone": "13988776655", "city": "武汉"}
print(json.dumps(info))
json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
The indent parameter can be used to format and output JSON data. The default value is None. You
can set an integer greater than 0 to represent the indentation, which is more readable. good
{
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}
print(json.dumps(info, ensure_ascii=False, indent=4))
import json
info = {
'name':'李立', 'phone':'13988776655', 'city':'武汉'}
json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
is not sorted by default.
You can set sort_keys=True to sort the conversion results in ascending order of the dictionary
{
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}
print(json.dumps(info,ensure_ascii=False,indent=4,sort_keys=True))
{
"city": "武汉",
"name": "李立",
"phone": "13988776655"
}
import json
info = {
'name':'李立', 'phone':'13988776655', 'city':'武汉'}
json.dump(obj,fp, ensure_ascii=True, indent=None, sort_keys=False)
writes JSON data into a file object with write permission
{
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}
print(json.dump(info,ensure_ascii=False,indent=4))
import json
info = {
'name':'李立', 'phone':'13988776655', 'city':'武汉'}
“
test.json” 文件中的数据
File and Folder Operations
Get the current working directory
os.getcwd()
returns the absolute path of the current program working directory
import os
result = os.getcwd()
print(result)
# F:\weiyun\2020
change current working directory
os.chdir()
changes the current working directory
import os
# \\'解析为'\','D:/testpath/path'
os.chdir('D:\\testpath\\path')
result = os.getcwd()
print(result)
# D:\testpath\path
Get a list of file names
os.listdir()
Gets a list of the names of all files and folders in the specified folder
import os
result = os.listdir('E:/股票数据/data')
print(result)
['600000.csv', '600006.csv', '600007.csv', '600008.csv',
'600009.csv',
'600010.csv', …… , '688399.csv']
create folder
os.mkdir() creates folders
os.makedirs() creates folders recursively
import os
os.mkdir('score')
os.makedirs('score/python/final')
delete empty directory
os.rmdir()
deletes empty directories
os.removedirs() recursively deletes empty directories
import os
os.rmdir('score')
os.removedirs('score/python/final/')
File renaming and deletion
os.rename(oldname, newname) file rename
os.remove(filename)
delete file
os.path.exists(filename)
detect existence
import os
if os.path.exists('XRD.txt'):
os.rename('XRD.txt', 'xrd.txt')
print('XRD.txt更名成功')
os.remove('xrd.txt')
print('xrd.txt已经被删除')
else:
print('XRD.txt不存在')
Detect files and read data
from os import path
def read_csv(filename):
with open(filename, 'r', encoding='GBK') as csv_obj:
data_lst = [line.strip().split(',') for line in csv_obj]
return data_lst
def check_path(filepath, filename):
if path.exists(filepath) and path.exists(filepath + filename):
return read_csv(filepath + filename)
else:
return '路径或文件名不存在'
if __name__ == "__main__":
data_path = 'E:/股票数据/data/'
data_file = '600009.csv'
data = check_path(data_path, data_file)
print(data)
NumPy(Numerical Python )
Multi-dimensional array ndarray of a single data type
is a universal function ufunc for fast processing of arrays
numpy.genfromtxt()
Get data from text files
and provide more complex operations like missing value handling
numpy.genfromtxt(fname, dtype=<class 'float'>, comments='#',
delimiter=None,
skip_header=0,skip_footer=0, missing_values=None,
filling_values=None, usecols=None, autostrip=False,
max_rows=None, encoding='bytes'…)
numpy.loadtxt(fname, dtype=<class 'float'>, comments='#',
delimiter=None,
converters=None,
skiprows=0, usecols=None,
unpack=False, ndmin=0,
encoding='bytes',max_rows=None)
The file "8.5 score.csv" saves the student's score data, and its data part includes
integers, floating point numbers and missing data (Zheng Jun's C language and VB scores are missing)
姓名,学号,C语言,Java,Python,VB,C++,总分
朱佳,0121701100511,75.2,93,66,85,88,407
李思,0121701100513,86, 76,96,93,67,418
郑君,0121701100514,, 98,76,,89,263
王雪,0121701100515,99, 96,91,88,86,460
罗明,0121701100510,95,96,85,63,91,430
fname: file, string, character sequence or generator
dtype: the data type of the generated array, the default value is float, str means the string
numpy.genfromtxt()
import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=str, delimiter=',', encoding='utf-8')
print(data)
[['姓名' '学号' 'C语言' 'Java' 'Python' 'VB' 'C++' '总分']
['朱佳' '0121701100511' '75.2' '93' '66' '85' '88' '407']
['李思' '0121701100513' '86' ' 76' '96' '93' '67' '418']
['郑君' '0121701100514' '' ' 98' '76' '' '89' '263']
['王雪' '0121701100515' '99' ' 96' '91' '88' '86' '460']
['罗明' '0121701100510' '95' '96' '85' '63' '91' '430']]
delimiter: used to define how to split data lines, separated by blank characters by default
skip_header: the number of lines skipped at the beginning of the file, the default value is skip_header=0
dtype=None, the type of each column is from each column of each line iteratively determine in the data
import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file,dtype=None,delimiter=',',skip_header=1,encoding='utf-8')
print(data)
[('朱佳', 121701100511, 75.2, 93, 66, 85, 88, 407)
('李思', 121701100513, 86. , 76, 96, 93, 67, 418)
('郑君', 121701100514, nan, 98, 76, -1, 89, 263)
('王雪', 121701100515, 99. , 96, 91, 88, 86, 460)
('罗明', 121701100510, 95. , 96, 85, 63, 91, 430)]
filling_values: Replace missing data with the set value as the default value
import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=None, delimiter=',', filling_values=0, skip_header=1, encoding='utf-8')
print(data)
[('朱佳', 121701100511, 75.2, 93, 66, 85, 88, 407)
('李思', 121701100513, 86. , 76, 96, 93, 67, 418)
('郑君', 121701100514, 0. , 98, 76, 0, 89, 263)
('王雪', 121701100515, 99. , 96, 91, 88, 86, 460)
('罗明', 121701100510, 95. , 96, 85, 63, 91, 430)]
names: The value is one of None, True, string or sequence
. When the value is "True", the first line read after skipping the number of skip_header lines is used as the field name
import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=None, delimiter=',', names=True, filling_values=0, encoding='utf-8')
print(data[['姓名', '学号', 'Python']]) # 以多个字段为索引时,放入列表中
[('朱佳', 121701100511, 66)
('李思', 121701100513, 96)
('郑君', 121701100514, 76)
('王雪', 121701100515, 91)
('罗明', 121701100510, 85)]
ufunc function
Universal functions are functions that perform operations on each element of an array.
Operations on arrays can be performed using operation functions, or written as array operation expressions
import numpy as np
a = np.array((1, 2, 3, 4, 5)) # 数组[ 1 2 3 4 5]
b = np.array((6, 7, 8, 9, 10)) # 数组[ 6 7 8 9 10]
print(np.add(a, b))
# 输出[ 7 9 11 13 15]
print(a + b)
# 输出[ 7 9 11 13 15]
[ 7 9 11 13 15]
[ 7 9 11 13 15]
Built-in random number function, trigonometric function, hyperbolic function, exponential and logarithmic function, arithmetic operation
, complex number processing and statistics, etc. Nearly a hundred kinds of mathematical functions can quickly perform various operations on data
import numpy as np
a = np.array((1, 2, 3, 4)) # 将元组转换为数组[1 2 3 4]
print(np.sum(a))
# 数组元素求和,输出10
print(a ** 2)
# 数组每个元素平方,[ 1 4 9 16]
print(a % 3)
# 数组每个元素对3 取模,[1 2 0 1]
print(np.sqrt(a)) # 开方[1. 1.41421356 1.73205081 2. ]
print(np.square(a)) # 每个元素2次方的数组,[ 1 4 9 16]
Statistical Analysis
The descriptive statistics of digital features mainly include calculating the complete situation, minimum
value, maximum value, mean, median, range, standard deviation, variance and covariance of digital data, etc.
The descriptive statistics of digital features mainly include calculating the complete situation, minimum
value, maximum value, mean, median, range, standard deviation, variance and covariance of digital data, etc.
import numpy as np
arr = np.random.randint(100, size=(3, 4))
print(np.max(arr), np.argmax(arr)) # 数组最大值及位置序号,输出98 2
print(np.cumsum(arr)) # 数组元素逐个累加,[ 35 92 190 287 314 378 460 536 565 656 711 808]
print(np.mean(arr)) # 返回平均值,输出67.33333333333333
print(np.median(arr)) # 返回中位数,输出70.0
array slice
Read the file and return an array, and slice it using
data[row index or slice, column index or slice]
import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=str, delimiter=',', encoding='utf-8')
print(data[0]) # ['姓名' '学号' 'C语言' 'Java' 'Python' 'VB' 'C++' '总分']
print(data[1:, 0]) # ['朱佳' '李思' '郑君' '王雪' '罗明']
print(data[0, 2:-1]) # ['C语言' 'Java' 'Python' 'VB' 'C++']
print(data[1:, 2:-1])
print(data[:, 0::7])
[['75.2' '93' '66' '85' '88']
['86' ' 76' '96' '93' '67']
[' ' ' 98' '76' ' ' '89']
['99' ' 96' '91' '88' '86']
['95' '96' '85' '63' '91']]
[['姓名' '总分']
['朱佳' '407']
['李思' '418']
['郑君' '263']
['王雪' '460']
['罗明' '430']]
Those who mourn are burned with flames, and the fallen are unforgivable. Wings of eternal burning, take me out of the sinking of the mortal world.