Python basics - file operation (2)

Python basics - file operation (2)

CSV format file

Comma-separated values, storing tabular data in plain text
Consists of any number of records, separated by newlines
Each record consists of fields, separated by commas or tabs

Each record has the same field sequence
If there is a column name, it is located in the first line of the file
Each record data does not span lines, and there is no blank line

insert image description here
read CSV file

年,制造商,型号,说明,价值
1997,Ford,E350,"ac, abs, moon",3000.00
1999,Chevy,"Venture""ExtendedEdition""","",4900.00
1999,Chevy,"Venture ""Extended Edition, Very Large""","",5000.00
1996,Jeep,Grand Cherokee,"MUST SELL! \nair, moon roof, loaded",4799.00

The data in the CSV file is basically two-dimensional data composed of rows and columns.
It can be processed by the method of two-dimensional list
CSV file "score.csv":

姓名,C,Java,Python,C#
罗明,95,96,85,63
朱佳,75,93,66,85
李思,86,76,96,93
郑君,88,98,76,90
王雪,99,96,91,88
李立,82,66,100,77
with open('8.2 score.csv', 'r', encoding='utf-8') as csv_obj:
data_lst = []
for line in csv_obj:
data_lst.append(line.strip().split(','))
print(data_lst)
姓名,C,Java,Python,C#
罗明,95,96,85,63
朱佳,75,93,66,85
李思,86,76,96,93
郑君,88,98,76,90
王雪,99,96,91,88
李立,82,66,100,77
[['姓名', 'C', 'Java', 'Python', 'C#'],
['罗明', '95', '96', '85', '63'],
['朱佳', '75', '93', '66', '85'],
['李思', '86', '76', '96', '93'],
['郑君', '88', '98', '76', '90'],
['王雪', '99', '96', '91', '88'],
['李立', '82', '66', '100', '77']]
def read_csv(filename):
"""
接收csv格式文件名为参数,根据逗号将每行切分为一个列表。
每行数据做为二维列表的一个元素,返回二维列表。
"""
return data_lst
with open(filename, 'r', encoding='utf-8') as csv_obj:
data_lst = [line.strip().split(',') for line in csv_obj]
if __name__ == '__main__':
file = '8.2 score.csv'  # 定义文件名变量,方便程序扩展和修改
data = read_csv(file)   # 读文件转为二维列表
print(data)             
# 输出列表

write CSV file

def write_file(ls, new_file):
"""
接收一个二维列表和一个表示文件名的字符串为参数,
将二维列表中的列表元素中的数据拼接在一起写入文件中,
每写入一组数据加一个换行符。
"""
with open(new_file, 'w', encoding='utf-8') as file:  # 写模式
for x in ls:
file.writelines(','.join(x) + '\n')
if __name__ == '__main__':
data = [['姓名', 'C',],, ['李立', '82', '66', '100', '77]]
file = 'score_new.csv’
write_file(data, file)

JSON file

JSON is a cross-language lightweight general data exchange format
JSON is a text format, the key must be in double quotes, string type

'
{
    
    
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}'

dumps()
load()
built-in json library for parsing and encoding JSON data

JSON-encoded

Convert Python object to JSON format data

json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
json.dump(obj,fp, ensure_ascii=True, indent=None,sort_keys=False)

dump(obj, fp) Convert "obj" to a string in JSON format
Write the string to the file object fp

json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
import json
defaults to ensure_ascii=True, which will convert non-ASCII characters such as Chinese into unicode encoding.
Set ensure_ascii=False to keep Chinese output as it is

info = {
    
    'name':'李立', 'phone':'13988776655', 'city':'武汉'}
{
    
    "name": "\u674e\u7acb", "phone": "13988776655", "city": "\u6b66\u6c49"}
print(json.dumps(info, ensure_ascii=False))
{
    
    "name": "李立", "phone": "13988776655", "city": "武汉"}
print(json.dumps(info))

json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
The indent parameter can be used to format and output JSON data. The default value is None. You
can set an integer greater than 0 to represent the indentation, which is more readable. good

{
    
    
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}
print(json.dumps(info, ensure_ascii=False, indent=4))
import json
info = {
    
    'name':'李立', 'phone':'13988776655', 'city':'武汉'}

json.dumps(obj, ensure_ascii=True, indent=None, sort_keys=False)
is not sorted by default.
You can set sort_keys=True to sort the conversion results in ascending order of the dictionary

{
    
    
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}
print(json.dumps(info,ensure_ascii=False,indent=4,sort_keys=True))
{
    
    
"city": "武汉",
"name": "李立",
"phone": "13988776655"
}
import json
info = {
    
    'name':'李立', 'phone':'13988776655', 'city':'武汉'}

json.dump(obj,fp, ensure_ascii=True, indent=None, sort_keys=False)
writes JSON data into a file object with write permission

{
    
    
"name": "李立",
"phone": "13988776655",
"city": "武汉"
}
print(json.dump(info,ensure_ascii=False,indent=4))
import json
info = {
    
    'name':'李立', 'phone':'13988776655', 'city':'武汉'}
“
test.json” 文件中的数据

File and Folder Operations

Get the current working directory

os.getcwd()
returns the absolute path of the current program working directory

import os
result = os.getcwd()
print(result)
# F:\weiyun\2020

change current working directory

os.chdir()
changes the current working directory

import os
# \\'解析为'\','D:/testpath/path'
os.chdir('D:\\testpath\\path')
result = os.getcwd()
print(result)
# D:\testpath\path

Get a list of file names

os.listdir()
Gets a list of the names of all files and folders in the specified folder

import os
result = os.listdir('E:/股票数据/data')
print(result)
['600000.csv', '600006.csv', '600007.csv', '600008.csv', 
'600009.csv', 
'600010.csv',  …… , '688399.csv']

create folder

os.mkdir() creates folders
os.makedirs() creates folders recursively

import os
os.mkdir('score')
os.makedirs('score/python/final')

delete empty directory

os.rmdir()
deletes empty directories
os.removedirs() recursively deletes empty directories

import os
os.rmdir('score')
os.removedirs('score/python/final/')

File renaming and deletion

os.rename(oldname, newname) file rename
os.remove(filename)
delete file
os.path.exists(filename)
detect existence

import os
if os.path.exists('XRD.txt'):
os.rename('XRD.txt', 'xrd.txt')
print('XRD.txt更名成功')
os.remove('xrd.txt')
print('xrd.txt已经被删除')
else:
print('XRD.txt不存在')

Detect files and read data

from os import path
def read_csv(filename):
with open(filename, 'r', encoding='GBK') as csv_obj:
data_lst = [line.strip().split(',') for line in csv_obj]
return data_lst
def check_path(filepath, filename):
if path.exists(filepath) and path.exists(filepath + filename):
return read_csv(filepath + filename)
else:
return '路径或文件名不存在'
if __name__ == "__main__":
data_path = 'E:/股票数据/data/'
data_file = '600009.csv'
data = check_path(data_path, data_file)
print(data)

NumPy(Numerical Python )

Multi-dimensional array ndarray of a single data type
is a universal function ufunc for fast processing of arrays
insert image description here

numpy.genfromtxt()

Get data from text files
and provide more complex operations like missing value handling

numpy.genfromtxt(fname, dtype=<class 'float'>, comments='#',
delimiter=None,
skip_header=0,skip_footer=0, missing_values=None, 
filling_values=None, usecols=None, autostrip=False, 
max_rows=None, encoding='bytes')
numpy.loadtxt(fname, dtype=<class 'float'>, comments='#', 
delimiter=None,
converters=None,
skiprows=0, usecols=None, 
unpack=False, ndmin=0, 
encoding='bytes',max_rows=None)

The file "8.5 score.csv" saves the student's score data, and its data part includes
integers, floating point numbers and missing data (Zheng Jun's C language and VB scores are missing)

姓名,学号,C语言,Java,Python,VB,C++,总分
朱佳,0121701100511,75.2,93,66,85,88,407
李思,0121701100513,86, 76,96,93,67,418
郑君,0121701100514,, 98,76,,89,263
王雪,0121701100515,99, 96,91,88,86,460
罗明,0121701100510,95,96,85,63,91,430

fname: file, string, character sequence or generator
dtype: the data type of the generated array, the default value is float, str means the string
numpy.genfromtxt()

import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=str, delimiter=',', encoding='utf-8')
print(data)
[['姓名' '学号' 'C语言' 'Java' 'Python' 'VB' 'C++' '总分']
['朱佳' '0121701100511' '75.2' '93' '66' '85' '88' '407']
['李思' '0121701100513' '86' ' 76' '96' '93' '67' '418']
['郑君' '0121701100514' '' ' 98' '76' '' '89' '263']
['王雪' '0121701100515' '99' ' 96' '91' '88' '86' '460']
['罗明' '0121701100510' '95' '96' '85' '63' '91' '430']]

delimiter: used to define how to split data lines, separated by blank characters by default
skip_header: the number of lines skipped at the beginning of the file, the default value is skip_header=0
dtype=None, the type of each column is from each column of each line iteratively determine in the data

import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file,dtype=None,delimiter=',',skip_header=1,encoding='utf-8')
print(data)
[('朱佳', 121701100511, 75.2, 93, 66, 85, 88, 407)
('李思', 121701100513, 86. , 76, 96, 93, 67, 418)
('郑君', 121701100514, nan, 98, 76, -1, 89, 263)
('王雪', 121701100515, 99. , 96, 91, 88, 86, 460)
('罗明', 121701100510, 95. , 96, 85, 63, 91, 430)]

filling_values: Replace missing data with the set value as the default value

import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=None, delimiter=',', filling_values=0, skip_header=1, encoding='utf-8')  
print(data)
[('朱佳', 121701100511, 75.2, 93, 66, 85, 88, 407)
('李思', 121701100513, 86. , 76, 96, 93, 67, 418)
('郑君', 121701100514, 0. , 98, 76, 0, 89, 263)
('王雪', 121701100515, 99. , 96, 91, 88, 86, 460)
('罗明', 121701100510, 95. , 96, 85, 63, 91, 430)]

names: The value is one of None, True, string or sequence
. When the value is "True", the first line read after skipping the number of skip_header lines is used as the field name

import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=None, delimiter=',', names=True, filling_values=0, encoding='utf-8')
print(data[['姓名', '学号', 'Python']])  # 以多个字段为索引时,放入列表中
[('朱佳', 121701100511, 66) 
('李思', 121701100513, 96)
('郑君', 121701100514, 76) 
('王雪', 121701100515, 91)
('罗明', 121701100510, 85)]

ufunc function

Universal functions are functions that perform operations on each element of an array.
Operations on arrays can be performed using operation functions, or written as array operation expressions

import numpy as np
a = np.array((1, 2, 3, 4, 5))   # 数组[ 1 2 3 4 5]
b = np.array((6, 7, 8, 9, 10))  # 数组[ 6 7 8 9 10]
print(np.add(a, b))             
# 输出[ 7 9 11 13 15]
print(a + b)                    
# 输出[ 7 9 11 13 15]
[ 7  9 11 13 15]
[ 7  9 11 13 15]

insert image description here

Built-in random number function, trigonometric function, hyperbolic function, exponential and logarithmic function, arithmetic operation
, complex number processing and statistics, etc. Nearly a hundred kinds of mathematical functions can quickly perform various operations on data

import numpy as np
a = np.array((1, 2, 3, 4))  # 将元组转换为数组[1 2 3 4]
print(np.sum(a))      
# 数组元素求和,输出10
print(a ** 2)          
# 数组每个元素平方,[ 1 4 9 16]
print(a % 3)           
# 数组每个元素对3 取模,[1 2 0 1]
print(np.sqrt(a)) # 开方[1. 1.41421356 1.73205081 2. ]
print(np.square(a))   # 每个元素2次方的数组,[ 1 4 9 16]

Statistical Analysis

The descriptive statistics of digital features mainly include calculating the complete situation, minimum
value, maximum value, mean, median, range, standard deviation, variance and covariance of digital data, etc.

insert image description here

The descriptive statistics of digital features mainly include calculating the complete situation, minimum
value, maximum value, mean, median, range, standard deviation, variance and covariance of digital data, etc.

import numpy as np
arr = np.random.randint(100, size=(3, 4))
print(np.max(arr), np.argmax(arr))  # 数组最大值及位置序号,输出98 2
print(np.cumsum(arr))  # 数组元素逐个累加,[ 35 92 190 287 314 378 460 536 565 656 711 808]
print(np.mean(arr))  # 返回平均值,输出67.33333333333333
print(np.median(arr))  # 返回中位数,输出70.0

array slice

Read the file and return an array, and slice it using
data[row index or slice, column index or slice]


import numpy as np
file = '8.5 score.csv'
data = np.genfromtxt(file, dtype=str, delimiter=',', encoding='utf-8')
print(data[0]) # ['姓名' '学号' 'C语言' 'Java' 'Python' 'VB' 'C++' '总分']
print(data[1:, 0])   # ['朱佳' '李思' '郑君' '王雪' '罗明']
print(data[0, 2:-1]) # ['C语言' 'Java' 'Python' 'VB' 'C++']
print(data[1:, 2:-1])
print(data[:, 0::7])
[['75.2' '93' '66' '85' '88']
['86' ' 76' '96' '93' '67']
[' ' ' 98' '76' ' ' '89']
['99' ' 96' '91' '88' '86']
['95' '96' '85' '63' '91']]
[['姓名' '总分']
['朱佳' '407']
['李思' '418']
['郑君' '263']
['王雪' '460']
['罗明' '430']]

Those who mourn are burned with flames, and the fallen are unforgivable. Wings of eternal burning, take me out of the sinking of the mortal world.

Guess you like

Origin blog.csdn.net/weixin_50804299/article/details/129349158