Python学习系列(六)——数据统计分析numpy
——————————————————————————
可能这属于python的进阶了吧,用了这俩库后发现python真好使,是真的好使,各种功能太爽了。
创建数组的方法。
import numpy as np
def createarray():
a = []
for i in range(0, 10, 1): #0 is start,10 is end, 1 is step
a.append(i)
a = np.array(a) # convert to numpy array
b = []
for i in range(0, 18, 1):
b.append(i)
b = np.array(b)
c = np.repeat(1, 10)
c = c.reshape(2, 5)
d = np.arange(10)
e =np.linspace(0, 10, 5)
return (a, b, c, d, e)
def onedimensionarrayinfo(a):
print(a.shape)
print(a.max())
print(a.mean())
print(a.argmax()) #最大值的索引
def twodimensionarrayinfo(b):
b = b.reshape(3, 6)
print(b)
print(b.shape)
print(b.size)
print(b.max(axis = 0)) # row
print(b.max(axis = 1)) # col
b = b.flatten()
print(b)
def repeatarrayinfo(c):
print(c)
def arangearrayinfo(d):
print(d)
def linspacearrayinfo(d):
print(d)
if __name__ == '__main__':
Array = createarray()
onedimensionarrayinfo(Array[0])
twodimensionarrayinfo(Array[1])
repeatarrayinfo(Array[2])
arangearrayinfo(Array[3])
linspacearrayinfo(Array[4])
矩阵常规操作。
import numpy as np
def createarray():
a = np.arange(100)
a = a.reshape(10, 10)
return a
# 行列操作
def arrayRWoperate(a):
print(a.shape)
print("array is " + str(a))
print("line i element is " + str(a[5]))
print("column j element is " + str(a[:, 5]))
b = np.split(a, 10, axis = 1) #分成十份
c = np.split(a, 10, axis = 0)
d = np.vstack((c[0], c[1])) #行拼接
e = np.hstack((b[0], b[1])) #列拼接
print(d)
print(e)
# 矩阵旋转
def rotateoperate(a):
print("Matrix transpose " + str(a.T)) #转置
print("Turn around " + str(np.fliplr(a))) #左右旋转
print("Upside down " + str(np.flipud(a))) #上下旋转
#矩阵标准化
ls = []
def standardized(a):
temp = a.shape
for i in range(temp[1]):
x = a[:,i]
max = x.max()
min = x.min()
# print(str(max) + " " + str(min))
for j in range(temp[0]):
ls.append(eval(str((a[j][i] - min) / (max - min))))
final = np.array(ls)
final = final.reshape(temp[0], temp[1])
print(final)
if __name__ == '__main__':
array = createarray()
arrayRWoperate(array)
rotateoperate(array)
standardized(array)
随机数操作。
import numpy as np
def randomUse():
print(np.random.rand()) # 0, 1
print(np.random.rand(5)) # 5 个0到1的小数
print(np.random.randint(0, 12, 5)) #5个0到12的整数
print(np.random.random_sample(6)) #6个0到1的小数
print(np.random.uniform(0, 1)) #0到1的小数
arr = np.arange(10)
np.random.shuffle(arr) #打乱顺序
s = np.random.normal(85, 4, 1000) #正态分布,均值85,方差4,1000个点
print(arr)
#计算pi
def monte_carlo():
count = 0
number = 100000
for i in range(0, number):
x = np.random.uniform(0, 1)
y = np.random.uniform(0, 1)
if x ** 2 + y ** 2 <= 1:
count += 1
print(4 * count/ number)
if __name__ == '__main__':
randomUse()
monte_carlo()
数据统计。
import numpy as np
def createarray():
a = np.arange(100)
a = a.reshape(10, 10)
return a
def statisticdata(a, label):
print("max value is " + str(np.amax(a)))
print("min value is " + str(np.amin(a)))
print("mean value is " + str(np.mean(a))) #均值
print("standard deviation is " + str(np.std(a))) #标准差
print("variance is " + str(np.var(a))) #方差
pos = np.where(a == label)
print(str(pos[0][0]) + " " + str(pos[1][0]))
geq = a[a > label] # 大于label的元素
print(np.sum([a > label])) # 大于label的个数
bhp = np.array([93.72,95.64,94.56,93.3,93.93,92.39,\
92.11,92.36,91.76,93.91,94.6,93.27,94.43,96.02,\
95.76,94.47,94.34,92.22,88.31,89.59,89.02,86.95,\
84.88,87.38,88.56,89.59,88.71,90.02,91.26,90.67,])
vale = np.array([34.37,35.13,35.14,35.31,35.57,35.03,\
33.44,33.94,34.21,34.27,34.23,33.76,34.32,34.87,\
34.5,33.23,33.29,32.88,31.91,32.17,32.44,31.91,\
31.04,31.51,32.14,32.42,32.25,32.7,32.36,32.34,])
corrc = np.corrcoef(bhp, vale) #计算相关系数
print(corrc)
if __name__ == '__main__':
label = 47
Array = createarray()
statisticdata(Array, label)