Python学习系列(六)——数据统计分析numpy

Python学习系列(六)——数据统计分析numpy

——————————————————————————

    可能这属于python的进阶了吧,用了这俩库后发现python真好使,是真的好使,各种功能太爽了。

    创建数组的方法。

import numpy as np 

def createarray():
    a = []
    for i in range(0, 10, 1): #0 is start,10 is end, 1 is step 
        a.append(i)
    a = np.array(a) # convert to numpy array

    b = []
    for i in range(0, 18, 1):
        b.append(i)
    b = np.array(b)

    c = np.repeat(1, 10)
    c = c.reshape(2, 5)

    d = np.arange(10)

    e =np.linspace(0, 10, 5) 
    return (a, b, c, d, e)

def onedimensionarrayinfo(a):
    print(a.shape)
    print(a.max())
    print(a.mean()) 
    print(a.argmax())  #最大值的索引 

def twodimensionarrayinfo(b):
    b = b.reshape(3, 6)
    print(b)
    print(b.shape)
    print(b.size)
    print(b.max(axis = 0))  # row
    print(b.max(axis = 1))  # col
    b = b.flatten()
    print(b)

def repeatarrayinfo(c):
    print(c)

def  arangearrayinfo(d):
    print(d)

def  linspacearrayinfo(d):
    print(d)

if __name__ == '__main__':
    Array = createarray()
    onedimensionarrayinfo(Array[0])
    twodimensionarrayinfo(Array[1])
    repeatarrayinfo(Array[2])
    arangearrayinfo(Array[3])
    linspacearrayinfo(Array[4])

    矩阵常规操作。

import numpy as np 

def createarray():
    a = np.arange(100)
    a = a.reshape(10, 10)
    return a

# 行列操作
def arrayRWoperate(a):
    print(a.shape)
    print("array is " + str(a))
    print("line i element is " + str(a[5]))
    print("column j element is " + str(a[:, 5]))
    b = np.split(a, 10, axis = 1)  #分成十份
    c = np.split(a, 10, axis = 0)
    d = np.vstack((c[0], c[1]))  #行拼接
    e = np.hstack((b[0], b[1]))  #列拼接
    print(d)
    print(e)

# 矩阵旋转
def rotateoperate(a):
    print("Matrix transpose " + str(a.T))  #转置
    print("Turn around " + str(np.fliplr(a)))  #左右旋转
    print("Upside down " + str(np.flipud(a)))  #上下旋转

#矩阵标准化
ls = []
def standardized(a):
    temp = a.shape
    for i in range(temp[1]):
        x = a[:,i]
        max = x.max()
        min = x.min()
        # print(str(max) + " " + str(min))
        for j in range(temp[0]):
            ls.append(eval(str((a[j][i] - min) / (max - min))))

    final = np.array(ls)
    final = final.reshape(temp[0], temp[1])
    print(final)

if __name__ == '__main__':
    array = createarray()
    arrayRWoperate(array)
    rotateoperate(array)
    standardized(array)

    随机数操作。

import numpy as np 

def randomUse():
    print(np.random.rand())  # 0, 1
    print(np.random.rand(5))  # 5 个0到1的小数
    print(np.random.randint(0, 12, 5)) #5个0到12的整数
    print(np.random.random_sample(6)) #6个0到1的小数
    print(np.random.uniform(0, 1)) #0到1的小数 
    arr = np.arange(10)  
    np.random.shuffle(arr)  #打乱顺序
    s = np.random.normal(85, 4, 1000) #正态分布,均值85,方差4,1000个点
    print(arr)

#计算pi
def monte_carlo():
    count = 0
    number = 100000
    for i in range(0, number):
        x = np.random.uniform(0, 1)
        y = np.random.uniform(0, 1)
        if x ** 2 + y ** 2 <= 1:
            count += 1
    print(4  * count/ number)

if __name__ == '__main__':
    randomUse()
    monte_carlo()

    数据统计。

import numpy as np 

def createarray():
    a = np.arange(100)
    a = a.reshape(10, 10)
    return a

def statisticdata(a, label):
    print("max value is " + str(np.amax(a)))
    print("min value is " + str(np.amin(a)))
    print("mean value is " + str(np.mean(a)))  #均值
    print("standard deviation is " + str(np.std(a))) #标准差
    print("variance is " + str(np.var(a)))  #方差

    pos = np.where(a == label)
    print(str(pos[0][0]) + "  " + str(pos[1][0]))
    geq = a[a > label]   # 大于label的元素
    print(np.sum([a > label]))  # 大于label的个数

    bhp = np.array([93.72,95.64,94.56,93.3,93.93,92.39,\
        92.11,92.36,91.76,93.91,94.6,93.27,94.43,96.02,\
        95.76,94.47,94.34,92.22,88.31,89.59,89.02,86.95,\
        84.88,87.38,88.56,89.59,88.71,90.02,91.26,90.67,])

    vale = np.array([34.37,35.13,35.14,35.31,35.57,35.03,\
        33.44,33.94,34.21,34.27,34.23,33.76,34.32,34.87,\
        34.5,33.23,33.29,32.88,31.91,32.17,32.44,31.91,\
        31.04,31.51,32.14,32.42,32.25,32.7,32.36,32.34,])

    corrc = np.corrcoef(bhp, vale)  #计算相关系数
    print(corrc)

if __name__ == '__main__':
    label = 47
    Array = createarray()
    statisticdata(Array, label)

猜你喜欢

转载自blog.csdn.net/asd20172016/article/details/81431728