Data analysis notes python - Chapter III statistics and linear algebra

2. Use the numpy simple descriptive statistical calculations

mport numpy as np
from scipy.stats import scoreatpercentile
#加载csv文件
data = np.loadtxt('mdrtb_2012.csv', delimiter=',', usecols=(1,), skiprows=1, unpack=True)
# 最大值
print('Max method', data.max())
print('Max function', np.max(data))
# 最小值
print('Min method', data.min())
print('Min function', np.min(data))
# 平均值
print('Mean method', data.mean())
print('Mean function', np.mean(data))
# 标准差
print('Std method', data.std())
print('Std function', np.std(data))
# 中位数
print('Median', np.median(data))
print('Score at percentile 50', scoreatpercentile(data, 50))

3. Using linear algebra numpy

import numpy as np

# 1.求逆矩阵
A = np.mat('2 4 6;4 2 6;10 -4 18')
#使用mit公开课中老师用的矩阵例子进行测试
#A = np.mat('1 0 0;-3 1 0;0 0 1')
print('A\n', A)

# 求矩阵的逆
inverse = np.linalg.inv(A)
print('inverse of A\n', inverse)
# 利用乘法进行验算
print('Check\n', A * inverse)
print('Error\n', A * inverse - np.eye(3))

#2.解线性方程

A = np.mat("1 -2 1;0 2 -8;-4 5 9")
print("A\n", A)
b = np.array([0, 8, -9])
print("b\n", b)

# 调用solve()函数,解线性方程组
x = np.linalg.solve(A, b)
print("solution", x)
# 使用dot()函数进行验算
print("check\n", np.dot(A, x))

4. Calculate the eigenvalues ​​and eigenvectors

import numpy as np
A = np.mat("3 -2;1 0")
print("A\n", A)

print("Eigenvalues", np.linalg.eigvals(A))
#eig()返回元组,包括特征值和特征向量
eigenvalues, eigenvectors = np.linalg.eig(A)
print("first tuple of eig", eigenvalues)
print("second tuple of eig\n", eigenvectors)

for i in range(len(eigenvalues)):
    print("left", np.dot(A, eigenvectors[:, i]))
    print("right", eigenvalues[i] * eigenvectors[:, i])
    print()

5.Numpy random number
(1) with a binomial distribution game

import numpy as np
from matplotlib.pyplot import plot, show
cash = np.zeros(10000)
cash[0] = 1000
outcome = np.random.binomial(9, 0.5, size=len(cash))
for i in range(1, len(cash)):
    if outcome[i] < 5:
        cash[i] = cash[i - 1] - 1
    elif outcome[i] < 10:
        cash[i] = cash[i - 1] + 1
    else:
        raise AssertionError("Unexpected outcome" + outcome)
print(outcome.min(), outcome.max())
plot(np.arange(len(cash)), cash)
show()

(2) normal sample

import numpy as np
import matplotlib.pyplot as plt
N = 10000
#print(np.sqrt(N))
normal_values = np.random.normal(size=N)
dummy, bins, dummy = plt.hist(normal_values, int(np.sqrt(N)), normed=True, lw=1)
sigma = 1
mu = 0
plt.plot(bins, 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu) ** 2 / (2 * sigma ** 2)), lw=2)
plt.show()

(3) Scipy normality

import numpy as np
from scipy.stats import shapiro
from scipy.stats import anderson
from scipy.stats import normaltest

# 读入流感趋势数据
flutrends = np.loadtxt("goog_flutrends.csv", delimiter=',', usecols=(1,), skiprows=1,converters={1: lambda s: float(s or 0)}, unpack=True)
N = len(flutrends)
normal_values = np.random.normal(size=N)
zero_values = np.zeros(N)

print("Normal Value Shapiro", shapiro(normal_values))
print("Zero Shapiro", shapiro(zero_values))
print("Flu Shapiro", shapiro(flutrends))
print()

print("Normal Value Anderson", anderson(normal_values))
print("Zero Anderson", anderson(zero_values))
print("Flu Anderson", anderson(flutrends))
print()

print("Normal Value normaltest", normaltest(normal_values))
print("Zero normaltest", normaltest(zero_values))
print("Flu normaltest", normaltest(flutrends))

(4) create a mask-type array numpy

import numpy as np
from scipy.misc import ascent, face
import matplotlib.pyplot as plt

face = face()
random_mask = np.random.randint(0, 2, size=face.shape)

plt.subplot(221)
plt.title("Original")
plt.imshow(face)
plt.axis('off')

masked_array = np.ma.array(face, mask=random_mask)
#print(masked_array)

plt.subplot(222)
plt.title("Masked")
plt.imshow(masked_array)
plt.axis("off")

plt.subplot(223)
plt.title("Log")
plt.imshow(np.log(face))
plt.axis("off")

plt.subplot(224)
plt.title("Log Masked")
plt.imshow(np.log(masked_array))
plt.axis("off")

plt.show()

Ignore the negative and extremes


import numpy as np
#from matplotlib.finance import _quotes_historical_yahoo
from datetime import date
import sys
import matplotlib.pyplot as plt

salary = np.loadtxt("MLB2008.csv", delimiter=",", usecols=(1,), skiprows=1, unpack=True)
triples = np.arange(0, len(salary), 3)
print("Triples", triples[:10], "...")

signs = np.ones(len(salary))
print("Signs", signs[:10], "...")
#下标为3的倍数元素取反
signs[triples] = -1
print("Sings", signs[:10], "...")

ma_log = np.ma.log(salary * signs)
print("Masked logs", ma_log[:10], "...")
#忽略极值
dev = salary.std()
avg = salary.mean()
inside = np.ma.masked_outside(salary, avg - dev, avg + dev)
print("Inside", inside[:10], "...")

plt.subplot(311)
plt.title("Original")
plt.plot(salary)

plt.subplot(312)
plt.title("Log Masked")
plt.plot(np.exp(ma_log))

plt.subplot(313)
plt.title("Not Extreme")
plt.plot(inside)

plt.show()

matplotlib drawing entry

import matplotlib.pyplot as plt
import numpy as np
# 设置横坐标,起点和终点
x = np.linspace(0, 20)
plt.plot(x, .5 + x)
plt.plot(x, 1 + 2 * x, "--")
# 将图像保存到文件中
plt.savefig('1.png')
# 显示图像
plt.show()

Guess you like

Origin blog.csdn.net/qq_28467367/article/details/89496157