python find the correlation coefficient

Two sets of data sequences, the correlation coefficient for Linear.

1: Use numpy

import random
import numpy as np
a = [random.randint(0, 10) for t in range(20)]
b = [random.randint(0, 10) for t in range(20)]
# First construct a matrix
ab = np.array([a, b])
# Covariance matrix
print (np.cov (ab))
print(np.corrcoef(ab))

2: Use pandas

import pandas as pd
# Pandas using the covariance, correlation coefficient
# DataFrame used as a data structure, facilitate the calculation, we will transpose matrix ab
dfab = pd.DataFrame(ab.T, columns=['A', 'B'])
# AB covariance
print (dfab.A.cov (dfab.B))
# AB correlation coefficient
print(dfab.A.corr(dfab.B))

3: Use native function

import random
import math
a = [random.randint(0, 10) for t in range(20)]
b = [random.randint(0, 10) for t in range(20)]

# Calculate the average
def mean(x):
  return sum(x) / len(x)

# Calculates the difference data for each one of the mean
def de_mean(x):
  x_bar = mean(x)
  return [x_i - x_bar for x_i in x]

# Aiding function dot product, sum_of_squares
def dot(v, w):
  return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
  return dot(v, v)

Variance #
def variance(x):
  n = len (x)
  deviations = de_mean(x)
  return sum_of_squares(deviations) / (n - 1)

# Standard deviation
def standard_deviation(x):
  return math.sqrt(variance(x))

# Covariance
def covariance(x, y):
  n = len (x)
  return dot(de_mean(x), de_mean(y)) / (n -1)

The correlation coefficient #
def correlation(x, y):
  stdev_x = standard_deviation(x)
  stdev_y = standard_deviation(y)
  if stdev_x > 0 and stdev_y > 0:
    return covariance(x, y) / stdev_x / stdev_y
  else:
    return 0

print(a)
print(b)
print(standard_deviation(a))
print(standard_deviation(b))
print(correlation(a,b))

4: R, spss, excel

 

Guess you like

Origin www.cnblogs.com/marszhw/p/12175454.html