Two sets of data sequences, the correlation coefficient for Linear.
1: Use numpy
import random import numpy as np a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] # First construct a matrix ab = np.array([a, b]) # Covariance matrix print (np.cov (ab)) print(np.corrcoef(ab))
2: Use pandas
import pandas as pd # Pandas using the covariance, correlation coefficient # DataFrame used as a data structure, facilitate the calculation, we will transpose matrix ab dfab = pd.DataFrame(ab.T, columns=['A', 'B']) # AB covariance print (dfab.A.cov (dfab.B)) # AB correlation coefficient print(dfab.A.corr(dfab.B))
3: Use native function
import random import math a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] # Calculate the average def mean(x): return sum(x) / len(x) # Calculates the difference data for each one of the mean def de_mean(x): x_bar = mean(x) return [x_i - x_bar for x_i in x] # Aiding function dot product, sum_of_squares def dot(v, w): return sum(v_i * w_i for v_i, w_i in zip(v, w)) def sum_of_squares(v): return dot(v, v) Variance # def variance(x): n = len (x) deviations = de_mean(x) return sum_of_squares(deviations) / (n - 1) # Standard deviation def standard_deviation(x): return math.sqrt(variance(x)) # Covariance def covariance(x, y): n = len (x) return dot(de_mean(x), de_mean(y)) / (n -1) The correlation coefficient # def correlation(x, y): stdev_x = standard_deviation(x) stdev_y = standard_deviation(y) if stdev_x > 0 and stdev_y > 0: return covariance(x, y) / stdev_x / stdev_y else: return 0 print(a) print(b) print(standard_deviation(a)) print(standard_deviation(b)) print(correlation(a,b))
4: R, spss, excel