Import numpy NP AS # (. 1) Random (D0, D1, .... DN) for generating d0 * d1 * .... * dn-dimensional array. The array of values between [0,1) np.random.rand (3,2,2) # generates an array of 3 * 2 * 2 Array ([[[ .10141273, .97087629 ], [0.57045156, 0.62780166]], [[0.15425975, 0.21828791], [0.03630166, 0.60174227]], [[0.20345412, 0.51719419], [0.77047215, 0.67555402]]]) # Randn (D0, d1, .... dn), is also used to generate d0 * d1 * ... dn-dimensional array, but the array of values subject to standard ecological distribution N (0,1) of. np.random.randn (3,2-) # output following array 3 * 2 These values are N (0,1) of the sample data; if desired obey N (μ, σ2) of the normal distribution, need only randn can be converted σx + μ on each of the generated value x Out[3]: array([[ 0.34949238, -1.39017794], [ 1.27691143, -0.71375712], [-0.56303407, 0.96331818]]) # The randint (Low, [, High, size]), generating a random size of data size, the size can be an integer, the number of bits in a matrix, or tensor bits, value lies half-open interval [Low, High) NP .random.randint (. 3, size = [2,3,4 ]) Out[4]: array([[[2, 1, 0, 0], [2, 0, 0, 1], [1, 1, 1, 0]], [[1, 0, 0, 1], [2, 2, 2, 2], [0, 0, 0, 1]]]) # Random_integers (Low [, High, size]), and the above randint similar, except that the range is closed interval [low, high] # random_sample ([size]), returns a random floating point number in half-open interval [ 0.0, 1.0) if the other section [a, b), can be converted [a, b), can be converted (ba * random_sample ([size] ) + a
# Scikit_learn introduce random data generated api # sklearn generated random data api datasets are in the class, and compared numpy, it can be used to generate data for a particular model of machine learning. Common api are: # 1, generating a regression model using data make_regression # 2, with make_hastie_10_2, make_classification or make_multilabel_classification generating a classification model data # 3, cluster model data generated by make_blobs # 4, generating make_gaussian_quantiles # 1, random regression model data # using regression models generate make_regression data. N_SAMPLES several key parameters (generation number of samples), n_features (wherein the number of samples), noise (random noise sample) and Coef (regression coefficient has returned). Import numpy AS NP Import matplotlib.pyplot AS PLT % matplotlib inline from sklearn.datasets.samples_generator Import make_regression # X-pattern feature, y is the output sample, regression coefficients Coef, total 1000 samples, each sample 1 wherein X, Y, Coef = make_regression (N_SAMPLES = 1000, n_features =. 1, Noise = 10, Coef = True) # drawing plt.scatter (X-, Y, Color = ' Black ' ) plt.plot(X,X*coef,color='blue',linewidth=3) plt.xticks (()) plt.yticks (()) plt.show() # 2, the random data classification model # generates three yuan classification model data make_classification. N_SAMPLES several key parameters (generation number of samples), n_features (wherein the number of samples), n_redundant (redundant features) and n_classes (class number output) Import numpy AS NP Import matplotlib.pyplot AS PLT % matplotlib inline from sklearn. datasets.samples_generator Import make_classification # the X1 sample wherein, Y1 is the output sample type, a total of 400 samples, wherein each sample two, there are three categories of outputs, wherein there is no redundancy, a category of each cluster X1, Y1 = make_classification (N_SAMPLES = 400, 2 = n_features, n_redundant = 0, = n_clusters_per_class. 1, n_classes =. 3 ) plt.scatter(X1[:,0],X1[:,1],marker='o',c=Y1) plt.show() # 3, the random data clustering model # generated cluster model data make_blobs. N_SAMPLES several key parameters (generation number of samples), n_features (wherein the number of samples), Centers (the number of clusters by cluster center or custom) and cluster_std (cluster data variance, the degree of polymerization representative cluster) Import numpy AS NP Import matplotlib.pyplot aS PLT % matplotlib inline from sklearn.datasets.samples_generator Import make_blobs # X-pattern feature, Y is a sample cluster categories, a total of 1000 samples, wherein each sample 2, a total of three clusters, cluster center [ -1, -1], [1,1], [2,2], respectively cluster variance [0.4,0.5,0.2]] X-, Y = make_blobs (N_SAMPLES = 1000, n_features = 2, Centers = [[- . 1, -1], [1,1], [2,2 &]], cluster_std = [0.4,0.5,0.2 ]) plt.scatter (X [: 0], X [:, 1], marker = ' o ' , c = y) plt.show() # 4, mix normal data packet # with make_gaussian_quantiles generating multidimensional normal distribution data packets. N_SAMPLES several key parameters (generation number of samples), n_features (dimension normal distribution), mean (average feature), cov (sample covariance coefficients), n_classes (quantile data are allocated in the normal distribution Number of groups). Import numpy AS NP Import matplotlib.pyplot AS PLT % matplotlib inline from sklearn.datasets Import make_gaussian_quantiles # generates two-dimensional normal distribution, the data generated by quantile divided into three groups, 1000 samples, two samples 1 and 2 mean, Co coefficient of variation is 2 the X1, Yl = make_gaussian_quantiles (N_SAMPLES = 1000, n_features = 2, n_classes =. 3, Mean = [1,2], CoV = 2 ) plt.scatter(X1[:,0],X1[:,1],marker='o',c=Y1)