Generating random data

Import numpy NP AS
 # (. 1) Random (D0, D1, .... DN) for generating d0 * d1 * .... * dn-dimensional array. The array of values between [0,1) 
np.random.rand (3,2,2) # generates an array of 3 * 2 * 2 

    Array ([[[ .10141273, .97087629 ],
        [0.57045156, 0.62780166]],

       [[0.15425975, 0.21828791],
        [0.03630166, 0.60174227]],

       [[0.20345412, 0.51719419],
        [0.77047215, 0.67555402]]])


# Randn (D0, d1, .... dn), is also used to generate d0 * d1 * ... dn-dimensional array, but the array of values subject to standard ecological distribution N (0,1) of. 
np.random.randn (3,2-) # output following array 3 * 2 These values are N (0,1) of the sample data; if desired obey N (μ, σ2) of the normal distribution, need only randn can be converted σx + μ on each of the generated value x

    
Out[3]:
array([[ 0.34949238, -1.39017794],
       [ 1.27691143, -0.71375712],
       [-0.56303407,  0.96331818]])


# The randint (Low, [, High, size]), generating a random size of data size, the size can be an integer, the number of bits in a matrix, or tensor bits, value lies half-open interval [Low, High) 
NP .random.randint (. 3, size = [2,3,4 ])

    
Out[4]:
array([[[2, 1, 0, 0],
        [2, 0, 0, 1],
        [1, 1, 1, 0]],

       [[1, 0, 0, 1],
        [2, 2, 2, 2],
        [0, 0, 0, 1]]])


# Random_integers (Low [, High, size]), and the above randint similar, except that the range is closed interval [low, high] 
# random_sample ([size]), returns a random floating point number in half-open interval [ 0.0, 1.0) if the other section [a, b), can be converted [a, b), can be converted (ba * random_sample ([size] ) + a
# Scikit_learn introduce random data generated api 
# sklearn generated random data api datasets are in the class, and compared numpy, it can be used to generate data for a particular model of machine learning. Common api are: 
# 1, generating a regression model using data make_regression 
# 2, with make_hastie_10_2, make_classification or make_multilabel_classification generating a classification model data 
# 3, cluster model data generated by make_blobs 
# 4, generating make_gaussian_quantiles


# 1, random regression model data 
# using regression models generate make_regression data. N_SAMPLES several key parameters (generation number of samples), n_features (wherein the number of samples), noise (random noise sample) and Coef (regression coefficient has returned). 
Import numpy AS NP
 Import matplotlib.pyplot AS PLT
 % matplotlib inline
 from sklearn.datasets.samples_generator Import make_regression
 # X-pattern feature, y is the output sample, regression coefficients Coef, total 1000 samples, each sample 1 wherein 
X, Y, Coef = make_regression (N_SAMPLES = 1000, n_features =. 1, Noise = 10, Coef = True)
 # drawing 
plt.scatter (X-, Y, Color = ' Black ' )
plt.plot(X,X*coef,color='blue',linewidth=3)
plt.xticks (())
plt.yticks (())
plt.show()


# 2, the random data classification model 
# generates three yuan classification model data make_classification. N_SAMPLES several key parameters (generation number of samples), n_features (wherein the number of samples), n_redundant (redundant features) and n_classes (class number output) 
Import numpy AS NP
 Import matplotlib.pyplot AS PLT 
 % matplotlib inline
 from sklearn. datasets.samples_generator Import make_classification
 # the X1 sample wherein, Y1 is the output sample type, a total of 400 samples, wherein each sample two, there are three categories of outputs, wherein there is no redundancy, a category of each cluster 
X1, Y1 = make_classification (N_SAMPLES = 400, 2 = n_features, n_redundant = 0, = n_clusters_per_class. 1, n_classes =. 3 )
plt.scatter(X1[:,0],X1[:,1],marker='o',c=Y1)
plt.show()


# 3, the random data clustering model 
# generated cluster model data make_blobs. N_SAMPLES several key parameters (generation number of samples), n_features (wherein the number of samples), Centers (the number of clusters by cluster center or custom) and cluster_std (cluster data variance, the degree of polymerization representative cluster) 
Import numpy AS NP
 Import matplotlib.pyplot aS PLT
 % matplotlib inline
 from sklearn.datasets.samples_generator Import make_blobs
 # X-pattern feature, Y is a sample cluster categories, a total of 1000 samples, wherein each sample 2, a total of three clusters, cluster center [ -1, -1], [1,1], [2,2], respectively cluster variance [0.4,0.5,0.2]] 
X-, Y = make_blobs (N_SAMPLES = 1000, n_features = 2, Centers = [[- . 1, -1], [1,1], [2,2 &]], cluster_std = [0.4,0.5,0.2 ])
plt.scatter (X [: 0], X [:, 1], marker = ' o ' , c = y)
plt.show()


# 4, mix normal data packet 
# with make_gaussian_quantiles generating multidimensional normal distribution data packets. N_SAMPLES several key parameters (generation number of samples), n_features (dimension normal distribution), mean (average feature), cov (sample covariance coefficients), n_classes (quantile data are allocated in the normal distribution Number of groups). 
Import numpy AS NP
 Import matplotlib.pyplot AS PLT 
 % matplotlib inline
 from sklearn.datasets Import make_gaussian_quantiles
 # generates two-dimensional normal distribution, the data generated by quantile divided into three groups, 1000 samples, two samples 1 and 2 mean, Co coefficient of variation is 2 
the X1, Yl = make_gaussian_quantiles (N_SAMPLES = 1000, n_features = 2, n_classes =. 3, Mean = [1,2], CoV = 2 )
plt.scatter(X1[:,0],X1[:,1],marker='o',c=Y1)

 

Guess you like

Origin www.cnblogs.com/spp666/p/11994499.html