The basic operation of processing data with Plotily

PD PANDAS AS Import 
# Import data .scv 
DF = pd.read_csv ( ".csv") 
# View first five data 
df.head () 
# check data described 
df.descirbe () 
# look shape data 
df.shape 
# look at the data set which contains columns 
df.columns 

# data visualization 
Import matplotlib.pyplot AS plt 
Import Seaborn AS the SNS 
# Notebook using Jupyter 
# Import Represents warnings                     
# warnings.filterwarnings ( "the ignore") 
#% matplotlib inline 

# create a custom image 
Figure (NUM = None, figsize = None, dpi = None, facecolor = None, edgecolor = None, frameon = True) 
# title 
plt.title ( "") 
# data distribution shown in FIG 
sns.displot (df [ "" ]) 

#! # PIP install plotly installation Plotily library 
# import drawing tool magazine
Offline plotly.offline AS Import 
Import plotly.graph_objs AS Go 
Import plotly.offline AS Py 
from plotly.offline Import init_notebook_mode, iPlot 
init_notebook_mode (Connected = True) 
offline.init_notebook_mode () # View the table in a column quickly how many different values have method, and calculates the number of each of the different values are repeated in the column value 
TEMP DF = [ ""] .value_counts () 
# draw histogram to see the different values of the proportion of 
trace = [go.Bar (x = temp .index, Y = (TEMP / temp.sum ()) * 100)] 
# font color setup graph 
layout = go.Layout ( 
    title = "", 
    Xaxis = dict (title = '', 
               tickfont = dict (size =, = Color 'RGB (,,)')), 
    YAXIS = dict (title = '', 
               titleFont = dict (= size, Color = 'RGB (,,)'),


= dict tickfont (= size, Color = 'RGB (,,)')) ) # display graphics Fig go.Figure = (Data = the trace, layout = layout) iPlot (Fig, filename = '') # drawn cake FIG the trace = [go.Pie (= temp.index Labels, values = temp.values)] # set title FIG layout = go.Layout ( title = '', ) # display graphics fig = go.Figure (data = trace, layout = layout) iPlot (Fig) # draw the pie charts, donut-shaped the trace = [go.Pie (= temp.index Labels, values = temp.values, Hole = 0.6)] temp1 = DF [ "FLAG_OWN_CAR"] .value_counts () . temp2 of DF = [ "FLAG_OWN_REALTY"] value_counts () # draw two pie charts trace = [go.Pie (labels = temp1.index , values = temp1.values, domain = { "x": [0, .48]}, hole = 0.6),
go.Pie(labels=temp2.index, values=temp2.values, domain={"x": [0.5, 1]}, hole=0.6)] # 设置图中的字体,图题等 layout = go.Layout( title=' ', annotations=[{"font": { "size": }, "showarrow": , "text": " ", "x": 0. , # 坐标 "y": 0. }, {"font": { "size": }, "showarrow": , "text": " ", "x": 0. , "y": 0. }]) # 显示图形 fig = go.Figure(data=trace,layout = layout) for Val in temp.index: temp_y1 = [] temp_y0 = [] # counting method iPlot (Fig) temp_y1.append (np.sum (DF [ "the TARGET"] [DF [ "the TYPE"] == Val] ==. 1)) temp_y0.append (np.sum (DF [ "the TARGET"] [DF [ "the TYPE" ] == Val] == 0)) temp_y1 = np.array (temp_y1) temp_y0 = np.array (temp_y0) # removed there is a missing feature values listed df_drop = df.dropna (Axis =. 1) df_drop.head () # which encoded into numerical form from sklearn import preprocessing remove non-numeric columns # categorical_feats = [ F for F IF in df_drop.columns df_drop [F] .dtype == 'Object' ] # non-numeric columns are coded for col categorical_feats in: LB = preprocessing.LabelEncoder () lb.fit (List (df_drop [COL] .values.astype ( 'STR'))) df_drop [COL] = lb.transform (List (df_drop [COL] .values.astype ( 'str'))) df_drop.head()
The divided data # # deleted ID df_drop1 = df_drop.drop ( "ID", Axis =. 1) # extract feature data and the training target Data_X df_drop1.drop = ( "", Axis =. 1) data_Y df_drop1 = [ ''] # divided data set training data set and test data sets from sklearn Import model_selection train_x, test_x, train_y, test_y = model_selection.train_test_split (data_X.values, data_y.values, test_size = 0.8, random_state = 0) # forecast model from sklearn.ensemble RandomForestClassifier Import model = RandomForestClassifier () to build the model # model.fit (train_x, train_y) # training model from sklearn Import metrics y_pred = model.predict (test_x) # forecast test set metrics.accuracy_score (y_pred, test_y) # forecast results of the evaluation print (metrics.classification_report (y_pred, test_y)) features = data_X.columns.values # column name in the data extracted, i.e. the distinguished name # importance to give their wherein x, y = (list (x ) for x in zip (* sorted (zip (model.feature_importances_, features), = False Reverse))) # draw histogram trace2 = go.Bar (x = x, y = y, marker = dict (color = x, colorscale = 'Viridis', reversescale = True), name = '', orientation = 'H',) # set FIG title, font layout = dict (title = '', width = 900, height = 2000, YAXIS = dict (showGrid = False, Showline = False, showticklabels = True,), margin = dict (L = 300,)) # display graphics FIG1 = go.Figure (Data = [trace2 is]) FIG1 [ 'layout']. Update (layout) iPlot (FIG1, filename = 'Plots') from sklearn.tree Import DecisionTreeClassifier from sklearn.neural_network import MLPClassifier Import AdaBoostClassifier sklearn.ensemble from from sklearn.ensemble Import BaggingClassifier from sklearn.ensemble Import GradientBoostingClassifier from sklearn.linear_model Import LogisticRegression # 7 for algorithms models = [LogisticRegression (solver = ' lbfgs'), # logistic regression RandomForestClassifier (n_estimators = 100) , # random forests DecisionTreeClassifier (), # tree MLPClassifier (max_iter = 100), # MLP AdaBoostClassifier (), # adaptive gradient lifting BaggingClassifier (), # bagging algorithm GradientBoostingClassifier ()] # gradient boosting algorithm model_name = [ 'LogisticRegression', 'RandomForestClassifier', "DecisionTreeClassifier", 'MLPClassifier', 'AdaBoostClassifier', 'BaggingClassifier', 'GradientBoostingClassifier'] ACC = [] # store the accuracy of each algorithm f1 = [] # store various algorithms f1 value recall = [] # records of the algorithm's recall for model in models: # training each algorithm model.fit (train_x, train_y) acc.append (model.score (test_x, test_y)) y_pred = model.predict (test_x) f1.append ( metrics.f1_score (y_pred, test_y)) recall.append (metrics.recall_score (y_pred, test_y)) # print assess the results of each algorithm pd.DataFrame({"name": model_name, "acc": acc, "f1": f1, "recall": recall})

Guess you like

Origin www.cnblogs.com/wwj99/p/12047941.html