from __future__ import absolute_import,division,print_function import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.pyplot import GridSpec import seaborn as sns import numpy as np import pandas as pda import os ,sys from tqdm import tqdm import warnings warnings.filterwarnings("ignore") sns.set_context("poster",font_scale=1.3) import missingno as msno import pandas_profiling from sklearn.datasets import make_blobs import time #读入数据 data=pda.read_csv("redcard.csv.gz",compression="gzip") print("=============多变量分析=========") # from pandas.tools.plotting import scatter_matrix # fig,ax=plt.subplots(figsize=(10,10)) # scatter_matrix(players[["height","weight","skinone"]],alpha=0.2,diagonal="hist",ax=ax) # players=pda.read_csv("raw_players.csv.gz") # players=players[players["rater1"].notnull()] # print(players.head()) weight_categories=["vlow_weight","low_weight","mid_weight", "high_weight","vhigh_weight",] data["weight_class"]=pda.qcut(data["weight"],len(weight_categories),weight_categories) print(data.head()) # windows pycharm执行代码,执行完在浏览器打开example.html if __name__ == '__main__': pfr = pandas_profiling.ProfileReport(data) pfr.to_file("./example.html")