Summary of pandas dataframe operation skills

#Chinese myfont 
= FontProperties(fname=r'C:\Windows\Fonts\simhei.ttf', size=14)
import seaborn as sns
sns.set(font=myfont.get_name())

file = open(".xlsx" , "rb")
data = pd.read_excel(file, sep="\t")
data = data.loc[(data['time'] > 0) & (data['time'] < 4320)]
drop_feat = ["Number",...,"Status"]
feat = [i for i in data.columns if i not in drop_feat]
data = data[feat]
print(data.isnull().sum() / len(data )) #Look at the proportion of missing, field

# read the document and, filter fill data, filter data
# ddie = data.loc[(data['level'] == "Ⅲ") | (data['level'] == "Ⅳ")]
data = ddie.groupby(['period', 'category']).mean().reset_index() 
dataForsize = ddie.groupby(['period', 'category']).size().reset_index()

dmean1 = data.loc[(data['class'] == '') | (data[''] == '')]
dsize1 = dataForsize.loc[(dataForsize['class'] == 'system') | ( dataForsize[''] == '')]
dmean1 = dmean1.groupby(['hang']).mean() # average after grouping
dsize1 = dsize1.groupby(['hang'])[0].agg(sum ) # Summation after counting the groups

# xx = list(range(0, 24)) The technique obtains the continuous coordinate of the x-axis
y1 = dmean1["time"]
x1 = y1._index._data The index corresponding to the technique is discontinuous coordinate, methodology : debug property check

plt.figure(figsize=(16,5))
plt.plot(x1, y1,color='blue')
plt.plot(x3, y3,color='red')
for i, (_x, _y) in enumerate(zip(x1,y1)):
plt.text(_x, _y, dsize1[x1[i]],color='blue', fontsize=12) # The key dsize1[x1[i]] is to find discontinuous x[i] coordinates from continuous i to get discontinuous y values
plt.xticks(np.arange(24)) 
label = [ "Other"]
plt.legend(label, loc=0, ncol=2)
plt.xlabel("0-23 hours")
plt.ylabel("Duration" )
plt.show()

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325184812&siteId=291194637