matplotlib简单操作(二)

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/8/11 13:32
# @Author  : limingyu
# @Site    : 
# @File    : Test_Matplotlib_fandango2.py
# @Software: PyCharm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

reviews = pd.read_csv("fandango_score_comparison.csv")  #读数据
#各评分媒体集合
cols = ['RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue','Fandango_Stars']
norm_reviews = reviews[cols]  #取上述几列内容
print(norm_reviews[0:5]) #打印5行数据
#   RT_user_norm       ...        Fandango_Stars
#0           4.3       ...                   5.0
#1           4.0       ...                   5.0
#2           4.5       ...                   5.0
#3           4.2       ...                   5.0
#4           1.4       ...                   3.5


#统计所有得分
fandango_score = norm_reviews['Fandango_Ratingvalue'].values
print(fandango_score)  #[4.5 4.5 4.5 4.5 3.  4....]
#统计每个得分的总个数
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
print(fandango_distribution)
#4.1    16
#4.2    12
#3.9    12
#4.3    11
#3.7     9...
#对总得分从小到大排序
fandango_distribution = fandango_distribution.sort_index()
print(fandango_distribution)
#2.7     2
#2.8     2
#2.9     5
#3.0     4
imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
print(imdb_distribution)
#画图
fig,ax = plt.subplots()
#hist()表示带有bins结构,默认bins为10个。
#bins:某个变量过多,坐标轴就化不开,用bins化成范围,减少变量数量。
#ax.hist(norm_reviews['Fandango_Ratingvalue'])
#ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20) #指定bins为20个
#指定bins和指定横坐标区间[4,5]
ax.hist(norm_reviews['Fandango_Ratingvalue'],range=[4,5],bins=20)
plt.show()

 

猜你喜欢

转载自blog.csdn.net/mingyuli/article/details/81586154