007.python科学计算库matplotlib(下)

测试数据 fandango_scores.csv

bar

import matplotlib.pyplot as plt
import pandas as pd
from numpy import arange

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# bar()方法有两个必需的参数，左边和高度。
# 我们使用左参数来指定条形图左侧的x坐标。
# 我们使用高度参数来指定每个栏的高度
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# ix[i, num_cols] 获取第i行的num_cols列中的数据，i从0开始
# 获取的列中数据即分别对应条形图的高度
bar_heights = norm_reviews.ix[0, num_cols].values
print(bar_heights)  # [4.3 3.55 3.9 4.5 5.0]
# 条形图的位置
bar_positions = arange(5) + 0.75
print(bar_positions)  # [0.75 1.75 2.75 3.75 4.75]
fig, ax = plt.subplots()
# 0.5 条形图的宽度
ax.bar(bar_positions, bar_heights, 0.5)
plt.show()

import matplotlib.pyplot as plt
import pandas as pd
from numpy import arange

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# 默认情况下，matplotlib将x轴标记标签设置为条上的整数值
# 在x轴上(从0到6)，我们只需要在横轴上的横轴上标记条就可以了。
# 我们可以使用ax .set_xticks()将ticks的位置改变为[1,2,3,4,5]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1, 6)
# 创建一个图和一组子图
fig, ax = plt.subplots()

ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
# 用字符串标签列表设置x-tick标签
ax.set_xticklabels(num_cols, rotation=45)

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

barh

import matplotlib.pyplot as plt
import pandas as pd
from numpy import arange

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# 默认情况下，matplotlib将x轴标记标签设置为条上的整数值
# 在x轴上(从0到6)，我们只需要在横轴上的横轴上标记条就可以了。
# 我们可以使用ax .set_xticks()将ticks的位置改变为[1,2,3,4,5]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1, 6)
# 创建一个图和一组子图
fig, ax = plt.subplots()
# barh 做一个水平条形图
ax.barh(bar_positions, bar_heights, 0.5)

ax.set_yticks(tick_positions)
# 用字符串标签列表设置y-tick标签
ax.set_yticklabels(num_cols)

ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

scatter

import matplotlib.pyplot as plt
import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# 让我们来看一个可以帮助我们形象化许多点的图
fig, ax = plt.subplots()
# 不同标记大小和/或颜色的 y vs x 散点图
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()

import matplotlib.pyplot as plt
import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
fig = plt.figure(figsize=(5, 10))
# 添加两个子图，均为散点图
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])
ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')
plt.show()

hist

import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# value_counts 返回包含唯一值计数的对象。结果对象将按降序排列，因此第一个元素是最频繁出现的元素。默认情况下排除NA值
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
# 按标签(沿着轴)对对象排序 此处的标签是 Fandango_Ratingvalue 的值
fandango_distribution = fandango_distribution.sort_index()

imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
# 按标签(沿着轴)对对象排序 此处的标签是 IMDB_norm 的值
imdb_distribution = imdb_distribution.sort_index()

print(fandango_distribution)
print("-------------------------------")
print(imdb_distribution)

import matplotlib.pyplot as plt
import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]

fig, ax = plt.subplots()
# 将Fandango_Ratingvalue的范围平均拆分为bins个宽度，
# 并只显示(4, 5)范围内的数据
# range不指定默认展示所有,bins不指定默认展示10个
ax.hist(norm_reviews['RT_user_norm'], range=(4, 5), bins=20)
plt.show()

import matplotlib.pyplot as plt
import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]

fig = plt.figure(figsize=(5, 10))
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
# 设置y轴的数据限制
ax1.set_ylim(0, 50)

ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0, 50)

plt.show()

boxplot

import matplotlib.pyplot as plt
import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]

fig, ax = plt.subplots()
# 做一个盒须图
# 为“x”的每一列或“x”序列中的每个向量做一个盒状和须状图。
# 该框从数据的下四分位数扩展到上四分位数，中间有一条线。
# 晶须从盒中伸出来显示数据的范围。传单点是那些超过末尾的胡子
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()

import matplotlib.pyplot as plt
import pandas as pd

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0, 5)
plt.show()

007.python科学计算库matplotlib(下)

测试数据 fandango_scores.csv

bar

barh

scatter

hist

boxplot

猜你喜欢