可视化matplotlib

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

unrate = pd.read_csv('unrate.csv')
unrate['DATE'] = pd.to_datetime(unrate['DATE'])#把date这一列转换成datetime格式
print(unrate.head(12))     #打印前12个日期
#plt.plot()   plot()函数
#plt.show()
first_twelve = unrate[0:12]
plt.plot(first_twelve['DATE'],first_twelve['VALUE']) #确定左边x轴和y轴
plt.xticks(rotation=45) #x轴的数字旋转45度
plt.xlabel('Month')  #给X,Y轴加上标签
plt.ylabel('Unemployment Rate')
plt.title('Monthly Unemployment Trends,1948') #标题
plt.show()
#绘制子图
fig = plt.figure(figsize=(3,3))  #可以指定figsize
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax3 = fig.add_subplot(4,3,6)
ax1.plot(np.random.randint(1,5,5),np.arange(5))
ax2.plot(np.arange(10)*3,np.arange(10)) #传入随机值
plt.show()
#同一个图中画两条曲线
unrate['MONTH']=unrate['DATE'].dt.month
fig = plt.figure(figsize=(6,3))
plt.plot(unrate[0:12]['MONTH'],unrate[0:12]['VALUE'],c = 'red')
plt.plot(unrate[12:24]['MONTH'],unrate[12:24]['VALUE'],c = 'blue')
plt.show()
#图中划出几条线,用不同的颜色表示
fig = plt.figure(figsize=(10,6))
colors = ['red','blue','green','orange','black']
for i in range(5):
    start_index = i*12
    end_index = (i+1)*12
    subset = unrate[start_index:end_index]
    label = str(1948+i)
    plt.plot(subset['MONTH'],subset['VALUE'],c = colors[i],label=label)
plt.legend(loc='upper left')  #指定线表示什么
plt.xlabel('Month,Integer')
plt.ylabel('Unemployment Rate,Percent')
plt.title('Monthly Unemployment Trends,1948-1952')
plt.show()

from numpy import arange

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom',
        'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print(norm_reviews[:1])  #第一个电影的各种信息
num_cols = ['RT_user_norm','Metacritic_user_nom', 'IMDB_norm',
            'Fandango_Ratingvalue','Fandango_Stars']#各个媒体的评分值
bar_heights = norm_reviews.ix[0,num_cols].values  #柱状图的高度
print(bar_heights)
bar_positions = arange(5)+0.75 #柱状图的位置,距离原点
print(bar_positions)
fig,ax = plt.subplots() #画出这个图,ax画图,fig控制这个图是什么样子
ax.bar(bar_positions,bar_heights,0.3) #bar形图,即柱状图,0.3表示宽度

ax.set_xticklabels(num_cols, rotation=45)  #x轴的字母倾斜45度
ax.set_xlabel('Rating Source') #设置x,y的标签
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
#散点图:scatter()
fig,ax = plt.subplots()
#需要传入两个媒体的值,然后设置X和Y轴
ax.scatter(norm_reviews['Fandango_Ratingvalue'],norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rottten Tomatoes')
plt.show()

fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(2,1,1)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
plt.show()

reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
print(norm_reviews[:5])  #前五行
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
print(fandango_distribution)
print(imdb_distribution)
fig, ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#bins:指定有多少个区间
ax.hist(norm_reviews['Fandango_Ratingvalue'],
        range=(4, 5),bins=20) #range:指定起始的区间
plt.show()
#作四个图
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50) #ylim:指定区间的大小
ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0, 50)
ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5))
ax3.set_title('Distribution of Metacritic Ratings')
ax3.set_ylim(0, 50)
ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5))
ax4.set_title('Distribution of IMDB Ratings')
ax4.set_ylim(0, 50)
plt.show()

猜你喜欢

转载自www.cnblogs.com/lifengwu/p/9818292.html