import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
df = pd.read_csv('fcc-forum-pageviews.csv', index_col='date', parse_dates=True)
# Clean data
# 计算浏览量位于数据集前 2.5% 或数据集后 2.5% 的阈值
low_threshold = df["value"].quantile(0.025)
high_threshold = df["value"].quantile(0.975)
# 过滤数据
condition = (df["value"] >= low_threshold) & (df["value"] <= high_threshold)
df = df[condition]
def draw_line_plot():
# Draw line plot
fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(df.index, df['value'], color='r')
ax.set_xlabel('Date')
ax.set_ylabel('Page Views')
ax.set_title('Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
# Save image and return fig (don't change this part)
fig.savefig('line_plot.png')
return fig
def draw_bar_plot():
# Copy and modify data for monthly bar plot
df_bar = df.copy()
# Draw bar plot
df_bar['Month'] = df_bar.index.month
df_bar['Year'] = df_bar.index.year
df_bar = df_bar.groupby(['Year', 'Month'])['value'].mean()
df_bar = df_bar.unstack()
fig = df_bar.plot.bar(legend=True, figsize=(10,5), ylabel="Average Page Views", xlabel='Years').figure
plt.legend(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'])
# Save image and return fig (don't change this part)
fig.savefig('bar_plot.png')
return fig
def draw_box_plot():
# Prepare data for box plots (this part is done!)
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
# Draw box plots (using Seaborn)
fig, ax = plt.subplots(1, 2, figsize=(32, 10), dpi=100)
sns.boxplot(data=df_box, x="year", y="value", ax=ax[0])
ax[0].set_title("Year-wise Box Plot (Trend)")
ax[0].set_xlabel("Year")
ax[0].set_ylabel("Page Views")
sns.boxplot(data=df_box, x="month", y="value", order=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], ax=ax[1])
ax[1].set_title("Month-wise Box Plot (Seasonality)")
ax[1].set_xlabel("Month")
ax[1].set_ylabel("Page Views")
# Save image and return fig (don't change this part)
fig.savefig('box_plot.png')
return fig
boilerplate-page-view-time-series-visualizer
猜你喜欢
转载自blog.csdn.net/u010095372/article/details/129907658
今日推荐
周排行