Python Data Analysis Project - Share biking Data Analysis

Background

  • With China's rapid economic development, urban population increased dramatically, brings with it a series of problems, traffic congestion, environmental damage, development of public transport can be the perfect solution to the problems we face now, a bicycle with a flexible, low carbon environmental advantages, if a bicycle could replace the current vehicle, so the road would not be so crowded, people travel will greatly enhance the efficiency, emissions from car exhausts will also be greatly reduced, it will also enhance the quality of the environment. At the same time, in order to perfect solution from the subway station to the company, home from the bus station "last kilometer" walk, bike sharing emerged. Shared bicycle and effective solution to the "walk tired, crowded bus, blocking the car, a taxi expensive" distressed. Overnight, north of Guangzhou and Shenzhen, and even some second-tier cities, a shared bicycle streets everywhere. Following the September 26, 2016 ofo bicycle Announces Strategic Investment by bit express train tens of millions of dollars, the two sides will expand the depth of cooperation in the field of shared bicycles, motorcycles friction thanks also completed in January 2017 D round of the $ 215 million (about combined 1.5 billion yuan) in financing domestic share cycling more popular, recently a mobile phone screen shots jump red network. On this screenshot, icon Pa 24 bicycle sharing applications over the entire screen of the phone, really "figure illustrates a shared bicycle competition." In the streets, as if overnight, bike sharing has come to "flood" the point, major cities roadside lined with bicycle sharing a variety of colors. Sharing economy continues to develop gradually changing people's daily lives, sharing spirit has been widely accepted.

Data Sources

Data Link: https://pan.baidu.com/s/11Gpq2-0z_E0ilNTm7Wt7vw
extraction code: 1whs
copy the contents of this open Baidu network disk phone App, the operation more convenient oh

import pandas as pd
import calendar
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sn
import numpy as np
from pylab import mpl

mpl.rcParams['font.sans-serif'] = ['Simhei']


def collect_and_process_data():
    bikedata = pd.read_csv("train.csv")
    # 数据提取
    # 提取年月日
    bikedata['date'] = bikedata.datetime.apply(lambda x: x.split()[0])
    # 提取小时

    bikedata['hour'] = bikedata.datetime.apply(lambda x: x.split()[1].split(":")[0])

    # 在年月日的基础上提取星期数(需要格式化日期)day_name 转化为英文名称
    bikedata['weekday'] = bikedata.date.apply(
        lambda dateString: calendar.day_name[datetime.strptime(dateString, '%Y/%m/%d').weekday()])
    # print(bikedata)
    # 在年月日的基础上提取月份  month_name 转化为英文名称
    bikedata["month"] = bikedata.date.apply(
        lambda dateString: calendar.month_name[datetime.strptime(dateString, '%Y/%m/%d').month])

    # 数据转化
    # 将season数字转化为英文季节值
    bikedata["season"] = bikedata.season.map({1: "Spring", 2: "Summer", 3: "Fall", 4: "Winter"})
    # print(bikedata)
    # 将以下变量转化为分类变量
    varlist = ['hour', 'weekday', 'month', 'season', 'holiday', 'workingday']
    for x in varlist:
        bikedata[x] = bikedata[x].astype('category')
    # print(bikedata.dtypes)

    # 删除无意义的变量
    bikedata.drop('datetime', axis=1, inplace=True)
    # print(bikedata)

    # 数据清洗
    # 查看是否缺失数据
    #print(bikedata.describe())

    # 查看是否有异常值
    fig, axes = plt.subplots(nrows=2, ncols=2)
    fig.set_size_inches(12,12)
    sn.boxplot(data=bikedata, y='count', orient='v', ax=axes[0][0])
    sn.boxplot(data=bikedata, y='count', x='season', orient='v', ax=axes[0][1])
    sn.boxplot(data=bikedata, y='count', x='hour', orient='v', ax=axes[1][0])
    sn.boxplot(data=bikedata, y='count', x='workingday', orient='v', ax=axes[1][1])
    #     # plt.show()

    axes[0][0].set(ylabel='骑行人数', title='骑行人数')
    axes[0][1].set(xlabel='季节', ylabel='骑行人数', title='不同季节的骑行人数')
    axes[1][0].set(xlabel='时间', ylabel='骑行人数', title='一天内不同时间骑行人数')
    axes[1][1].set(xlabel='工作日', ylabel='骑行人数', title='工作日骑行人数')
    plt.savefig('Abnormal_value_analysis.png')
    # plt.show()

    # 剔除数据
    bikedata1 = bikedata[np.abs(bikedata["count"] - bikedata["count"].mean()) <= (3 * bikedata["count"].std())]
    # print("剔除前数据记录:", bikedata.shape, "\n剔除前数据记录:", bikedata1.shape)
    bikedata1.to_csv('processed_data.csv')
    return bikedata1


def Data_Analysis_and_Visualization_month(bikedata):
    # 判断每个月份有几条记录,并按由大到小顺序排序
    monthAggregated = pd.bikedata(bikedata.groupby("month")["count"].mean()).reset_index()
    # print(monthAggregated)
    monthSorted = monthAggregated.sort_values(by="count", ascending=False)
    # print(monthSorted)


    fig, axes = plt.subplots()
    fig.set_size_inches(12, 20)
    sortOrder = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October",
                 "November", "December"]
    sn.barplot(data=monthSorted, x="month", y="count", order=sortOrder)
    axes.set(xlabel="月份", ylabel="平均骑行人数", title="不同月份的骑行人数")
    plt.savefig('result1.png')
    plt.show()


def Data_Analysis_and_Visualization_hour(bikedata):
    # 一周内不同时间的骑行人数
    hourAggregated = pd.DataFrame(bikedata.groupby(["hour", "weekday"])["count"].mean()).reset_index()
    print(hourAggregated)


    fig1, ax1 = plt.subplots()
    fig1.set_size_inches(12, 20)
    hueOrder = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"]
    sn.pointplot(x=hourAggregated["hour"], y=hourAggregated["count"], hue=hourAggregated["weekday"], hue_order=hueOrder,
                 data=hourAggregated)
    ax1.set(xlabel="时间", ylabel="骑行人数", title="一周内不同时间的骑行人数")

    plt.savefig("result2.png")
    plt.show()


# 主函数
def main():
    # 数据采集\查看和处理
    bikedata1 = collect_and_process_data()
    # 数据分析与可视化1
    Data_Analysis_and_Visualization_month(bikedata1)
    # 数据分析与可视化2
    Data_Analysis_and_Visualization_hour(bikedata1)


if __name__ == '__main__':
    main()

Visualization results:

Here Insert Picture Description
Here Insert Picture Description
Here Insert Picture Description
Source link: https://pan.baidu.com/s/1du9KnHUTn6Z3N9-Jx-BelA
extraction code: okek
copy the contents of this open Baidu network disk phone App, the operation more convenient oh

Published 38 original articles · won praise 13 · views 4362

Guess you like

Origin blog.csdn.net/YanWenCheng_/article/details/94197268