Use pandas and matplotlib data plotted on a histogram excel file

Zero, the experimental requirements

1. crawling data
Here Insert Picture Description
2. Data visualization, the horizontal axis is the required bid date, the performance of each of the other attributes of the longitudinal axis

First, access to data

Experimental data is used, you can right click and choose New Tab to open the picture to see it
Here Insert Picture Description
get reptile code for the .xlsx file is -

import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
wang=[];
for k in range(2,5):
    url="http://xkctk.hangzhou.gov.cn/tzgg/index_{0}.html".format(k)
    r=requests.get(url)
    r.encoding="utf-8"
    soup=BeautifulSoup(r.text,"html.parser")
    a=soup.find_all('a',string=re.compile('增量指标竞价情况'))
    for links in a:
        urll=links.get("href")
        rr=requests.get(urll)
        rr.encoding="utf-8"
        soupp=BeautifulSoup(rr.text,"html.parser")
        result=re.findall(r'\d+\-\d+\-\d+|配置增量指标\d+|个人增量指标\d+|单位增量指标\d+|最低成交价[\u4e00-\u9fa5]+\d+\元\D\单位\d+|成交\d+|平均成交价[\u4e00-\u9fa5]+\d+\元\D\单位\d+',soupp.text)
        lis=[str(i) for i in result]
        l="".join(lis)
        title=['竞价日期','配置增量指标','个人增量指标','单位增量指标','个人最低成交价','单位最低成交价','个人最低成交价个数','单位最低成交价个数','个人平均成交价','单位平均成交价']
        number=re.findall(r'\d+\-\d+\-\d+|\d+',l)
        result1=dict(zip(title,number))
        wang.append(result1)
writer = pd.ExcelWriter('news.xlsx') #news为文件名
df=pd.DataFrame(wang) #将字典保存成DataFrame数据结构
df.to_excel(writer,index=False,columns=('竞价日期','配置增量指标','个人增量指标','单位增量指标','个人最低成交价','单位最低成交价','个人最低成交价个数','单位最低成交价个数','个人平均成交价','单位平均成交价')) #导出成excel
df.to_excel('C:/暂用/news.xlsx')

Second, the histogram plot a code

A separate one to one map

"""个人最低成交价/日期"""
import matplotlib.pyplot as plt
import pandas as pd

# 解决中文和负号显示问题
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False

df = pd.read_excel("news.xlsx", "Sheet1")	# 读取news.xlsx文件的Sheet1
fig = plt.figure()
plt.bar(df['竞价日期'], df['个人最低成交价'])	# plt.bar(x,y)
plt.title(u'个人最低成交价/日期')
plt.xlabel('日期', size=10)
plt.ylabel(u'个人最低成交价')
plt.show()

Here Insert Picture Description

Third, a multi-histogram rendering and marked with a code value

Since the overall difference between the larger value (the minimum value of the gap as the number of the lowest price and the auction), where the code is divided into two sections, respectively, show
the first one .py

"""配置增量指标/日期"""
import matplotlib.pyplot as plt
import pandas as pd

total_width, n = 0.8, 4  # 对每一个日期设置0.8的宽度,有9个指标
width = total_width / n  # 每个日期下每个指标的宽度


# 定义函数来显示柱状上的数值
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width() / n - 0.1, height + 1000, '%s' % float(height))


# 解决中文和负号显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

df = pd.read_excel("news.xlsx", "Sheet1")
fig = plt.figure()

x = [0, 1, 2, 3, 4, 5, 6, 7]  # 一共有8行数据

# 每绘制过一个指标,就加上一定的宽度使下一个指标的柱状图不重叠##
for i in range(len(x)):                                       ##
    x[i] = x[i] + width                                       ##
################################################################
# 横坐标为x,纵坐标为df['个人最低成交价'], 宽度为width
t3 = plt.bar(x, df['个人最低成交价'], width=width, label='个人最低成交价', fc='b')    # 颜色为blue

for i in range(len(x)):
    x[i] = x[i] + width
# tick_label:此处真正显示日期
t4 = plt.bar(x, df['单位最低成交价'], width=width, tick_label=df['竞价日期'], label='单位最低成交价', fc='g')# 颜色为green

for i in range(len(x)):
    x[i] = x[i] + width
t7 = plt.bar(x, df['个人平均成交价'], width=width, label='个人平均成交价', fc='r')    # 颜色为red

for i in range(len(x)):
    x[i] = x[i] + width
t8 = plt.bar(x, df['单位平均成交价'], width=width, label='单位平均成交价', fc='c')    # 颜色为青色

autolabel(t3)
autolabel(t4)
autolabel(t7)
autolabel(t8)

plt.title(u'x/日期')
plt.xlabel('日期', size=10)
plt.ylabel(u'各项指标')
plt.legend()
plt.show()

Here Insert Picture Description
The first two .py

"""配置增量指标/日期"""
import matplotlib.pyplot as plt
import pandas as pd

total_width, n = 0.8, 5  # 对每一个日期设置0.8的宽度,有n个指标
width = total_width / n  # 每个日期下每个指标的宽度


# 定义函数来显示柱状上的数值
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        # 在(rect.get_x() + rect.get_width() / n, height + 10)坐标处,写上文本height
        plt.text(rect.get_x() + rect.get_width() / n, height + 10, '%s' % float(height))


# 解决中文和负号显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

df = pd.read_excel("news.xlsx", "Sheet1")
fig = plt.figure()

x = [0, 1, 2, 3, 4, 5, 6, 7]  # 一共有8行数据

t0 = plt.bar(x, df['配置增量指标'], width=width, label='配置增量指标', fc='y')

for i in range(len(x)):
    x[i] = x[i] + width
t1 = plt.bar(x, df['个人增量指标'], width=width, label='个人增量指标', fc='r')

for i in range(len(x)):
    x[i] = x[i] + width
t2 = plt.bar(x, df['单位增量指标'], width=width, tick_label=df['竞价日期'], label='单位增量指标', fc='c')

for i in range(len(x)):
    x[i] = x[i] + width
t5 = plt.bar(x, df['个人最低成交价个数'], width=width, label='个人最低成交价个数', fc='g')

for i in range(len(x)):
    x[i] = x[i] + width
t6 = plt.bar(x, df['单位最低成交价个数'], width=width, label='单位最低成交价个数', fc='b')

autolabel(t0)
autolabel(t1)
autolabel(t2)

autolabel(t5)
autolabel(t6)

plt.title(u'x/日期')
plt.xlabel('日期', size=10)
plt.ylabel(u'各项指标')
plt.legend()
plt.show()

Here Insert Picture Description

Published 131 original articles · won praise 81 · views 60000 +

Guess you like

Origin blog.csdn.net/weixin_43469047/article/details/102994742