1. Pygal (chart type Bar)
Python visualization package Pygal will be used to generate scalable vector graphics files
Official pygal documentation: [www.pygal.org/en/stable/ ] ( http://www.pygal.org/en/stable/ )
1. Install pygal
pip install pygal -i https://pypi.tuna.tsinghua.edu.cn/simple
2. Simple python chart
import pygal
pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4).render()
Generate svg chart
pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4).render_to_file("simple.svg")
You need to view its source file to display pictures.
3. Make multiple series icons (Bar)
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.Bar()
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.render_to_file("wsd.svg")
4. Stacked Bar (StackedBar)
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.StackedBar()
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.render_to_file("wsd.svg")
5. Set the level of the chart above (HorizontalStackedBar)
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.HorizontalStackedBar()
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.render_to_file("wsd.svg")
2. Pygal (various chart types)
1. Basic simple line chart (Line)
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.Line()
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.render_to_file("wsd.svg")
2、Horizontal Line
The same graphic but horizontal, the range is 0-100.
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.HorizontalLine()
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.range = [0, 10]
py_bar.render_to_file("wsd.svg")
3、Stacked
Same graphics but with stacked values and fill rendering
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.StackedLine(fill=True)
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.range = [0, 10]
py_bar.render_to_file("wsd.svg")
4、Time
For time-dependent graphs, simply format the label or use a variant of the xy graph
import pygal
from datetime import datetime
# x_label_rotation=20是指x轴标签右旋转20度,可负数,负数向左旋转
date_chart = pygal.Line(x_label_rotation=-20)
date_chart.x_labels = map(lambda d: d.strftime('%Y-%m-%d'), [
datetime(2013, 1, 2),
datetime(2013, 1, 12),
datetime(2013, 2, 2),
datetime(2013, 2, 22)])
date_chart.add("Visits", [300, 412, 823, 672])
date_chart.render_to_file("line-time.svg")
Lambda is an expression or an anonymous function
def sum(x, y):
return x + y
It can be written like this in Lambda
p = lambda x, y: x + y
5、Histogram
Basic
The histogram is a special bar. It takes three values for the bar graph: the height of the ordinate, the start of the abscissa, and the end of the abscissa.
import pygal
hist = pygal.Histogram()
hist.add('Wide bars', [(5, 0, 10), (4, 5, 13), (2, 0, 15)])
hist.add('Narrow bars', [(10, 1, 2), (12, 4, 4.5), (8, 11, 13)])
hist.render_to_file("histogram-basic.svg")
6、Scatter Plot
Disable the line between points to get a scatterplot
import pygal
from math import cos
xy_chart = pygal.XY()
xy_chart.title = 'XY Cosinus'
xy_chart.add('x = cos(y)', [(cos(x / 10.), x / 10.) for x in range(-50, 50, 5)])
xy_chart.add('y = cos(x)', [(x / 10., cos(x / 10.)) for x in range(-50, 50, 5)])
xy_chart.add('x = 1', [(1, -5), (1, 5)])
xy_chart.add('x = -1', [(-1, -5), (-1, 5)])
xy_chart.add('y = 1', [(-5, 1), (5, 1)])
xy_chart.add('y = -1', [(-5, -1), (5, -1)])
xy_chart.render_to_file("xy-basic.svg")
7、Pie
Simple pie chart
import pygal
pie_chart = pygal.Pie()
pie_chart.title = 'Browser usage in February 2012 (in %)'
pie_chart.add('IE', 19.5)
pie_chart.add('Firefox', 36.6)
pie_chart.add('Chrome', 36.3)
pie_chart.add('Safari', 4.5)
pie_chart.add('Opera', 2.3)
pie_chart.render_to_file("pie-basic.svg")
8、Multi-series pie
The same pie chart, but divided into subcategories
import pygal
pie_chart = pygal.Pie()
pie_chart.title = 'Browser usage by version in February 2012 (in %)'
pie_chart.add('IE', [5.7, 10.2, 2.6, 1])
pie_chart.add('Firefox', [.6, 16.8, 7.4, 2.2, 1.2, 1, 1, 1.1, 4.3, 1])
pie_chart.add('Chrome', [.3, .9, 17.1, 15.3, .6, .5, 1.6])
pie_chart.add('Safari', [4.4, .1])
pie_chart.add('Opera', [.1, 1.6, .1, .5])
pie_chart.render_to_file("pie-multi-series.svg")
9、Radar
Simple Kiviat diagram
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.Radar()
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.range = [0, 10]
py_bar.render_to_file("wsd.svg")
10、Box
Extremes (default)
import pygal
box_plot = pygal.Box()
box_plot.title = 'V8 benchmark results'
box_plot.add('Chrome', [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607])
box_plot.add('Firefox', [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450])
box_plot.add('Opera', [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669])
box_plot.add('IE', [43, 41, 59, 79, 144, 136, 34, 102])
box_plot.render_to_file("box-extremes.svg")
11、Dot
import pygal
# pygal.Bar()(1, 3, 3, 7)(1, 6, 6, 4)(5,7,8,13)(5,7,4,9).render_to_file("xgp.svg")
py_bar = pygal.Dot(x_label_rotation=30)
py_bar.add("大标题",[1, 3, 3, 7])
py_bar.add("小标题",[1, 6, 6, 4])
py_bar.range = [0, 10]
py_bar.render_to_file("wsd.svg")
12、Funnel
Funnel chart
import pygal
funnel_chart = pygal.Funnel()
funnel_chart.title = 'V8 benchmark results'
funnel_chart.x_labels = ['Richards', 'DeltaBlue', 'Crypto', 'RayTrace', 'EarleyBoyer', 'RegExp', 'Splay', 'NavierStokes']
funnel_chart.add('Opera', [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669])
funnel_chart.add('Firefox', [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450])
funnel_chart.add('Chrome', [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607])
funnel_chart.render_to_file('funnel-basic.svg')
13, SolidGauge
import pygal
gauge = pygal.SolidGauge(inner_radius=0.70)
# 百分格式
percent_formatter = lambda x: '{:.10g}%'.format(x)
# 美元格式
dollar_formatter = lambda x: '{:.10g}$'.format(x)
gauge.value_formatter = percent_formatter
gauge.add('Series 1', [{'value': 225000, 'max_value': 1275000}],
formatter=dollar_formatter)
gauge.add('Series 2', [{'value': 110, 'max_value': 100}])
gauge.add('Series 3', [{'value': 3}])
gauge.add(
'Series 4', [
{'value': 51, 'max_value': 100},
{'value': 12, 'max_value': 100}])
gauge.add('Series 5', [{'value': 79, 'max_value': 100}])
gauge.add('Series 6', 99)
gauge.add('Series 7', [{'value': 100, 'max_value': 100}])
gauge.render_to_file('solidgauge-normal.svg')
14、Gauge
Instrument chart
import pygal
gauge_chart = pygal.Gauge(human_readable=True)
gauge_chart.title = 'DeltaBlue V8 benchmark results'
gauge_chart.range = [0, 10000]
gauge_chart.add('Chrome', 8212)
gauge_chart.add('Firefox', 8099)
gauge_chart.add('Opera', 2933)
gauge_chart.add('IE', 41)
gauge_chart.render_to_file('gauge-basic.svg')
15、Maps
World map
installation
pip install pygal_maps_world
Countries
import pygal
worldmap_chart = pygal.maps.world.World()
worldmap_chart.title = 'Some countries'
worldmap_chart.add('C countries', ['cn', 'ca', 'ch', 'cg'])
worldmap_chart.add('F countries', ['fr', 'fi'])
worldmap_chart.add('M countries', ['ma', 'mc', 'md', 'me', 'mg',
'mk', 'ml', 'mm', 'mn', 'mo',
'mr', 'mt', 'mu', 'mv', 'mw',
'mx', 'my', 'mz'])
worldmap_chart.add('U countries', ['ua', 'ug', 'us', 'uy', 'uz'])
worldmap_chart.render_to_file('world-map-countries.svg')
16、Continents
Visit continents
import pygal
supra = pygal.maps.world.SupranationalWorld()
supra.add('Asia', [('asia', 1)])
supra.add('Europe', [('europe', 1)])
supra.add('Africa', [('africa', 1)])
supra.add('North america', [('north_america', 1)])
supra.add('South america', [('south_america', 1)])
supra.add('Oceania', [('oceania', 1)])
supra.add('Antartica', [('antartica', 1)])
supra.render_to_file('world-map-continents.svg')
Three, throw dice
Analyze point probability and draw histogram
1. Create source files (required for reference)
from random import randint
class Die():
"""表示一个色子的类"""
def __init__(self,num_sides=6):
"""色子默认为6面"""
self.num_sides=num_sides
def roll(self):
"""返回一个位于1和色子面数之间的随机值"""
return randint(1, self.num_sides)
2. Create a dice
from Pygal.示例.die import Die
import pygal
# 创建一个色子
die = Die()
# 掷几次色子,并且将结果存储在一个列表中
results = []
for roll in range(1000):
r = die.roll()
results.append(r)
print(results)
# 分析结果
frequencies = []
for value in range(1, die.num_sides+1):
frequency = results.count(value)
frequencies.append(frequency)
print(frequencies)
# 对结果进行可视化
hist = pygal.Bar()
hist.title='掷色子1000次的结果'
hist.x_labels = ['1','2','3','4','5','6']
hist.x_title='Result'
hist.y_title='概率'
hist.add('D6',frequencies)
hist.render_to_file('die_visual.svg')
Open the file with a browser, and point the mouse to the data, you can see the title "D6", the x-axis coordinates and y-axis coordinates are displayed.
It can be found that the frequency of the six numbers is about the same (theoretically the probability is 1/6, with the increase of the number of experiments, the trend is more and more obvious)
3. Roll two dice at the same time
Just change the code a little bit, and then instantiate a dice
from Pygal.示例.die import Die
import pygal
# 创建两个色子
die_1 = Die()
die_2 = Die()
# 掷几次色子,并且将结果存储在一个列表中
results = []
for roll in range(1000):
r = die_1.roll() + die_2.roll()
results.append(r)
print(results)
# 分析结果
frequencies = []
max_result= die_1.num_sides + die_2.num_sides
for value in range(2, max_result + 1):
frequency = results.count(value)
frequencies.append(frequency)
print(frequencies)
# 对结果进行可视化
hist = pygal.Bar()
hist.title='掷色子1000次的结果'
hist.x_labels = ['2','3','4','5','6','7','8','9','10','11','12']
hist.x_title='Result'
hist.y_title='概率'
hist.add('D6 + D6',frequencies)
hist.render_to_file('die_visualc.svg')****
It can be seen from the figure that the sum of the two dice is 7 and the sum is 2 the least. Because there is only one case that can roll 2-> (1, 1); and the cases that roll 7 are (1, 6), (2, 5), (3, 4), (4, 3), ( 5, 2), (6, 1) There are 6 cases in total, and the remaining numbers are not as many as 7, so the probability of throwing 7 is the highest.
4. Throw two dice at the same time (six and ten)
from Pygal.示例.die import Die
import pygal
# 创建两个色子
die_1 = Die()
die_2 = Die(10)
# 掷几次色子,并且将结果存储在一个列表中
results = []
for roll in range(50000):
r = die_1.roll() + die_2.roll()
results.append(r)
print(results)
# 分析结果
frequencies = []
max_result= die_1.num_sides + die_2.num_sides
for value in range(2, max_result + 1):
frequency = results.count(value)
frequencies.append(frequency)
print(frequencies)
# 对结果进行可视化
hist = pygal.Bar()
hist.title='掷色子1000次的结果'
# hist.x_labels = ['2','3','4','5','6','7','8','9','10','11','12','13','14','15','16']
hist.x_labels = [i for i in range(2,max_result+1)]
hist.x_title='Result'
hist.y_title='概率'
hist.add('D6 + D6',frequencies)
hist.render_to_file('die_visualcc.svg')
Four, Python processing csv file
CSV (Comma-Separated Values) is comma-separated values, which can be opened and viewed in Excel. Since it is plain text, any editor can be opened. Unlike the Excel file, in the CSV file:
- Values have no type, all values are strings
- Cannot specify styles such as font color
- The width and height of the cell cannot be specified, and the cells cannot be merged
- No multiple sheets
- Cannot embed image chart
In the CSV file, use ,
as a separator to separate two cells. This way a,,c
represents a cell a
and the cell c
has a blank between the cells. So on and so forth.
Not every comma represents a boundary between cells. So even if the CSV is a plain text file, it still insists on using a special module for processing. Python has a built-in csv module. First look at a simple example.
1. Read data from CSV file
import csv
filename = 'F:/Jupyter Notebook/matplotlib_pygal_csv_json/sitka_weather_2014.csv'
with open(filename) as f:
reader = csv.reader(f)
print(list(reader))
**data
It cannot be printed directly. The outermost layer of list (data) is list. Each row of data in the inner layer is in a list, which is a bit like this
[['name', 'age'], ['Bob', '14'], ['Tom', '23'], ...]
So we can access Bob's age like this reader[1][1]
, traverse as follows in the for loop
import csv
filename = 'F:/Jupyter Notebook/matplotlib_pygal_csv_json/sitka_weather_2014.csv'
with open(filename) as f:
reader = csv.reader(f)
for row in reader:
# 行号从1开始
print(reader.line_num, row)
Intercept part of the output
1 ['AKST', 'Max TemperatureF] 2 ['2014-1-1', '46', '42', '37', '40', '38', '36', '97', 138'] ...
The number in front is the line number, which can be reader.line_num
obtained starting from 1 .
It should be noted that the reader can only be traversed once. Since reader is an iterable object, you can use the next
method to get one row at a time.
import csv
filename = 'F:/Jupyter Notebook/matplotlib_pygal_csv_json/sitka_weather_2014.csv'
with open(filename) as f:
reader = csv.reader(f)
# 读取一行,下面的reader中已经没有该行了
head_row = next(reader)
for row in reader:
# 行号从2开始
print(reader.line_num, row)
2. Write data to the csv file
There are readers to read, and of course there are writers to write. Write one line at a time, and write multiple lines at a time.
import csv
# 使用数字和字符串的数字都可以
datas = [['name', 'age'],
['Bob', 14],
['Tom', 23],
['Jerry', '18']]
with open('example.csv', 'w', newline='') as f:
writer = csv.writer(f)
for row in datas:
writer.writerow(row)
# 还可以写入多行
writer.writerows(datas)
If not specified newline=''
, a blank line will be written for each line written. The above code generates the following.
name,age Bob,14 Tom,23 Jerry,18 name,age Bob,14 Tom,23 Jerry,18
3. DictReader and DictWriter objects
Use DictReader to get data like a dictionary, using the first row of the table (usually the header) as the key. You can access the data corresponding to a certain key in each row.
import csv
filename = 'F:/Jupyter Notebook/matplotlib_pygal_csv_json/sitka_weather_2014.csv'
with open(filename) as f:
reader = csv.DictReader(f)
for row in reader:
# Max TemperatureF是表第一行的某个数据,作为key
max_temp = row['Max TemperatureF']
print(max_temp)
Using the DictWriter class, you can write data in the form of a dictionary, and the key is also a header (the first row of the table).
import csv
headers = ['name', 'age']
datas = [{'name':'Bob', 'age':23},
{'name':'Jerry', 'age':44},
{'name':'Tom', 'age':15}
]
with open('example.csv', 'w', newline='') as f:
# 标头在这里传入,作为第一行数据
writer = csv.DictWriter(f, headers)
writer.writeheader()
for row in datas:
writer.writerow(row)
# 还可以写入多行
writer.writerows(datas)
4. Count the monthly maximum temperature
import csv
from matplotlib import pyplot as plt
from datetime import datetime
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
filename = 'Python-sitka_weather_2014.csv'
with open(filename) as f:
# 调用reader()函数,将f对象作为参数传递给它,从而创建一个与该文件相关联的阅读器对象
reader = csv.reader(f)
# 返回文件中的下一行
header_row = next(reader)
# print(header_row)
# for index, column_header in enumerate(header_row):
# print(index, column_header)
highs = []
for row in reader:
# 使用int()将字符串转换为数字,让matplotlib能够读取它们
high = int(row[1])
highs.append(high)
print(highs)
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(16, 9))
plt.plot(highs, c='red')
# 设置图形格式
plt.title('2014年最高气温', fontsize=24)
plt.xlabel('', fontsize=16)
plt.ylabel('最高气温', fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
5. Count the maximum and minimum temperature of each month
import csv
from matplotlib import pyplot as plt
from datetime import datetime
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
filename = 'Python-sitka_weather_2014.csv'
with open(filename) as f:
# 调用reader()函数,将f对象作为参数传递给它,从而创建一个与该文件相关联的阅读器对象
reader = csv.reader(f)
# 返回文件中的下一行
header_row = next(reader)
# print(header_row)
dates, highs, lows = [], [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y/%m/%d")
dates.append(current_date)
# print(current_date)
# 使用int()将字符串转换为数字,让matplotlib能够读取它们
high = int(row[1])
highs.append(high)
low = int(row[3])
lows.append(low)
# print(highs)
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(16, 9))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)
# 设置图形格式
plt.title('2014年最高气温', fontsize=24)
plt.xlabel('', fontsize=16)
# 绘制斜线标签
fig.autofmt_xdate()
plt.ylabel('最高气温', fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
# plt.savefig('hish.png')