Quickly master the data visualization tool pyecharts

Introduction to Python data visualization library pyecharts

Drawing tools:

Use Baidu's open source pyecharts library

You can refer to its official document
pyecharts official document


Data preprocessing

Module installation

pip install pyecharts

Import module

import pandas as pd
df = pd.read_excel('taobao.xlsx')

Deduplication

# 删除行完全一样的值
df.drop_duplicates(inplace=True)
# 删除列重复的值
df.drop_duplicates(subset=['列名','列名'])

Processing geographic location

location_list = []
for location in df['location']:
    location = location.split(' ')[0]
    location_list.append(location)
df['location'] = location_list

Process sales

sales_list = []
for sale in df['sales']:
    sale = sale[:-3].replace('+', '')
    if '万' in sale:
        sale = int(float(sale.replace('万', '')) * 10000)
    sales_list.append(sale)

df['sales'] = sales_list

Make a chart

###Import modules

import jieba
import pandas as pd
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.charts import Pie, Bar, Map, WordCloud, Page

2.1 Word Cloud

Two methods:

  1. pyechartsBuilt-in word cloud
  2. wordcloud Module generates word cloud (recommended

method one:

stop_words_txt = 'stop_words.txt'
# 载入停用词,即过滤词
jieba.analyse.set_stop_words(stop_words_txt)
# TextRank 关键词抽取,只获取固定词性
# topK为返回权重最大的关键词,默认值为20
# withWeight为返回权重值,默认为False
keywords_count_list = jieba.analyse.textrank(' '.join(df1.comment), topK=100, withWeight=True)
print(keywords_count_list)
word_cloud = (
    WordCloud()
        .add("", keywords_count_list, word_size_range=[5, 50], 
             shape=SymbolType.TRIANGLE,
            )
        .set_global_opts(title_opts=opts.TitleOpts(title="这里输入标题"))
)
# 这句话是渲染成一个html文件到当前文件夹下面
#     word_cloud.render('WordCloud.html')

Method 2: (Recommended, can be customized

pip install wordcloud

import jieba
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
from wordcloud import WordCloud


# 打开文本
# text = open('1.txt',encoding='utf-8').read()
 
# 中文分词
text = ' '.join(jieba.cut(text))
 
# 生成对象
mask = np.array(Image.open("input_picture"))
wc = WordCloud(mask=mask,font_path='C:\Windows\Fonts\SimHei.ttf',mode='RGBA').generate(text)
 
# 显示词云
# plt.imshow(wc, interpolation='bilinear')
# plt.axis("off")
# plt.show()
 
# 保存到文件
wc.to_file('output_picture')

2.2 Histogram

General histogram:

bar = (
    Bar()
    .add_xaxis(Faker.days_attrs)
    .add_yaxis("商家A", Faker.days_values)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="Bar-DataZoom(slider+inside)"),
    )
#     .render("bar_datazoom_both.html")
)

Horizontal histogram:

.reversal_axis()
.set_series_opts(label_opts=opts.LabelOpts(position="right"))

Slider histogram:

datazoom_opts=[opts.DataZoomOpts()]

2.3 Pie Chart

The data comes from:standard_goods_comments.xlsx

Use cup for display here

[('B', 1909), ('C', 810), ('A', 696), ('D', 259)]

Multi-picture display cup:

from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.commons.utils import JsCode


fn = """
    function(params) {
        if(params.name == 'other')
            return '\\n\\n\\n' + params.name + ' : ' + params.value + '%';
        return params.name + ' : ' + params.value + '%';
    }
    """


def new_label_opts():
    return opts.LabelOpts(formatter=JsCode(fn), position="center")


pie = (
    Pie()
    .add(
        "",
        [['A_cup', round(696/total_cup, 2)*100],['other',round(1 - 696/total_cup, 2)*100]],
        center=["20%", "30%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .add(
        "",
        [['B_cup', round(1909/total_cup, 2)*100],['other',round(1 - 1909/total_cup, 2)*100]],
        center=["55%", "30%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .add(
        "",
        [['C_cup', round(810/total_cup, 2)*100],['other',round(1 - 810/total_cup, 2)*100]],
        center=["20%", "70%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .add(
        "",
        [['D_cup', round(259/total_cup * 100, 1)],['other',round(1 - 259/total_cup, 2)*100]],
        center=["55%", "70%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="Cup-多饼图"),
        legend_opts=opts.LegendOpts(
            type_="scroll", pos_top="20%", pos_left="80%", orient="vertical"
        ),
    )
#     .render("mutiple_pie.html")
)

2.3.1 Rose diagram

Epidemic display:

from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker


v = Faker.choose()
pie = (
    Pie()
    .add(
        "",
        [list(z) for z in zip(v, list(range(10,80,10)))],
        radius=["30%", "75%"],
        center=["25%", "50%"],
        rosetype="radius",
        label_opts=opts.LabelOpts(is_show=False),
    )
    .add(
        "",
        [list(z) for z in zip(v,list(range(10,80,10))[::-1])],
        radius=["30%", "75%"],
        center=["75%", "50%"],
        rosetype="area",
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="Pie-玫瑰图示例"))
)

2.4 Map

from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker

map = (
    Map()
    .add("店铺数量",[['广东',100],['广西',100],['湖南',19,]], "china")
    .set_global_opts(
        title_opts=opts.TitleOpts(title="商家店铺地址分布图"),
        visualmap_opts=opts.VisualMapOpts(max_=200),
    )
)

2.5 Water Polo Diagram

the weather:

from pyecharts import options as opts
from pyecharts.charts import Liquid

liquid = (
    Liquid()
    .add("lq", [0.45,0.5])	
 	# 第一个值为显示的值,第二个值为水的分量
    .set_global_opts(title_opts=opts.TitleOpts(title="今日湿度"))
    .render("liquid_base.html")
)

Integrated chart

Multi-chart integration

Page.save_resize_html('page_draggable_layout.html',cfg_file= 'chart_config.json')

Reference documents:

  1. Quickly master the basic operations of pyecharts commonly used charts in 5 minutes
  2. pyecharts official document

Recommended reading:

  1. Use xpath to crawl data
  2. jupyter notebook use
  3. BeautifulSoup crawls the top 250 Douban movies
  4. An article takes you to master the requests module
  5. Python web crawler basics-BeautifulSoup

Guess you like

Origin blog.csdn.net/qq_45176548/article/details/112221932