Pyecharts Amazon Order Visualization

The data in this article comes from three months of order data from a clothing seller on the Amazon platform. Here, pyecharts is used for visual display.

Import data and process

import pandas as pd
import numpy as np
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.charts import Line,Bar,Map,PictorialBar,Pie,WordCloud,Page
from pyecharts.commons.utils import JsCode
import requests
from collections import Counter
orders=pd.read_excel('亚马逊入驻商订单报表.xlsx')
orders.info()

insert image description here

mkt=pd.read_excel('市场.xlsx')
mkt.info()

insert image description here

#删除下单时间为空的记录
orders.dropna(subset=['下单时间'],inplace=True)

# 对时间字段进行处理
#提取下单日期、时间
orders['date']=pd.to_datetime(orders['下单时间'], utc=False)   

#不间断空白符
mkt.美国州名英文=mkt.美国州名英文.replace('\xa0',' ',regex=True)
# 对配送州字段进行处理,原始数据中既有州缩写也有全称,统一为全称呼;
def states(s):
    s=s.upper().replace('.','')
    t=list(mkt.美国州名英文.str.upper())
    if s in t:
        return mkt[[i==s for i in t]].美国州名英文.iloc[0]
    else:
        return mkt[[i==s for i in list(mkt.州名简写)]].美国州名英文.iloc[0]
orders['配送州']=orders.配送州.apply(states)
orders['配送州']=orders['配送州'].str.replace('South dakota','South Dakota')\
                                 .str.replace('New mexico','New Mexico')\
                                 .str.replace('South carolina','South Carolina')\
                                 .str.replace('New hampshire','New Hampshire')\
                                 .str.replace('New jersey','New Jersey')

data=pd.DataFrame({
    
    '订单号':orders['订单ID'],
                   '用户':orders['买家姓名'],
                   '产品':orders['产品名称'],
                   '数量':orders['产品数量'],
                   '单价':orders['产品价格'],
                   '销售额':orders['产品数量']*orders['产品价格'],
                   '日期':orders['date'].dt.day,
                   '星期':orders['date'].dt.day_name(),
                   '时间':pd.to_datetime(orders['date']).dt.hour,
                   '配送州':orders['配送州']})
data.head()

insert image description here

time attribute

Order volume and customer unit price in each time period

#自定义背景
bg_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: 'rgba(128, 255, 165, 0.2)'}, {offset: 1, color: 'rgba(1, 191, 236, 0.2)'}], false)"
)

#颜色样式:
color_js = """new echarts.graphic.LinearGradient(0, 0, 0, 1,
    [{offset: 0, color: 'rgba(128, 255, 165)'}, {offset: 1, color: 'rgba(1, 191, 236)'}], false)"""


hour_df=data.groupby('时间').agg({
    
    '订单号':['count'],'销售额':['sum']})
hour_df.columns=['订单量','销售额']
hour_df['平均客单价']=(hour_df['销售额']/hour_df['订单量']).map(lambda x:"%.2f" % x)
hour_df.head()

insert image description here

def hour_view():    
    line = (
        Line(init_opts=opts.InitOpts(bg_color=JsCode(bg_color_js),chart_id='hour_chart'))
        .add_xaxis(['{}点'.format(i) for i in hour_df.index.tolist()])
        .add_yaxis('订单量',
                   hour_df.订单量.tolist(),
                   yaxis_index=0,
                   is_smooth=True, 
                   symbol='circle', 
                   is_symbol_show=False, 
                   linestyle_opts=opts.LineStyleOpts(color='#04c1ea',width=3),
                   itemstyle_opts=opts.ItemStyleOpts(color='#04c1ea'),
                  )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='平均客单价',
                min_=15,
                position="right",
                axisline_opts=opts.AxisLineOpts(is_show=False), #不显示坐标轴轴线
                axistick_opts=opts.AxisTickOpts(is_show=False), #不显示坐标轴刻度线
            )
        )    
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各时间段订单量和客单价', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'),
                        yaxis_opts=opts.AxisOpts(name='订单量',
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),    
                                                 ),)

    )
    bar = (
        Bar()
        .add_xaxis(['{}点'.format(i) for i in hour_df.index.tolist()])
        .add_yaxis('平均客单价',
                   hour_df.平均客单价.tolist(),
                   yaxis_index=1,
                   itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js), opacity=0.7),
                   label_opts=opts.LabelOpts(is_show=False),)
    )

#     return line.overlap(bar).render_notebook()
    return line.overlap(bar)
    
hour_view()

insert image description here
The peak order volume occurs from 7 o'clock to 11 o'clock, which is different from the habits of domestic users; the
three time points with the highest average customer price are 13 o'clock, 6 o'clock, and 5 o'clock.

Weekly order volume distribution

week_df=data.groupby('星期')['订单号'].count().reset_index()
cat_day_of_week = pd.api.types.CategoricalDtype(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
week_df['星期'] = week_df['星期'].astype(cat_day_of_week)
week_df = week_df.sort_values(['星期'])
week_df

insert image description here

def week_view():
    line = (
        Line(init_opts=opts.InitOpts(bg_color=JsCode(bg_color_js),chart_id='week_chart'))
        .add_xaxis(week_df.星期.tolist())
        .add_yaxis('订单量',
                   week_df.订单号.tolist(),
                   is_smooth=True,
                   symbol='circle',
                   is_symbol_show=False,#不显示圆点
                   linestyle_opts=opts.LineStyleOpts(color="#fff"),
                   areastyle_opts=opts.AreaStyleOpts(color=JsCode(color_js), opacity=1),
                   itemstyle_opts=opts.ItemStyleOpts(color="#5aecbb"),
                  )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各周段订单量', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'),
                        xaxis_opts=opts.AxisOpts(boundary_gap=False), #x轴刻度起始点从原点开始,刻度终点为数据最大点
                        yaxis_opts=opts.AxisOpts(axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 min_=180,
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),  
                                                 ),)

    )

#     return line.render_notebook()
    return line
week_view()

insert image description here
Wednesday and Friday are the peak periods for orders, while Tuesday is the lowest for the week.

State orders

geo_df = data.groupby(['配送州']).agg({
    
    '订单号':['count'],'销售额':['sum']}).reset_index()
geo_df.columns=['配送州','订单量','销售额']
geo_df.sort_values(['订单量'], ascending = False,inplace=True)

data_pair = []
for idx, row in geo_df.iterrows():
    data_pair.append((row['配送州'], row['订单量']))
    
geo_df['累计']=(geo_df['订单量'].cumsum()/(geo_df['订单量'].sum())).round(2)
geo_df['平均客单价']=(geo_df['销售额']/geo_df['订单量']).round(2)
geo_df.head()

insert image description here

Orders and cumulative distribution of orders by state

def pro_ord_view():    
    bar = (
        Bar(init_opts=opts.InitOpts(chart_id='pro_ord_chart'))
        .add_xaxis(geo_df.配送州.tolist())
        .add_yaxis('订单量',
                   geo_df.订单量.tolist(),
                   yaxis_index=0,
#                    is_symbol_show=False, 
                   itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js), opacity=0.7),
                  )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='订单累计比',
                position="right",
                axisline_opts=opts.AxisLineOpts(is_show=False), 
                axistick_opts=opts.AxisTickOpts(is_show=False), 
            )
        )    
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各州订单及订单累计分布', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'),
                        yaxis_opts=opts.AxisOpts(name='订单数',
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),  
                                                 ),
                        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-45)),)
        

    )
    line = (
        Line()
        .add_xaxis(geo_df.配送州.tolist())
        .add_yaxis('订单累计比',
                   geo_df.累计.tolist(),
                   is_smooth=True,
                   is_symbol_show=False,
                   symbol='circle',
                   yaxis_index=1,
                   linestyle_opts=opts.LineStyleOpts(color='#04c1ea',width=3),
                   itemstyle_opts=opts.ItemStyleOpts(color='#04c1ea'),
                   label_opts=opts.LabelOpts(is_show=False),)
    )

#     return bar.overlap(line).render_notebook()
    return bar.overlap(line)
    
pro_ord_view()

insert image description here

GEO_data = requests.get(url="https://echarts.apache.org/examples/data/asset/geo/USA.json").json()

area_move = """{
        Alaska: {              // 把阿拉斯加移到美国主大陆左下方
            left: -128,
            top: 25,
            width: 15
        },
        Hawaii: {
            left: -110,        // 夏威夷
            top: 25,
            width: 5
        },
        'Puerto Rico': {       // 波多黎各
            left: -76,
            top: 26,
            width: 2
        }
    }"""
def pro_map_view(): 
    map=(
         Map(init_opts=opts.InitOpts(chart_id='pro_map_chart'))
         .add_js_funcs("""echarts.registerMap('USA', {}, {});""".format(GEO_data, area_move))
         .add('订单量',
              data_pair=data_pair,
              maptype='USA',
              is_roam=False,  # 是否开启鼠标缩放和平移漫游
              # 关闭symbol的显示
              is_map_symbol_show=False,
              zoom=1.1,  # 当前视角的缩放比例
              label_opts=opts.LabelOpts(is_show=False),
              )
        .set_global_opts(
              legend_opts=opts.LegendOpts(is_show=False),
              title_opts=opts.TitleOpts(title="美国各州订单量分布", pos_left='center'),
              visualmap_opts=opts.VisualMapOpts(
                                                is_piecewise=True,
                                                pos_left='2%',
                                                pos_top='65%',
                                                range_text=['订单量', ''],# 两端的文本
                                                pieces=[{
    
    'min': 101},
                                                        {
    
    'min': 61,'max': 100},
                                                        {
    
    'min': 31, 'max': 60},
                                                        {
    
    'min': 11,'max': 30},
                                                        {
    
    'min': 1,'max': 10}],
                                                range_color=["#CCD3D9", "#E6B6C2", "#D4587A", "#DC364C"])
              )
          )
#     return map.render_notebook()
    return map

pro_map_view()

insert image description here

Order price distribution by state

pro_price=geo_df[['配送州','平均客单价']].sort_values('平均客单价',ascending=False)
pro_price.head()

insert image description here

def pro_price_view():    
    bar = (
        Bar(init_opts=opts.InitOpts(chart_id='pro_price_chart'))
        .add_xaxis(pro_price.配送州.tolist())
        .add_yaxis('平均客单价',
                   pro_price.平均客单价.tolist(),
                   yaxis_index=0,
                   itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)),
                  )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各州平均客单价', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='shadow'),
                        yaxis_opts=opts.AxisOpts(min_=int(pro_price.平均客单价.min()-1),
                                                 max_=int(pro_price.平均客单价.max()+1),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),     
                                                 ),
                        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-45)),)
    )
#     return bar.render_notebook()
    return bar

pro_price_view()   

Product attributes

gender attribute

Judging from the product name keywords, 93% of the products are women’s models

f, m = 0, 0
for i in data['产品']:
    try:
        if i.upper().__contains__('WOMEN') or i.upper().__contains__('GIRL'):
            f+=1
        elif i.upper().__contains__('MEN'):
            m+=1
        else:
            pass
    except AttributeError:
        pass

f_p = round(f/(f+m)*100)
m_p = round(m/(f+m)*100)

symbols = [
    'path://M18.2629891,11.7131596 L6.8091608,11.7131596 C1.6685112,11.7131596 0,13.032145 0,18.6237673 L0,34.9928467 C0,38.1719847 4.28388932,38.1719847 4.28388932,34.9928467 L4.65591984,20.0216948 L5.74941883,20.0216948 L5.74941883,61.000787 C5.74941883,65.2508314 11.5891201,65.1268798 11.5891201,61.000787 L11.9611506,37.2137775 L13.1110872,37.2137775 L13.4831177,61.000787 C13.4831177,65.1268798 19.3114787,65.2508314 19.3114787,61.000787 L19.3114787,20.0216948 L20.4162301,20.0216948 L20.7882606,34.9928467 C20.7882606,38.1719847 25.0721499,38.1719847 25.0721499,34.9928467 L25.0721499,18.6237673 C25.0721499,13.032145 23.4038145,11.7131596 18.2629891,11.7131596 M12.5361629,1.11022302e-13 C15.4784742,1.11022302e-13 17.8684539,2.38997966 17.8684539,5.33237894 C17.8684539,8.27469031 15.4784742,10.66467 12.5361629,10.66467 C9.59376358,10.66467 7.20378392,8.27469031 7.20378392,5.33237894 C7.20378392,2.38997966 9.59376358,1.11022302e-13 12.5361629,1.11022302e-13',
    'path://M28.9624207,31.5315864 L24.4142575,16.4793596 C23.5227152,13.8063773 20.8817445,11.7111088 17.0107398,11.7111088 L12.112691,11.7111088 C8.24168636,11.7111088 5.60080331,13.8064652 4.70917331,16.4793596 L0.149791395,31.5315864 C-0.786976655,34.7595013 2.9373074,35.9147532 3.9192135,32.890727 L8.72689855,19.1296485 L9.2799493,19.1296485 C9.2799493,19.1296485 2.95992025,43.7750224 2.70031069,44.6924335 C2.56498417,45.1567684 2.74553639,45.4852068 3.24205501,45.4852068 L8.704461,45.4852068 L8.704461,61.6700801 C8.704461,64.9659872 13.625035,64.9659872 13.625035,61.6700801 L13.625035,45.360657 L15.5097899,45.360657 L15.4984835,61.6700801 C15.4984835,64.9659872 20.4191451,64.9659872 20.4191451,61.6700801 L20.4191451,45.4852068 L25.8814635,45.4852068 C26.3667633,45.4852068 26.5586219,45.1567684 26.4345142,44.6924335 C26.1636859,43.7750224 19.8436568,19.1296485 19.8436568,19.1296485 L20.3966199,19.1296485 L25.2043926,32.890727 C26.1862111,35.9147532 29.9105828,34.7595013 28.9625083,31.5315864 L28.9624207,31.5315864 Z M14.5617154,0 C17.4960397,0 19.8773132,2.3898427 19.8773132,5.33453001 C19.8773132,8.27930527 17.4960397,10.66906 14.5617154,10.66906 C11.6274788,10.66906 9.24611767,8.27930527 9.24611767,5.33453001 C9.24611767,2.3898427 11.6274788,0 14.5617154,0 L14.5617154,0 Z',
]
def gender_view():
    pbar=(PictorialBar(init_opts=opts.InitOpts(bg_color=JsCode(bg_color_js),chart_id='gender_chart'))
         .add_xaxis([0, 1])
         # 此部分数据为要显示的数值
         .add_yaxis("",
            [{
    
    "value": m_p,
                "symbol": symbols[0],
                'symbolBoundingData': 100,
                "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(105,204,230)'}}, # 单独控制颜色
                 },
                {
    
    "value": f_p,
                "symbol": symbols[1],
                'symbolBoundingData': 100,
                "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(255,130,130)'}},  # 单独控制颜色     
               }],
            label_opts=opts.LabelOpts(is_show=True, position='inside',font_family='Arial',font_weight='bolder',
                                      font_size=40,formatter='{c}%'),
    #         symbol_repeat=False,
            is_symbol_clip=True
          )
         # 此部分数据用于背景,设置为100
         .add_yaxis("",
                [{
    
    "value": 100,
                    "symbol": symbols[0],
                    'symbolBoundingData': 100,
                    "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(105,204,230,0.40)'}},  # 单独控制颜色   
                   },
                  {
    
    "value": 100,
                    "symbol": symbols[1],
                    'symbolBoundingData': 100,
                    "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(255,130,130,0.40)'}},  # 单独控制颜色
                  }],
                category_gap='35%',  #柱形间距
                label_opts=opts.LabelOpts(is_show=False),
                is_symbol_clip=True,
            )
         .set_global_opts(
            title_opts=opts.TitleOpts(title="男款商品 VS 女款商品",
                                      subtitle='依据订单商品名称中的关键词判断, 如“women”,“girl”等。',
                                      pos_left='center'),
            tooltip_opts=opts.TooltipOpts(is_show=False), #鼠标移动到柱形时不显示数据提示
            legend_opts=opts.LegendOpts(is_show=False),
            xaxis_opts=opts.AxisOpts(is_show=False),
            yaxis_opts=opts.AxisOpts(is_show=False, max_=100),
        )
                      )
#     return pbar.render_notebook()
    return pbar
gender_view()

insert image description here

size and color

Which size of clothes do you buy more?
Which color is more popular?

#分词
word_list = []
for item in data['产品']:
    try:
        words = item.replace('(', ' ').replace(')', ' ').replace(',', ' ').replace('\xa0', ' ')\
                    .replace('T Shirt', 'T-Shirt').replace("Women's", 'Womens').split(' ')
        word_list.extend(words)
    except AttributeError:
        pass

#统计尺码的词频
size_list = []
for word in word_list:
    if word.upper() in ['L', 'XL', '2XL', '3XL', 'M', 'S', 'XS', '4XL']:
        size_list.append(word)
    else:
        pass

c = Counter(size_list)
c

insert image description here

#统计颜色的词频
color_list = []
for word in word_list:
    if word in ['Black', 'Blue', 'Green', 'Grey', 'White', 'Yellow', 'Purple', 'Pink']:
        color_list.append(word)
    else:
        pass

c1 = Counter(color_list)
c1

insert image description here

def size_col_view():    
    pie = (Pie(init_opts=opts.InitOpts(chart_id='size_col_chart'))
           .add("",
                c.most_common(10),
                radius=["30%", "50%"],
                center=["25%", "50%"],
                # rosetype="area",
                label_opts=opts.LabelOpts(is_show=True, formatter='{b}:{d}%'),
                itemstyle_opts={
    
    
                'normal': {
    
    
                    'shadowColor': 'rgba(0, 0, 0, .5)',  # 阴影颜色
                    'shadowBlur': 5,  # 阴影大小
                    'shadowOffsetY': 5,  # Y轴方向阴影偏移
                    'shadowOffsetX': 5,  # x轴方向阴影偏移
                    'opacity': '0.7',}}
            )
           .add("",
                c1.most_common(10),
                radius=["30%", "50%"],
                center=["75%", "50%"],
                # rosetype="area",
                label_opts=opts.LabelOpts(is_show=True, formatter='{b}:{d}%'),
                itemstyle_opts={
    
    
                'normal': {
    
    
                    'shadowColor': 'rgba(0, 0, 0, .5)',  
                    'shadowBlur': 5, 
                    'shadowOffsetY': 5,
                    'shadowOffsetX': 5,
                    # 'opacity': '0.7',
                }}
            )
        .set_global_opts(
                title_opts=[
                dict(text='商品属性',left='center',top='5%',textStyle=dict(color='#282828',fontSize=20)),
                dict(text='SIZE',left='23%',top='48%',textStyle=dict(color='#282828',fontSize=17)),
                dict(text='COLOR',left='72%',top='48%',textStyle=dict(color='#282828',fontSize=17))
                            ],
                tooltip_opts=opts.TooltipOpts(is_show=False),
                legend_opts=opts.LegendOpts(is_show=False),
                visualmap_opts=opts.VisualMapOpts(is_show=False,max_=300,
                    range_color=['rgb(1, 191, 236)', 'rgb(128, 255, 165)']
                          )
           ))
#     return pie.render_notebook()
    return pie
size_col_view()

insert image description here

#取词量排前100的词,排除掉出现次数最多的空格
c2=Counter(word_list).most_common(101)[1:]

def cloud_view():    
    cloud=(
            WordCloud(init_opts=opts.InitOpts(chart_id='cloud_chart'))
            .add('', c2,mask_image='amazon.jpg',width='900px', height='900px',word_size_range=[10, 50],word_gap=10,)
#     return cloud.render_notebook()
    return cloud
    #第一次运行显示空白,再运行一次就会显示出来了
cloud_view()

insert image description here

#标题
def title_view(title = '亚马逊订单可视化'):
    c = (Pie(init_opts=opts.InitOpts(chart_id='title_chart'))
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,
                  title_textstyle_opts=opts.TextStyleOpts(font_size=55,),
            pos_left='center',
            pos_top='middle'),)
        )
#     return c.render_notebook()
    return c
title_view()

generate large screen

When developing each sub-chart, chart_id is set in the initial configuration item opts.InitOpts of each chart.
Otherwise, when saving the json file, pyecharts will generate a random chart_id for the chart, and the "cid" of the subsequent json file will be different, making it impossible to reference it repeatedly!
insert image description here
Use the Page function to drag and drop to assemble the large screen, click Save Config in the upper left corner of the page to generate the chart_config.json file

page = Page(layout=Page.DraggablePageLayout, page_title="亚马逊订单数据分析")
page.add(title_view(),hour_view(),week_view(),pro_ord_view(),pro_map_view(),pro_price_view(),gender_view(),size_col_view(),cloud_view())
page.render('亚马逊订单拖拽图.html')
a = page.save_resize_html('亚马逊订单拖拽图.html', cfg_file='chart_config.json', dest='亚马逊订单可视化.html')

insert image description here

おすすめ

転載: blog.csdn.net/qq_45694768/article/details/124829338