Statistical information applet micro letter friends

This small project refer to "micro-letter friends Data Analysis" - more than the country

I. Features:

  This paper describes the use of web client micro-channel data acquisition, to achieve access to the personal data of micro letter friends, and
Some simple data analysis features include:
  1. crawling buddy list, show your friends nickname, gender and geographical save and signature, as xlsx format files
  2. Statistics Friends geographical distribution, and to make a word cloud and visual display on the map

Second, install the operating environment and associated libraries.

  1. Python 3.x (use Anaconda's spyder editor)
  2. Use the Python libraries installed:
  • Run the following commands in the Anaconda Prompt Anaconda, upgrade pip before installation, as follows:
Python -m pip install --upgrade pip
  • Installation wxpy: pip install wxpy
     
  • Installation PIL: pip install pillow
     
  • Installation pyecharts: pip install pyecharts
     
  • Installation Itchat: pip install itchat
     
  • AnSo Jieba: pip install jieba
     
  • Installation Pandas: pip install Pandas
     
  • Installation Numpy: pip install Numpy
  • 安装 wordcloud:pip install wordcloud
  • Install map data package: pip install echarts-china-provinces-pypkg    
    •   pip install echarts-countries-pypkg 

 

code show as below

# - * - Coding: UTF-8 - * - 
"" " 
the Created Wed Jun 5 11:44:42 2019 ON 

@author: lenovo 
" "" 
from wxpy Import *
 Import openpyxl
 Import PANDAS AS pd
 from wordcloud Import wordcloud 
 Import matplotlib. pyplot AS plt
 from pyecharts Import the Map 


DEF connect_in ():
     '' '  
    connected to the web version of the micro-letter and returns all friends information 
    bot: initialization of the robot and select the cache mode (scan code) Login 
    friend_all: get all my micro letter friends information 
    ' '' 
    BOT = BOT (cache_path = True) 
    friend_all =bot.friends ()
     return friend_all 

DEF analyse_friends (friend_all, top_provinces = 10, top_cities = 100 ): 
    Friends = friend_all 
    Data = Friends.stats_text (Total = True, Sex = True, top_provinces = 30, 500 = top_cities )
     Print (Data) 

DEF get_column_title ():
     '' ' input row parallel header 
        ls = [] no default 
    ' '' 
    column_titles = []
     return colomn_titles 

DEF data_dict_to_list (friend_all):
     '' ' 
    data_ls: Initial: initial value set comprising a list of the column headings list line of the list \ 
            traverse all friends to extract data dictionary information is added to the data list, and returns the data list
    list_0: a micro channel data buddy list, comprising 'the NickName', 'Sex', 'City', 'Province', \ 
            'the Signature', 'HeadImgUrl', 'HeadImgFlag'. 
    '' ' 
    data_lis = [[ ' the NickName ' , ' Sex ' , ' City ' , ' Province ' , ' Signature ' , ' HeadImgUrl ' , \
                ' HeadImgFlag ' ]]
     for a_friend in friend_all:
        NickName = a_friend.raw.get('NickName',None)
        #Sex = a_friend.raw.get('Sex',None)
        Sex ={1:"",2:"",0:"其它"}.get(a_friend.raw.get('Sex',None),None)
        City = a_friend.raw.get('City',None)
        Province = a_friend.raw.get('Province',None)
        Signature = a_friend.raw.get('Signature',None)
        HeadImgUrl = a_friend.raw.get('HeadImgUrl',None)
        HeadImgFlag = a_friend.raw.get ( ' HeadImgFlag ' , None) 
        list_0 = [the NickName, Sex, City, Province, the Signature, HeadImgUrl, HeadImgFlag] 
        data_lis.append (list_0) 
    return data_lis 
    
DEF data_lis_savein_excel (data_lis = [], filename = ' wechat_data ' , \ 
                          sheet_title = ' wechat1 ' ):
     ' '' 
    list is written version 07 excel in which elements of the list is the list. 
    filename: save the file name (including the path) 
    LIS: a list of elements of the list, the following : 
    LIS = [[ "name", "price", "Publishing", "language"],
           [ "Dark time", "32.4", "people's Posts and Telecommunications Press", "Chinese"],
           [ "Tear down the walls of thinking in the" "26.7", "Machinery Industry Press," "Chinese"]] 
    '' ' 
    wb = openpyxl.Workbook () 
    Sheet = wb.active 
    sheet.title = sheet_title 
    file_name = filename + ' .xlsx ' 
    for I in Range (0, len (data_lis)):
         for J in Range (0, len (data_lis [I])): 
            sheet.cell (Row = I +. 1, column = + J. 1, value = STR (data_lis [I] [J])) 
    wb.save (file_name) 
    return file_name
     Print ( " write data successfully! ")

def count_sing(file_name,sheet_name='wechat1',column_name='NickName'):
    '''输出单个列的统计数据'''
    f=open(file_name,'rb')
    data=pd.read_excel(f,sheetname=sheet_name)
    print(column_name+'\t'+str(data[column_name].count()))
    print(data[column_name].describe())
    f.close()

def wordcloud_show(file_name,sheet_name='wechat1',column_name='City' ):
     ' '' Obtained by the word cloud method plt + wordcloud '' ' 
    F = Open (file_name, ' RB ' ) 
    Data = pd.read_excel (F, sheetname = SHEET_NAME) 
    WORD_LIST = Data [column_name] .fillna ( ' 0 ' ) .ToList ()
     # the dataframe into columns list, wherein the nan replaced with "0" 
    NEW_TEXT = '  ' .join (WORD_LIST) 
    wordcloud = wordcloud (font_path = ' simhei.ttf ' , \ 
                          BACKGROUND_COLOR = " Black ") .generate (NEW_TEXT) 
    plt.imshow (wordcloud) 
    plt.axis ( " OFF " ) 
    plt.show () 
    f.close () 
    
DEF save_wordcloud_to_html (save_road, file_name, SHEET_NAME = ' wechat1 ' , \ 
                           column_name = ' City ' ) :
     '' ' using the word cloud pyecharm do exist as an html file ' '' 
    f = Open (file_name, ' rb ' ) 
    the Data = pd.read_excel (f, sheetname = sheet_name)
     # COUNT = df.city.value_counts () to # dataframe full frequency statistics, excluding nan
    Data = city_list [column_name] .fillna ( ' from NAN ' ) .ToList ()
     # The dataframe list into columns, wherein the nan replaced with "from NAN" 
    count_city = pd.value_counts (city_list) # of full list frequency statistics 
    from pyecharts Import wordcloud 
    name = count_city.index.tolist () 
    value = count_city.tolist () 
    wordcloud = wordcloud (width = 1300, height = 620. ) 
    wordcloud.add ( "" , name, value, word_size_range = [20 is, 100 ]) 
    wordcloud.show_config () 
    wordcloud.render (save_road + ' .html' ) 
    F.close () 

DEF shou_data_in_countrymap (save_road, file_name, sheet_name = ' wechat1 ' , \ 
                           column_name = ' Province ' ):
     ' '' These two friends do on the national map the distribution of '' ' 
    f = Open (file_name, ' RB ' ) 
    Data = pd.read_excel (F, sheetname = SHEET_NAME) 
    province_list = Data [column_name] .fillna ( ' from nAN ' ) .ToList ()
     # the dataframe into columns list, wherein the nan replaced with "nAN"
    = pd.value_counts count_province (province_list) # of full list frequency statistics 
    value = count_province.tolist () 
    attr = count_province.index.tolist () 
    Map = the Map ( " provincial distribution of micro-channel friends " , width = 1200, height = 600 ) 
    map.add ( "" , attr, value, maptype = ' china ' , is_visualmap = True, 
    visual_text_color = ' # 000 ' , 
    is_label_show = True) # display province on the map 
    map.show_config () 
    map.render (save_road + 'map1'+'.html')
    f.close()
    

def main():
    friends_data=connect_in()
    data_ls=data_dict_to_list(friends_data)
    file_name=data_lis_savein_excel(data_ls)
    analyse_friends(friends_data)
    count_sing(file_name)
    wordcloud_show(file_name)
    save_road=r'C:\Users\lenovo\AppData\Local\Programs\Python\Python37'
    
    save_wordcloud_to_html(save_road,file_name)
    shou_data_in_countrymap(save_road,file_name)
main()

                  effect:

 

 

 

 

Guess you like

Origin www.cnblogs.com/DXL123/p/10978901.html