This small project refer to "micro-letter friends Data Analysis" - more than the country
I. Features:
This paper describes the use of web client micro-channel data acquisition, to achieve access to the personal data of micro letter friends, and
Some simple data analysis features include:
1. crawling buddy list, show your friends nickname, gender and geographical save and signature, as xlsx format files
2. Statistics Friends geographical distribution, and to make a word cloud and visual display on the map
Second, install the operating environment and associated libraries.
- Python 3.x (use Anaconda's spyder editor)
- Use the Python libraries installed:
- Run the following commands in the Anaconda Prompt Anaconda, upgrade pip before installation, as follows:
Python -m pip install --upgrade pip
- Installation wxpy: pip install wxpy
-
Installation PIL: pip install pillow
-
Installation pyecharts: pip install pyecharts
-
Installation Itchat: pip install itchat
-
AnSo Jieba: pip install jieba
-
Installation Pandas: pip install Pandas
- Installation Numpy: pip install Numpy
- 安装 wordcloud:pip install wordcloud
- Install map data package: pip install echarts-china-provinces-pypkg
- pip install echarts-countries-pypkg
code show as below
# - * - Coding: UTF-8 - * - "" " the Created Wed Jun 5 11:44:42 2019 ON @author: lenovo " "" from wxpy Import * Import openpyxl Import PANDAS AS pd from wordcloud Import wordcloud Import matplotlib. pyplot AS plt from pyecharts Import the Map DEF connect_in (): '' ' connected to the web version of the micro-letter and returns all friends information bot: initialization of the robot and select the cache mode (scan code) Login friend_all: get all my micro letter friends information ' '' BOT = BOT (cache_path = True) friend_all =bot.friends () return friend_all DEF analyse_friends (friend_all, top_provinces = 10, top_cities = 100 ): Friends = friend_all Data = Friends.stats_text (Total = True, Sex = True, top_provinces = 30, 500 = top_cities ) Print (Data) DEF get_column_title (): '' ' input row parallel header ls = [] no default ' '' column_titles = [] return colomn_titles DEF data_dict_to_list (friend_all): '' ' data_ls: Initial: initial value set comprising a list of the column headings list line of the list \ traverse all friends to extract data dictionary information is added to the data list, and returns the data list list_0: a micro channel data buddy list, comprising 'the NickName', 'Sex', 'City', 'Province', \ 'the Signature', 'HeadImgUrl', 'HeadImgFlag'. '' ' data_lis = [[ ' the NickName ' , ' Sex ' , ' City ' , ' Province ' , ' Signature ' , ' HeadImgUrl ' , \ ' HeadImgFlag ' ]] for a_friend in friend_all: NickName = a_friend.raw.get('NickName',None) #Sex = a_friend.raw.get('Sex',None) Sex ={1:"男",2:"女",0:"其它"}.get(a_friend.raw.get('Sex',None),None) City = a_friend.raw.get('City',None) Province = a_friend.raw.get('Province',None) Signature = a_friend.raw.get('Signature',None) HeadImgUrl = a_friend.raw.get('HeadImgUrl',None) HeadImgFlag = a_friend.raw.get ( ' HeadImgFlag ' , None) list_0 = [the NickName, Sex, City, Province, the Signature, HeadImgUrl, HeadImgFlag] data_lis.append (list_0) return data_lis DEF data_lis_savein_excel (data_lis = [], filename = ' wechat_data ' , \ sheet_title = ' wechat1 ' ): ' '' list is written version 07 excel in which elements of the list is the list. filename: save the file name (including the path) LIS: a list of elements of the list, the following : LIS = [[ "name", "price", "Publishing", "language"], [ "Dark time", "32.4", "people's Posts and Telecommunications Press", "Chinese"], [ "Tear down the walls of thinking in the" "26.7", "Machinery Industry Press," "Chinese"]] '' ' wb = openpyxl.Workbook () Sheet = wb.active sheet.title = sheet_title file_name = filename + ' .xlsx ' for I in Range (0, len (data_lis)): for J in Range (0, len (data_lis [I])): sheet.cell (Row = I +. 1, column = + J. 1, value = STR (data_lis [I] [J])) wb.save (file_name) return file_name Print ( " write data successfully! ") def count_sing(file_name,sheet_name='wechat1',column_name='NickName'): '''输出单个列的统计数据''' f=open(file_name,'rb') data=pd.read_excel(f,sheetname=sheet_name) print(column_name+'\t'+str(data[column_name].count())) print(data[column_name].describe()) f.close() def wordcloud_show(file_name,sheet_name='wechat1',column_name='City' ): ' '' Obtained by the word cloud method plt + wordcloud '' ' F = Open (file_name, ' RB ' ) Data = pd.read_excel (F, sheetname = SHEET_NAME) WORD_LIST = Data [column_name] .fillna ( ' 0 ' ) .ToList () # the dataframe into columns list, wherein the nan replaced with "0" NEW_TEXT = ' ' .join (WORD_LIST) wordcloud = wordcloud (font_path = ' simhei.ttf ' , \ BACKGROUND_COLOR = " Black ") .generate (NEW_TEXT) plt.imshow (wordcloud) plt.axis ( " OFF " ) plt.show () f.close () DEF save_wordcloud_to_html (save_road, file_name, SHEET_NAME = ' wechat1 ' , \ column_name = ' City ' ) : '' ' using the word cloud pyecharm do exist as an html file ' '' f = Open (file_name, ' rb ' ) the Data = pd.read_excel (f, sheetname = sheet_name) # COUNT = df.city.value_counts () to # dataframe full frequency statistics, excluding nan Data = city_list [column_name] .fillna ( ' from NAN ' ) .ToList () # The dataframe list into columns, wherein the nan replaced with "from NAN" count_city = pd.value_counts (city_list) # of full list frequency statistics from pyecharts Import wordcloud name = count_city.index.tolist () value = count_city.tolist () wordcloud = wordcloud (width = 1300, height = 620. ) wordcloud.add ( "" , name, value, word_size_range = [20 is, 100 ]) wordcloud.show_config () wordcloud.render (save_road + ' .html' ) F.close () DEF shou_data_in_countrymap (save_road, file_name, sheet_name = ' wechat1 ' , \ column_name = ' Province ' ): ' '' These two friends do on the national map the distribution of '' ' f = Open (file_name, ' RB ' ) Data = pd.read_excel (F, sheetname = SHEET_NAME) province_list = Data [column_name] .fillna ( ' from nAN ' ) .ToList () # the dataframe into columns list, wherein the nan replaced with "nAN" = pd.value_counts count_province (province_list) # of full list frequency statistics value = count_province.tolist () attr = count_province.index.tolist () Map = the Map ( " provincial distribution of micro-channel friends " , width = 1200, height = 600 ) map.add ( "" , attr, value, maptype = ' china ' , is_visualmap = True, visual_text_color = ' # 000 ' , is_label_show = True) # display province on the map map.show_config () map.render (save_road + 'map1'+'.html') f.close() def main(): friends_data=connect_in() data_ls=data_dict_to_list(friends_data) file_name=data_lis_savein_excel(data_ls) analyse_friends(friends_data) count_sing(file_name) wordcloud_show(file_name) save_road=r'C:\Users\lenovo\AppData\Local\Programs\Python\Python37' save_wordcloud_to_html(save_road,file_name) shou_data_in_countrymap(save_road,file_name) main()
effect: