美国2012年总统候选人政治献金数据分析
导入包
In [1]:
import numpy as np
import pandas as pd from pandas import Series,DataFrame
方便大家操作,将月份和参选人以及所在政党进行定义
In [2]:
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6, 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12} of_interest = ['Obama, Barack', 'Romney, Mitt', 'Santorum, Rick', 'Paul, Ron', 'Gingrich, Newt'] parties = { 'Bachmann, Michelle': 'Republican', 'Romney, Mitt': 'Republican', 'Obama, Barack': 'Democrat', "Roemer, Charles E. 'Buddy' III": 'Reform', 'Pawlenty, Timothy': 'Republican', 'Johnson, Gary Earl': 'Libertarian', 'Paul, Ron': 'Republican', 'Santorum, Rick': 'Republican', 'Cain, Herman': 'Republican', 'Gingrich, Newt': 'Republican', 'McCotter, Thaddeus G': 'Republican', 'Huntsman, Jon': 'Republican', 'Perry, Rick': 'Republican' }
读取文件
In [3]:
table = pd.read_csv('data/usa_election.txt') table.head()
Out[3]:
In [8]:
#使用map函数+字典,新建一列各个候选人所在党派party
table['party'] = table['cand_nm'].map(parties) table.head()
Out[8]:
In [10]:
#party这一列中有哪些元素
table['party'].unique()
Out[10]:
In [ ]:
#使用value_counts()函数,统计party列中各个元素出现次数,value_counts()是Series中的,无参,返回一个带有每个元素出现次数的Series
In [11]:
table['party'].value_counts()
Out[11]:
In [12]:
#使用groupby()函数,查看各个党派收到的政治献金总数contb_receipt_amt
table.groupby(by='party')['contb_receipt_amt'].sum()
Out[12]:
In [13]:
#查看具体每天各个党派收到的政治献金总数contb_receipt_amt 。使用groupby([多个分组参数])
table.groupby(by=['party','contb_receipt_dt'])['contb_receipt_amt'].sum()
Out[13]:
In [14]:
def trasform_date(d):
day,month,year = d.split('-') month = months[month] return "20"+year+'-'+str(month)+'-'+day
In [17]:
#将表中日期格式转换为'yyyy-mm-dd'。日期格式,通过函数加map方式进行转换
table['contb_receipt_dt'] = table['contb_receipt_dt'].apply(trasform_date)
In [18]:
table.head()
Out[18]:
In [19]:
#查看老兵(捐献者职业)DISABLED VETERAN主要支持谁 :查看老兵们捐赠给谁的钱最多
table['contbr_occupation'] == 'DISABLED VETERAN'
Out[19]:
In [21]:
old_bing_df = table.loc[table['contbr_occupation'] == 'DISABLED VETERAN']
In [22]:
old_bing_df.groupby(by='cand_nm')['contb_receipt_amt'].sum()
Out[22]:
In [23]:
table['contb_receipt_amt'].max()
Out[23]:
In [24]:
#找出候选人的捐赠者中,捐赠金额最大的人的职业以及捐献额 .通过query("查询条件来查找捐献人职业")
table.query('contb_receipt_amt == 1944042.43')
Out[24]: