Step 1. Import the necessary libraries
In [30]:
import pandas as pd
Step 2. Import the dataset from this address.
Step 3. Assign it to a variable called
In [31]:
url = 'https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv'
df = pd.read_csv(url)
df.head()
Out[31]:
Date Total Equity Domestic Equity World Equity Hybrid Total Bond Taxable Bond Municipal Bond Total
0 2012-12-05 -7426 -6060 -1367 -74 5317 4210 1107 -2183
1 2012-12-12 -8783 -7520 -1263 123 1818 1598 219 -6842
2 2012-12-19 -5496 -5470 -26 -73 103 3472 -3369 -5466
3 2012-12-26 -4451 -4076 -375 550 2610 3333 -722 -1291
4 2013-01-02 -11156 -9622 -1533 -158 2383 2103 280 -8931
Step 4. What is the frequency of the dataset?
In [32]:
# weekly data
Step 5. Set the column Date as the index.
In [33]:
df = df.set_index('Date')
df.head()
Out[33]:
Total Equity Domestic Equity World Equity Hybrid Total Bond Taxable Bond Municipal Bond Total
Date
2012-12-05 -7426 -6060 -1367 -74 5317 4210 1107 -2183
2012-12-12 -8783 -7520 -1263 123 1818 1598 219 -6842
2012-12-19 -5496 -5470 -26 -73 103 3472 -3369 -5466
2012-12-26 -4451 -4076 -375 550 2610 3333 -722 -1291
2013-01-02 -11156 -9622 -1533 -158 2383 2103 280 -8931
Step 6. What is the type of the index?
In [34]:
df.index
# it is a 'object' type
Out[34]:
Index([u'2012-12-05', u'2012-12-12', u'2012-12-19', u'2012-12-26',
u'2013-01-02', u'2013-01-09', u'2014-04-02', u'2014-04-09',
u'2014-04-16', u'2014-04-23', u'2014-04-30', u'2014-05-07',
u'2014-05-14', u'2014-05-21', u'2014-05-28', u'2014-06-04',
u'2014-06-11', u'2014-06-18', u'2014-06-25', u'2014-07-02',
u'2014-07-09', u'2014-07-30', u'2014-08-06', u'2014-08-13',
u'2014-08-20', u'2014-08-27', u'2014-09-03', u'2014-09-10',
u'2014-11-05', u'2014-11-12', u'2014-11-19', u'2014-11-25',
u'2015-01-07', u'2015-01-14', u'2015-01-21', u'2015-01-28',
u'2015-02-04', u'2015-02-11', u'2015-03-04', u'2015-03-11',
u'2015-03-18', u'2015-03-25', u'2015-04-01', u'2015-04-08'],
dtype='object', name=u'Date')
Step 7. Set the index to a DatetimeIndex type
df.index = pd.to_datetime(df.index)
type(df.index)
In [35]:
df.index = pd.to_datetime(df.index)
type(df.index)
Out[35]:
pandas.tseries.index.DatetimeIndex
Step 8. Change the frequency to monthly, sum the values and assign it to monthly.
In [36]:
monthly = df.resample('M').sum()
monthly
Out[36]:
Total Equity Domestic Equity World Equity Hybrid Total Bond Taxable Bond Municipal Bond Total
Date
2012-12-31 -26156.0 -23126.0 -3031.0 526.0 9848.0 12613.0 -2765.0 -15782.0
2013-01-31 3661.0 -1627.0 5288.0 2730.0 12149.0 9414.0 2735.0 18540.0
2013-02-28 NaN NaN NaN NaN NaN NaN NaN NaN
2013-03-31 NaN NaN NaN NaN NaN NaN NaN NaN
2013-04-30 NaN NaN NaN NaN NaN NaN NaN NaN
2013-05-31 NaN NaN NaN NaN NaN NaN NaN NaN
2013-06-30 NaN NaN NaN NaN NaN NaN NaN NaN
2013-07-31 NaN NaN NaN NaN NaN NaN NaN NaN
2013-08-31 NaN NaN NaN NaN NaN NaN NaN NaN
2013-09-30 NaN NaN NaN NaN NaN NaN NaN NaN
2013-10-31 NaN NaN NaN NaN NaN NaN NaN NaN
2013-11-30 NaN NaN NaN NaN NaN NaN NaN NaN
2013-12-31 NaN NaN NaN NaN NaN NaN NaN NaN
2014-01-31 NaN NaN NaN NaN NaN NaN NaN NaN
2014-02-28 NaN NaN NaN NaN NaN NaN NaN NaN
2014-03-31 NaN NaN NaN NaN NaN NaN NaN NaN
2014-04-30 10842.0 1048.0 9794.0 4931.0 8493.0 7193.0 1300.0 24267.0
2014-05-31 -2203.0 -8720.0 6518.0 3172.0 13767.0 10192.0 3576.0 14736.0
2014-06-30 2319.0 -6546.0 8865.0 4588.0 9715.0 7551.0 2163.0 16621.0
2014-07-31 -7051.0 -11128.0 4078.0 2666.0 7506.0 7026.0 481.0 3122.0
2014-08-31 1943.0 -5508.0 7452.0 1885.0 1897.0 -1013.0 2910.0 5723.0
2014-09-30 -2767.0 -6596.0 3829.0 1599.0 3984.0 2479.0 1504.0 2816.0
2014-10-31 NaN NaN NaN NaN NaN NaN NaN NaN
2014-11-30 -2753.0 -7239.0 4485.0 729.0 14528.0 11566.0 2962.0 12502.0
2014-12-31 NaN NaN NaN NaN NaN NaN NaN NaN
2015-01-31 3471.0 -1164.0 4635.0 1729.0 7368.0 2762.0 4606.0 12569.0
2015-02-28 5508.0 3509.0 1999.0 1752.0 9099.0 7443.0 1656.0 16359.0
2015-03-31 5691.0 -8176.0 13867.0 2829.0 9138.0 7267.0 1870.0 17657.0
2015-04-30 379.0 -4628.0 5007.0 970.0 423.0 514.0 -91.0 1772.0
Step 9. You will notice that it filled the dataFrame with months that don’t have any data with NaN. Let’s drop these rows.
In [37]:
monthly = monthly.dropna()
monthly
Out[37]:
Total Equity Domestic Equity World Equity Hybrid Total Bond Taxable Bond Municipal Bond Total
Date
2012-12-31 -26156.0 -23126.0 -3031.0 526.0 9848.0 12613.0 -2765.0 -15782.0
2013-01-31 3661.0 -1627.0 5288.0 2730.0 12149.0 9414.0 2735.0 18540.0
2014-04-30 10842.0 1048.0 9794.0 4931.0 8493.0 7193.0 1300.0 24267.0
2014-05-31 -2203.0 -8720.0 6518.0 3172.0 13767.0 10192.0 3576.0 14736.0
2014-06-30 2319.0 -6546.0 8865.0 4588.0 9715.0 7551.0 2163.0 16621.0
2014-07-31 -7051.0 -11128.0 4078.0 2666.0 7506.0 7026.0 481.0 3122.0
2014-08-31 1943.0 -5508.0 7452.0 1885.0 1897.0 -1013.0 2910.0 5723.0
2014-09-30 -2767.0 -6596.0 3829.0 1599.0 3984.0 2479.0 1504.0 2816.0
2014-11-30 -2753.0 -7239.0 4485.0 729.0 14528.0 11566.0 2962.0 12502.0
2015-01-31 3471.0 -1164.0 4635.0 1729.0 7368.0 2762.0 4606.0 12569.0
2015-02-28 5508.0 3509.0 1999.0 1752.0 9099.0 7443.0 1656.0 16359.0
2015-03-31 5691.0 -8176.0 13867.0 2829.0 9138.0 7267.0 1870.0 17657.0
2015-04-30 379.0 -4628.0 5007.0 970.0 423.0 514.0 -91.0 1772.0
Step 10. Good, now we have the monthly data. Now change the frequency to year.
In [38]:
year = monthly.resample('AS-JAN').sum()
year
Out[38]:
Total Equity Domestic Equity World Equity Hybrid Total Bond Taxable Bond Municipal Bond Total
Date
2012-01-01 -26156.0 -23126.0 -3031.0 526.0 9848.0 12613.0 -2765.0 -15782.0
2013-01-01 3661.0 -1627.0 5288.0 2730.0 12149.0 9414.0 2735.0 18540.0
2014-01-01 330.0 -44689.0 45021.0 19570.0 59890.0 44994.0 14896.0 79787.0
2015-01-01 15049.0 -10459.0 25508.0 7280.0 26028.0 17986.0 8041.0 48357.0