pandas时间序列学习笔记

创建一个时间序列

# Create the range of dates here
seven_days = pd.date_range('2017-1-1', periods=7)

# Iterate over the dates and print the number and name of the weekday
for day in seven_days:
    print(day.dayofweek, day.weekday_name)
<script.py> output:
    6 Sunday
    0 Monday
    1 Tuesday
    2 Wednesday
    3 Thursday
    4 Friday
    5 Saturday
data = pd.read_csv('nyc.csv')

# Inspect data
print(data.info())

# Convert the date column to datetime64
data.date = pd.to_datetime(data.date)

# Set date column as index
data.set_index('date', inplace=True)

# Inspect data 
print(data.info())

# Plot data
data.plot(subplots=True)
plt.show()
<script.py> output:
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 6317 entries, 0 to 6316
    Data columns (total 4 columns):
    date     6317 non-null object
    ozone    6317 non-null float64
    pm25     6317 non-null float64
    co       6317 non-null float64
    dtypes: float64(3), object(1)
    memory usage: 197.5+ KB
    None
    <class 'pandas.core.frame.DataFrame'>
    DatetimeIndex: 6317 entries, 1999-07-01 to 2017-03-31
    Data columns (total 3 columns):
    ozone    6317 non-null float64
    pm25     6317 non-null float64
    co       6317 non-null float64
    dtypes: float64(3)
    memory usage: 197.4 KB
    None

# Create dataframe prices here
prices = pd.DataFrame()

# Select data for each year and concatenate with prices here 
for year in ['2013', '2014', '2015']:
    price_per_year = yahoo.loc[year, ['price']].reset_index(drop=True)
    price_per_year.rename(columns={'price': year}, inplace=True)
    prices = pd.concat([prices, price_per_year], axis=1)

# Plot prices
prices.plot()
plt.show()

asfred()

给已经存在的时间序列调整时间间隔

# Inspect data
print(co.info())

# Set the frequency to calendar daily
co = co.asfreq('D')

# Plot the data
co.plot(subplots=True)
plt.show()


# Set frequency to monthly
co = co.asfreq('M')

# Plot the data
co.plot(subplots=True)
plt.show()

shifted(),滞后函数

等价于r里面的lag()
peroid参数指定滞后阶数
\(x_t\)/\(x_{t-1}\)

diff()求差分

\(x_t\)-\(x_{t-1}\)

# Import data here
google = pd.read_csv('google.csv', parse_dates=['Date'], index_col='Date')

# Set data frequency to business daily
google = google.asfreq('B')

# Create 'lagged' and 'shifted'
google['lagged'] = google.Close.shift(periods=-90)
google['shifted'] = google.Close.shift(periods=90)

# Plot the google price series
google.plot()
plt.show()

猜你喜欢

转载自www.cnblogs.com/gaowenxingxing/p/12173017.html