import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
from statsmodels.tsa.seasonal import seasonal_decompose
from datetime import datetime, timedelta
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from random import gauss
from random import seed
import scipy.stats as stats
import math
import os
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
#from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_model import ARIMA
# Style Dictionary to standarize plotting
# scheme between different python scripts
styledict = {'xtick.labelsize':21,
'xtick.major.size':9,
'xtick.major.width':1,
'ytick.labelsize':21,
'ytick.major.size':9,
'ytick.major.width':1,
'legend.framealpha':0.0,
'legend.fontsize':15,
'axes.labelsize':22,
'axes.titlesize':22,
'axes.linewidth':2,
'figure.figsize':(12,8),
'savefig.format':'jpg'}
plt.rcParams.update(**styledict)
# define font style for text
font = {'family': 'serif',
'color': 'blue',
'weight': 'normal',
'size': 12,
}
# Go to working directory
os.chdir("/users/sshen/climstats")
#Python code for plotting: Fig. 7.1: The Keeling curve
os.chdir("/users/sshen/climstats")
co2m = pd.read_csv('data/co2_mm_mlo.csv', skiprows = 51)
print(co2m.head(5))
df = pd.DataFrame(co2m)
print(df.shape)
plt.plot(co2m['decimal date'], co2m['average'], color = 'r')
plt.plot(co2m['decimal date'], co2m['interpolated'],
color = 'k')
plt.title('Monthly Atmospheric CO\N{SUBSCRIPT TWO} \
at Mauna Loa Observatory', pad = 15)
plt.ylabel('Parts Per Million [ppm]',
labelpad = 15)
# text inside the figure
plt.text(1958,410,
s='Scripps Institution of Oceanography', size = 18)
plt.text(1958,400,
s='NOAA Global Monitoring Laboratory', size = 18)
# save figure
plt.savefig("fig0701.eps")
year month decimal date average interpolated trend ndays Unnamed: 7 0 1958 3 1958.2027 315.70 314.43 -1 -9.99 -0.99 1 1958 4 1958.2877 317.45 315.16 -1 -9.99 -0.99 2 1958 5 1958.3699 317.51 314.71 -1 -9.99 -0.99 3 1958 6 1958.4548 317.24 315.14 -1 -9.99 -0.99 4 1958 7 1958.5370 315.86 315.18 -1 -9.99 -0.99 (749, 8)
os.chdir("/users/sshen/climstats")
co2m = pd.read_csv('data/co2_mm_mlo.csv', skiprows = 51)
# get date into correct format in order to plot time series:
date_index = []
# convert decimal date into datetime object, and store
df_co2 = df
for i in df_co2['decimal date']:
start = float(i)
year = int(float(start))
rem = start - year
base = datetime(year, 1, 1)
result = base + timedelta(seconds=(base.replace(year=
base.year + 1) - base).total_seconds() * rem)
date_index.append(result)
date_index = pd.to_datetime(date_index)
# Create a time series starting from March 1958 to July 2020 with monthly frequency
from statsmodels.tsa.seasonal import seasonal_decompose
os.chdir("/users/sshen/climstats")
co2m = pd.read_csv('data/co2_mm_mlo.csv', skiprows = 51)
data_values = ser = pd.Series(co2m['average'])
ser.index = date_index
# get observed data in correct format
# convert values from string to float):
data_values = [float(i) for i in ser]
co2 = ser
co2.index = pd.date_range('1958-03-17', '2020-08-17', freq = 'M')
#pd.date_range(start='1958-03', end='2020-08', freq='M')
# Display the time series
print(co2.head())
# Decompose the time series into trend, seasonal, and residual components
decomposition = sm.tsa.seasonal_decompose(co2, model='additive')
# Plot the decomposed time series
plt.figure(figsize=(11, 16))
plt.subplot(411)
plt.plot(co2)
plt.title(r'Monthly Atmospheric CO$_2$ [ppm] at Mauna Loa Observatory')
plt.ylabel('Observed Data')
plt.subplot(412)
plt.plot(decomposition.trend)
plt.ylabel('Trend')
plt.subplot(413)
plt.plot(decomposition.seasonal)
plt.ylabel('Seasonality')
plt.subplot(414)
plt.plot(decomposition.resid)
plt.ylabel('Random')
plt.tight_layout()
plt.savefig("fig0702.eps")
plt.show()
#you can also use a simple command to make the plots
#decomposition.plot()
#but the figures do not look as nice
1958-03-31 315.70 1958-04-30 317.45 1958-05-31 317.51 1958-06-30 317.24 1958-07-31 315.86 Freq: M, Name: average, dtype: float64
train_series = ser
from statsmodels.tsa.api import SimpleExpSmoothing
#Use the data "train_series" and "ser" from
# the code for Fig. 7.2
# Fit Holt Winter's Exponential Smoothing model,
# with additive trend and season, then make
# a forecast
model = ExponentialSmoothing(train_series, seasonal_periods = 12, trend='add', seasonal='add', freq='M')
model_fit = model.fit()
# forecast 48 months
pred = model_fit.forecast(48)
s = seasonal_decompose(ser, model='additive')
# set up plot
fig, ax = plt.subplots()
# labels
plt.title('Observed (red) and ETS Forecast (blue)', pad=15)
plt.ylabel('Parts Per Million [ppm]', labelpad = 15)
# plot observed and forecast lines
plt.plot(s.observed, color='red', linewidth=2)
plt.plot(pred, linewidth = 2)
# save figure
plt.savefig("fig0703.eps")
plt.show()
from statsmodels.tsa.seasonal import seasonal_decompose
# Read the data
tda = pd.read_csv('data/StPaulStn.txt', header=None, delim_whitespace=True)
# Extracting Tmin and dates
t1 = tda.iloc[:, 7] # Assuming the date is in column 8 (0-indexed)
tmin = tda.iloc[:, 11] # Assuming Tmin is in column 12 (0-indexed)
# Selecting the date range: 1 Jan 1941-31 Dec 1950
print(tda.iloc[1461:1462, :])
#GHCND:USW00014927 ST. PAUL AIRPORT MN US 19410101
print(tda.iloc[5111:5112, :])
#GHCND:USW00014927 ST. PAUL AIRPORT MN US 19501213
ta = t1[1461:5111]
da = tmin[1461:5111]
# Convert date string to datetime
ta = pd.to_datetime(ta, format='%Y%m%d')
# Create a time series
tmin_ts = pd.Series(data=da.values, index=ta)
# Decompose the time series
decomposition = sm.tsa.seasonal_decompose(tmin_ts, model='additive', period=365)
#Plot the data and decompostions by a simple command
#decomposition.plot()
#Or using the following for nice figures
# Plot the decomposed time series
plt.figure(figsize=(11, 15))
plt.subplot(411)
plt.plot(decomposition.observed, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(decomposition.trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(decomposition.seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(decomposition.resid, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
plt.savefig("fig0704.eps") #save the figure
plt.show()
0 1 2 3 4 5 6 7 8 \ 1461 GHCND:USW00014927 ST. PAUL DOWNTOWN AIRPORT MN US 19410101 11.7 9 10 11 12 1461 -9999.0 1.7 -1.1 -9999.0 0 1 2 3 4 5 6 7 8 \ 5111 GHCND:USW00014927 ST. PAUL DOWNTOWN AIRPORT MN US 19501231 0.0 9 10 11 12 5111 -9999.0 3.9 -6.1 -9999.0
#Another way: This code is printed in the book
os.chdir("/users/sshen/climstats")
#Read .txt data into Pandas DataFrame
dat = pd.read_fwf('data/StPaulStn.txt', header=None)
print(dat.iloc[1461:1462, :])
#GHCND:USW00014927 ST. PAUL AIRPORT MN US 19410101
print(dat.iloc[5111:5112, :])
#GHCND:USW00014927 ST. PAUL AIRPORT MN US 19501213
print(dat.shape)
Tmin = dat[1461:5111][11] #Extract Tmin data from column 12
Tmin.index = dat.iloc[1461:5111, 7]
# Convert date string to datetime
ta = pd.to_datetime(ta, format='%Y%m%d')
# Create a time series
tmin_ts = pd.Series(data=da.values, index=ta)
ets = sm.tsa.seasonal_decompose(tmin_ts, model='additive', period=365)
ets.plot() #plot the deomposition
plt.savefig("fig0704.eps") #save the figure
plt.show()
0 1 2 3 4 5 6 7 8 \ 1461 GHCND:USW00014927 ST. PAUL DOWNTOWN AIRPORT MN US 19410101 11.7 9 10 11 12 1461 -9999.0 1.7 -1.1 -9999.0 0 1 2 3 4 5 6 7 8 \ 5111 GHCND:USW00014927 ST. PAUL DOWNTOWN AIRPORT MN US 19501231 0.0 9 10 11 12 5111 -9999.0 3.9 -6.1 -9999.0 (7589, 13)
seed(1)# set seed to ensure the same simulation result
# create the white noise series
white_noise = [gauss(0.0, 1.0) for i in range(500)]
white_noise_ser = pd.Series(white_noise)
# set up plot
fig, ax = plt.subplots(figsize=(12,6))
# white noise
plt.plot(white_noise_ser, 'k')
#plt.plot(white_noise_ser, color='black')
# add labels
plt.title('A Simulated Time Series of White Noise',
pad = 15)
plt.ylabel(r'$W_{t}$', labelpad = 15)
plt.xlabel('Discrete Time Steps', labelpad = 15)
plt.savefig("fig0705.eps") # save figure
plt.show()
#set up the plot layout: two panels in a row
fig, ax = plt.subplots(1,2, figsize = (12, 5))
#histogram set up
ax[0].set_xlim(-4,4)
ax[0].set_ylim(0,.5)
ax[0].set_title('Histogram of a White Noise Time Series',
pad = 20, size = 18)
ax[0].set_xlabel(r'$W_{t}$', labelpad = 15, size = 20)
ax[0].set_ylabel('Density', labelpad = 15, size = 20)
#Plot the histogram of white noise
ax[0].hist(white_noise_ser, density=True,
edgecolor='black', fc='None')
#Fit the histogram to the standard normal distribution
mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(-4,4,100)
ax[0].plot(x, stats.norm.pdf(x, mu, sigma), color='blue')
#Plot ACF
sm.graphics.tsa.plot_acf(white_noise_ser,
ax=ax[1], color = 'k')
#Add labels and ticks for ACF
ax[1].set_title('Auto-correlation of White Noise',
pad = 20, size = 20)
ax[1].set_xlabel('Lag', labelpad = 15, size = 20)
ax[1].set_ylabel('ACF', labelpad = 15, size = 20)
ax[1].set_xticks([0,5,10,15,20,25])
plt.tight_layout()
plt.savefig("fig0706.pdf") # save figure
plt.show()
#Python plot Fig. 7.7(a): Random walk time series
#set seed to ensure the same simulation result
seed(100)
# Fig. 7.7(a): Five realizations with different sigma values
n = 1000 #Total number of time steps
m = 5 #Number of time series realizations
drift = 0.05 #Drift delta = 0.05
sd = [0.1, 0.4, 0.7, 1.0, 1.5] #SD sigma
#generate the random walk time series data
X = np.zeros((m,n))
for i in range(m):
white_noise = [gauss(0.0, sd[i]) for k in range(n)]
for j in range(n-1):
X[i, j+1] = drift + X[i,j] + white_noise[j]
# plot the five time series realizations
seed(100) #set seed
fig, ax = plt.subplots() #set up plot
#plot five random walks
plt.plot(X[0,:], color='black', linewidth=2,
label=r'$\sigma$ = 0.1')
plt.plot(X[1,:], color='blue', linewidth=2,
label=r'$\sigma$ = 0.4')
plt.plot(X[2,:], color='red', linewidth=2,
label=r'$\sigma$ = 0.7')
plt.plot(X[3,:], color='orange', linewidth=2,
label=r'$\sigma$ = 1.0')
plt.plot(X[4,:], color='brown', linewidth=2,
label=r'$\sigma$ = 1.5')
#add legend
fig.legend(loc=(.15,.65), fontsize = 18)
blue_series = X[1,:]
#add labels
plt.title(r'Five RW realizations: $\delta = 0.05$',
pad = 15)
plt.ylabel(r'$X_{t}$', labelpad=15)
plt.xlabel('Time steps: t', labelpad=15)
plt.savefig("fig0707a.eps") # save figure
plt.show()
# Fig. 7.7(b): 100 realizations
# random walk to show variance increasing with time
seed(100)# set seed
n = 1000 #Total number of time steps
m = 100 #Number of time series realizations
drift = 0 #Drift delta = 0
sd = np.ones(m) #SD sigma is the same for all realizations
#Generate the random walk time series data
X = np.zeros((m,n))
for i in range(m):
white_noise = [gauss(0.0, sd[i]) for k in range(n)]
for j in range(n-1):
X[i, j+1] = drift + X[i,j] + white_noise[j]
# plot the time series realizations
seed(100)# set seed
# set up plot
fig, ax = plt.subplots()
# random walks
for i in range(m):
plt.plot(X[i,:], color='black', linewidth=1.5)
#sd of cols of X
col_sd = np.std(X,axis=0)
plt.plot(col_sd, color='red', linewidth=2,
label='Standard deviation of the simulations: sd(t)')
plt.plot(-col_sd, color='red', linewidth=2)
#theoretical sd
theo_sd = np.sqrt(np.arange(1,n+1))
plt.plot(theo_sd, color='blue', linewidth=2,
label=r'Theoretical formula: SD=$\sqrt{t}$')
plt.plot(-theo_sd, color='blue', linewidth=2)
#Add legend
fig.legend(loc=(.16,.79), fontsize = 18)
#Add labels
plt.title(r'100 RW Realizations: $\delta = 0, \sigma = 1$',
pad=15)
plt.ylabel(r'$X_{t}$', labelpad=10)
plt.xlabel('Time steps: t', labelpad=15)
ax.set_ylim(-100,110)
ax.set_yticks([-100,-50,0,50,100])
plt.savefig("fig0708b.eps") # save figure
plt.show()
# Stationarity test for the blue time series in Fig. 7.7:
adf_test = adfuller(blue_series, autolag='AIC')
print('p-value = ', adf_test[1])
#p-value = 0.5653243650129005
# note: the p-value is large,
# meaning that we keep the null hypothesis of the ADF test:
# the blue time series is non-stationary.
p-value = 0.5653243650129005
#Stationarity test for white noise:
adf_test = adfuller(white_noise, autolag='AIC')
print('p-value = ', adf_test[1])
#p-value = 0.0
# note: the p-value is small,
# meaning that we reject the null hypothesis of the ADF test:
# the white noise time series is stationary.
p-value = 0.0
seed(101)# set seed
n = 124
# white noise
w = [gauss(0,1) for k in range(n)]
ma1_uniform = np.zeros(n)
ma1_uneven = np.zeros(n)
ma5_uniform = np.zeros(n)
#generate three MA time series
for t in range(4,n):
ma1_uniform[t] = .5*w[t] + .5*w[t-1]
ma1_uneven[t] = .9*w[t] + .1*w[t-1]
ma5_uniform[t] = (w[t] + w[t-1] + w[t-2]
+ w[t-3] + w[t-4]) / 5
# plot the time series realizations
seed(101)# set seed
# set up plot
fig, ax = plt.subplots(figsize = (12,6))
plt.plot(ma1_uniform[4:n], color='black',
label='MA(1): a = 0.5, b = 0.5')
plt.plot(ma1_uneven[4:n], color='blue',
label='MA(1): a = 0.9, b = 0.1')
plt.plot(ma5_uniform[4:n], color='red',
label='MA(5): Uniform weight = 1/5')
# add legends
fig.legend(loc=(.16,.71), prop={"size":16})
# add labels
plt.title('Realizations of MA time series',
pad = 15)
plt.ylabel(r'$X_{t}$', labelpad=15)
plt.xlabel('Time steps', labelpad=15)
ax.set_ylim(-2,4)
plt.savefig("fig0708.eps")# save figure
plt.show()
seed(791)# set seed
n = 121 # total number of time steps
m = 4 # number of time series realizations
lam = .9 # decay parameter
X = np.zeros((m,n)) # matrix for data
X[:,0] = 4 #set the initial condition at t=0
#generate the time series data
for i in range(m):
for j in range(n-1):
X[i,j+1] = X[i,j]*lam + gauss(0,.25)
# plot the realizations and their mean
fig, ax = plt.subplots(figsize = (12,6)) # set up plot
# moving average time series
plt.plot(X[0,:], color='purple', linewidth=2)
plt.plot(X[1,:], color='blue', linewidth=2)
plt.plot(X[2,:], color='red', linewidth=2)
plt.plot(X[3,:], color='orange', linewidth=2)
#plot the mean of above series
col_means = np.mean(X, axis=0)
plt.plot(col_means, color='black', linewidth=2)
#add labels
plt.title('Realizations of AR(1) time series and their mean',
pad = 15)
plt.ylabel(r'$X_{t}$', labelpad=15)
plt.xlabel('Time steps', labelpad=15)
ax.set_ylim(-2,4)
plt.savefig("fig0709.eps")# save figure
plt.show()
seed(82) #set seed to ensure the same result
n = 481 # total number of time steps
m = 2 # number of time series realizations
lam = [0.9, 0.6] # decay parameter
X = np.zeros((m,n)) # placeholder for data matrix
X[:,0] = 4
# simulate the time series data
for k in range(m):
for i in range(n-1):
X[k,i+1] = X[k,i]*lam[k] + gauss(0,0.25)
# plot the auto-correlation function
fig, ax = plt.subplots(1,2, figsize = (14,6))
# ACF
sm.graphics.tsa.plot_acf(X[0,:], ax=ax[0],
lags=36, color = 'k')
sm.graphics.tsa.plot_acf(X[1,:], ax=ax[1], lags=36,
vlines_kwargs={'color':'red'}, color = 'red')
#add labels
ax[0].set_title('Auto−correlation function of AR(1)',
pad = 15)
ax[0].set_xlabel('Time lag', labelpad=15)
ax[0].set_ylabel('ACF', labelpad = -5)
ax[0].set_xticks([0,5,15,25,35])
ax[0].set_yticks([-0.2, 0.2, 0.6,1.0])
ax[0].annotate(r'Decay parameter $\lambda$ = 0.9',
xy=(5,.8), size = 22)
ax[1].set_title('Auto−correlation function of AR(1)',
pad = 15)
ax[1].set_xlabel('Time lag', labelpad=15)
ax[1].set_ylabel('ACF', labelpad=10)
ax[1].set_xticks([0,5,15,25,35])
ax[1].set_yticks([0, 0.4,0.8])
ax[1].annotate(r'Decay parameter $\lambda$ = 0.6',
xy=(5,.8), color='red', size = 22)
plt.tight_layout()
plt.savefig("fig0710.pdf")# save figure
plt.show()
## NOTE: Run the code for Figs. 7.1 and 7.2 first ##
from statsmodels.tsa.arima.model import ARIMA
# set up series data variable
data_values = [float(i) for i in ser]
data_dates = pd.date_range('1958-03-17',
'2020-08-17', freq = 'M')
co2_series = pd.Series(data_values,
index=data_dates)
# Fit AR(1) model
AR1_model = ARIMA(co2_series, order=(1,0,0))
AR1_model_fit = AR1_model.fit()
AR1_residuals = AR1_model_fit.resid
AR1_fit = pd.Series(co2_series.values -
AR1_residuals.values, data_dates)
# Fit MA(1) model
MA1_model = ARIMA(co2_series, order=(0,0,1))
MA1_model_fit = MA1_model.fit()
MA1_residuals = MA1_model_fit.resid
MA1_fit = pd.Series(co2_series.values -
MA1_residuals.values, data_dates)
# drop first values of models
AR1_fit.drop(AR1_fit.index[0], inplace=True)
MA1_fit.drop(MA1_fit.index[0], inplace=True)
fig, ax = plt.subplots()#plot setup
#prepare labels
plt.title(r'Monthly Atmospheric CO$_2$ at Mauna Loa Observatory',
pad = 15)
plt.ylabel('Parts Per Million [ppm]', labelpad = 15)
#plot observed line
plt.plot(ser, color='red', linewidth=2,
linestyle='-', label='Observed data')
#plot AR(1) model
plt.plot(AR1_fit, color='black', linestyle='--',
linewidth=1, label='AR(1) model fitting')
#plot MA(1) model
plt.plot(MA1_fit, color='blue', linestyle='--',
linewidth=2, label='MA(1) model fitting')
#add legend
plt1 = plt.legend(loc=(.10,.7), fontsize = 18)
plt1.get_texts()[0].set_color('r')
plt1.get_texts()[2].set_color('b')
plt.savefig("fig0711.pdf")# save figure
plt.show()