Chapter 8: Spectral Analysis and Filtering of Time Series


The Python code was developed based on the R code written by
Samuel Shen, Distinguished Professor, San Diego State University, California, USA

Kaelia Okamura composed and finalized the code for this chapter.



In [1]:
import pandas as pd
import numpy as np
import math
from random import seed
import statistics as stats
import scipy.stats as scistats
from scipy.stats import norm
import matplotlib.pyplot as plt
import matplotlib.colors as color
import datetime
import os
import sys
import statsmodels.api as sm
from pandas import read_table
from netCDF4 import Dataset as ds
import shapely
import warnings
warnings.filterwarnings("ignore")

from matplotlib import cm as cm1
import netCDF4 as nc
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.basemap import Basemap, cm, shiftgrid, addcyclic
from matplotlib.patches import Polygon
from scipy.ndimage import gaussian_filter
In [2]:
# Style Dictionary to standarize plotting 
# scheme between different python scripts 
styledict = {'xtick.labelsize':21,
             'xtick.major.size':9,
             'xtick.major.width':1,
             'ytick.labelsize':21,
             'ytick.major.size':9,
             'ytick.major.width':1,
             'legend.framealpha':0.0,
             'legend.fontsize':15,
             'axes.labelsize':22,
             'axes.titlesize':22,
             'axes.linewidth':2,
             'figure.figsize':(12,8),
             'savefig.format':'jpg'}
plt.rcParams.update(**styledict)

# define font style for text
font = {'family': 'serif',
        'color':  'blue',
        'weight': 'normal',
        'size': 12,
        }
In [3]:
# Change current working directory to where the data is stored
os.chdir("/Users/sshen/climstats/")

Figure 8.1: Waves of different amplitudes, periods and phases¶

In [4]:
t = np.linspace(0,2,2000)
# A=1, tau=1, phi=0
y1 = np.sin(2*np.pi*t)
# A=1, tau=0.5, phi=0
y2 = np.sin(2*np.pi*t/0.5)
# A=0.5, tau=1, phi=pi/2
y3 = 0.5 * np.sin(2*np.pi*t - np.pi/2)
# A=0.5, tau=1, phi=pi/6
y4 = 0.5 * np.sin(2*np.pi*t/0.5 + np.pi/6)

fig, ax = plt.subplots(2, 2, figsize=(13, 8))
# plot t,y1
ax[0, 0].plot(t, y1, color = 'k', linewidth = 2)
ax[0, 0].set_title("Function T(t): \
A=1, $\u03C4$=1, $\phi$=0", 
                   pad=15, size = 18)
ax[0,0].set_ylabel("T(t)", labelpad=10, size = 18)
ax[0,0].tick_params(labelsize=15)
ax[0,0].set_yticks([-1.0,0.0,1.0])
# plot t,y2
ax[0, 1].plot(t, y2, color = 'red', linewidth = 2)
ax[0, 1].set_title("Function T(t): A=1, \
$\u03C4$=0.5, $\phi$=0",
                  pad=16, size = 18)
ax[0,1].set_ylabel("T(t)", labelpad=10, size = 18)
ax[0,1].tick_params(labelsize=15)
ax[0,1].set_yticks([-1.0,0.0,1.0])
# plot t,y3
ax[1, 0].plot(t, y3, color = 'blue', linewidth = 2)
ax[1, 0].set_title("Function T(t): A=0.5, \
$\u03C4$=1, $\phi=\dfrac{\pi}{2}$",
                  pad=18, size = 18)
ax[1,0].set_xlabel("Time t", labelpad=15, size = 18)
ax[1,0].set_ylabel("T(t)", labelpad=10, size = 18)
ax[1,0].tick_params(labelsize=15)
ax[1,0].set_yticks([-1.0,0.0,1.0])
# plot t,y4
ax[1, 1].plot(t, y4, color = 'purple', linewidth = 2)
ax[1, 1].set_title('Function T(t): A=0.5, \
$\u03C4$=0.5, $\phi=\dfrac{\pi}{6}$',
                  pad=18, size = 18)
ax[1,1].set_xlabel("Time t", labelpad=15, size = 18)
ax[1,1].set_ylabel("T(t)", labelpad=10, size = 18)
ax[1,1].tick_params(labelsize=15)
ax[1,1].set_yticks([-1.0,0.0,1.0])
fig.tight_layout(pad=3.0)
plt.savefig("fig0801.eps") # save the figure

Figure 8.2: Superposition of several harmonics¶

In [5]:
fig, ax = plt.subplots(figsize=(12, 6))
plt.plot(t, y1 + y2 + 2*y3 + 2*y4, color = 'b', linewidth=2)
plt.title("Superposition of several harmonics", pad=15)
plt.xlabel("Time t", labelpad = 15)
plt.ylabel("T(t)", labelpad = 15)
plt.yticks([-3,-2,-1,0,1], rotation = 90)
plt.xticks([0,0.5,1.0,1.5,2.0])
plt.savefig("fig0802.eps") # save figure
plt.show()

Python code for the discrete sine transform (DST)¶

In [6]:
import numpy as np
import math
M = 5
time = np.linspace(1, M, M)-1/2
freq = np.linspace(1, M, M)-1/2
time_freq = np.outer(time, freq)
# Construct a sine transform matrix
Phi = np.sqrt(2/M)*np.sin((math.pi/M)*time_freq)
# Verify Phi is an orthogonal matrix
abs(np.dot(Phi,Phi.transpose()).round(2))
# The output is an identity matrix

# Given time series data
ts = time**2
# DST transform
ts_dst = np.dot(Phi.transpose(),ts)
# Inverse DST
Recon = np.dot(Phi, ts_dst)
# Verify Recon = ts
Recon.transpose()
#array([ 0.25,  2.25,  6.25, 12.25, 20.25])
Out[6]:
array([ 0.25,  2.25,  6.25, 12.25, 20.25])

Figure 8.3: Polar expression of a complex number¶

In [7]:
r = 10
bb = 1.4*r
t = np.linspace(0,2*np.pi,200)
x = r*np.cos(t)
y = r*np.sin(t)
aa = 1.4*r
x1 = r*np.cos(np.pi/3)
y1 = r*np.sin(np.pi/3)
t2 = np.linspace(0,np.pi/3,10)
x2 = 0.22*r * np.cos(t2)
y2 = 0.22*r * np.sin(t2)

import pylab as pl
from matplotlib import collections  as mc
fig, ax = plt.subplots(figsize=(10, 10))
ax.plot(x,y,color='k')# plot circle
ax.set_xlim(-14,14)
ax.set_ylim(-14,14)
ax.axis('off')# hide axis
# add arrows
ax.annotate("", xy=(14,0), xytext=(-13, 0),
    arrowprops=dict(width=0.5, color = 'k'))
ax.annotate("", xy=(0,14), xytext=(0, -14),
    arrowprops=dict(width=0.5, color = 'k'))
ax.plot(x2,y2,color='k')# plot angle
# plot line segments
segments = [[(x1, 0),(x1,y1)], [(0,0),(x1,y1)]]
ls = mc.LineCollection(segments, colors='k', linewidths=2)
ax.add_collection(ls)
# add text annotations
ax.text(10.3,-1.5, "Real Axis", size = 18)
ax.text(0.5,12.3, "Imaginary Axis", size = 18)
ax.text(-1,-1, "0", size = 18)
ax.text(1,-1.2, r"x=rcos$\theta$", size = 18)
ax.text(5.2,3, r"y=rsin$\theta$", size = 18)
ax.text(2, 4.7, "r", size = 18)
ax.text(2, 1.3, r"$\theta$", size = 18)
ax.text(5, 9, "(x,y)", size = 18)
ax.text(4.5, 10.5, 
 r"z = re$^{i\theta}$ = rcos$\theta$ + $i$ rsin$\theta$", 
        size = 18)
plt.savefig("fig0803.eps") # save figure
plt.show()

Figure 8.4: The unitary DFT matrix¶

In [8]:
M = 200
i = complex(0,1)#construct the imaginary unit
time_freq = np.outer(np.linspace(0,M-1,M), 
                     np.linspace(0,M-1,M))
#Construct the unitary DFT matrix U
U = np.exp(i*2*np.pi*time_freq/M) / np.sqrt(M)
Ure = np.real(U)# get real part of U
Uim = np.imag(U)# get imaginary part of U
In [9]:
# plot the real part of U
fig, ax = plt.subplots(figsize=(10, 9))
plt.contourf(np.linspace(0,M-1,M),np.linspace(0,M-1,M), 
             Ure, cmap = "autumn")
plt.title("Real Part of the DFT Matrix", pad = 10)
plt.xlabel("t", labelpad = 10)
plt.ylabel("k", labelpad = 10)
plt.yticks([0,50,100,150])
# add color bar
plt.colorbar(ticks = [-0.08,-0.04,0,0.04,0.08])
plt.savefig("fig0804a.png") # save figure
plt.show()
In [10]:
# plot the imaginary part of U
fig, ax = plt.subplots(figsize=(10,9))
plt.contourf(np.linspace(0,M-1,M),np.linspace(0,M-1,M), 
             Uim, cmap = "rainbow")
plt.title("Imaginary Part of the DFT Matrix", pad = 10)
plt.xlabel("t", labelpad = 10)
plt.ylabel("k", labelpad = 10)
plt.yticks([0,50,100,150])
# add color bar
plt.colorbar(ticks = [-0.08,-0.04,0,0.04,0.08])
plt.savefig("fig0804b.png")# save figure
plt.show()

An example of DFT and iDFT¶

In [11]:
M = 9
ts = (np.arange(1, M+1))**(1/2)
np.set_printoptions(precision=2)
print(ts)
#[1.   1.41 1.73 2.   2.24 ...
i = complex(0,1)#construct the imaginary unit
time_freq = np.outer(np.linspace(0,M-1,M), 
                     np.linspace(0,M-1,M))
#Construct the unitary DFT matrix U
U = np.exp(i*2*np.pi*time_freq/M) / np.sqrt(M)
#DFT of ts
ts_dft = np.dot(U.conj().transpose(), ts) 
#Inverse DFT for reconstruction
ts_rec = np.dot(U, ts_dft)
np.set_printoptions(precision=2)
print(ts_rec)
#[1.  -5.83e-15j 1.41-2.43e-15j 1.73-3.05e-16j ...
[1.   1.41 1.73 2.   2.24 2.45 2.65 2.83 3.  ]
[1.  -5.83e-15j 1.41-2.43e-15j 1.73-3.05e-16j 2.  -2.83e-15j
 2.24+4.44e-16j 2.45+1.44e-15j 2.65+4.11e-16j 2.83+4.16e-16j
 3.  +2.89e-15j]

Figure 8.5: Real and Imaginary parts of DFT basis functions¶

In [12]:
M = 200
i = complex(0,1)#construct the imaginary unit
time_freq = np.outer(np.linspace(0,M-1,M), 
                     np.linspace(0,M-1,M))
#Construct the unitary DFT matrix U
U = np.exp(i*2*np.pi*time_freq/M) / np.sqrt(M)
Ure = np.real(U)# get real part of U
Uim = np.imag(U)# get imaginary part of U
time = np.linspace(0,M-1,M)
heights = [0.92,0.7, 0.7, 1.16,
           0.92,0.7, 0.7, 1.16]
widths = [4,3.3]
In [13]:
fig, ax = plt.subplots(4, 2, figsize=(13, 8), sharex=True)
fig.subplots_adjust(hspace=0)
fig.subplots_adjust(wspace=0)
# real k=3
ax[0,0].plot(time, Ure[3,:], 'k-')
ax[0,0].set_title("Real part of DFT harmonics",
                  pad=10, size = 17)
ax[0,0].set_ylabel("k=3", labelpad = 40, size = 17)
ax[0,0].axes.yaxis.set_ticks([])
# imaginary k=3
ax[0,1].plot(time, Uim[3,:], 'k-')
ax[0,1].set_title("Imaginary part of DFT harmonics",
                  pad=10, size = 17)
ax[0,1].axes.yaxis.set_ticks([])
# real k=2
ax[1,0].plot(time, Ure[2,:], 'k-')
ax[1,0].set_ylabel("k=2", labelpad = 40, size = 17)
ax[1,0].axes.yaxis.set_ticks([])
# imaginary k=2
ax[1,1].plot(time, Uim[2,:], 'k-')
ax[1,1].axes.yaxis.set_ticks([])
# real k=1
ax[2,0].plot(time, Ure[1,:], 'k-')
ax[2,0].set_ylabel("k=1", labelpad = 40, size = 17)
ax[2,0].axes.yaxis.set_ticks([])
# imaginary k=1
ax[2,1].plot(time, Uim[1,:], 'k-')
ax[2,1].axes.yaxis.set_ticks([])
# real k=0
ax[3,0].plot(time, Ure[0,:], 'k-')
ax[3,0].set_xlabel("t", size = 17, labelpad = 10)
ax[3,0].set_ylabel("k=0", labelpad = 15, size = 17)
ax[3,0].set_ylim(-0.1,0.1)
ax[3,0].tick_params(axis = 'y',labelsize = 15,
                    labelrotation=90)
ax[3,0].tick_params(axis = 'x',labelsize = 15)
ax[3,0].set_yticks([-0.1,0,0.1])
# imaginary k=0
ax[3,1].plot(time, Uim[0,:], 'k-')
ax[3,1].set_xlabel("t", size = 17, labelpad = 10)
ax[3,1].axes.yaxis.set_ticks([])
ax[3,1].tick_params(axis = 'x',labelsize = 15)
plt.savefig("fig0805.eps") # save figure
plt.show()

Figure 8.6: Tokyo monthly temperature: 2011-2020¶

In [14]:
import netCDF4 as nc
# read the data file from the folder named "data"
ncep ='data/air.mon.mean.nc' # file name and path
nc = nc.Dataset(ncep)
# get detailed description of dataset
print(nc)
# define variables
Lon = nc.variables['lon'][:]
Lat1 = nc.variables['lat'][:]
Time = nc.variables['time'][:]
NcepT = nc.variables['air'][:]
# 1684 months from Jan 1880-June 2019
print(np.shape(NcepT))
print(Lat1[22])
print(Lon[56])
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    description: Data from NCEP initialized reanalysis (4x/day).  These are the 0.9950 sigma level values
    platform: Model
    Conventions: COARDS
    NCO: 20121012
    history: Thu May  4 20:11:16 2000: ncrcat -d time,0,623 /Datasets/ncep.reanalysis.derived/surface/air.mon.mean.nc air.mon.mean.nc
Thu May  4 18:11:50 2000: ncrcat -d time,0,622 /Datasets/ncep.reanalysis.derived/surface/air.mon.mean.nc ./surface/air.mon.mean.nc
Mon Jul  5 23:47:18 1999: ncrcat ./air.mon.mean.nc /Datasets/ncep.reanalysis.derived/surface/air.mon.mean.nc /dm/dmwork/nmc.rean.ingest/combinedMMs/surface/air.mon.mean.nc
/home/hoop/crdc/cpreanjuke2farm/cpreanjuke2farm Mon Oct 23 21:04:20 1995 from air.sfc.gauss.85.nc
created 95/03/13 by Hoop (netCDF2.3)
Converted to chunked, deflated non-packed NetCDF4 2014/09
    title: monthly mean air.sig995 from the NCEP Reanalysis
    dataset_title: NCEP-NCAR Reanalysis 1
    References: http://www.psl.noaa.gov/data/gridded/data.ncep.reanalysis.derived.html
    dimensions(sizes): lat(73), lon(144), time(878)
    variables(dimensions): float32 lat(lat), float32 lon(lon), float64 time(time), float32 air(time, lat, lon)
    groups: 
(878, 73, 144)
35.0
140.0
In [15]:
# change dates from Julian to standard
m1 = []
for t in ((Time/24).filled()):
    day = t
    start = datetime.date(1800,1,1)
    delta = datetime.timedelta(t)
    offset = start + delta
    m = offset.strftime('%Y-%m-%d')
    m1.append(m)
print(m1[756][0:4]) # print out year
print(m1[756][5:7]) # print out month
2011
01
In [16]:
# crop NcepT for years 2011-2020
TokyoT = NcepT[756:876, 28, 55]
M = len(TokyoT)
t1 = np.linspace(2011, 2020, M)
In [17]:
fig, ax = plt.subplots()
ax.plot(t1, TokyoT, 'k-', linewidth = 2)
ax.set_title("NCEP Monthly Tokyo Temperature: 2011-2020",
            pad = 15)
ax.set_xlabel("Time [month]", labelpad = 15)
ax.set_ylabel("Temperature [$\degree$C]", labelpad = 15)
ax.set_xticks([2012, 2014, 2016, 2018, 2020])
ax.set_yticks([22,24,26,28])
# save figure
plt.savefig("fig0806.eps")
plt.show()

Figure 8.7: Periodogram of Tokyo temperature data¶

In [18]:
from numpy.fft import fft, ifft
In [19]:
Tokyo_FFT = np.fft.fft(TokyoT - np.mean(TokyoT))
kk = np.linspace(0,59,60)
f_mon = kk/M
f_year = 12*kk/M
tau = 120/kk
Mod_squared = ((np.sqrt(np.real(Tokyo_FFT)**2 + 
                np.imag(Tokyo_FFT)**2))[0:60]**2)/100
In [20]:
fig, ax = plt.subplots(2, 2, figsize = (13,12))
fig.subplots_adjust(hspace=0.35)
fig.subplots_adjust(wspace=0.35)
# top left
ax[0,0].plot(kk, Mod_squared, 'k-')
ax[0,0].set_title("Periodogram of monthly Tokyo temp", 
                  pad = 10, size = 15)
ax[0,0].set_xlabel("k", size = 15)
ax[0,0].set_ylabel("Spectral Power", labelpad = 10, 
                   size = 15)
ax[0,0].tick_params(labelsize = 15)
# top right
ax[0,1].plot(f_year, Mod_squared, 'k-')
ax[0,1].set_title("Periodogram in terms of year", 
                  pad = 10, size = 15)
ax[0,1].set_xlabel("Cycles per year", 
                   size = 15, labelpad = 10)
ax[0,1].set_ylabel("Spectral Power", 
                   labelpad = 10, size = 15)
ax[0,1].tick_params(labelsize = 15)
# bottom left
ax[1,0].plot(f_mon, Mod_squared, 'k-')
ax[1,0].set_title("Periodogram in terms of month", 
                  pad = 10, size = 15)
ax[1,0].set_xlabel("Cycles per month", 
                   size = 15, labelpad = 10)
ax[1,0].set_ylabel("Spectral Power", 
                   labelpad = 10, size = 15)
ax[1,0].tick_params(labelsize = 15)
# bottom right
ax[1,1].plot(tau, Mod_squared, 'k-')
ax[1,1].set_title("Periodogram in terms of period", 
                  pad = 10, size = 15)
ax[1,1].set_xlabel("Period in Month (in log scale)", 
                   size = 15, labelpad = 10)
ax[1,1].set_ylabel("Spectral Power", 
                   labelpad = 10, size = 15)
ax[1,1].tick_params(labelsize = 15)
plt.savefig("fig0807.eps") # save figure
plt.show()

Python code to verify Parseval's identity¶

In [21]:
import numpy as np
from numpy.fft import fft, ifft
M = 8
X = np.random.normal(0, 1,  M)#Time series data
DFT_X = fft(X)/np.sqrt(M) #Compute DFT using FFT
np.linalg.norm(X) - np.linalg.norm(DFT_X )
#4.440892098500626e-16
Out[21]:
2.220446049250313e-16

Figure 8.8: Approximate a function by a partial sum of Fourier series¶

In [22]:
i = complex(0,1)
T = 2
t = np.linspace(-T/2, T/2, 401)
# define the original function x(t)
# all (-) values = -4, all (+) values = 4
condlist = [t < 0, t > 0]
choicelist = [-4, 4]
xt = np.select(condlist, choicelist)
J = [3, 10, 100]
# plot x(t)
plt.plot(t,xt, color = 'red', linewidth = 6)
plt.title('Approximate x(t) by a finite sum of Fourier series', 
          pad = 15)
plt.xlabel("t", labelpad = 15)
plt.ylabel("x(t)", labelpad = 10)
plt.xticks([-1.0,-0.5,0.0,0.5,1.0])
# plot the sums
Fcol = ('brown', 'blue', 'black')
for j in range(3):
    k = np.linspace(-J[j],J[j],2*J[j]+1)
    a = 8/(i*np.pi*(2*k-1))
    b = np.exp(i * np.pi * np.outer(2*k-1, t))
    RK = np.sum((a[:, np.newaxis] * b), axis=0)
    plt.plot(t, np.real(RK), color = Fcol[j], linewidth = 2)
    plt.legend(['Function x(t)','Sum of 7 terms', 
     'Sum of 21 terms', 'Sum of 201 terms'], fontsize = 20)
    
plt.savefig("fig0808.eps") # save the figure

Fig. 8.9: Data, signal, and noise for a DST filter: M = 51¶

In [23]:
from numpy.random import normal
fig, ax = plt.subplots(figsize = (12,7))
seed(102)# set seed to ensure the same simulation result
M = 51
t = np.linspace(0, 10, M)
ys = 10*np.sin(t)
yn = np.random.normal(0, 3, M)
yd = ys + yn
plt.plot(t, ys, color = 'blue', linewidth = 5,
        label = 'Signal')
plt.plot(t, yn, color = 'brown',linewidth = 1.5,
        label ='Noise')
plt.plot(t, yd, 'o-', color = 'k', linewidth = 3,
        label='Data = Signal + Noise')
plt.title('Data, signal, and noise for a DST filter')
plt.ylim(-20, 20)
plt.xlabel('t', fontsize = 20)
plt.ylabel('y', fontsize = 20)
plt.legend(loc='lower left', fontsize = 20)
plt.savefig("fig0809.eps") # save the figure
plt.show()