Chapter 6: Covariance Matrices, EOFs, and PCs


The Python code was developed based on the R code written by
Samuel Shen, Distinguished Professor, San Diego State University, California, USA

Kaelia Okamura, Briana Ramirez, and Danielle Lafarga contributed to the Python code development.
Kaelia Okamura composed and finalized the code.



In [1]:
import pandas as pd
import numpy as np
import math
import statistics as stats
import scipy.stats as scistats
import matplotlib.pyplot as plt
import matplotlib.colors as color
import os
import sys
import statsmodels.api as sm
from pandas import read_table
from netCDF4 import Dataset as ds
import shapely
import warnings
warnings.filterwarnings("ignore")

from matplotlib import cm as cm1
from numpy.linalg import multi_dot
import netCDF4 as nc
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.basemap import Basemap, cm, shiftgrid, addcyclic
from matplotlib.patches import Polygon
from scipy.ndimage import gaussian_filter
In [2]:
# Style Dictionary to standarize plotting 
# scheme between different python scripts 
styledict = {'xtick.labelsize':21,
             'xtick.major.size':9,
             'xtick.major.width':1,
             'ytick.labelsize':21,
             'ytick.major.size':9,
             'ytick.major.width':1,
             'legend.framealpha':0.0,
             'legend.fontsize':15,
             'axes.labelsize':22,
             'axes.titlesize':22,
             'axes.linewidth':2,
             'figure.figsize':(12,8),
             'savefig.format':'jpg'}
plt.rcParams.update(**styledict)

# define font style for text
font = {'family': 'serif',
        'color':  'blue',
        'weight': 'normal',
        'size': 12,
        }
In [3]:
#Go to the working directory
import os
os.chdir("/Users/sshen/climstats")

Figure 6.1a: SAT covariance plot along a zonal band¶

In [4]:
import netCDF4 as nc
#import .nc data
nc = nc.Dataset("data/air.mon.anom.nc")
# extrat data for variables
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
time = nc.variables['time'][:]
sat = nc.variables['air'][:] 
#sat = surface air temp anomalies: mon, 5deg grid
# 1674 months from Jan 1880-June 2019: 139yrs 6mons
print(np.shape(sat)) #(1674, 36, 72): mon, lat, lon
#print(nc) 
Dec = list(np.arange(11, 1674, 12))#Dec index
Decsat = sat[Dec, :, :] #extract Dec data
# space-time data matrix STsat
N = 72 * 36
P = len(Dec)
STsat = np.zeros((N,P))
for k in range(P):
    STsat[:,k] = Decsat[k,:,:].flatten()
print(STsat.shape)#(2592, 139) 139 Decembers
#print(STsat[0:4, 0:4])

#Space-time dataframe with lat, lon, time
# repeat each value in lat 72 times
LAT = np.repeat(lat,72).reshape(2592,1) 
# repeat lon 36 times
LON = np.tile(lon,36).reshape(2592,1)
STanom0 = np.hstack((LAT, LON, STsat))
yrlist = list(np.arange(1880, 2019))
cnames = ["LAT", "LON"] + yrlist
STanom = pd.DataFrame(STanom0, columns=cnames)

#Select the zonal band
n1 = np.where((STanom["LAT"] > -4) & (STanom["LAT"] < 0)\
          & (STanom["LON"] > 0) & (STanom["LON"] < 360))
P1 = 81 #Dec 1961 in STsat space-time data matrix
P2 = 130 #Dec 2010
P = P2 - P1 + 1 #50 years
print(STsat.shape) #The space-time data
da0 = STsat[n1,P1:(P2 + 1)]
da1 = da0[0,:,:]
(1674, 36, 72)
(2592, 139)
(2592, 139)
In [5]:
STanom #Display the space-time data frame
Out[5]:
LAT LON 1880 1881 1882 1883 1884 1885 1886 1887 ... 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
0 -87.5 2.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
1 -87.5 7.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
2 -87.5 12.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
3 -87.5 17.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
4 -87.5 22.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2587 87.5 337.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
2588 87.5 342.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
2589 87.5 347.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
2590 87.5 352.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36
2591 87.5 357.5 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 ... -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36 -9.969210e+36

2592 rows × 141 columns

In [6]:
# define AreaFrac as a float for future manipulation
AreaFrac = np.sqrt(np.cos(LAT[n1]*np.pi/180))
#AreaFrac = float(np.sqrt(np.cos(Lat1[0]*np.pi/180)))

# subtract row means from dat1 to get math anomalies
dat2 = da1 - da1.mean(axis=1, keepdims=True)
# multiply each value by AreaFrac
Banddat = AreaFrac * dat2
#print(Banddat.shape)
covBand = np.dot(Banddat, Banddat.transpose())
# check max and min values
print(np.max(covBand))
print(np.min(covBand))
covBand62 = covBand #To be used for Fig 6.2 later
90.67207862062392
-6.18168468483208
In [7]:
#plot the figure: Fig. 6.1a
int = np.linspace(-7,92,81) # define levels for plot
fig, ax = plt.subplots()
# define color bar
myColMap = LinearSegmentedColormap.from_list(name='my_list', 
            colors=['navy','darkgreen','lime','yellow',
                 'pink','red','maroon'], N=100)
# plot covariance plot 
Lon1 = np.arange(2.5, 360, 5)
colormap = plt.contourf(Lon1,Lon1,covBand, 
                        levels = int, cmap = myColMap)
plt.title("Covariance of SAT Anomalies on \
Zonal Band 2.5$\degree$S",
          size = 21, pad = 15)
plt.xlabel("Longitude", labelpad = 15)
plt.ylabel("Longitude", labelpad = 15)
# plot color bar
cbar = plt.colorbar(colormap, shrink=0.9, pad = 0.05)
plt.text(372, 348, '[$\degree$C]$^2$', size = 20)
cbar.set_ticks([0,20,40,60,80])
plt.savefig("fig0601a.eps")# save figure

Figure 6.1b: SAT covariance plot along a meridional band¶

In [8]:
#Covert missing values into NaN
STanom[STanom < -999] =  pd.NA
STanom.iloc[0:2, 0:5]
Out[8]:
LAT LON 1880 1881 1882
0 -87.5 2.5 NaN NaN NaN
1 -87.5 7.5 NaN NaN NaN
In [9]:
plt.figure(figsize=(9, 7))

n1 = np.where((STanom.iloc[:, 0] > -90) & (STanom.iloc[:, 0] < 90) 
              & (STanom.iloc[:, 1] > 0) & (STanom.iloc[:, 1] < 4))
n1 = np.concatenate(n1)

P1 = 83  # Time index for Dec 1961
P2 = 133  # Dec 2010
P = P2 - P1   # =50 Decembers
dat1 = STanom.iloc[n1, P1:P2]  # 1961-2010
Lat1 = STanom.iloc[n1, 0]
Lon1 = STanom.iloc[n1, 1]

AreaFac = np.sqrt(np.cos(Lat1 * np.pi / 180))
dat2 = dat1.transpose() - np.array(np.mean(dat1, axis=1))
Banddat = AreaFac[:, np.newaxis] * dat2.transpose()
covBand = np.dot(Banddat, Banddat.T)

#print(dat2)
# Display max and min of covariance matrix
print("Max:", np.nanmax(covBand))
print("Min:", np.nanmin(covBand))

# Creating the filled contour plot
intervals = np.linspace(-35, 120, 81)
myColMap = LinearSegmentedColormap.from_list(name='my_list', 
            colors=['navy','darkgreen','lime','yellow',
                 'pink','red','maroon'], N=100)

ticks = np.arange(-90, 91, 30)
plt.contourf(Lat1, Lat1, covBand, levels=intervals, cmap=myColMap)

clb = plt.colorbar(label='', ticks=np.arange(-40, 120, 20),
            pad = 0.05, shrink=0.9)
clb.ax.set_title('[°C]²')
plt.title("SAT Meridional Covariance: 2.5°E")
plt.xlabel("Latitude")
plt.ylabel("Latitude")
plt.xticks(ticks)
plt.yticks(ticks)
plt.grid()
plt.show()
Max: 116.8833093892827
Min: -34.57828535281515

Figure 6.1c: Positions of the zonal band in Fig. 6.1a and meridional band in Fig. 6.1b indicated by the thick dashed black lines¶

In [10]:
sat.shape
mapmat = sat[1416, :, :]
sat[1410:1419, 0, 0]
Out[10]:
masked_array(data=[--, --, --, --, --, --, --, --, --],
             mask=[ True,  True,  True,  True,  True,  True,  True,  True,
                    True],
       fill_value=-9.96921e+36,
            dtype=float32)
In [11]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import matplotlib.colors as mcolors
import netCDF4 as nc

plt.figure(figsize=(12, 6))

#import .nc data
nc = nc.Dataset("data/air.mon.anom.nc")
# extrat data for variables
lon = nc.variables['lon'][:]
lat = nc.variables['lat'][:]
time = nc.variables['time'][:]
sat = nc.variables['air'][:] #The temp anomalies data

# sat is a 3D array, and lon, lat are longitude and latitude 
mapmat = sat[1415, :, :] #Dec 1997
mapmat = np.clip(mapmat, -5, 5)

# Create the color palette
rgb_palette = mcolors.LinearSegmentedColormap.from_list(
    "custom", ['black', 'blue', 'darkgreen', 'green', 'white', 
               'yellow', 'pink', 'red', 'maroon'])

# Initialize the Basemap
m = Basemap(projection='cyl', llcrnrlat=-90, 
            urcrnrlat=90, llcrnrlon=0, urcrnrlon=360)

# Create meshgrid for plotting
lon, lat = np.meshgrid(lon, lat)
x, y = m(lon, lat)

# Plot the data
cf = m.contourf(x, y, mapmat, levels=np.linspace(-5, 5, 31), 
               cmap=myColMap, extend='both')

# Add map features
m.drawcoastlines()
m.drawparallels(np.arange(-90., 91., 30.), 
                labels=[1,0,0,0], fontsize = 15)
m.drawmeridians(np.arange(0., 361., 60.), 
                labels=[0,0,0,1], fontsize = 15)

# Add the two thick dashed lines
plt.plot([0,360], [-2.5, 2.5], 
         linestyle='dashed', color = 'k', linewidth = 5)
plt.plot([2.5, 2.5], [-90, 90], 
         linestyle='dashed', color = 'k', linewidth = 5)

# Title and labels
plt.title("December 1997 SAT Anomalies", fontsize=20)
clb = plt.colorbar(cf, label='', ticks=np.arange(-5, 5.1, 1),
            pad = 0.03, shrink=0.8)
clb.ax.set_title('°C')

# Save the figure
plt.savefig("fig0601c.eps", format='eps')
plt.show()

Figure 6.2: Scree plot of eigenvalues¶

In [12]:
# Python code for Fig. 6.2: Scree plot 
# The data is from Fig. 6.1a's covariance matrix 
values, vectors = np.linalg.eig(covBand62)
lam = values
K = 10
lamK = lam[0:10]

fig, ax = plt.subplots()

ax.plot(np.linspace(1,K,K), 100*lamK/np.sum(lam), 
        'ko-', linewidth = 3)
ax.set_title("Scree Plot of the First 10 Eigenvalues", 
             pad = 15)
ax.set_xlabel("EOF Mode Number", labelpad = 15)
ax.set_ylabel("Percentage of Variance [%]", labelpad = 15)

ax1 = ax.twinx()
ax1.plot(np.linspace(1,K,K),np.cumsum(100*lamK/np.sum(lam)), 
         'bo-', linewidth = 3)
ax1.set_ylabel("Cumulative Variance [%]", 
               color = 'blue', labelpad = 10)
ax1.tick_params(length=6, width=2,
                labelsize=21, color = 'b', labelcolor = 'b')
ax1.set_yticks([60,70,80,90,100])
ax1.spines['right'].set_color('b')

ax.text(4,30, "────  Cumulative Percentage Variance", 
        fontsize = 20, color = "blue")
ax.text(4,20, "────  Percentage Variance", fontsize = 20)
fig.tight_layout()
plt.savefig("fig0602.eps")# save figure

Generate Random Field Using EOFs¶

In [13]:
# Python code for generating a random space-time field 
# generate EOFs
N = 100 # number of spatial points

# define function for EOF
def eof(x,n):
    return((np.sin(n*x)/np.sqrt(np.pi/2))*np.sqrt(np.pi/N))

x = np.linspace(0,np.pi,N)
print(sum(eof(4,x)**2)) # verify the normalization condition
# 0.99
print(sum((eof(1,x) * eof(2,x)))) # verify orthogonality
# 2.534322578184866e-18

# generate PCs
Mode = 5
Ms = 1000
univar = np.diag(np.repeat(1,Mode))
zeromean = np.repeat(0,Mode)
rs = np.asmatrix(np.random.multivariate_normal(zeromean, 
                                           univar, Ms))
pcm = np.asmatrix(np.zeros([Ms, Mode]))

for m in range(Mode):
    pcm[:,m] = rs[:,m]*np.sqrt(1/Ms) 

print(np.dot(pcm[:,0].T, pcm[:,0])) #check normalized
print(np.dot(pcm[:,0].T, pcm[:,1])) #check orthogonal

# generate an independent random field
lam = [10, 9, 4.1, 4, 2]
sqrlam = np.diag(np.sqrt(lam))
eofm = np.zeros([N, Mode])

for m in range(Mode):
    eofm[:,m] = eof(m,x)
a = np.dot(np.asmatrix(eofm), np.asmatrix(sqrlam))
Yxr = np.dot(a, np.asmatrix(pcm).T)
Yxr.shape
#(100, 1000) #100 points, 1000 samples
0.99
2.534322578184866e-18
[[1.00934799]]
[[-0.0305723]]
Out[13]:
(100, 1000)

Durbin-Watson (DW) test for no serial correlation¶

In [14]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.stats.stattools import durbin_watson
Ms = 1000
#use DataFrame to make the data correct shapes for regression
X = pd.DataFrame(np.linspace(1, Ms, num = Ms))
y = pd.DataFrame(Yxr[10,:])
y = y.transpose()
reg = LinearRegression().fit(X,y)
residuals = y - reg.predict(X) #compute residuals
#perform Durbin-Watson test
durbin_watson(residuals)
#1.96237986 in (1.5, 2.5) implies independence
Out[14]:
array([1.94016451])

Figure 6.3: Ralization of a random field¶

In [15]:
# define color map
myColMap = LinearSegmentedColormap.from_list(name='my_list', 
            colors=['black','blue','lime','yellow',
                     'pink','red','maroon'], N=100)
clev2 = np.linspace(Yxr.min(), Yxr.max(), 501)
r = np.linspace(2,100,99)
x = np.linspace(0,np.pi,N)
# find dimensions
print((Yxr[:,1:100]).shape)
print(r.shape)
print(x.shape)
(100, 99)
(99,)
(100,)
In [16]:
# plot Fig. 6.3
contf = plt.contourf(r, x, Yxr[:,1:100], clev2, 
                     cmap=myColMap);
colbar = plt.colorbar(contf, drawedges=False, 
         ticks = [-0.06,-0.04,-0.02,0,0.02,0.04])
plt.title("A random field realization from given EOFs", 
          pad = 20)
plt.xlabel("r", labelpad = 20)
plt.ylabel("x", labelpad = 20)
plt.text(104, 3.2, "Value", size = 17)
plt.savefig("fig0603.eps") # save figure

Figure 6.4: Scree plot for eigenvalues¶

In [17]:
plt.figure(figsize=(10, 8))
Mode = np.linspace(1,5,5)
lam = [10, 9, 4.1, 4, 2]
samp = np.repeat(100,5)
sd = []
for i in range(5):
    sds = (np.sqrt(2/100)*lam[i])
    sd.append(sds)
sd2 = []
for i in range(5):
    sds2 = (np.sqrt(2/1000)*lam[i])
    sd2.append(sds2)
plus = []
for i in range(5):
    values = lam[i] + sd[i]
    plus.append(values)
minus = []
for i in range(5):
    values = lam[i] - sd[i]
    minus.append(values)  
plus2 = []
for i in range(5):
    values = lam[i] + sd2[i]
    plus2.append(values)   
minus2 = []
for i in range(5):
    values = lam[i] - sd2[i]
    minus2.append(values)    

plt.plot(Mode, lam, color = 'k')
plt.ylim(0,12)
plt.xlabel("Mode", labelpad = 20, fontsize = 25)
plt.ylabel("Variance $\lambda$", labelpad = 20, 
           fontsize = 25)
plt.scatter(Mode, plus, color = "red")
plt.scatter(Mode, minus, color = "red")
plt.scatter(Mode+0.06, plus2, color = "blue")
plt.scatter(Mode+0.06, minus2, color = "blue")
# plot line segments
plt.arrow(1,minus[0], dx = 0, dy = plus[0]-minus[0], 
          color = "red")
plt.arrow(2,minus[1], dx = 0, dy = plus[1]-minus[1], 
          color = "red")
plt.arrow(3,minus[2], dx = 0, dy = plus[2]-minus[2], 
          color = "red")
plt.arrow(4,minus[3], dx = 0, dy = plus[3]-minus[3], 
          color = "red")
plt.arrow(5,minus[4], dx = 0, dy = plus[4]-minus[4], 
          color = "red")
plt.arrow(1.06,minus2[0], dx = 0, dy = plus2[0]-minus2[0], 
          color = "blue")
plt.arrow(2.06,minus2[1], dx = 0, dy = plus2[1]-minus2[1], 
          color = "blue")
plt.arrow(3.06,minus2[2], dx = 0, dy = plus2[2]-minus2[2], 
          color = "blue")
plt.arrow(4.06,minus2[3], dx = 0, dy = plus2[3]-minus2[3], 
          color = "blue")
plt.arrow(5.06,minus2[4], dx = 0, dy = plus2[4]-minus2[4], 
          color = "blue")
plt.text(2.2,11.2, "Red standard error bar: 100 samples", 
         color = "red", fontsize = 19)
plt.text(2.2,10.4, "Blue standard error bar: 1000 samples", 
         color = "blue", fontsize = 19)
plt.savefig("fig0604.eps") # save figure

Figure 6.5a: EOF mode vs exact mode¶

In [18]:
plt.figure(figsize=(10, 8))

import random
random.seed(10)
M=100 #M samples or M independent time steps
N=100 #N spatial locations
Mode=5 # 5 EOF modes to be considered
#Generate PCs
lam = [10.0, 9.0, 4.1, 4.0, 2.0]
np.round(np.array(np.sqrt(2/M))*lam, 2)
#[1] 1.41 1.27 0.58 0.57 0.28
univar = np.diag([1]*Mode) #SD = 1
zeromean = [0]*Mode #mean = 0
rs = np.random.multivariate_normal(zeromean, univar, M)
print(rs.shape)
#(100, 5)  100 samples and 5 modes
pcm = rs*np.sqrt(1/M) #PCs
print(pcm.shape)
#(100, 5)
x = pd.DataFrame(np.linspace(0, np.pi, N))
n = pd.DataFrame(np.arange(1, Mode + 1))
nx = np.dot(x, n.transpose())
eofm = np.sqrt(2)*np.sin(nx)*np.sqrt((1/N))#EOFs
Lam = np.diag(np.sqrt(lam)) #eigenvalue matrix
da0 = np.matmul(eofm, Lam)
da = np.matmul(da0, pcm.transpose())
print(da.shape) #(100, 100) generated space-time data
u,d,v = np.linalg.svd(da) #SVD mode
plt.ylim(-0.2, 0.2)
plt.plot(x, u[:,1], color = 'b') #SVD model
plt.plot(x, eofm[:,0], color = 'r')#Exact mode
plt.ylabel('EOF1')
plt.xlabel('x')
plt.title('EOF mode (Blue curve) vs Exact mode (Red curve)')
(100, 5)
(100, 5)
(100, 100)
Out[18]:
Text(0.5, 1.0, 'EOF mode (Blue curve) vs Exact mode (Red curve)')

Figure 6.6: EOF4 error vs exact mode 3¶

In [19]:
plt.figure(figsize=(10, 8))
plt.ylim(-0.26, 0.3)
plt.plot(x, eofm[:,3], color = 'r',
        label = 'True EOF4')#Exact mode 4
plt.plot(x, u[:,4], color = 'b',
        label = 'Sample EOF4') #SVD mode 4
plt.plot(x, eofm[:,3] - u[:,4], color = 'k',
        label = 'True EOF4 - Sampel EOF4') 
plt.plot(x, eofm[:,2], color = 'orange',
        label = 'True EOF3') #Exact mode 3
plt.ylabel('EOFs [dimensionless]')
plt.xlabel('x')
plt.title('EOF4 error (black) vs exact EOF3 (orange)')
plt.legend(loc="upper left", fontsize = 17, 
           labelcolor='linecolor')
plt.show()

Figure 6.7a: True EOFs and their sampling counterparts¶

In [20]:
from scipy.stats import multivariate_normal as mvn
d = mvn([0,0])
x = d.rvs(size = 1)
dist = d.pdf(x)
M = 100
N = 100
Mode = 6
# generate PCs
lam = [10, 9, 4.1, 4, 2, 1]
univar = np.diag(np.repeat(1,Mode))
zeromean = np.repeat(0,Mode)
rs = np.asmatrix(np.random.multivariate_normal(zeromean, 
                                               univar, M))
print(rs.shape)
t = np.linspace(0,99,100)
a51 = rs[:,1]*np.sqrt(1/M)
pcm = np.asmatrix(np.zeros([M, Mode]))
for m in range(Mode):
    pcm[:,m] = rs[:,m]*np.sqrt(1/M) 
print(pcm.shape)
# generate EOFs
def eof(n,x,y):
    return((np.outer(np.sin(n*x),np.sin(n*y)))*(2/N))
x = y = np.linspace(0,np.pi,100)
print(np.sum((eof(2,x,y))**2))
# plot true EOF 1 2D
fig, ax = plt.subplots(figsize=(8, 8))
cs = ax.contour(x,y, eof(1, x, y))
ax.clabel(cs, fontsize = 15)
plt.text(1.35,1.5, "EOF1", size = 35)
# save figure
plt.savefig("fig0607.png")
plt.show()
(100, 6)
(100, 6)
0.9801
In [21]:
plt.figure(figsize = (8,8))
eofm = (np.zeros([N**2, Mode]))
# potential problem with for-loop
for m in range(Mode):
    eofm[:,m] = (eof(m, x, y)).flatten()    
print(eofm.shape)
np.dot(eofm.T, eofm)

# generate the random data with given EOFs
Lam = np.diag(np.sqrt(lam))
da = multi_dot([eofm, Lam, pcm.T])
print(da.shape)
from scipy import linalg
svdda, s, Vh = linalg.svd(da,full_matrices=False)

# plot sample EOF5 2D: R = 100
k = 4
plt.contour(x, y, (svdda[:,k]).reshape(100,100))
# save figure
plt.savefig("fig0607EOF5.eps")

plt.show() 
(10000, 6)
(10000, 100)
In [22]:
print(svdda.shape)
print(x.shape)
print(y.shape)
(10000, 100)
(100,)
(100,)