Chapter 4: Regression Models and Methods


The Python code was developed based on the R code written by
Samuel Shen, Distinguished Professor, San Diego State University, California, USA

Kaelia Okamura, Joseph Diaz, Liu Yang, and Briana Ramirez contributed to the Python code development.
Kaelia Okamura composed and finalized the code.



In [1]:
import numpy as np
import math
import statistics as stats
import scipy.stats as scistats
import scipy.odr as ordire
import matplotlib.pyplot as plt
import os
import pandas as pd
from pandas import read_table, read_csv
from netCDF4 import Dataset as ds
# These import statements bring in the python packages we'll need. 
In [2]:
#Style Dictionary to standarize plotting scheme between different python scripts 
styledict = {'xtick.labelsize':25,
             'xtick.major.size':9,
             'xtick.major.width':1,
             'ytick.labelsize':25,
             'ytick.major.size':9,
             'ytick.major.width':1,
             'legend.framealpha':0.0,
             'legend.fontsize':15,
             'axes.labelsize':20,
             'axes.titlesize':25,
             'axes.linewidth':2,
             'figure.figsize':(12,8),
             'savefig.format':'jpg'}
plt.rcParams.update(**styledict)
In [3]:
# go to your working directory
os.chdir("/Users/sshen/climstats")
In [4]:
def linregress_CIs(xd, yd, conf = 0.95):
    alpha = 1. - conf   # significance
    n = xd.size   # data sample size
    x = np.linspace(xd.min(), xd.max(),100)
        
    # Predicted values from fitted model:
    a, b, r, p, err = scistats.linregress(xd, yd)
    y = a*x + b
    
    sd = 1./(n - 2.)*np.sum((yd - a*xd - b)**2)
    sd = np.sqrt(sd)
    sxd = np.sum((xd - xd.mean())**2)
    sx  = (x - xd.mean())**2
    
    # quantile of student's t distribution for p=1-alpha/2
    q = scistats.t.ppf(1.-alpha/2, n-2)
    
    # get the upper and lower CI:
    dy = q*sd*np.sqrt( 1./n + sx/sxd )
    dr = q*sd*np.sqrt(1 + 1./n + sx/sxd )
    rl = y - dr
    ru = y + dr
    yl = y - dy
    yu = y + dy
    
    return yl, yu, x, rl, ru

Figure 4.1: Colorado temperature lapse rate¶

In [5]:
x = np.array([
1671.5, 1635.6, 2097.0, 1295.4, 1822.7,2396.9,2763.0,1284.7,
1525.2, 1328.6, 1378.9, 2323.8, 2757.8,1033.3,1105.5,1185.7,
2343.9, 1764.5, 1271.0, 2347.3, 2094.0,2643.2,1837.9,1121.7
])
y = np.array([
22.064, 23.591, 18.464, 23.995, 20.645,17.175,13.582,24.635,
22.178, 24.002, 23.952, 16.613, 13.588,25.645,25.625,25.828,
17.626, 22.433, 24.539, 17.364, 17.327,15.413,22.174,24.549
])
# calculate correlation coefficients
corrMatr = np.corrcoef(x, y)
# R-sqaured
Rsqu = corrMatr[0, 1]**2
# trend line
reg1 = np.array(np.polyfit(x, y, 1))
abline = reg1[1] + x*reg1[0]
fig, ax = plt.subplots(figsize=(12, 12))
ax.plot(x, y, 'ko');
ax.plot(x, abline, 'k-');
ax.set_title("Colorado Elevation vs. July Tmean: \n \
1981 - 2010 Average", 
             size = 25, fontweight = 'bold', pad = 20)
ax.set_xlabel("Elevation [$m$]", size = 25, labelpad = 20)
ax.set_ylabel("Temperature [$\degree$C]", 
              size = 25, labelpad = 20);
ax.tick_params(length=6, width=2, labelsize=20);
ax.set_xticks(np.round(np.linspace(1000, 3000, 5), 2))
ax.set_yticks(np.round(np.linspace(12, 26, 8), 2))
ax.text(1750, 25.5, 
        r"Temperature lapse rate: 7.0 $\degree $C/km", 
        color= 'k', size = 20)
ax.text(2250, 24.5, r"$y = 33.48 - 0.0070~ x$" % Rsqu, 
        color= 'k', size = 20)
ax.text(2250, 23.5, r"$R-squared = %.2f$" % Rsqu, 
        color= 'k', size = 20)
plt.show()

Linear regression as the first order polynomial fitting¶

In [6]:
reg = np.polyfit(x, y, 1)
print(reg)
#[-6.98188456e-03  3.34763004e+01]
[-6.98188456e-03  3.34763004e+01]

Colorado TLR regression analysis¶

In [7]:
reg = np.polyfit(x, y, 1)
print(reg) #slope and intercept
#[-6.98188456e-03  3.34763004e+01]

regfit = np.polyval(reg,x)
regresiduals = y - regfit
print(np.round(regresiduals, 5))
print(np.mean(regresiduals))
#-5.181040781584064e-16 #should be almost zero

xa = np.array(x) - np.mean(x)
np.sum(xa*regresiduals)
print(np.dot(xa, regresiduals)) #orthogonality
#-1.48929757415317e-11 #should be almost zero

np.sum((regresiduals)**2)/(y.size - 2)
#0.6096193452251238 #unbiased MSE
[-6.98188456e-03  3.34763004e+01]
[ 0.25792  1.53427 -0.37129 -0.43697 -0.10542  0.43358 -0.60335  0.12833
 -0.64953 -0.19817  0.10302 -0.6388  -0.63366 -0.61692 -0.13283  0.63012
  0.51454  1.27623 -0.06333  0.27628 -1.52923  0.39122  1.52971 -1.09572]
-5.181040781584064e-16
-1.48929757415317e-11
Out[7]:
0.6096193452251238

Figure 4.2: Geometric derivation of the least squares¶

In [8]:
plt.figure(figsize=(10, 8))# Define figure size
plt.xlim([0, 5.2]); plt.ylim([0, 2.2])
plt.axis('off')
plt.annotate('', ha = 'center', va = 'bottom',
        xytext = (0, 0), xy = (4.12,  0),
        arrowprops = dict(facecolor='black', lw = 4,  
            arrowstyle="->, head_width=0.5, head_length=3")
            )
plt.annotate('', ha = 'center', va = 'bottom',
              xytext = (4, 0), xy = (4, 2.13), 
             arrowprops = dict(facecolor='black', lw = 4,  
            arrowstyle="->, head_width=0.4,head_length=4.5")
            )
plt.annotate('', ha = 'center', va = 'bottom',
              xytext = (0, 0), xy = (4.07, 2.07), 
             arrowprops = dict(facecolor='black', lw = 4,  
            arrowstyle="->, head_width=0.5,head_length=3"))
plt.plot([0, 5], [0,  0], color ='k', linewidth = 8,
        dashes = (3, 1))
plt.plot([3, 4], [0,  2], color ='k', linewidth = 3,
        dashes = (3,  1))
plt.plot([5, 4], [0,  2], color ='k', linewidth = 3,
        dashes = (4, 1))
plt.plot([3.9, 3.9], [0,  0.1], color ='k')
plt.plot([3.9, 4.0], [0.1,  0.1], color ='k')
plt.text(2, 0.1, r'$\hat{b}~\mathbf{x}_a$', fontsize = 30)
plt.text(2, 1.2, r'$\mathbf{y}_a$', fontsize = 30)
plt.text(4.08, 0.8, r'$\mathbf{e}$', fontsize = 30)
plt.text(3.8, 0.6, r'Shortest $\mathbf{e}$', 
         rotation = 90, fontsize = 20)
plt.text(3.3, 1.1, r'Longer $\mathbf{e}$', 
         rotation = 73, fontsize = 20)
plt.text(4.3, 1.1, r'Longer $\mathbf{e}$', 
         rotation = -73, fontsize = 20)
plt.show()

Estimate the regression slope b using two methods¶

In [9]:
#Method 1: Using vector projection
xa = x - np.mean(x)  #Anomaly the x data vector
nxa = np.sqrt(np.sum(xa**2)) #Norm of the anomaly vector
ya = y - np.mean(y)
nya = np.sqrt(sum(ya**2))
print(np.sum(ya*(xa/nxa))/nxa) #Compute b
#[1] -0.006981885  #This is an estimate for b

#Method 2:  Using correlation
from scipy.stats import pearsonr
corxy, _ = pearsonr(xa, ya) #Correlation between xa and ya
print(corxy)
#[1] -0.9813858 #Very high correlation
print(corxy*nya/nxa) #Compute b
#[1] -0.006981885 #This is an estimate for b
-0.0069818845648987665
-0.9813858291431773
-0.006981884564898768

Compute MV, YV, and $R^2$¶

In [10]:
from statistics import variance
reg = np.polyfit(x, y, 1)
yhat = np.polyval(reg,x)
#computing MV 
print('MV = ', variance(yhat))
#MV =  15.227212262175959
#Or another way
n = 24
print('MV = ', 
      np.sum((yhat - np.mean(yhat))**2)/(n-1))
#MV =  15.227212262175959

#computing YV 
print('YV =', np.sum((y - np.mean(y))**2)/(n-1))
# [1] 15.81033  
#Or another way
print('YV =', variance(y))
#YV = 15.81032641847826

#computing R-squared value 
print('R-squared = ', variance(yhat)/variance(y))
#R-squared =  0.9631181456430407

#computing correlation and R-squared
from scipy.stats import pearsonr
corxy, _ = pearsonr(x, y)
print('Correlation r_xy = ', corxy)
#Correlation r_xy =  -0.9813858291431772
print('R-squared = ', corxy**2)
#R-squared =  0.9631181456430413
MV =  15.227212262175959
MV =  15.227212262175959
YV = 15.810326418478262
YV = 15.81032641847826
R-squared =  0.9631181456430407
Correlation r_xy =  -0.9813858291431772
R-squared =  0.9631181456430413

Figure 4.3: Confidence intervals of a regression model¶

In [11]:
reg1 = np.array(np.polyfit(x, y, 1))#regression
abline = reg1[1] + x*reg1[0]
# get confidence intervals
yl,yu,xd,rl,ru = linregress_CIs(x,y)
# plot the figure
fig, ax = plt.subplots(figsize=(13,12))
ax.plot(x,y, 'ko');
ax.plot(x, abline, 'k-');
ax.plot(xd, yu, 'r-')
ax.plot(xd, yl, 'r-')
ax.plot(xd, ru, 'b-')
ax.plot(xd, rl, 'b-')
ax.set_title("Colorado Elevation vs. July Tmean \n \
1981 - 2010 Average",size=25, pad = 20, 
             fontweight = 'bold')
ax.set_xlabel("Elevation [m]", size = 25, labelpad = 20)
ax.set_ylabel(r"Temperature [$\degree$C]", 
              size = 25, labelpad = 20);
ax.tick_params(length=6, width=2, labelsize=20);
ax.set_xticks(np.round(np.linspace(1000, 3000, 5), 2))
ax.set_yticks(np.round(np.linspace(10, 30, 5), 2))
ax.text(1600, 25, r"Temp lapse rate: 7.0$\degree$C/km", 
        color= 'k', size = 20)
ax.text(1750, 27, 
        r"$y = %1.2f %1.4f ~x$"%(reg1[1], reg1[0]), 
        size = 20)
ax.text(1800, 26, r"R-squared $= %.2f$" % Rsqu, 
        color= 'k', size = 20)
ax.text(1050, 15, "Blue lines: CI of July Tmean RV", 
        color= 'b', size = 20)
ax.text(1050, 14, "Red lines: CI of the fitted model", 
        color= 'r', size = 20)
plt.show()

Figure 4.4: Regression residuals: Colorado July Tmean vs. Elevation¶

In [12]:
reg1 = np.array(np.polyfit(x, y, 1))#regression
abline = reg1[1] + x*reg1[0]
# calculate residuals
r = y - abline
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, r, 'kd')
ax.set_title("Residuals of the Colorado 1981 - 2010 July \n\
Tmean vs. Elevation",fontweight = 'bold', size=25, pad = 20)
ax.set_xlabel("Elevation [m]", size = 25, labelpad = 20)
ax.set_ylabel(r"Residual Temp [$\degree$C]", 
              size = 25, labelpad = 20);
ax.tick_params(length=6, width=2, labelsize=20);
ax.set_xticks(np.linspace(1000, 3000, 5))
ax.set_yticks(np.linspace(-2,2,5))
plt.show()

Figure 4.5: Q-Q plot of theoretical quantiles vs. sample quantiles¶

In [13]:
fig, ax = plt.subplots(figsize=(12,8))#fig setup
reg1 = np.array(np.polyfit(x, y, 1))#regression
abline = reg1[1] + x*reg1[0]
r = y - abline# calculate residuals
#Q-Q plot as a probability plot: quantiles vs quantiles
pp1 = scistats.probplot(r, dist="norm", plot=ax,)
ax.set_title("QQ-Normal Plot for Colorado TLR Residuals", 
             fontweight = 'bold',size = 25, pad = 20);
ax.set_ylabel("Sample Quantiles", size = 25, labelpad = 20);
ax.set_xlabel("Theoretical Quantiles", 
              size = 25, labelpad = 20);
ax.tick_params(length=6, width=2, labelsize=20);
ax.set_xticks(np.round(np.linspace(-2, 2, 5), 2))
plt.show()
In [14]:
#Black-white version of the Q-Q plot
fig, ax = plt.subplots(figsize=(12,8))#fig setup
reg1 = np.array(np.polyfit(x, y, 1))#regression
abline = reg1[1] + x*reg1[0]
r = y - abline# calculate residuals
#Q-Q plot as a probability plot: quantiles vs quantiles
pp1 = scistats.probplot(r, dist="norm", plot=ax,)
#modify the color, ticks and labels of the probability plot
ax.get_lines()[0].set_marker('d')
ax.get_lines()[0].set_color('k')
ax.get_lines()[1].set_color('k')
ax.get_lines()[1].set_linestyle('--')
ax.set_title("QQ-Normal Plot for the Colorado TLR Residuals", 
             fontweight = 'bold',size = 25, pad = 20);
ax.set_ylabel("Sample Quantiles", size = 25, labelpad = 20);
ax.set_xlabel("Theoretical Quantiles", size = 25, labelpad = 20);
ax.tick_params(length=6, width=2, labelsize=20);
ax.set_xticks(np.round(np.linspace(-2, 2, 5), 2))
plt.show()

Durbin-Watson test (DW-test) for independence¶

In [15]:
from statsmodels.stats.stattools import durbin_watson
orderET = np.arange(1,25)
ElevTemp = np.stack((x,y, orderET), axis = -1)
sorted_ind = np.argsort(ElevTemp[:,0],kind='mergesort')
dat1 = ElevTemp[sorted_ind]
# use the first order polynomial fit for linear regression
reg1 = np.array(np.polyfit(dat1[:,0], dat1[:,1], 1))
abline = reg1[1] + dat1[:,0]*reg1[0]
r = dat1[:,1] - abline# calculate residuals
#perform Durbin-Watson test
durbin_watson(r)
#2.307190038542777
Out[15]:
2.307190038542777

Mann-Kendall test for a signifiant trend¶

In [16]:
import pymannkendall as mk
orderET = np.arange(1,25)
ElevTemp = np.stack((x,y, orderET), axis = -1)
sorted_ind = np.argsort(ElevTemp[:,0],kind='mergesort')
dat1 = ElevTemp[sorted_ind]
# use the first order polynomial fit for linear regression
reg1 = np.array(np.polyfit(dat1[:,0], dat1[:,1], 1))
abline = reg1[1] + dat1[:,0]*reg1[0]
r = dat1[:,1] - abline# calculate residuals
#perform Durbin-Watson test
dat1[:,2] = r
print(mk.original_test(dat1[:,2]))#test for residual trend
#p=0.6374381847429542, z=0.47128365713220055
print(mk.original_test(dat1[:,1]))#test for temp trend
#p=2.260863496417187e-09, z=-5.97786112467686
Mann_Kendall_Test(trend='no trend', h=False, p=0.6374381847429542, z=0.47128365713220055, Tau=0.07246376811594203, s=20.0, var_s=1625.3333333333333, slope=0.011948809394942904, intercept=-0.22178354930674668)
Mann_Kendall_Test(trend='decreasing', h=True, p=2.260863496417187e-09, z=-5.97786112467686, Tau=-0.8768115942028986, s=-242.0, var_s=1625.3333333333333, slope=-0.5298190476190479, intercept=28.26891904761905)

The TLR multivariate linear regression¶

In [17]:
x = np.array([
1671.5, 1635.6, 2097.0, 1295.4, 1822.7,2396.9,2763.0,1284.7,
1525.2, 1328.6, 1378.9, 2323.8, 2757.8,1033.3,1105.5,1185.7,
2343.9, 1764.5, 1271.0, 2347.3, 2094.0,2643.2,1837.9,1121.7
])
y = np.array([
22.064, 23.591, 18.464, 23.995, 20.645,17.175,13.582,24.635,
22.178, 24.002, 23.952, 16.613, 13.588,25.645,25.625,25.828,
17.626, 22.433, 24.539, 17.364, 17.327,15.413,22.174,24.549
])

from sklearn import linear_model
lat=np.array([
39.9919, 38.4600, 39.2203, 38.8236, 39.2425, 37.6742,
39.6261, 38.4775, 40.6147, 40.2600, 39.1653, 38.5258,
37.7717, 38.0494, 38.0936, 38.0636, 37.1742, 38.4858,
8.0392, 38.0858, 40.4883, 37.9492, 37.1786, 40.0583
])
lon=np.array([
-105.2667,-105.2256,-105.2783,-102.3486,-107.9631,-106.3247,
-106.0353,-102.7808,-105.1314,-103.8156,-108.7331,-106.9675,
-107.1097,-102.1236,-102.6306,-103.2153,-105.9392,-107.8792,
-103.6933,-106.1444,-106.8233,-107.8733,-104.4869,-102.2189
])
elev = x; temp = y #The x and y data were entered earlier
dat = np.stack((lat, lon, elev), axis = -1)
print(dat[0:2,:]) #Show the data of the first two stations
#[[  39.9919 -105.2667 1671.5   ]
# [  38.46   -105.2256 1635.6   ]]
#Multivariate linear regression
xdat = dat
ydat = temp
regr = linear_model.LinearRegression()
multi_reg = regr.fit(xdat, ydat)
print('Intercept: \n', regr.intercept_)
#23.73406772023514
print('Coefficients: \n', regr.coef_)
#[-0.01070055 -0.10029741 -0.00721227]
[[  39.9919 -105.2667 1671.5   ]
 [  38.46   -105.2256 1635.6   ]]
Intercept: 
 23.73406772023514
Coefficients: 
 [-0.01070055 -0.10029741 -0.00721227]

Figure 4.6: Regression diagnostics for the global average annual mean data¶

In [18]:
# Change current working directory 
os.chdir("/Users/sshen/climstats/")
# Read the data
dtmean = np.array(read_table(
    "data/aravg.ann.land_ocean.90S.90N.v5.0.0.201909.txt", 
              header = None, delimiter = "\s+"))
xDT = dtmean[0:139, 0]
yDT = dtmean[0:139, 1]
# Make a linear fit, i.e., linear regression
regLin = np.array(np.polyfit(xDT, yDT, 1))
ablinereg = regLin[1] + xDT*regLin[0]
yld,yud,xdd,rld,rud = linregress_CIs(xDT,yDT)

# Plot the figure
fig, ax = plt.subplots(2, 1, figsize=(12,12))
ax[0].plot(xDT, yDT, 'ko-')
ax[0].plot(xDT, ablinereg, 'k-')
ax[0].plot(xdd, yld, 'r-')
ax[0].plot(xdd, yud, 'r-')
ax[0].plot(xdd, rld, 'b-')
ax[0].plot(xdd, rud, 'b-')
ax[1].plot(xDT, yDT - ablinereg, 'kd')
ax[0].set_title(
    "Global Annual Mean Land and Ocean Surface \n\
Temperature Anomolies", fontweight = 'bold', 
                size = 25, pad = 20)
ax[0].set_ylabel("Temperature [$\degree$C]", 
                 size = 25, labelpad = 20)
ax[0].tick_params(length=6, width=2, labelsize=20);
ax[0].set_yticks(np.round(np.linspace(-1, 1, 5), 2))
ax[0].text(1880, 0.4, 
        "Linear temp trend 0.7348 deg C per century", 
           color = 'k', size = 20)
ax[0].text(1877, 0.6, "(a)", size = 20)
ax[0].axes.get_xaxis().set_visible(False)
ax[1].tick_params(length=6, width=2, labelsize=20);
ax[1].set_yticks(np.round(np.linspace(-0.5, 0.5, 5), 2))
ax[1].set_ylabel("Residuals [$\degree$C]", 
                 size = 25, labelpad = 20)
ax[1].text(1877, 0.4, "(b)", size = 20)
ax[1].set_xlabel("Year", size = 25, labelpad = 20)
fig.tight_layout(pad=-1.5)
plt.show()

Kolmogorov-Smirnov (KS) test, Durbin-Watson (DW) test, edof, and t-value¶

In [19]:
#Kolmogorov-Smirnov (KS) test for normality
import statistics 
from scipy.stats import kstest
resi = yDT - ablinereg
testvar = (resi - np.mean(resi))/statistics.stdev(resi)
kstest(testvar, 'norm')#
#KstestResult(statistic=0.0751, pvalue=0.3971)
#The normality assumption is accepted

#perform Durbin-Watson test for independence
from statsmodels.stats.stattools import durbin_watson
durbin_watson(resi)
#0.45234915992769864 not in (1.5, 2.5)
#The independence assumption is rejected

#calculate autocorrelations of temp data for edof
import statsmodels.api as sm
autocorr = sm.tsa.acf(yDT)
rho1 = autocorr[1]
edof = (yDT.size - 2)*(1 - rho1)/(1 + rho1)
edof #[1] 5.183904 effective degrees of freedom

#Compare the crtical t values with different edof
import scipy.stats
scipy.stats.t.ppf(q=.975, df=137)
#critical t value 1.9774312122928936
scipy.stats.t.ppf(q=.975, df=5)
##critical t value 2.5705818366147395
Out[19]:
2.5705818366147395

Figure 4.7: Polynomial fitting¶

In [20]:
os.chdir("/Users/sshen/climstats/")
# Read the data
dtmean = np.array(read_table(
    "data/aravg.ann.land_ocean.90S.90N.v5.0.0.201909.txt", 
              header = None, delimiter = "\s+"))
xDT = dtmean[0:139, 0]
yDT = dtmean[0:139, 1]
# Create trend line
reg3 = np.array(np.polyfit(xDT, yDT, 3))
ablineDT3 = reg3[3] + xDT*reg3[2] + \
           (xDT**2)*reg3[1] + (xDT**3)*reg3[0]
fig, ax = plt.subplots(2, 1, figsize=(12,12))
ax[0].plot(xDT, yDT, 'ko-')
ax[0].plot(xDT, ablineDT3, 'k-', linewidth = 4)

ax[1].plot(xDT, yDT - ablineDT3, 'kd')
ax[0].set_title("Global Annual Mean Land and Ocean \n\
            Surface Temperature Anomolies", 
            fontweight = 'bold', size = 25, pad = 20)
ax[0].tick_params(length=6, width=2, labelsize=20);
ax[0].set_yticks(np.round(np.linspace(-1, 1, 5), 2))
ax[0].set_ylabel("Temperature [$\degree$C]", 
                 size = 25, labelpad = 20)
ax[0].text(1900, 0.3, "The third-order polynomial fit", 
           color = 'k', size = 20)
ax[0].text(1877, 0.6, "(a)", size = 20)
ax[0].axes.get_xaxis().set_visible(False)
ax[1].tick_params(length=6, width=2, labelsize=20);
ax[1].set_yticks(np.round(np.linspace(-0.4, 0.4, 5), 2))
ax[1].set_ylabel("Residuals [$\degree$C]", 
                 size = 25, labelpad = 20)
ax[1].set_xlabel("Year", size = 25, labelpad = 20)
ax[1].text(1877, 0.35, "(b)", size = 20)
fig.tight_layout(pad=-1.5)
plt.show()

Figure 4.8: Ninth order polunomial fit¶

In [21]:
guess = np.polyfit(xDT, yDT, 9)

# Define polynomial funciton
def fit_func(p, t):
    return p[0]*t**9 + p[1]*t**8 + p[2]*t**7 + \
p[3]*t**6 + p[4]*t**5 + p[5]*t**4 + p[6]*t**3 +\
p[7]*t**2 + p[8]*t +  p[9]

# Fit the model
Model = ordire.Model(fit_func)
Data = ordire.RealData(xDT, yDT)
Odr = ordire.ODR(Data, Model, guess, maxit = 10000000)
output = Odr.run()

beta = output.beta

fig, ax = plt.subplots(2, 1, figsize=(12,12))
ax[0].plot(xDT, yDT, 'ko-')
ax[0].plot(xDT, fit_func(beta, xDT), 'k-', linewidth = 4)


ax[1].plot(xDT, yDT - fit_func(beta, xDT), 'kd')
ax[0].set_title("Global Annual Mean Land and Ocean \n \
Surface Temperature Anomolies", 
                fontweight = 'bold', size = 25, pad = 20)

ax[0].tick_params(length=6, width=2, labelsize=20);
ax[0].set_ylabel("Temperature [deg C]", size = 25, labelpad = 20)
ax[0].text(1900, .4, "Ninth-order Polynomial Fit", 
           color = 'k', size = 20)
ax[0].text(1877, 0.6, "(a)", size = 20)
ax[0].axes.get_xaxis().set_visible(False)


ax[1].tick_params(length=6, width=2, labelsize=20);
ax[1].set_ylabel("Residuals [deg C]", size = 25, labelpad = 20)
ax[1].set_xlabel("Year", size = 25, labelpad = 20)
ax[1].text(1877, 0.25, "(b)", size = 20)

fig.tight_layout(pad=-1.5)

plt.show()
/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3457: RankWarning: Polyfit may be poorly conditioned
  exec(code_obj, self.user_global_ns, self.user_ns)