import numpy as np
import pandas as pd
import math as m
import matplotlib
import statistics as stats
import scipy.stats as scistats
import matplotlib.pyplot as plt
import matplotlib.colors as color
import os
import sys
import netCDF4 as nc
import statsmodels.api as sm
import shapely
import statistics
import warnings
warnings.filterwarnings("ignore")
from matplotlib import cm as cm1
from scipy import optimize as opt
from pandas import read_table
from netCDF4 import Dataset as ds
from netCDF4 import num2date
from datetime import datetime, date, timedelta
from matplotlib.dates import date2num, num2date
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.basemap import Basemap, cm, shiftgrid, addcyclic
from matplotlib.patches import Polygon
from scipy.ndimage import gaussian_filter
from sklearn.linear_model import LinearRegression
# Style Dictionary to standarize plotting scheme
# between different python scripts
styledict = {'xtick.labelsize':21,
'xtick.major.size':9,
'xtick.major.width':1,
'ytick.labelsize':21,
'ytick.major.size':9,
'ytick.major.width':1,
'legend.framealpha':0.0,
'legend.fontsize':15,
'axes.labelsize':22,
'axes.titlesize':22,
'axes.linewidth':2,
'figure.figsize':(12,8),
'savefig.format':'jpg'}
plt.rcParams.update(**styledict)
# define skewness function
def skewness(data):
mu = stats.mean(data)
sigmaInv = 1/stats.stdev(data)
skew = ((data - mu)*sigmaInv)**3
return stats.mean(skew)
# define kurstosis function
def kurtosis(data):
mu = stats.mean(data)
sigmaInv = 1/stats.stdev(data)
kurt = ((data - mu)*sigmaInv)**4
return stats.mean(kurt) - 3
# go to your working directory
os.chdir("/Users/sshen/climstats")
# read the data file from the folder named "data"
NOAAtemp = read_table(\
"data/aravg.ann.land_ocean.90S.90N.v4.0.1.201907.txt",
header = None, delimiter = "\s+")
# We need to specfify the name of the file, whether we have a header to
# disregard (in this case, none), and the delimiter for the text file;
# which in this case is a regular expression for any number of consecutive
# spaces. Dimension of Data set 'NOAAtemp' should be 140 rows by 6 columns
# check the data matrix dimension
print("The dimension of our data table is:", NOAAtemp.shape)
# For the next couple of graphs, we'll need the information from the
# first and second columns up to the data from the 139th row
x = np.array(NOAAtemp.loc[0:138, 0])
print(x.shape)
y = np.array(NOAAtemp.loc[0:138, 1])
print(y.shape)
The dimension of our data table is: (140, 6) (139,) (139,)
# plot figure
plt.plot(x, y, 'brown', marker = 'o', linewidth = 3);
plt.title("Global Land-Ocean Average Annual Mean \n \
Surface Temperature Anomalies: 1880 - 2018", pad = 15)
plt.xlabel("Year", labelpad = 20)
plt.ylabel("Temperature Anomaly [$\degree$C]",
labelpad = 20)
# display on screen
plt.show()
# Plot Fig. 1.2: Staircase chart of data
#keyword arguments
kwargs = {'drawstyle' : 'steps'}
plt.plot(x, y, 'black', linewidth = 3, **kwargs);
plt.title("Global Land-Ocean Average Annual Mean \n \
Surface Temperature Anomalies: 1880 - 2018", pad = 15)
plt.xlabel("Year", labelpad = 20)
plt.ylabel("Temperature Anomaly [$\degree$C]",
labelpad = 20)
plt.show()
# Plot Fig. 1.3: Color bar chart of data
# define an array z with y number of ones
z = np.ones(y.size)
z[0] = -99
z[1] = -99
# computer 5-point moving average
for i in range(2, z.size - 2):
z[i] = np.mean(y[i-2:i+3])
z[z.size - 2] = -99
z[z.size - 1] = -99
# define variables based on range
y1 = [y[i] for i in range(y.size) if y[i] >= 0]
x1 = [x[i] for i in range(y.size) if y[i] >= 0]
y2 = [y[i] for i in range(y.size) if y[i] < 0]
x2 = [x[i] for i in range(y.size) if y[i] < 0]
y3 = z[2:x.size-2]
x3 = x[2:x.size-2]
# plot the moving average data
plt.plot(x3, y3, 'black', linewidth = 3)
plt.title("Global Land-Ocean Average Annual Mean \n \
Surface Temperature Anomalies: 1880 - 2018", pad = 20)
# create bar chart
plt.bar(x1, y1, color = 'red');
plt.bar(x2, y2, color = 'blue');
plt.xlabel("Year", labelpad = 20)
plt.ylabel("Temperature Anomaly [$\degree$C]",
labelpad = 16)
plt.show()
# temperature data string from 1880 to 2018
temp2018 = np.array(NOAAtemp.loc[0:138, 1])
print("The first six elements (or head) of the data is:\n",
temp2018[0:6], "\n")
# arithmetic average of the data
print("The Mean is %f.\n" % stats.mean(temp2018))
# standard deviation of the data
print("The Standard Deviation is %f.\n" %
stats.stdev(temp2018))
# variance of the data
print("The Variance is %f.\n" % stats.variance(temp2018))
# skewness of the data
print("The Skewness is %f.\n" % skewness(temp2018))
# kurtosis of the data
print("The Kurtosis is %f.\n" % kurtosis(temp2018))
# median of the data
print("The Median is %f.\n" % stats.median(temp2018))
# percentiles
print("The 5th, 25th, 75th and 95th percentiles are:")
probs = [5, 25, 75, 95]
print([round(np.percentile(temp2018, p),5) for p in probs])
print()
The first six elements (or head) of the data is: [-0.370221 -0.319993 -0.320088 -0.396044 -0.458355 -0.470374] The Mean is -0.185863. The Standard Deviation is 0.324757. The Variance is 0.105467. The Skewness is 0.774270. The Kurtosis is -0.261913. The Median is -0.274434. The 5th, 25th, 75th and 95th percentiles are: [-0.57649, -0.41198, 0.01552, 0.41324]
We use $x = \{x_1, x_2,\cdots, x_n\}$ to denote the time series data. Symbolically the above statistical indices are defined as: \begin{align*} \text{Mean: }\mu(x) &= \frac{1}{n}\sum_{k=1}^n x_k\\ \text{Variance by unbiased estimate: } \sigma^2(x) &= \frac{1}{n-1}\sum_{k=1}^n \left(x_k - \mu(x)\right)^2\\ \text{Standard Deviation: } \sigma(x) &= \sqrt{\sigma^2(x)}\\ \text{Skewness: } \gamma_3(x) &= \frac{1}{n}\sum_{k=1}^n \left(\frac{x_k - \mu(x)}{\sigma(x)}\right)^3\\ \text{Kurtosis: } \gamma_4(x) &= \frac{1}{n}\sum_{k=1}^n \left(\frac{x_k - \mu(x)}{\sigma(x)}\right)^4 - 3\\ \end{align*}
# plot figure
mu, std = scistats.norm.fit(y)
# create an evenly spaced sequence of 100 points in [-1,1]
points = np.linspace(-1, 1, 100)
plt.hist(y, bins = 20, range=(-1,1), color ='white',
edgecolor = 'k', density = True);
plt.plot(points, scistats.norm.pdf(points, mu, std),
color = 'b', linewidth = 3)
plt.title("Histogram of 1880 - 2018 Temperature Anomalies",
pad = 20)
plt.xlabel("Temperature Anomalies [$\degree$C]",
labelpad = 20)
plt.ylabel("Frequency", labelpad = 20)
plt.xticks([-1.0, -0.5, 0.0, 0.5, 1.0])
plt.show()
plt.figure(figsize=(5,8))
# plot boxplot
medianprops = dict(linestyle='-',
linewidth=2.5, color='k')
whiskerprops = dict(linestyle='--',
linewidth=2.0, color='k')
plt.boxplot(y, medianprops = medianprops,
whiskerprops = whiskerprops);
#plt.title("Boxplot of 1880 - 2018\n\
#Temperature Anomalies",
# pad = 20)
plt.ylabel("Temperature Anomalies [$\degree$C]",
labelpad = 23)
y_ticks = np.linspace(-0.5,0.5,3)
plt.yticks(y_ticks, size = 23)
x = np.arange(1)
plt.xticks(x, " ")
plt.show()
NOAAtemp = read_table("data/aravg.ann.land_ocean.90S.90N.v4.0.1.201907.txt",
header = None, delimiter = "\s+")
# We need to specifify the name of the file, whether we have a header to
# disregard (in this case, none), and the delimiter for the text file;
# which in this case is a regular expression for any number of consecutive
# spaces. Dimension of Data set 'NOAAtemp' should be 140 rows by 6 columns
# For the next couple of graphs, we'll need the information from the
# first column up to the data from the 139th row
temp2018 = np.array(NOAAtemp.loc[0:138, 1])
#line = np.linspace(-3, 3, temp2018.size)
tstand = np.sort((temp2018 - np.mean(temp2018))/statistics.stdev(temp2018))
# simulate 139 points by N(0,1)
qn = np.random.normal(size=y.size)
# sort the points
qns = np.sort(qn)
# set up figure
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
# plot figure
qq2 = scistats.probplot(qns, dist = "norm", plot = plt)
ax.get_lines()[0].set_color('purple')
ax.get_lines()[1].set_linewidth(3)
ax1 = ax.twinx()
qt = scistats.probplot(tstand, dist = "norm", plot = plt)
ax1.get_lines()[1].set_linewidth(0.01)
ax1.get_lines()[0].set_color('black')
ax.tick_params(length=6, width=2, labelsize=20)
ax.set_title("Q-Q plot for the Standardized Global \n\
Temperature Anomalies vs N(0,1)", pad = 20)
ax1.set_title("")
ax.set_xlabel("Quantile of N(0,1)", labelpad = 20)
ax.set_ylabel("Quantile of Temperature Anomalies", labelpad = 20)
ax1.set_ylabel("")
ax1.tick_params(length=0.01, width=0.01, color = 'white', labelcolor = 'b')
ax1.set_yticks([])
ax.set_xlim(-3,3)
ax.set_ylim(-3,3)
plt.show()