Statistics and Data Visualizations in Climate Science

with R and Python

 

A Cambridge University Press book By

SSP Shen and GR North

 

 

Version 1.0 released in July 2023 and coded by Dr. Samuel Shen, Distinguished Professor
San Diego State University, California, USA
https://shen.sdsu.edu
Email:

 

 

 

 

Chapter 1: Basics of Climate Data Arrays, Statistics, and Visualization

R Plot Fig. 1.1: Simple Line Graph

# go to your working directory
#setwd("/Users/sshen/climstats") 
# read the data file from the folder named "data"
NOAAtemp = read.table(
  "data/aravg.ann.land_ocean.90S.90N.v4.0.1.201907.txt",
  header=FALSE) #Read from the data folder
# check the data matrix dimension
dim(NOAAtemp)
## [1] 140   6
#[1] 140   6 
#140 years from 1880 to 2019
#2019 will be excluded since data only up to July 2019
#col1 is year, col2 is anomalies, col3-6 are data errors
# set the plot margins and the positions of labels
par(mar=c(3.5,3.5,2.5,1), mgp=c(2,0.8,0))
plot(NOAAtemp[1:139,1], NOAAtemp[1:139,2],
     type ="o", col="brown", lwd=3,
     main ="Global Land-Ocean Average Annual Mean 
     Surface Temperature Anomalies: 1880-2018",
     cex.lab=1.2,cex.axis=1.2,
     xlab="Year", 
     ylab=expression(
       paste("Temperature Anomaly [", degree,"C]"))
)

 

R Plot Fig. 1.2: Staircase Chart of Data

plot(NOAAtemp[1:139,1], NOAAtemp[1:139,2],
     type="s", #staircase curve for data
     col="black", lwd=2,
     main="Global Land-Ocean Average Annual Mean 
     Surface Temperature Anomalies: 1880-2018",
     cex.lab=1.2,cex.axis=1.2,
     xlab="year",
     ylab=expression(paste(
       "Temperature Anomaly [", degree,"C]"))
)

 

R Plot Fig. 1.3: Color Bar Chart of Data

x <- NOAAtemp[,1]
y <- NOAAtemp[,2]
z <- rep(-99, length(x))
# compute 5-point moving average
for (i in 3:length(x)-2) z[i] = 
  mean(c(y[i-2],y[i-1],y[i],y[i+1],y[i+2]))
n1 <- which(y>=0); x1 <- x[n1]; y1 <- y[n1]
n2 <- which(y<0); x2 <- x[n2]; y2 <- y[n2]
x3 <- x[2:length(x)-2]
y3 <- z[2:length(x)-2]
plot(x1, y1, type="h", #bars for data
     xlim = c(1880,2016), lwd=3, 
     tck = 0.02,  #tck>0 makes ticks inside the plot
     ylim = c(-0.7,0.7), xlab="Year", col="red",
     ylab = expression(paste(
       "Temperature Anomaly [", degree,"C]")),
     main ="Global Land-Ocean Average Annual Mean 
     Surface Temperature Anomalies: 1880-2018",
     cex.lab = 1.2, cex.axis = 1.2)
lines(x2, y2, type="h",
      lwd = 3, tck = -0.02, col = "blue")
lines(x3, y3, lwd = 2)