#Simulation of the standard error of mean
#setwd("/Users/sshen/climstats")
#setEPS() # save the .eps figure file to the working directory
#postscript("fig0301.eps", height = 5, width = 10)
mu = 0; sig = 10; m=10000; n = 100; k = m*n
x = rnorm(k, mean = mu, sd = sig)
par(mar = c(4, 4.5, 2, 1.5))
par(mfrow=c(1,2))
hist(x, breaks = 201, xlim = c(-40,40),
main = expression('Histogram of x: sd(x) = 10'),
cex.lab = 1.5, cex.axis = 1.5,
cex.main = 1.5)
text(-25, 19000, '(a)', cex =1.3)
xmat = matrix(x, ncol = n)
xbar = rowMeans(xmat)
hist(xbar, breaks = 31, xlim = c(-40,40),
xlab = expression(bar(x)),
main = expression('Histogram of '*bar(x)*
': sd('*bar(x)*') = 1'),
cex.lab = 1.5, cex.axis = 1.5,
cex.main = 1.5)
text(-25, 750, '(b)', cex =1.3)
#dev.off()
m=10000 #10,000 experiments
x = 1:m
n = 30 #sample size n
truemean = 8
da = matrix(rnorm(n*m, mean = truemean, sd = 1), nrow = m)
esmean = rowMeans(da) #sample mean
library(matrixStats)
essd = rowSds(da) #sample SD
upperci = esmean + 1.96*essd/sqrt(n) #interval upper limit
lowerci = esmean - 1.96*essd/sqrt(n) #interval lower limit
l=0
for(k in 1:m){
if(upperci[k] >= truemean & lowerci[k] <= truemean )
l=l+1
} #Determine if the true mean is inside the interval
l/m #Percentage of truth
## [1] 0.9406
#[1] 0.9425 #which is approximately 0.95
#Plot confidence intervals and tail probabilities
#setwd("/Users/sshen/climstats")
#setEPS() # save the .eps figure file to the working directory
#postscript("fig0302.eps", height = 4.5, width = 6.5)
par(mar=c(2.5,3.5,2.0,0.5))
rm(list=ls())
par(mgp=c(1.4,0.5,0))
curve(dnorm(x,0,1), xlim=c(-3,3), lwd=3,
main='Confidence Intervals and Confidence Levels',
xlab="True mean as a normally distributed random variable",
ylab="", xaxt="n", cex.lab=1.2)
title(ylab='Probability density', line=2, cex.lab=1.2)
polygon(c(-1.96, seq(-1.96,1.96,len=100), 1.96),
c(0,dnorm(seq(-1.96,1.96,len=100)),0),col='skyblue')
polygon(c(-1.0,seq(-1.0, 1, length=100), 1),
c(0, dnorm(seq(-1.0, 1, length=100)), 0.0),col='white')
polygon(c(-3.0,seq(-3.0, -1.96, length=100), -1.96),
c(0, dnorm(seq(-3.0, -1.96, length=100)), 0.0),col='red')
polygon(c(1.96,seq(1.96, 3.0, length=100), 3.0),
c(0, dnorm(seq(1.96, 3.0, length=100)), 0.0),col='red')
points(c(-1,1), c(0,0), pch=19, col="blue")
points(0,0, pch=19)
points(c(-1.96,1.96),c(0,0),pch=19, col="red")
text(0,0.02, expression(bar(x)), cex=1.0)
text(-1.50,0.02, "SE", cex=1.0)
text(-0.60,0.02, "SE", cex=1.0)
text(1.50,0.02, "SE", cex=1.0)
text(0.60,0.02, "SE", cex=1.0)
text(0,0.2, "Probability
= 0.68")
arrows(-2.8,0.06,-2.35,0.01, length=0.1)
text(-2.5,0.09, "Probability
=0.025")
#dev.off()
#setwd('/Users/sshen/climstats')
da1 =read.csv("data/Lat52.5_Lon-112.5.csv", header=TRUE)
dim(da1)
## [1] 1642 3
#[1] 1642 3 #1642 months: Jan 1880 - Oct 2016
da1[1:2,]
## Level.Name Date
## 1 NOAA Merged Land Ocean Global Surface Temperature Anomalies 1880-01-01
## 2 NOAA Merged Land Ocean Global Surface Temperature Anomalies 1880-02-01
## Value
## 1 -7.9609
## 2 -4.2510
# Date Value
#1 1880-01-01 -7.9609
#2 1880-02-01 -4.2510
jan = seq(1, 1642, by=12)
Tjan = da1[jan,]
TJ50 = Tjan[1:50, 3]
xbar = mean(TJ50)
sdEdm = sd(TJ50)
EM = 1.96*sdEdm/sqrt(50)
CIupper = xbar + EM
CIlower = xbar - EM
round(c(xbar, sdEdm, EM, CIlower, CIupper), digits =2)
## [1] -2.47 4.95 1.37 -3.84 -1.10
#[1] -2.47 4.95 1.37 -3.84 -1.10
#52.5N, 112.5W, Edmonton, NOAAGlobalTemp Version 3.0
#setwd("/Users/sshen/climstats")
#setEPS() # save the .eps figure file to the working directory
#postscript("fig0303.eps", height = 7, width = 10)
da1 =read.csv("data/Lat52.5_Lon-112.5.csv", header=TRUE)
jan =seq(1, 1642, by=12)
Tjan = da1[jan,]
t=1880:2016
regJan = lm(Tjan[,3] ~ t)
par(mar=c(3.6,4.5,2,1.3), mgp = c(2.3, 0.9, 0))
plot(t, Tjan[,3], type="l",
main = "Edmonton January Surface Air Temperature Anomalies",
xlab = "Time: 1880-2016",
ylab = expression("Temperature Anomaly:"~degree~C),
ylim=c(-20,10), cex.lab=1.4, cex.axis=1.4)
m19972006 = mean(Tjan[118:137, 3]) #1997--2016 Jan mean
m18801929 = mean(Tjan[1:50, 3]) #1880--1929 Jan mean
EM = 1.96*sd(Tjan[1:50, 3])/sqrt(50)
lines(t, rep(0,length(t)), lty = 2)
lines(t[118:137], rep(m19972006, 20),
col = 'blue', lwd = 3, lty = 1)
lines(t[1:50], rep(m18801929, 50),
col = 'darkgreen', lwd = 3, lty = 1)
lines(t[1:50], rep(m18801929 - EM, 50),
col = 'darkgreen', lwd = 1.5, lty = 3)
lines(t[1:50], rep(m18801929 + EM, 50),
col = 'darkgreen', lwd = 1.5, lty = 3)
abline(regJan, col='red', lwd = 2)
text(1920, 9,
expression("Linear trend: 3.78"~degree~C~"per century"),
col="red", cex=1.4)
text(2005, -17,
paste('1997-2016', '\nmean = ',
round({m19972006}, digits = 2)),
col = 'blue', cex = 1.4)
text(1905, -17,
paste('1880-1929', '\nmean = ',
round({m18801929}, digits = 2)),
col = 'darkgreen', cex = 1.4)