setwd('U:/public_html/588/Class24/')
X <- read.csv('TucsonAZ.csv', header = T)
attach(X)
plot(temp, type="l")
n <- dim(X)[1]
t <- 1:n
sint <- sin(2*pi*t/365.25)
cost <- cos(2*pi*t/365.25)
lm1 <- lm(temp ~ sint + cost)
summary(lm1)
b0 <- lm1$coef[1]
b1 <- lm1$coef[2]
b2 <- lm1$coef[3]
lines( t, b0 + b1*sint + b2*cost, col="red", lwd=2)
par(mfrow=c(2,2))
plot(lm1)
## illustrating autocorrelation
resid <- lm1$resid
plot(resid[1:(n-1)], resid[2:n] , xlab="Current", ylab= "Next")
cor(resid[1:(n-1)], resid[2:n]) # lag 1 autocorrelation
acf(resid)
pacf(resid) # the latter one to determine the order of AR model to fit
plot(resid[200:250], type="l")
## how does the autocorrelation affect the Var(Xbar)?
Tnov <- temp[month == 11]
yearnov <- year[month==11]
plot(yearnov,Tnov)
nov.mean <- tapply(Tnov,yearnov, mean)
(sigma <- sd(Tnov)) # variability in day-to-day observations
sd(nov.mean) # variability in monthly means
sigma/sqrt(30) # what it should've been if we had independent observations
# now the "theoretical result" for AR(1) model
r <- 0.75
nn <- 30
rowr <- 0:(nn-1) # powers of r in the first row
Sigmat <- toeplitz( r ^ rowr) # Sigma-matrix in correlation form for AR(1) model
# we'll find var(Xbar) under assumption of AR(1) model
avec <- matrix(1,1,nn)
vXbar <- 1/nn^2* avec %*% Sigmat %*% t(avec)
sqrt(vXbar)*sigma # this is what the variability should've been based on AR(1) model
# what is "Effective sample size"?
1/vXbar # it shrinks quite a bit :(
# we can also do r <- -0.75 and see the effective sample size grow!