setwd('U:/public_html/588/Class16/')
data <- read.table("global-anom.txt")
plot(data)
## fitting a polynomial
year <- data[,1]
temp <- data[,2]
year2 <- year^2
year3 <- year^3
year4 <- year^4
lm1 <- lm(temp ~ year + year2 + year3 + year4)
summary(lm1)
plot(data)
lines(year, lm1$fit)
## what's wrong with the t-tests?
yr <- year - 1940 # "coded" data
yr2 <- yr^2 # coding is needed for the t-statistics to make more sense; it does not change the overall model fits
yr3 <- yr^3
yr4 <- yr^4
yr5 <- yr^5
lm2 <- lm(temp ~ yr + yr2 + yr3)
summary(lm2)
plot(yr, temp)
lines(yr, lm2$fit)
lm3 <- lm(temp ~ yr + yr2 + yr3 + yr4 + yr5)
summary(lm3)
lines(yr, lm3$fit, col="red", lwd=2)
plot(lm3$fit, lm3$res) ## we are still not happy
sin63 <- sin(2*pi*yr/63)
cos63 <- cos(2*pi*yr/63)
lm4 <- lm(temp ~ yr + yr2 + sin63 + cos63) # what if there's a 63-year period?
plot(yr, temp)
lines(yr, lm4$fit, col="blue", lwd=2)
pd.range <- 60:70
sse <- rep(0, length(pd.range))
for (p in pd.range){
sinp <- sin(2*pi*yr/p)
cosp <- cos(2*pi*yr/p)
lmp <- lm(temp ~ yr + yr2 + sinp + cosp)
i <- p - pd.range[1] + 1
sse[i] <- sum(lmp$resid^2)
}
plot(pd.range, sse)
# now analyze "the best" model
p <- 64
sinp <- sin(2*pi*yr/p)
cosp <- cos(2*pi*yr/p)
lmp <- lm(temp ~ yr + yr2 + sinp + cosp)
summary(lmp)
plot(yr, temp)
lines(yr, lmp$fit, col="blue", lwd=2)
# now compare this model with simply quadratic growth
lm.quad <- lm(temp ~ yr + yr2)
anova(lm.quad, lmp) # this is the "extra sum of squares anova" to simultaneously test both sin and cos terms
(Fratio <- 0.68976/2/0.08904^2) # 0.08904 from the 'lmp' model
(Fratio <- 0.68976/2/(0.98299/124))
plot(lmp$res, type="o") # what's unusual about the residuals?
## prediction into the future?