setwd('U:/public_html/588/Class16/') data <- read.table("global-anom.txt") plot(data) ## fitting a polynomial year <- data[,1] temp <- data[,2] year2 <- year^2 year3 <- year^3 year4 <- year^4 lm1 <- lm(temp ~ year + year2 + year3 + year4) summary(lm1) plot(data) lines(year, lm1$fit) ## what's wrong with the t-tests? yr <- year - 1940 # "coded" data yr2 <- yr^2 # coding is needed for the t-statistics to make more sense; it does not change the overall model fits yr3 <- yr^3 yr4 <- yr^4 yr5 <- yr^5 lm2 <- lm(temp ~ yr + yr2 + yr3) summary(lm2) plot(yr, temp) lines(yr, lm2$fit) lm3 <- lm(temp ~ yr + yr2 + yr3 + yr4 + yr5) summary(lm3) lines(yr, lm3$fit, col="red", lwd=2) plot(lm3$fit, lm3$res) ## we are still not happy sin63 <- sin(2*pi*yr/63) cos63 <- cos(2*pi*yr/63) lm4 <- lm(temp ~ yr + yr2 + sin63 + cos63) # what if there's a 63-year period? plot(yr, temp) lines(yr, lm4$fit, col="blue", lwd=2) pd.range <- 60:70 sse <- rep(0, length(pd.range)) for (p in pd.range){ sinp <- sin(2*pi*yr/p) cosp <- cos(2*pi*yr/p) lmp <- lm(temp ~ yr + yr2 + sinp + cosp) i <- p - pd.range[1] + 1 sse[i] <- sum(lmp$resid^2) } plot(pd.range, sse) # now analyze "the best" model p <- 64 sinp <- sin(2*pi*yr/p) cosp <- cos(2*pi*yr/p) lmp <- lm(temp ~ yr + yr2 + sinp + cosp) summary(lmp) plot(yr, temp) lines(yr, lmp$fit, col="blue", lwd=2) # now compare this model with simply quadratic growth lm.quad <- lm(temp ~ yr + yr2) anova(lm.quad, lmp) # this is the "extra sum of squares anova" to simultaneously test both sin and cos terms (Fratio <- 0.68976/2/0.08904^2) # 0.08904 from the 'lmp' model (Fratio <- 0.68976/2/(0.98299/124)) plot(lmp$res, type="o") # what's unusual about the residuals? ## prediction into the future?