data <- read.csv("hubble.csv", header = TRUE) # replace by your own path here Y <- data[,2] xx <- data[,1] n <- length(Y) lm1 <- lm(Y ~ xx) summary(lm1) lm2 <- lm(Y ~ xx - 1) ## a way to eliminate intercept summary(lm2) plot(xx,Y) lines(xx, predict(lm1)) ## now let's do it "by hand" Xmat <- cbind(rep(1,n), xx) # "design matrix" XTX1 <- solve(t(Xmat) %*% Xmat) betahat <- XTX1 %*% t(Xmat) %*% Y Yhat <- Xmat %*% betahat # predicted values shat <- sqrt( sum((Y - Yhat)^2/(n-2)) ) # estimate of the residual st.dev. print( SEmat <- shat^2*XTX1) print(SE <- sqrt(diag(SEmat))) x <- 200 A <- matrix(c(1,x),1,2) # this is an 1x2 matrix print( SE.at.x <- sqrt( A %*% SEmat %*% t(A))) ypred <- A %*% betahat tstar <- qt(0.975, n - 2) lines( c(x,x),c(ypred - tstar*SE.at.x, ypred + tstar*SE.at.x), col="red", lwd=2) # now let's do several lines for (x in seq(-200,1000, by=100)){ A <- matrix(c(1,x),1,2) # this is an 1x2 matrix SE.at.x <- sqrt( A %*% SEmat %*% t(A)) ypred <- A %*% betahat lines( c(x,x),c(ypred - tstar*SE.at.x, ypred + tstar*SE.at.x), col="red", lwd=2) } ## now, prediction intervals for (x in seq(-200,1000, by=100)){ A <- matrix(c(1,x),1,2) # this is an 1x2 matrix SE.at.x <- sqrt( shat^2 + A %*% SEmat %*% t(A)) ypred <- A %*% betahat lines( c(x+10,x+10),c(ypred - tstar*SE.at.x, ypred + tstar*SE.at.x), col="blue", lwd=1.5) } ## examples of built-in prediction and conf.int in R x <- xx y <- Y lm1 <- lm(y ~ x) predict(lm1) new <- data.frame(x = seq(-200, 1000, by = 100)) # these are points for which prediction would be done predict(lm1, new, se.fit = TRUE) pred.w.plim <- predict(lm1, new, interval="prediction") pred.w.clim <- predict(lm1, new, interval="confidence") matplot(new$x, cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), col=c("black", "red", "red", "blue", "blue"), type="l", xlab = "Speed", lwd = 2, ylab="Distance") points(x,y)