# first, using data on p.115 means <- c(27.4, 32.7, 42.3, 42.9, 39.7, 45.1) sds <- c(6.1, 5.1, 7.8, 6.7, 7, 6.7) ns <- c(49, 57, 71, 56, 56, 60) I <- length(ns) n <- sum(ns) Ybb <- sum(ns*means)/n # grand mean SSE <- sum((ns-1)*sds^2) # Within Groups SS SSB <- sum((means - Ybb)^2*ns) # Between Groups dfB <- I-1 dfE <- sum(ns)- I MSE <- SSE/dfE # this is the pooled estimate s_p^2 sp <- sqrt(MSE) MSB <- SSB/dfB Fstat <- MSB/MSE pval <- 1 - pf(Fstat,dfB, dfE) # upper tail only exp(pf(Fstat,dfB, dfE, lower.tail = FALSE, log.p = TRUE)) # this is a bit more exact # second, using the built-in lm function X <- read.csv("mice.csv", header = TRUE) # replace by your own path here boxplot(X[,1] ~ X[,2]) lm1 <- lm(X[,1] ~ X[,2]) summary(lm1) # "milking" information from lm1 anova(lm1) plot(lm1$resid) plot(lm1$resid ~ X[,2]) # some residual diagnostics hist(lm1$resid) kruskal.test(X[,1] ~ X[,2]) ## example where KW works better: boxplot(Ozone ~ Month, data = airquality) kruskal.test(Ozone ~ Month, data = airquality) lm2 <- lm(Ozone ~ Month, data = airquality) anova(lm2)