##12 X <- read.csv("handi.csv", header = TRUE) attach(X) lm1 <- lm(SCORE ~ HANDICAP) summary(lm1) sp <- 1.633 Xbar <- rep(0,4) Xbar[1] <- mean(SCORE[HANDICAP == 'AMPUTEE']) Xbar[2] <- mean(SCORE[HANDICAP == 'CRUTCHES']) Xbar[3] <- mean(SCORE[HANDICAP == 'WHEELCHAIR']) ni <- 14 Xbar[4] <- mean(SCORE[HANDICAP == 'HEARING']) C <- c(1,1,1,-3)/3 SE <- sqrt(sum(C^2))*sp/sqrt(ni) print(t <- sum(Xbar*C)/SE) print(pval <- 2*pt(-abs(t), 65)) meandiff <- sum(Xbar*C) halfw <- qt(0.975,65)*SE print(CI <- c(meandiff - halfw, meandiff + halfw)) ##13 # since we plan 3 comparisons, we should use a third of the .05 to construct each individual C.I. # print( tstar <- qt(1-0.025/3, 65) ) print( halfw <- tstar*sp*sqrt(1/ni + 1/ni) ) lm2 <- aov(SCORE ~ HANDICAP) TukeyHSD(lm2) # for comparison, we can look at Tukey method # we can also reuse the differences in the means reported in the printout print(halfw.tukey <- (1.35+2.11)/2) # thus, if we are only interested in 3 comparisons, Bonferroni method would be more economical sprintf( ' CRUTCHES-AMPUTEE %5.3f to %5.3f', 1.493 - halfw, 1.493 + halfw ) sprintf( ' WHEELCHAIR-AMPUTEE %5.3f to %5.3f' , 0.914 - halfw, 0.914 + halfw ) sprintf( ' WHEELCHAIR-CRUTCHES %5.3f to %5.3f', -0.579 - halfw, -0.579 + halfw ) detach(X) ###16 ## i : LSD qt(0.975, 30) ## ii: trick question -- will not proceed with pairwise comparisons, as p-value is not below 5%! ## iii: Tukey qtukey(0.95,6,30) /sqrt(2) ## iv: Bonferroni k <- 6*(6-1)/2 qt(1 - 0.025/k, 30) ## v. Scheffe sqrt(5*qf(0.95,5,30)) ##19 X <- read.csv("mice.csv", header = TRUE) attach(X) lm3 <- aov(lifetime ~ diet) TukeyHSD(lm3) # LSD interval summary(lm3) sp <- sqrt(44.6) halfw <- qt(0.975, 343)*sp*sqrt(1/71 + 1/57) meandiff <- 9.61 sprintf(' 95%% confidence interval: %6.2f to %6.2f', meandiff -halfw, meandiff + halfw) detach(X) # part of the output: # N/R50-N/N85 9.6059550 6.2021702 13.0097399 # 1) sample sizes not equal # 2) might be too conservative because it's valid for 15 comparison pairs, we do not have that many # LSD one is only 7.27 to 11.95 ##21 X <- read.csv("bearings.csv", header = TRUE) attach(X) # ftime <- X[,1] # bcompound <- X[,2] boxplot(ftime ~ bcompound) lm4 <- aov(ftime ~ bcompound) summary(lm4) TukeyHSD(lm4) detach(X) Statistical summary: [let's pretend that we are talking to a non-statistician] In this analysis, we compare the durability of 5 different compounds to be potentially used in the manufacture of high-speed turbine bearings, in terms of their failure times (measured in millions of cycles). Ten measurements were obtained for each compound type. See the attached boxplot. Doing a 1-way analysis of variance, we discover strong evidence (p-value = 0.002) that the compound type affects the bearing durability. Further, analyzing the differences between individual compounds, we discovered significant differences between compounds V and II (average mean difference between V and II: 8.656 with 95% C.I. ranging from 2.98 to 14.34) and compounds V and III ( average mean difference between V and III: 6.070 with 95% C.I. ranging from 0.39 to 11.75). Methods used: for pairwise comparisons, we used Tukey-Kramer HSD method. Under the assumption of normality of the observations and equal variance within each group (both assumptions are approximately satisfied), this method provides 95% confidence intervals for the differences in means between each pair of groups. The method guarantees the familywise 5% error rate, that is, the chance of obtaining a false positive conclusion about the difference of any two compounds is 5%. par(mfrow=c(1,2)) qqnorm(lm4$resid) lm5 <- aov(1/ftime ~ bcompound) # someone suggested we might compare failure rates instead qqnorm(lm5$resid) # both QQ plots reveal an outlier for compound IV # this outlier has potentially inflated our sp and led to a less decisive test