# here, we are looking at the 2-sample t-test and compare it to the randomization test of Chap. 1 # X <- read.csv("U:/public_html/588/Class1/CASE0101.csv", header = TRUE) # replace by your own path here V <- X[,1] # all the scores in one "pile" n1 <- 23 n2 <- 24 V1 <- V[1:n1] V2 <- V[(n1+1):(n1+n2)] M <- 10000 # sample size of the Monte Carlo study Dstat <- rep(0, M) # vector of simulation results Tstat <- rep(0, M) for (mc in 1:M){ Y <- sample(V, length(V), replace = FALSE) # this is the permutation of the original data Y1 <- Y[1:n1] Y2 <- Y[(n1+1):(n1+n2)] # two parts of the sample Dstat[mc] <- mean(Y2) - mean(Y1) # calculating the test statistic Tstat[mc] <- Dstat[mc]/sqrt( (var(Y1)*(n1-1) + var(Y2)*(n2-1))/(n1+n2-2))/sqrt(1/n1 + 1/n2) } sp <- sqrt((var(V1)*(n1-1) + var(V2)*(n2-1))/(n1+n2-2)) # pooled est. of st.dev.; # usually we would also have to test equal variances SEdiff <- sp*sqrt(1/n1 + 1/n2) tratio <- (mean(V2) - mean(V1))/SEdiff hist(Tstat, freq=FALSE) # histogram with Y-axis as "density", not frequency count2 <- sum( abs(Tstat) > tratio) pvalue2 <- count2/M pvalueT <- 2*pt(-abs(tratio), df = n1+n2-2) # dt is a function for CDF of T-distribution print( paste( "P-value for the T statistic, simulated", pvalue2)) print(paste("tratio", round(tratio,2))) print( paste( "P-value for the T statistic, T-approximation", round(pvalueT,4))) dtn1n2 <- function(x){ dt(x, n1+n2-2) } curve(dtn1n2, -4,4,add=TRUE, col="red", lwd=2) # built-in R routine t.test(V1, V2) # first one does *not* assume equal variances t.test(V1, V2, var.equal=TRUE)