dough.raw <- read.table(file="http://www2.imm.dtu.dk/courses/02418/week5/lecture/data/doughnut.txt", header=TRUE) ## how the data look like: dough.raw ## mean by fat type apply(dough.raw,FUN=mean,MARGIN=2) ## comparing means t.test(dough.raw$Fat1,dough.raw$Fat2) ## there are 4*3/2=6 such comparisons ## reformat data so as to have fat type as an explanatory variable tmp <- as.matrix(dough.raw) # read.table(file="./data/dough.unix.txt",skip=1)) dough <- data.frame(Quantity=matrix(ncol=1,t(tmp)), Fat.type=factor(rep(paste("Fat",1:4,sep=""),6))) dough$Fat.type class(dough$Quantity) class(dough$Fat.type) levels(dough$Fat.type) boxplot(Quantity~Fat.type,data=dough,col="lightgray") ## design matrix (R code below a bit laboured, there must be other ways...) library(dummies) dum <- dummy(x=dough$Fat.type) design <- cbind(rep(1,nrow(dough)),dum) design View(design) ## fitting a oneway ANOVA model in R lm.res <- lm(data = dough,Quantity~Fat.type) summary(lm.res) ## model globally not significant not so low p-val, low R2 ## Fat2 almost signficant ## remember the low sample size for each fat type ## the "between group" variance is not considered large enough ## compared to the "withing group" variance ## common mistake: explanatory variable not stored as an R factor junk <- as.vector(dough) junk$Fat.type <- as.numeric(substring(as.character(junk$Fat.type),first=4)) is.factor(junk$Fat.type) class(junk$Fat.type) class(dough$Fat.type) is.factor(dough$Fat.type) levels(dough$Fat.type) plot(junk$Fat.type,junk$Quantity) ## Fitting a linear model on improperly formatted data junk.lm <- lm(data = junk,Quantity~Fat.type) summary(junk.lm) abline(junk.lm,col='red')