dough.raw <- read.table(file="http://www2.imm.dtu.dk/courses/02418/week5/lecture/data/doughnut.txt",
                    header=TRUE)
  
## how the data look like:
dough.raw


## mean by fat type
apply(dough.raw,FUN=mean,MARGIN=2)

## comparing means
t.test(dough.raw$Fat1,dough.raw$Fat2)

## there are 4*3/2=6 such comparisons

## reformat data so as to have fat type as an explanatory variable
tmp <- as.matrix(dough.raw) # read.table(file="./data/dough.unix.txt",skip=1))
dough <- data.frame(Quantity=matrix(ncol=1,t(tmp)),
                    Fat.type=factor(rep(paste("Fat",1:4,sep=""),6)))

dough$Fat.type

class(dough$Quantity)

class(dough$Fat.type)

levels(dough$Fat.type)

boxplot(Quantity~Fat.type,data=dough,col="lightgray")

## design matrix (R code below a bit laboured, there must be other ways...)
library(dummies)
dum <- dummy(x=dough$Fat.type)
design <- cbind(rep(1,nrow(dough)),dum)
design
View(design)
  
## fitting a oneway ANOVA model in R
lm.res <- lm(data = dough,Quantity~Fat.type)

summary(lm.res)

## model globally not significant not so low p-val, low R2
## Fat2 almost signficant
## remember the low sample size for each fat type
## the "between group" variance is not considered large enough
## compared to the "withing group" variance

## common mistake: explanatory variable not stored as an R factor
junk <- as.vector(dough)
junk$Fat.type <- as.numeric(substring(as.character(junk$Fat.type),first=4))

is.factor(junk$Fat.type)
class(junk$Fat.type)
class(dough$Fat.type)


is.factor(dough$Fat.type)

levels(dough$Fat.type)


plot(junk$Fat.type,junk$Quantity)

## Fitting a linear model on improperly formatted data
junk.lm <- lm(data = junk,Quantity~Fat.type)
summary(junk.lm)

abline(junk.lm,col='red')