#beer data set
df = read.csv('data.csv', header=FALSE)
names(df) = c("sodium", "brand", "rep")
df$sodium= as.numeric(df$sodium)
df$brand= as.factor(df$brand)

library(ggplot2)
##boxplot of sodium content versus brands
ggplot(df, aes(brand, sodium, fill=brand)) + 
  geom_boxplot() + theme_classic() + xlab("Brand") +
  ylab("Sodium") + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20)) + 
  guides(fill=F) + geom_hline(yintercept=mean(df$sodium),
                              linetype="dotted")
#One way Anonva 
attach(df)
#what is the expected valued of the sodium content for given brands ?
anova(lm(sodium ~ brand - 1))
round(coef(M1 <-lm(sodium ~ brand-1)),1) 

#Do the sodium content Differ significantly between brands ?
summary(aov(sodium ~ brand))
# what is the expected value of sodium content ?
mi=tapply(sodium,brand,mean)
mi
#what is the variaability of sodium content between brands ?
var(mi)
c(mean(sodium),deviance(M1)/42) 

##Introduce Random effects next
library(lme4)
M0 <- lmer(sodium ~ 1+(1|brand), data=df)
summary(M0)
## estimate of mu = 17.629
## estimate of sigma^2 = 0.716
## estimate of sigma_{\alpha}^2 = 21.274

## the deviation from the average sodium content by brand is
ranef(M0)
round(tapply(sodium,brand,mean)-mean(sodium),3)

##hypothesis testing
alpha=0.01; I=6; n=6*8; mu.hat=mean(sodium)
MSA <- anova(lm(sodium ~ brand))[1, 3]
c(mu.hat-qt(1-alpha/2, I-1)*sqrt(MSA/n),
  mu.hat+qt(1-alpha/2, I-1)*sqrt(MSA/n))
## 99% confidence interval for mu 
## check that this is larger than one way anonva CI
## (15.94813, 19.31020) see below

anova(lm(sodium ~ brand))
t.test(sodium, conf.level=1 - alpha)