rm(list = ls())
# Question 3
regimes  <- data.frame(list(
    Group  =  factor(rep(1:5, 3)) ,
    trt = rep(c("A","B","C"), each = 5),
    weightloss = c(15,24,31,37,33,10,15,28,36,37,8,17,34,34,39)
   ))
## graphical analysis:
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
ggplot(data = regimes, aes(x= Group, y= weightloss, color = trt )) +
  geom_point()+
  geom_line()+
  ggtitle(label = "weightloss of different regimes")
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

# trt A seems have a good effect. all are similar though.
# some further test:

#consider group as block -- randomized complete block desgin
aov1 <- aov(weightloss ~ Group+ trt, data = regimes)
summary(aov1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Group        4 1507.7   376.9  32.682 5.27e-05 ***
## trt          2   19.7     9.9   0.855    0.461    
## Residuals    8   92.3    11.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## if we know, reduced proportion
weights <- c(250,309,327,356,379)
regimes$weights <-rep(weights,3)
regimes$ratio <- regimes$weightloss/regimes$weights
aov2 <- aov(ratio ~ trt, data =regimes)
summary(aov2)
##             Df   Sum Sq   Mean Sq F value Pr(>F)
## trt          2 0.000266 0.0001330   0.185  0.833
## Residuals   12 0.008614 0.0007178
#conclusions : trt still not significant.

## if you want to calculate all kindso group means
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
group.mean <- regimes %>%
     dplyr::group_by(Group) %>%
     dplyr::summarize(mean(weightloss))
trt.mean <- regimes %>%
  dplyr::group_by(trt) %>%
  dplyr::summarize(mean(weightloss))

#graphial ANOVA
layout(matrix(1:3,3))
stripchart(group.mean$`mean(weightloss)`-mean(group.mean$`mean(weightloss)`), ylim = c(0,4) ,col ="red",main="group mean")
stripchart(trt.mean$`mean(weightloss)`-mean(trt.mean$`mean(weightloss)`), ylim = c(0,4) ,method = 'jitter', col ="blue", main= "trt mean")
stripchart(aov1$residuals, main="residuals", method= "jitter")

library(ggplot2)
p1 <- ggplot(data.frame(x = group.mean$`mean(weightloss)`-mean(group.mean$`mean(weightloss)`),y =rep(1,5)), aes(x,y)) + geom_point( )
p2 <- qplot(x,y, data= data.frame(x= trt.mean$`mean(weightloss)`-mean(trt.mean$`mean(weightloss)`),y = rep(2,3)), color = I("blue"))
p3 <- ggplot(data.frame(x= aov1$residuals, y = rep(1,15)), aes(x,y))+geom_point(color = "red")
library(gridExtra)
grid.arrange(p1,p2,p3)

### Question 4
process <- data.frame(list(
  runs = 1:32,
  block = factor(rep(1:8, each =4)),
  variants = unlist(strsplit("CBDABDACDABCADCBADBCDCABBDCACDAB", split="")),
  results = c(56,60,69,61,62,70,65,65,
             66,63,52,57,58,60,61,66,
             56,61,53,52,62,57,59,58,
             60,68,61,65,63,68,61,55)
  
))

#part a)
layout(1)
plot(process$run, process$results)
lines(process$run, process$results, col ="red")

plot(process$block,process$results)

plot(process$variants,process$results)

library(ggplot2)
ggplot(process, aes(block,results))+geom_point(aes(color=variants))+
geom_line()

#part b)
aov2 <- aov(results ~ block + variants, data =process)
summary(aov2) 
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## block        7  255.4   36.48   3.646 0.00992 ** 
## variants     3  254.4   84.79   8.474 0.00070 ***
## Residuals   21  210.1   10.01                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#graphial ANOVA

library(dplyr)
block.mean <- process %>%
  dplyr::group_by(block) %>%
  dplyr::summarize(mean(results))
variants.mean <- process %>%
  dplyr::group_by(variants) %>%
  dplyr::summarize(mean(results))


layout(matrix(1:3,3))
stripchart(block.mean$`mean(results)`-mean(block.mean$`mean(results)`) ,col ="red",main="group mean", xlim=c(-7,7))
stripchart(variants.mean$`mean(results)`-mean(variants.mean$`mean(results)`),method = 'jitter', col ="blue", main= "trt mean",xlim=c(-7,7))
stripchart(aov2$residuals, main="residuals", method= "jitter",xlim=c(-7,7))

layout(1)



## part C:
#confidence intervals for block
sigma.hat = sqrt ( sum(aov2$residuals^2)/21 )
t.crit <- -qt(0.025, 21)
ME1 = t.crit * sigma.hat / sqrt(8) 



#variants (process):
data.frame(list('0.025' = variants.mean$`mean(results)`-ME1,
                '0.975' = variants.mean$`mean(results)`+ME1
))
##     X0.025   X0.975
## 1 58.67423 63.32577
## 2 55.92423 60.57577
## 3 56.67423 61.32577
## 4 63.17423 67.82577
#question d)
plot(aov2$residuals, main ="rediduals over time", ylab ="residuals")

qqnorm(aov2$residuals)
qqline(aov2$residuals)

#part (e)
plot(block.mean,ylab = "averages", xlab="time", main="averages over time")
lines(block.mean, col ="red")

## really big variation among those means of fun runs at different times


## Question 5
# don't think about the time/ blocking struture.
aov3 <- aov(results ~ variants, data = process)
summary(aov3)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## variants     3  254.4   84.79     5.1 0.00609 **
## Residuals   28  465.5   16.63                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sigma.hat2 = sqrt ( sum(aov3$residuals^2)/28 )
t.crit2 <- -qt(0.025, 28)
ME2 = t.crit2 * sigma.hat2 / sqrt(8) 


#variants (process):
data.frame(list('0.025' = variants.mean$`mean(results)`-ME2,
                '0.975' = variants.mean$`mean(results)`+ME2
))
##     X0.025   X0.975
## 1 58.04708 63.95292
## 2 55.29708 61.20292
## 3 56.04708 61.95292
## 4 62.54708 68.45292
ME1/ME2
## [1] 0.7876165
### Apackage to get Graphical Anova
#install.packages("BHH2")
library(BHH2)

layout(1)

data(heads.data)
heads.data$periods <- factor(heads.data$periods)
heads.data$heads <- factor(heads.data$heads)

heads.aov <- aov(resp~periods+heads,data=heads.data)
anovaPlot(heads.aov)

anovaPlot(heads.aov,labels=TRUE,faclab=TRUE)