#An example from "Design of Experiments: Statistical
#Principles of Research Design and Analysis"
#2nd Edition by Robert O. Kuehl

d=read.delim("http://dnett.github.io/S510/PlantDensity.txt")

d

names(d)=c("x","y")
head(d)

plot(d[,1],d[,2],col=4,pch=16,xlab="Plant Density",
     ylab="Grain Yield")


y=d$y
x=(d$x-mean(d$x))/10
x

n=nrow(d)

x1=matrix(1,nrow=n,ncol=1)
x2=cbind(x1,x)
x3=cbind(x2,x^2)
x4=cbind(x3,x^3)
x5=matrix(model.matrix(~0+factor(x)),nrow=n)
I=diag(rep(1,n))

library(MASS)
proj=function(x){
  x%*%ginv(t(x)%*%x)%*%t(x)
}

p1=proj(x1)
p2=proj(x2)
p3=proj(x3)
p4=proj(x4)
p5=proj(x5)

t(y)%*%(p2-p1)%*%y
t(y)%*%(p3-p2)%*%y
t(y)%*%(p4-p3)%*%y
t(y)%*%(p5-p4)%*%y
t(y)%*%(I-p5)%*%y
t(y)%*%(I-p1)%*%y

o=lm(y~x+I(x^2)+I(x^3)+I(x^4),data=d)
anova(o)


X=x5
(p2-p1)%*%X
5*((p2-p1)%*%X)[15,]

#Let's add the best fitting simple linear regression
#line to our plot.

o=lm(y~x,data=d)

u=seq(0,60,by=.01) #overkill here but used later.

lines(u,coef(o)[1]+coef(o)[2]*u,col=2)

#The fit doesn't look very good.
#Let's formally test for lack of fit.

o=lm(y~x+factor(x),data=d)
anova(o)

#It looks like a linear fit is inadequate.
#Let's try a quadratic fit.

o=lm(y~x+I(x^2)+factor(x),data=d)
anova(o)

#It looks like a quadratic fit is adequate.
#Let's estimate the coefficients for the best
#quadratic fit.

b=coef(lm(y~x+I(x^2),data=d))

#Let's add the best fitting quadratic curve
#to our plot.

lines(u,b[1]+b[2]*u+b[3]*u^2,col=3)

#Let's add the treatment group means to our plot.

trt.means=tapply(d$y,d$x,mean)

points(unique(d$x),trt.means,pch="X")

#The quartic fit will pass through the treatment
#means.

b=coef(lm(y~x+I(x^2)+I(x^3)+I(x^4),data=d))
lines(u,b[1]+b[2]*u+b[3]*u^2+b[4]*u^3+b[5]*u^4,col=1)