qqgamma <- function(x, shape, ...)
  plot(qgamma(ppoints(x), shape), sort(x), ...)


qqweibull <- function(x, shape, ...)
  plot(qweibull(ppoints(x), shape), sort(x), ...)


library(MASS)
data(GAGurine)
GAGurine$Agemod <- GAGurine$GAG
GAGurine$Agemod[GAGurine$Agemod==Inf] <- 0

GAGurine$Agemod <- cut(GAGurine$Age, 10)

plot(GAGurine$Age, 1/GAGurine$GAG)

gg2 <- lm(GAG~Age, data=GAGurine)
summary(gg2)$adj.r.squared

## Compute simple linear model
gg <- lm(1/GAG~Age, data=GAGurine)
summary(gg)$adj.r.squared
gg1 <- lm(1/GAG~poly(Age,4) ,data=GAGurine)
summary(gg1)$adj.r.squared

## Set threshold for large leverage as p/
## Non-polynomial model
thr <- 2/314
## Polynomial model
thr1 <- 5/314

## Compute studentized residuals
ggstr <- studres(gg)
gg1str <- studres(gg1)

## Compute leverages
gg.hat <- lm.influence(gg)$hat
gg1.hat <- lm.influence(gg1)$hat

## Get the samples with the highest leverage
## leverages are the h_ii of the H matrix used to compute the linear model.
## The higher the leverage the higher the influence the sample has to move the model
## Common thresholds are 2 or 3 times the trace(H) (which is p) normalized by the number of samples
## Try with different threshold multipliers
which(gg.hat>2*thr)
which(gg1.hat>2*thr1)
order(abs(ggstr), decreasing=TRUE)
order(abs(gg1str), decreasing=TRUE)

## Note that some of the sample have both high studentized residuals and high leverages.
## Some of them can be considered outlier. Maybe in some further analysis we should consider removing this samples from the dataset.

## Diagnostic Plots
plot(fitted(gg),ggstr, main="Poly(1)")
abline(h = 0, lty = 2)

plot(fitted(gg1),gg1str, main="Poly(4)")
abline(h = 0, lty = 2)

## Check normality
qqnorm(ggstr)
qqline(ggstr)
shapiro.test(ggstr)

qqnorm(gg1str)
qqline(gg1str)
shapiro.test(gg1str)


## Plot the data, boxplots, regression models and loess fit.
xyplot(1/GAG~Age, data=GAGurine,groups=trunc(Age),pch=19, 
       panel=function(x,y,...){
         panel.xyplot(x,y,...)
         panel.loess(x,y,..., col="red", lwd=2, family="gaussian", degree=3)
         panel.bwplot(trunc(x)+0.5,y, horizontal=FALSE, lwd=3,
                      par.settings=list(
                        box.rectangle=list(col="red"),
                        box.umbrella=list(col="red"),
                        box.dot = list(col="red"),
                        plot.symbol=list(col="red"))
                      )
         panel.abline(gg, lwd=2)
         panel.lines(x,gg1$fitted.values, lwd=3, col="blue")
       }, key=list(text=list(c("lm", "poly(4)", "Loess Fit")),
            lines=list(lty=c(rep(1,3)), col=c("black", "blue", "red")),
            columns=3)
       )

## With the command below we can get the information on the values plotted in the boxplots for each Age level.
## In particular in the "stats" object we have a matrix of 5 x 17 -> see the help of boxplots and slides for further information.
rr <- boxplot(GAG~I(trunc(Age)), data=GAGurine, plot=FALSE)$stats

## Create a vector of ages as integers
xx <- unique(trunc(GAGurine$Age))+1

## Given the poly(4) model we can compute the prediction at each Age level with 95% CI
mypred <- predict(gg1, data.frame(Age=xx), interval="confidence", level=0.95)

## Plot everything
## NB we compute the model using 1/GAG so we have to transform back the prediction of the linear model to produce a plot GAG by Age.
plot(xx,1/mypred[,2], type="none",
     main="GAG level vs Age", ylab="GAG", xlab="Age")
polygon(x=c(xx,rev(xx)),y=c(1/mypred[,2], rev(1/mypred[,3])), col="skyblue")
lines(xx,1/mypred[,1], lty=3)
lines(xx,1/mypred[,2])
lines(xx,1/mypred[,3])