## Lesson 4 ##-------------------------------------------------- library(lattice) ##-------------------------------------------------- ## Exercise 1 ##-------------------------------------------------- bt <- read.table("/media/data/work/Teaching/Unitn_Pugliese/2015/Lesson1/examples1/BodyTemperature.txt", header=TRUE) ## Which are the variable inside the dataset? summary(bt) ## Let's explore the data ## Look at all the possible relationship between variables in the dataset pairs(bt) ## Build a linear regression model mymod <- lm(Temperature ~ HeartRate, data=bt) ## Get some information on the fitted model summary(mymod) ## Compute the R^2 R2 <- 1 - sum(mymod$residuals^2)/sum((bt$Temperature - mean(bt$Temperature))^2) ## Compute the sample correlation coefficient and show the relationship between R and R^2 cor(bt$Temperature, bt$HeartRate) cor(bt$Temperature, bt$HeartRate)^2 ## Compute the confidence interval at 95% for the model parameters confint(mymod, level=0.95) ## Get the prediction for a person whose heart rate is 75 si <- data.frame(HeartRate=75) predict(mymod, si) ## Make some diagnostic plots layout(matrix(1:4, ncol=2, nrow=2)) plot(mymod, which=1:4) dev.off() hist(residuals(mymod),xlab="Residuals", freq=FALSE, col="grey80") lines(density(residuals(mymod))) ## Solution using lattice dev.new() library(lattice) histogram(residuals(mymod), panel=function(...){ panel.histogram(...) panel.densityplot(..., lwd=2) }, type="density", xlab="Residuals") ##-------------------------------------------------- ## Exercise 2 ##-------------------------------------------------- bw <- read.table("/home/michele/work/Teaching/Unitn_Pugliese/Lesson4/birthwt.txt", header=TRUE) str(bw) summary(bw) ## Variable exploration splom(bw) ## Look at the two variables together cor(bw$bwt,bw$lwt) xyplot( bwt~lwt, data=bw) ## Build the model mymod <- lm(bwt~lwt, data=bw) summary(mymod) ## Diagnostic plots layout(matrix(1:4, ncol=2, nrow=2)) plot(mymod) dev.off() ## Get confidence interval confint(mymod, level=0.90) ##-------------------------------------------------- ## Exercise 3 ##-------------------------------------------------- bw <- read.table("/home/michele/work/Teaching/Unitn_Pugliese/Lesson4/bodyweight.txt",header=TRUE) ## Build the linear model mymod <- lm(bodyfat~neck, data=bw) summary(mymod) ## Transform cm to inches bw$neck.in <- bw$neck * 0.39370 ## Build the model again mymod2 <- lm(bodyfat~neck.in, data=bw) ## Test the significance of a model anova(mymod, mymod2) ## Compute the akaike criterion AIC(mymod) AIC(mymod2) mymod2 <- update(mymod, . ~ . + I(neck^2)) mymod3 <- update(mymod2, . ~ . + I(neck^3)) out.poly <- lm(bodyfat ~ poly(neck,9), data=bw) plot(bodyfat~neck, data=bw) lines(x=bw$neck,y=predict(out.poly), add=TRUE) anova(mymod, mymod2, mymod3) ## Polynomial regression aa <- read.table("~/work/Teaching/Unitn_Pugliese/Lesson4/test.txt") names(aa) <- c("x","y") mymod <- lm(y~x, data=aa) out.poly <- lm(y~poly(x,3, raw=TRUE), data=aa) out.poly4 <- lm(y~poly(x,4, raw=TRUE), data=aa) out.poly5 <- lm(y~poly(x,5, raw=TRUE), data=aa) out.poly6 <- lm(y~poly(x,6, raw=TRUE), data=aa) plot(aa$x,aa$y) curve(predict(out.poly, data.frame(x=x)), add=TRUE) curve(predict(mymod,data.frame(x=x)), add=TRUE, col="grey80") curve(predict(out.poly4, data.frame(x=x)), add=TRUE, col="red") curve(predict(out.poly5, data.frame(x=x)), add=TRUE, col="green") curve(predict(out.poly6, data.frame(x=x)), add=TRUE, col="blue") mycol <- rev(topo.colors(50)[seq(20,50,length=7)]) mycol <- colorRampPalette(c(colors()[139],colors()[134]))(7) # xyplot(y~x, data=aa, panel=function(x,y,...){ # for (i in 1:7){ # out.poly <- lm(y~poly(x,i)) # panel.curve(predict(out.poly, data.frame(x=x)), col=mycol[i]) # } # panel.xyplot(x,y,..., pch=19) # }, key=list(lines=list(lty=1, col=mycol[1:7]), # text=list(labels=as.character(1:7)), columns=4), # main="Regression curves: different polynomial degree regression models")