R example: selection
library(MPV)data(p9.10)
full.lm <- lm(y ~ ., data=p9.10)
## DATA description
# help(p9.10)
# The 'p9.10' data frame has 31 observations on the rut depth of
# asphalt pavements prepared under different conditions.
# Usage:
# data(p9.10)
# Format:
# This data frame contains the following columns:
# y change in rut depth/million wheel passes (log scale)
# x1 viscosity (log scale)
# x2 percentage of asphalt in surface course
# x3 percentage of asphalt in base course
# x4 indicator
# x5 percentage of fines in surface course
# x6 percentage of voids in surface course
# This is the library with the leaps function
# install.packages('leaps')
library(leaps)
# Leaps takes a design matrix as argument: throw away the intercept
# column or leaps will complain
X <- model.matrix(full.lm)[,-1]
# Look at R^2
# R^2 -- all subsets
r2.leaps <- leaps(X, p9.10$y, nbest=3, method='r2')
plot(r2.leaps$size, r2.leaps$r2, pch=23, bg='blue', cex=2)
best.model.r2 <- r2.leaps$which[which((r2.leaps$r2 == max(r2.leaps$r2))),]
print(best.model.r2)
# Adjusted R^2 -- all subsets
adjr2.leaps <- leaps(X, p9.10$y, nbest=3, method='adjr2')
plot(adjr2.leaps$size, adjr2.leaps$adjr2, pch=23, bg='blue', cex=2)
best.model.adjr2 <- adjr2.leaps$which[which((adjr2.leaps$adjr2 == max(adjr2.leaps$adjr2))),]
print(best.model.adjr2)
# Cp -- all subsets
Cp.leaps <- leaps(X, p9.10$y, nbest=3, method='Cp')
plot(Cp.leaps$size, Cp.leaps$Cp, pch=23, bg='blue', cex=2)
best.model.Cp <- Cp.leaps$which[which((Cp.leaps$Cp == min(Cp.leaps$Cp))),]
print(best.model.Cp)
# Use stepwise search, both direction, starting at full model
full.step.both <- step(full.lm, direction='both')
print(summary(full.step.both))
# Backward
full.step.backward <- step(full.lm, direction='backward')
print(summary(full.step.backward))
# Forward
# Need an "upper" model
lowest.step.forward <- step(lm(y ~ 1, data=p9.10), list(upper=full.lm), direction='forward')
print(summary(lowest.step.forward))