R example: selection

library(MPV)
data(p9.10)

full.lm <- lm(y ~ ., data=p9.10)

## DATA description
# help(p9.10)

# The 'p9.10' data frame has 31 observations on the rut depth of
# asphalt pavements prepared under different conditions.

# Usage:

# data(p9.10)

# Format:

# This data frame contains the following columns:

# y change in rut depth/million wheel passes (log scale)

# x1 viscosity (log scale)

# x2 percentage of asphalt in surface course

# x3 percentage of asphalt in base course

# x4 indicator

# x5 percentage of fines in surface course

# x6 percentage of voids in surface course


# This is the library with the leaps function

# install.packages('leaps')

library(leaps)

# Leaps takes a design matrix as argument: throw away the intercept
# column or leaps will complain

X <- model.matrix(full.lm)[,-1]

# Look at R^2

# R^2 -- all subsets

r2.leaps <- leaps(X, p9.10$y, nbest=3, method='r2')
plot(r2.leaps$size, r2.leaps$r2, pch=23, bg='blue', cex=2)
best.model.r2 <- r2.leaps$which[which((r2.leaps$r2 == max(r2.leaps$r2))),]
print(best.model.r2)


# Adjusted R^2 -- all subsets

adjr2.leaps <- leaps(X, p9.10$y, nbest=3, method='adjr2')
plot(adjr2.leaps$size, adjr2.leaps$adjr2, pch=23, bg='blue', cex=2)
best.model.adjr2 <- adjr2.leaps$which[which((adjr2.leaps$adjr2 == max(adjr2.leaps$adjr2))),]
print(best.model.adjr2)


# Cp -- all subsets

Cp.leaps <- leaps(X, p9.10$y, nbest=3, method='Cp')
plot(Cp.leaps$size, Cp.leaps$Cp, pch=23, bg='blue', cex=2)
best.model.Cp <- Cp.leaps$which[which((Cp.leaps$Cp == min(Cp.leaps$Cp))),]
print(best.model.Cp)


# Use stepwise search, both direction, starting at full model

full.step.both <- step(full.lm, direction='both')
print(summary(full.step.both))

# Backward

full.step.backward <- step(full.lm, direction='backward')
print(summary(full.step.backward))

# Forward
# Need an "upper" model

lowest.step.forward <- step(lm(y ~ 1, data=p9.10), list(upper=full.lm), direction='forward')
print(summary(lowest.step.forward))