# Packages install.packages(c("ISLR", "leaps", "tidyverse")) library(tidyverse) # data manipulation and visualization library(leaps) # model selection functions install.packages("caret") library(caret) # # DATA # ?swiss data("swiss") sample_n(swiss, 3) # Inspect the data head(swiss) # # Simple Linear Models # m1 = lm(Fertility ~ Agriculture,data=swiss); summary(m1) m2 = lm(Fertility ~ Examination,data=swiss); summary(m2) m3 = lm(Fertility ~ Education,data=swiss); summary(m3) m4 = lm(Fertility ~ Catholic,data=swiss); summary(m4) m5 = lm(Fertility ~ Infant.Mortality,data=swiss); summary(m5) # m6 = lm(Fertility ~ .,data=swiss); summary(m6) m7 = lm(Fertility ~ Agriculture+Education+Catholic+Infant.Mortality,data=swiss); summary(m7) # # Computing best subsets regression # ?regsubsets models = regsubsets(Fertility ~ ., data = swiss, nvmax = 5) summary(models) # # Choosing the optimal model # # method = "exhaustive" res.sum <- summary(models) res.sum names(res.sum) res.sum$bic res.sum$which data.frame( Adj.R2 = which.max(res.sum$adjr2),CP = which.min(res.sum$cp),BIC = which.min(res.sum$bic) ) # # There is no single correct solution to model selection, each of these criteria will lead to slightly different models. # Remember that, "All models are wrong, some models are useful". # # method = "backwards" back = regsubsets(Fertility ~ ., data = swiss, nvmax = 5,method="backward") res.back = summary(back) res.back names(res.back) which.min(res.back$bic) data.frame( Adj.R2 = which.max(res.back$adjr2),CP = which.min(res.back$cp),BIC = which.min(res.back$bic) ) # # method = "forward" forw = regsubsets(Fertility ~ ., data = swiss, nvmax = 5,method="forward") res.forw = summary(back) res.forw names(res.forw) which.min(res.forw$bic) data.frame( Adj.R2 = which.max(res.forw$adjr2),CP = which.min(res.forw$cp),BIC = which.min(res.forw$bic) ) # # Stepwise regression model library(MASS) ?stepAIC full.model <- lm(Fertility ~., data = swiss) step.model <- stepAIC(full.model, direction = "both", trace = FALSE) summary(step.model) # # step.model <- stepAIC(full.model)