R Regression: penalized regression essentials ridge lasso elastic.net
Revision as of 08:50, 2 December 2019 by Onnowpurbo (talk | contribs) (Created page with " # Ref: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/153-penalized-regression-essentials-ridge-lasso-elastic-net/ library(tidyverse) library(car...")
# Ref: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/153-penalized-regression-essentials-ridge-lasso-elastic-net/
library(tidyverse) library(caret) library(glmnet)
# Preparing the data
# Load the data
data("Boston", package = "MASS")
# Split the data into training and test set
set.seed(123)
training.samples <- Boston$medv %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- Boston[training.samples, ]
test.data <- Boston[-training.samples, ]
# Additional data preparation # Predictor variables x <- model.matrix(medv~., train.data)[,-1] # Outcome variable y <- train.data$medv
# R functions glmnet(x, y, alpha = 1, lambda = NULL)
# Computing ridge regression # Find the best lambda using cross-validation set.seed(123) cv <- cv.glmnet(x, y, alpha = 0) # Display the best lambda value cv$lambda.min # Fit the final model on the training data model <- glmnet(x, y, alpha = 0, lambda = cv$lambda.min) # Display regression coefficients coef(model) # Make predictions on the test data x.test <- model.matrix(medv ~., test.data)[,-1] predictions <- model %>% predict(x.test) %>% as.vector() # Model performance metrics data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Computing lasso regression # Find the best lambda using cross-validation set.seed(123) cv <- cv.glmnet(x, y, alpha = 1) # Display the best lambda value cv$lambda.min # Fit the final model on the training data model <- glmnet(x, y, alpha = 1, lambda = cv$lambda.min) # Dsiplay regression coefficients coef(model) # Make predictions on the test data x.test <- model.matrix(medv ~., test.data)[,-1] predictions <- model %>% predict(x.test) %>% as.vector() # Model performance metrics data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Computing elastic net regession
# Build the model using the training set
set.seed(123)
model <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Best tuning parameter
model$bestTune
# Coefficient of the final model. You need
# to specify the best lambda
coef(model$finalModel, model$bestTune$lambda)
# Make predictions on the test data
x.test <- model.matrix(medv ~., test.data)[,-1]
predictions <- model %>% predict(x.test)
# Model performance metrics
data.frame(
RMSE = RMSE(predictions, test.data$medv),
Rsquare = R2(predictions, test.data$medv)
)
# Comparing the different models
# Using caret package
# Setup a grid range of lambda values:
lambda <- 10^seq(-3, 3, length = 100)
# Compute ridge regression:
# Build the model
set.seed(123)
ridge <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneGrid = expand.grid(alpha = 0, lambda = lambda)
)
# Model coefficients
coef(ridge$finalModel, ridge$bestTune$lambda)
# Make predictions
predictions <- ridge %>% predict(test.data)
# Model prediction performance
data.frame(
RMSE = RMSE(predictions, test.data$medv),
Rsquare = R2(predictions, test.data$medv)
)
# Compute lasso regression:
# Build the model
set.seed(123)
lasso <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneGrid = expand.grid(alpha = 1, lambda = lambda)
)
# Model coefficients
coef(lasso$finalModel, lasso$bestTune$lambda)
# Make predictions
predictions <- lasso %>% predict(test.data)
# Model prediction performance
data.frame(
RMSE = RMSE(predictions, test.data$medv),
Rsquare = R2(predictions, test.data$medv)
)
# Elastic net regression:
# Build the model
set.seed(123)
elastic <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Model coefficients
coef(elastic$finalModel, elastic$bestTune$lambda)
# Make predictions
predictions <- elastic %>% predict(test.data)
# Model prediction performance
data.frame(
RMSE = RMSE(predictions, test.data$medv),
Rsquare = R2(predictions, test.data$medv)
)
# Comparing models performance: models <- list(ridge = ridge, lasso = lasso, elastic = elastic) resamples(models) %>% summary( metric = "RMSE")