R Regression: penalized regression essentials ridge lasso elastic.net
Jump to navigation
Jump to search
# Ref: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/153-penalized-regression-essentials-ridge-lasso-elastic-net/
library(tidyverse) library(caret) library(glmnet)
# Preparing the data # Load the data data("Boston", package = "MASS") # Split the data into training and test set set.seed(123) training.samples <- Boston$medv %>% createDataPartition(p = 0.8, list = FALSE) train.data <- Boston[training.samples, ] test.data <- Boston[-training.samples, ]
# Additional data preparation # Predictor variables x <- model.matrix(medv~., train.data)[,-1] # Outcome variable y <- train.data$medv
# R functions glmnet(x, y, alpha = 1, lambda = NULL)
# Computing ridge regression # Find the best lambda using cross-validation set.seed(123) cv <- cv.glmnet(x, y, alpha = 0) # Display the best lambda value cv$lambda.min # Fit the final model on the training data model <- glmnet(x, y, alpha = 0, lambda = cv$lambda.min) # Display regression coefficients coef(model) # Make predictions on the test data x.test <- model.matrix(medv ~., test.data)[,-1] predictions <- model %>% predict(x.test) %>% as.vector() # Model performance metrics data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Computing lasso regression # Find the best lambda using cross-validation set.seed(123) cv <- cv.glmnet(x, y, alpha = 1) # Display the best lambda value cv$lambda.min # Fit the final model on the training data model <- glmnet(x, y, alpha = 1, lambda = cv$lambda.min) # Dsiplay regression coefficients coef(model) # Make predictions on the test data x.test <- model.matrix(medv ~., test.data)[,-1] predictions <- model %>% predict(x.test) %>% as.vector() # Model performance metrics data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Computing elastic net regession # Build the model using the training set set.seed(123) model <- train( medv ~., data = train.data, method = "glmnet", trControl = trainControl("cv", number = 10), tuneLength = 10 ) # Best tuning parameter model$bestTune # Coefficient of the final model. You need # to specify the best lambda coef(model$finalModel, model$bestTune$lambda) # Make predictions on the test data x.test <- model.matrix(medv ~., test.data)[,-1] predictions <- model %>% predict(x.test) # Model performance metrics data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Comparing the different models # Using caret package # Setup a grid range of lambda values: lambda <- 10^seq(-3, 3, length = 100) # Compute ridge regression: # Build the model set.seed(123) ridge <- train( medv ~., data = train.data, method = "glmnet", trControl = trainControl("cv", number = 10), tuneGrid = expand.grid(alpha = 0, lambda = lambda) ) # Model coefficients coef(ridge$finalModel, ridge$bestTune$lambda) # Make predictions predictions <- ridge %>% predict(test.data) # Model prediction performance data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Compute lasso regression: # Build the model set.seed(123) lasso <- train( medv ~., data = train.data, method = "glmnet", trControl = trainControl("cv", number = 10), tuneGrid = expand.grid(alpha = 1, lambda = lambda) ) # Model coefficients coef(lasso$finalModel, lasso$bestTune$lambda) # Make predictions predictions <- lasso %>% predict(test.data) # Model prediction performance data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Elastic net regression: # Build the model set.seed(123) elastic <- train( medv ~., data = train.data, method = "glmnet", trControl = trainControl("cv", number = 10), tuneLength = 10 ) # Model coefficients coef(elastic$finalModel, elastic$bestTune$lambda) # Make predictions predictions <- elastic %>% predict(test.data) # Model prediction performance data.frame( RMSE = RMSE(predictions, test.data$medv), Rsquare = R2(predictions, test.data$medv) )
# Comparing models performance: models <- list(ridge = ridge, lasso = lasso, elastic = elastic) resamples(models) %>% summary( metric = "RMSE")