# Difference between revisions of "R Regression: penalized regression essentials ridge lasso elastic.net"

```# Ref: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/153-penalized-regression-essentials-ridge-lasso-elastic-net/
```
```library(tidyverse)
library(caret)
library(glmnet)
```
```# Preparing the data
data("Boston", package = "MASS")
# Split the data into training and test set
set.seed(123)
training.samples <- Boston\$medv %>%
createDataPartition(p = 0.8, list = FALSE)
train.data  <- Boston[training.samples, ]
test.data <- Boston[-training.samples, ]
```

```# Additional data preparation
# Predictor variables
x <- model.matrix(medv~., train.data)[,-1]
# Outcome variable
y <- train.data\$medv
```

```# R functions
glmnet(x, y, alpha = 1, lambda = NULL)
```

```# Computing ridge regression
# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x, y, alpha = 0)
# Display the best lambda value
cv\$lambda.min
# Fit the final model on the training data
model <- glmnet(x, y, alpha = 0, lambda = cv\$lambda.min)
# Display regression coefficients
coef(model)
# Make predictions on the test data
x.test <- model.matrix(medv ~., test.data)[,-1]
predictions <- model %>% predict(x.test) %>% as.vector()
# Model performance metrics
data.frame(
RMSE = RMSE(predictions, test.data\$medv),
Rsquare = R2(predictions, test.data\$medv)
)
```

```# Computing lasso regression

# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x, y, alpha = 1)
# Display the best lambda value
cv\$lambda.min
# Fit the final model on the training data
model <- glmnet(x, y, alpha = 1, lambda = cv\$lambda.min)
# Dsiplay regression coefficients
coef(model)
# Make predictions on the test data
x.test <- model.matrix(medv ~., test.data)[,-1]
predictions <- model %>% predict(x.test) %>% as.vector()
# Model performance metrics
data.frame(
RMSE = RMSE(predictions, test.data\$medv),
Rsquare = R2(predictions, test.data\$medv)
)
```

```# Computing elastic net regession

# Build the model using the training set
set.seed(123)
model <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Best tuning parameter
model\$bestTune

# Coefficient of the final model. You need
# to specify the best lambda
coef(model\$finalModel, model\$bestTune\$lambda)

# Make predictions on the test data
x.test <- model.matrix(medv ~., test.data)[,-1]
predictions <- model %>% predict(x.test)
# Model performance metrics
data.frame(
RMSE = RMSE(predictions, test.data\$medv),
Rsquare = R2(predictions, test.data\$medv)
)
```

```# Comparing the different models
# Using caret package

# Setup a grid range of lambda values:
lambda <- 10^seq(-3, 3, length = 100)

# Compute ridge regression:
# Build the model
set.seed(123)
ridge <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneGrid = expand.grid(alpha = 0, lambda = lambda)
)
# Model coefficients
coef(ridge\$finalModel, ridge\$bestTune\$lambda)
# Make predictions
predictions <- ridge %>% predict(test.data)
# Model prediction performance
data.frame(
RMSE = RMSE(predictions, test.data\$medv),
Rsquare = R2(predictions, test.data\$medv)
)
```
```# Compute lasso regression:
# Build the model
set.seed(123)
lasso <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneGrid = expand.grid(alpha = 1, lambda = lambda)
)
# Model coefficients
coef(lasso\$finalModel, lasso\$bestTune\$lambda)
# Make predictions
predictions <- lasso %>% predict(test.data)
# Model prediction performance
data.frame(
RMSE = RMSE(predictions, test.data\$medv),
Rsquare = R2(predictions, test.data\$medv)
)
```
```# Elastic net regression:

# Build the model
set.seed(123)
elastic <- train(
medv ~., data = train.data, method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Model coefficients
coef(elastic\$finalModel, elastic\$bestTune\$lambda)
# Make predictions
predictions <- elastic %>% predict(test.data)
# Model prediction performance
data.frame(
RMSE = RMSE(predictions, test.data\$medv),
Rsquare = R2(predictions, test.data\$medv)
)
```
```# Comparing models performance:
models <- list(ridge = ridge, lasso = lasso, elastic = elastic)
resamples(models) %>% summary( metric = "RMSE")
```