R Regression: multicollinearity essentials and vif
Jump to navigation
Jump to search
library(tidyverse) library(caret)
# Load the data
data("Boston", package = "MASS")
# Split the data into training and test set
set.seed(123)
training.samples <- Boston$medv %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- Boston[training.samples, ]
test.data <- Boston[-training.samples, ]
# Build the model model1 <- lm(medv ~., data = train.data) # Make predictions predictions <- model1 %>% predict(test.data) # Model performance data.frame( RMSE = RMSE(predictions, test.data$medv), R2 = R2(predictions, test.data$medv) )
# Detecting multicollinearity car::vif(model1)
# Dealing with multicollinearity # Build a model excluding the tax variable model2 <- lm(medv ~. -tax, data = train.data) # Make predictions predictions <- model2 %>% predict(test.data) # Model performance data.frame( RMSE = RMSE(predictions, test.data$medv), R2 = R2(predictions, test.data$medv) )