R Regression: multicollinearity essentials and vif
Jump to navigation
Jump to search
library(tidyverse) library(caret)
# Load the data data("Boston", package = "MASS") # Split the data into training and test set set.seed(123) training.samples <- Boston$medv %>% createDataPartition(p = 0.8, list = FALSE) train.data <- Boston[training.samples, ] test.data <- Boston[-training.samples, ]
# Build the model model1 <- lm(medv ~., data = train.data) # Make predictions predictions <- model1 %>% predict(test.data) # Model performance data.frame( RMSE = RMSE(predictions, test.data$medv), R2 = R2(predictions, test.data$medv) )
# Detecting multicollinearity car::vif(model1)
# Dealing with multicollinearity # Build a model excluding the tax variable model2 <- lm(medv ~. -tax, data = train.data) # Make predictions predictions <- model2 %>% predict(test.data) # Model performance data.frame( RMSE = RMSE(predictions, test.data$medv), R2 = R2(predictions, test.data$medv) )