Friday, September 18, 2020

Random Forest plot

library(randomForest)

# Load the dataset and explore

data1 <- read.csv(file.choose(), header = TRUE)

head(data1)

str(data1)

summary(data1)

# Split into Train and Validation sets

# Training Set : Validation Set = 70 : 30 (random)

set.seed(1000)

train <- sample(nrow(data1), 0.7*nrow(data1), replace = FALSE)

TrainSet <- data1[train,]

ValidSet <- data1[-train,]

summary(TrainSet)

summary(ValidSet)

# Create a Random Forest model with default parameters

model1 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, importance = TRUE, family =negative.binomial)

model1

# or

# Fine tuning parameters of Random Forest model

model2 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, ntree = 500, mtry = 6, importance = TRUE, objective = "reg:negative.binomial")

model2

# Predicting on train set

predTrain <- predict(model2, TrainSet, type = "class")

# Checking classification accuracy

#table(predTrain, TrainSet$SPEEDING_CRASH)

#mean(predValid == ValidSet$SPEEDING_CRASH)

# To check important variables

importance(model2)

varImpPlot(model2)

#OR

importance(model1)

varImpPlot(model1)

R for Transportation Engineers