Friday, September 18, 2020

Random Forest plot

 library(randomForest)

# Load the dataset and explore

data1 <- read.csv(file.choose(), header = TRUE)


head(data1)


str(data1)


summary(data1)



# Split into Train and Validation sets

# Training Set : Validation Set = 70 : 30 (random)

set.seed(1000)

train <- sample(nrow(data1), 0.7*nrow(data1), replace = FALSE)

TrainSet <- data1[train,]

ValidSet <- data1[-train,]

summary(TrainSet)

summary(ValidSet)


# Create a Random Forest model with default parameters

model1 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, importance = TRUE, family =negative.binomial)

model1


# or

# Fine tuning parameters of Random Forest model

model2 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, ntree = 500, mtry = 6, importance = TRUE, objective = "reg:negative.binomial")

model2


# Predicting on train set

predTrain <- predict(model2, TrainSet, type = "class")

# Checking classification accuracy

#table(predTrain, TrainSet$SPEEDING_CRASH)  


#mean(predValid == ValidSet$SPEEDING_CRASH)  


# To check important variables

importance(model2)        

varImpPlot(model2)

#OR


importance(model1)        

varImpPlot(model1)




0 comments:

Post a Comment