library(randomForest)
# Load the dataset and explore
data1 <- read.csv(file.choose(), header = TRUE)
head(data1)
str(data1)
summary(data1)
# Split into Train and Validation sets
# Training Set : Validation Set = 70 : 30 (random)
set.seed(1000)
train <- sample(nrow(data1), 0.7*nrow(data1), replace = FALSE)
TrainSet <- data1[train,]
ValidSet <- data1[-train,]
summary(TrainSet)
summary(ValidSet)
# Create a Random Forest model with default parameters
model1 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, importance = TRUE, family =negative.binomial)
model1
# or
# Fine tuning parameters of Random Forest model
model2 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, ntree = 500, mtry = 6, importance = TRUE, objective = "reg:negative.binomial")
model2
# Predicting on train set
predTrain <- predict(model2, TrainSet, type = "class")
# Checking classification accuracy
#table(predTrain, TrainSet$SPEEDING_CRASH)
#mean(predValid == ValidSet$SPEEDING_CRASH)
# To check important variables
importance(model2)
varImpPlot(model2)
#OR
importance(model1)
varImpPlot(model1)
0 comments:
Post a Comment