Sunday, September 27, 2020

Neural Network in R

 library(neuralnet)


# creating training data set

TAWKIR=c(20,10,30,20,80,30)

AHMED=c(90,20,40,50,50,80)

Placed=c(1,0,0,0,1,1)

# Here, you will combine multiple columns or features into a single set of data

df=data.frame(TAWKIR,AHMED,Placed)


# load library

require(neuralnet)


# fit neural network

nn=neuralnet(Placed~TAWKIR+AHMED,data=df, hidden=3,act.fct = "logistic",

             linear.output = FALSE)

# plot neural network

plot(nn)




Saturday, September 26, 2020

Friday, September 18, 2020

Random Forest plot

 library(randomForest)

# Load the dataset and explore

data1 <- read.csv(file.choose(), header = TRUE)


head(data1)


str(data1)


summary(data1)



# Split into Train and Validation sets

# Training Set : Validation Set = 70 : 30 (random)

set.seed(1000)

train <- sample(nrow(data1), 0.7*nrow(data1), replace = FALSE)

TrainSet <- data1[train,]

ValidSet <- data1[-train,]

summary(TrainSet)

summary(ValidSet)


# Create a Random Forest model with default parameters

model1 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, importance = TRUE, family =negative.binomial)

model1


# or

# Fine tuning parameters of Random Forest model

model2 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, ntree = 500, mtry = 6, importance = TRUE, objective = "reg:negative.binomial")

model2


# Predicting on train set

predTrain <- predict(model2, TrainSet, type = "class")

# Checking classification accuracy

#table(predTrain, TrainSet$SPEEDING_CRASH)  


#mean(predValid == ValidSet$SPEEDING_CRASH)  


# To check important variables

importance(model2)        

varImpPlot(model2)

#OR


importance(model1)        

varImpPlot(model1)




SHAP plot

 #Part 1: library input

suppressPackageStartupMessages({

  library(SHAPforxgboost)

  library(xgboost)

  library(data.table)

  library(ggplot2)

})


#part 2:

#file load and shap value calculation

a <- read.csv(file.choose())

X1 = as.matrix(a[,-1])

mod1 = xgboost::xgboost(

  data = X1, label = a$SPEEDING_CRASH, gamma = 0, eta = 1, 

  lambda = 0,nrounds = 10, verbose = F, objective = "reg:squarederror")


# shap.values(model, X_dataset) returns the SHAP

# data matrix and ranked features by mean|SHAP|

shap_values <- shap.values(xgb_model = mod1, X_train = X1)

shap_values$mean_shap_score



#part 3:

shap_values_iris <- shap_values$shap_score


# shap.prep() returns the long-format SHAP data from either model or

shap_long_iris <- shap.prep(xgb_model = mod1, X_train = X1)

# is the same as: using given shap_contrib

shap_long_iris <- shap.prep(shap_contrib = shap_values_iris, X_train = X1)


# -------------------------------------------------------------------------



shap.plot.summary(shap_long_iris)


# option of dilute is offered to make plot faster if there are over thousands of observations

# please see documentation for details. 


shap.plot.summary(shap_long_iris, x_bound  = 1.5, dilute = 10)

#end...................................



Text plot

 ## A blank plot to set up a coordinate system

## Final result will be Figure 3-42
> plot(0:10, 0:10, type = "n")
## Some regular text as a baseline
text(2,10, "Regular text", pos = 4)
## Set text larger and use serif family
par(list(cex = 2, family = "serif"))
## Add some text
text(2,8, "Serif Family", pos = 4)
## Alter rotation and set sans serif family
par(list(srt = 180, family = "sans"))
## Add some text note pos is opposite to previous
text(2,6, "Sans Family", pos = 2)
## Alter rotation and set monospace family
par(list(srt = 90, family = "mono"))
## Add some text
text(8,6, "Monospace Family")
## Reset parameters
par(list(cex = 1, srt = 0, family = ""))
## Create multi-line text
text(2,4, "Multi-line\ntext with\ndefault spacing", pos = 4)
## Alter line height
par(lheight = 2)
## More multi-line text
text(4, 2, "Multi-line\ntext with\ncustom spacing", pos = 4)
## Reset line height
par(lheight = 1)
  



Bi Plot for PCA

 ## Use datasets:USArrests

data(USArrests) # Get datafile

names(USArrests) # View variable names



## Scaled PCA using entire data.frame

pca1 = prcomp(USArrests, scale = TRUE)


## Both following commands produce same PCA as previous

pca2 = prcomp(~., data = USArrests, scale = TRUE)

pca3 = prcomp(~ Murder + Assault + Rape + UrbanPop,

                data = USArrests, scale = TRUE)


pca1 # View result


names(pca1) # View elements in result object


summary(pca1) # Summary


## Plots for results...

## Scree-plot of variances (Figure 2-5)

#plot(pca1, type = "lines", main = "PCA for USArrests")


## Bi-plot of result (Figure 2-6)

biplot(pca1, col = 1, cex = c(0.8, 1.2), expand = 0.9)




Scree plot from PCA

 ## Use datasets:USArrests

data(USArrests) # Get datafile

names(USArrests) # View variable names


## Scaled PCA using entire data.frame

pca1 = prcomp(USArrests, scale = TRUE)


## Both following commands produce same PCA as previous

pca2 = prcomp(~., data = USArrests, scale = TRUE)

pca3 = prcomp(~ Murder + Assault + Rape + UrbanPop,

                data = USArrests, scale = TRUE)


pca1 # View result


names(pca1) # View elements in result object


summary(pca1) # Summary


## Plots for results...

## Scree-plot of variances 

plot(pca1, type = "lines", main = "PCA for USArrests")




Monday, September 14, 2020

Tuesday, September 8, 2020