Monday, August 2, 2021

Ordinal Logistic Regression (OLR) in Neural Network (NN) using R

August 02, 2021 Md. Tawkir Ahmed No comments

library(neuralnet)

library("ISLR")

#load data

data<- read.csv(file.choose())

colnames(data)

smp_size <- floor(0.75 * nrow(data))

train_ind <- sample(seq_len(nrow(data)), size = smp_size)

train <- mtcars[train_ind, ]

test <- mtcars[-train_ind, ]

#divide training and testing data

mean_data <- apply(data[2:11], 2, mean)

sd_data <- apply(data[2:11], 2, sd)

data_scaled <- as.data.frame(scale(data[,2:11],center = mean_data, scale = sd_data))

head(data_scaled, n=20)

index = sample(1:nrow(data),round(0.70*nrow(data)))

train_data <- as.data.frame(data_scaled[index,])

test_data <- as.data.frame(data_scaled[-index,])

# Custom activation function

softplus <- function(x) 1 / (1 + exp(-x))

nn <- neuralnet((KABCO=="2") ~ Covid + Speed+ Volume+ Drug_Alco+

Lighting+ Road_Surface+ Speeding+ Work_Zone_Related+

Weather_Condition, train_data,

linear.output = FALSE, hidden = c(9,9), act.fct = softplus,

likelihood=TRUE, threshold=0.01)

print(nn)

plot(nn)

summary(nn)

predict_olr <- predict(nn,test_data)

plot(test_data$KABCO,predict_olr$nn,col='black',main='Real vs predicted for neural network',pch=18,cex=4)

abline(1,2,3,4,5, lwd=5)

#Check the data - actual and predicted

final_output=cbind (Input, Output,

as.data.frame(model$net.result) )

colnames(final_output) = c("Input", "Expected Output",

"Neural Net Output" )

print(final_output)

#for 2 row and 2 column

par(mfrow=c(2,2))

gwplot(net.infert, selected.covariate="parity")

gwplot(net.infert, selected.covariate="induced")

gwplot(net.infert, selected.covariate="spontaneous")

#mse

predict_net_test <- compute(nn,test_data)

MSE.net <- sum((test_data$KABCO - predict_net_test$net.result)^2)/nrow(test_data)

MSE.net

#####################################################################

#Confusion matrix

library(caret)

library(e1071)

# training

p<- predict(nn, train_data)

tab<- table(p, train_data$KABCO)

tab

1-sum(diag(tab))/sum(tab)

#testing

p1<- predict(nn, test_data)

tab1<- table(p1, test_data$KABCO)

tab1

1-sum(diag(tab1))/sum(tab1)

#end

#data

confusionMatrix(data$KABCO, sample(data$KABCO))

newPrior <- c(.05, .8, .15, 0.5, 0.9)

names(newPrior) <- levels(data$KABCO)

cm <-confusionMatrix(data$KABCO, sample(data$KABCO))

# extract the confusion matrix values as data.frame

cm_d <- as.data.frame(cm$table)

# confusion matrix statistics as data.frame

cm_st <-data.frame(cm$overall)

# round the values

cm_st$cm.overall <- round(cm_st$cm.overall,2)

# here we also have the rounded percentage values

cm_p <- as.data.frame(prop.table(cm$table))

cm_d$Perc <- round(cm_p$Freq*100,2)

library(ggplot2) # to plot

library(gridExtra) # to put more

library(grid) # plot together

# plotting the matrix

cm_d_p <- ggplot(data = cm_d, aes(x = Prediction , y = Reference, fill = Freq))+

geom_tile() +

geom_text(aes(label = paste("",Freq,",",Perc,"%")), color = 'red', size = 8) +

theme_light() +

guides(fill=FALSE)

# plotting the stats

cm_st_p <- tableGrob(cm_st)

# all together

grid.arrange(cm_d_p, cm_st_p,nrow = 1, ncol = 2,

top=textGrob("Confusion Matrix and Statistics",gp=gpar(fontsize=25,font=1)))

#search confusion matrix plot

###########################################################################

#Creates vectors having data points

expected_value <- factor(train_data[c("KABCO"))

predicted_value <- factor(test_data[c("KABCO"))

#Creating confusion matrix

example <- confusionMatrix(data=predicted_value, reference = expected_value)

#Display results

example

table(expected_value,predicted_value)

#install required packages

install.packages('gmodels')

#import required library

library(gmodels)

#Computes the crosstable calculations

CrossTable(expected_value,predicted_value)

Sunday, August 1, 2021

Logistic Regression with Confusion Matrix in R

August 01, 2021 Md. Tawkir Ahmed No comments

#Coded by Tawkir Ahmed

library(ggplot2) #USed for plotting data

library(dplyr) #Used to extract columns in the data

library(rms) #Used to extract p-value from logistic model

library(aod)

library(caret)

# logistic model

rail <- read.csv(choose.files()) #training

test<- read.csv(choose.files()) #testing

labs <- attributes(rail)$OPS #need to think

labs

summary(rail)

# collapse all missing values to NA

rail$OPS <- factor(rail$OPS, levels=c("1", "2", "3", "4", "5"))

# run our regression model

model <- glm(OPS~ y1+ y2+ y3+ y4+ y5,

data=rail, family="binomial")

summary(model)

#Confusion matrix

# training

p<- predict(model, rail)

tab<- table(p, rail$OPS)

tab

1-sum(diag(tab))/sum(tab)

#testing

p1<- predict(model, test)

tab1<- table(p1, test$OPS)

tab1

1-sum(diag(tab1))/sum(tab1)

#end

library(e1071)

#data

confusionMatrix(rail$OPS, sample(rail$OPS))

newPrior <- c(.05, .8, .15, 0.5, 0.9)

names(newPrior) <- levels(rail$OPS)

cm <-confusionMatrix(rail$OPS, sample(rail$OPS))

# extract the confusion matrix values as data.frame

cm_d <- as.data.frame(cm$table)

# confusion matrix statistics as data.frame

cm_st <-data.frame(cm$overall)

# round the values

cm_st$cm.overall <- round(cm_st$cm.overall,2)

# here we also have the rounded percentage values

cm_p <- as.data.frame(prop.table(cm$table))

cm_d$Perc <- round(cm_p$Freq*100,2)

library(ggplot2) # to plot

library(gridExtra) # to put more

library(grid) # plot together

# plotting the matrix

cm_d_p <- ggplot(data = cm_d, aes(x = Prediction , y = Reference, fill = Freq))+

geom_tile() +

geom_text(aes(label = paste("",Freq,",",Perc,"%")), color = 'red', size = 8) +

theme_light() +

guides(fill=FALSE)

# plotting the stats

cm_st_p <- tableGrob(cm_st)

# all together

grid.arrange(cm_d_p, cm_st_p,nrow = 1, ncol = 2,

top=textGrob("Confusion Matrix and Statistics",gp=gpar(fontsize=25,font=1)))

#search confusion matrix plot

###########################################################################

Tuesday, July 27, 2021

Scatter plot in R

July 27, 2021 Md. Tawkir Ahmed No comments

# install.packages("ggplot2")

# install.packages("ggExtra")

library(ggplot2)

library(ggExtra)

dat<- read.csv(file.choose())

colnames(dat)

# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Speed_2017_to_2019, y = Volume_2017_to_2019)) +

geom_point(shape=5, color="gray")+

ylim(0, 100)+

xlim(0, 100)+

geom_smooth(method=lm, color="darkred", fill="blue")+

theme_classic()

# Densigram

ggMarginal(p, type = "densigram",

xparams = list(fill = 4),

yparams = list(fill = 3))

# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Speed_2020, y = Volume_2020)) +

geom_point()+

ylim(0, 100)+

xlim(0, 100)+

geom_smooth(method=lm)+

theme_classic()

#scale_size_manual(values=c(2,3,4))

#p + scale_y_discrete(limits=c("0", "100", "200", "300", "400"))

#p + scale_x_discrete(limits=c("0", "30", "60", "90", "120"))

# Densigram

ggMarginal(p, type = "densigram",

xparams = list(fill = 4),

yparams = list(fill = 3))

# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Crash_2020, y = Speed_2020)) +

geom_point()+

geom_smooth(method=lm, color="darkred", fill="blue")

# Densigram

ggMarginal(p, type = "densigram",

xparams = list(fill = 4),

yparams = list(fill = 3))

Histogram in R

July 27, 2021 Md. Tawkir Ahmed No comments

# Load data

p <- read.csv(file.choose())

colnames(p)

hist(p$Speed_during, xlab = "Speed in 2020", ylab = "Frequency",

main = "",breaks= 20, xlim= c(0, 120), w=20)

hist(p$Volume_during, xlab = "Volume in 2020", ylab = "Frequency", main = "",

breaks= 20, xlim= c(0, 120), w=20)

#breaks=beans

Thursday, July 22, 2021

No Parametric Model in R

July 22, 2021 Md. Tawkir Ahmed No comments

require(foreign) #import data files

require(ggplot2) #plot

require(MASS) #for Modern Applied Statistics with S

library("mosaic") #Statistics and Mathematics Teaching Utilities

# Load data

p <- read.csv(file.choose())

m<- fitModel(SPEEDING_CRASH ~

exp(LENGTH +

LOG_SL*a +

LOG_NL *b +

P_TRUCK *c +

LOG_AVG_AADT*d +

LOG_SIGNAL_PER_MILE*e +

LOG_STOPPED_PER_MILE*f +

LOG_MEDIAN_WIDTH*g +

MEDIAN_TYPE*h +

P_RESIDENTIAL*i +

P_COMMERCIAL*j +

P_INDUSTRIAL*k +

P_AGRICULTURAL*l +

P_INSTITUTIONAL*m +

P_GOVERNMENTAL*n +

LANDUSE_MIX*o +

LOG_POP_DEN*p +

PCT_POV *q +

PCT_VEHICLE_0*r +

PCT_OLD_YOUNG*s +

PCT_MEANS_PEDBIC*t +

LOG_DAILY_TRANSIT*u +

LOG_PAVEMENT_CONDITION *v +

ISLDTYPE*w +

LOG_ISLDWIDTH*x +

OSLDTYPE*y +

LOG_OSLDWIDTH*z +

SURF_TYPE*a1 +

LOG_SURFACE_WIDTH*b1 +

LOG_LANE_WIDTH*c1 +

LOG_POLES_PER_MILE*d1 +

P_SINGLE_SIDEWALK_ONLY*e1 +

P_BOTH_SIDEWALK_ONLY*f1 +

LOG_SIDEWALK_WIDTH*g1 +

P_SINGLE_BIKELANE_ONLY*h1 +

P_BOTH_BIKELANE_ONLY*i1 +

P_SINGLE_BIKESLOT_ONLY*j1 +

P_BOTH_BIKESLOT_ONLY*k1 +

P_SINGLE_SHAREDPATH_ONLY*l1 +

P_BOTH_SHAREDPATH_ONLY*m1 +

LOG_SHAREDPATH_WIDTH*n1 +

LOG_SHAREDPATH_DISTFMRD *o1 +

LOG_FRQ_AM78 *p1 +

NARROW_LANE *q1 +

ASPHALT_SURFACE *r1 +

PRESENCE_OF_MEDIAN_ISL_AT_CROSS *s1 +

HIGH_FREQ_TRANSIT*t1 +

P_SPEEDING_ADJ*u1 +

LOG_85_PERCENT_SPEED_ADJ*v1+

w1), data = p, start=

list(a=0,b=0,c=0,d=0,e=0,f=0,g=0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,a1=0,

b1=0,c1=0,d1=0,e1=0,f1=0,g1=0,h1=0,i1=0,j1=0,k1=0,l1=0,m1=0,n1=0,o1=0,p1=0,q1=0,r1=0,s1=0,t1=0,u1=0,v1=0,w1=0)) #we want to start our calculation from 1

print(summary(m))

#CMF

m1<-coef(m)

cmf<- exp(m1*(1-0))

cmf

##############################

modelSummary <- summary(m) # capture model summary as an object

modelCoeffs <- modelSummary$coefficients # model coefficients all

beta.estimate <- modelCoeffs[, "Estimate"] # get beta estimate from model

beta.estimate

stde <- modelCoeffs[, "Std. Error"] # get std.error from model

stde

####standard error

se<- (exp(beta.estimate+stde)-exp(beta.estimate-stde))/2

#end

Monday, July 12, 2021

MARS in R

July 12, 2021 Md. Tawkir Ahmed No comments

#MARS

library(earth)

require(MASS)

# fit model

fit <- earth(formula=SPEEDING_CRASH ~

log(AVG_AADT)+LANDUSE_MIX+PAVEMENT_CONDITION+ISLDWIDTH+

P_BOTH_SIDEWALK_ONLY+HIGH_FREQ_TRANSIT+SIGNAL_PER_MILE+

MEDIAN_WIDTH+DAILY_TRANSIT+PAVEMENT_CONDITION+LANE_WIDTH

, a, degree=1)

fit

plot(fit)

# summarize the fit

summary(fit)

# summarize the importance of input variables

evimp(fit)

plot(evimp(fit))

# make predictions

predictions <- predict(fit, a)

plot(fit)

# summarize accuracy

coefficients(fit)

residuals(fit)

mse <- mean((a$SPEEDING_CRASH - predictions)^2)

print(mse)

#standart error

vcov(fit)

standard_error(fit)

Std.Error(fit)

se <- sqrt(diag(vcov(fit)))

se.coef(fit)

sqrt(diag(cov(fit)))

#2 tailed z test

z<- coef(fit)/standard_error(fit)

p<- (1-pnorm(abs(z),0,1))*2

p #pvalue

Sunday, April 25, 2021

Multigroup Analysis for SEM

April 25, 2021 Md. Tawkir Ahmed No comments

#R 3.6 version is needed for MGA analysis

#More than two option in a variable Mga is not possible

#install.packages("devtools")

#library(devtools)

# install "plspm"

#install_github("gastonstat/plspm")

# load plspm

library(plspm)

#require(plspm)

dataset <- read.csv(file.choose())

#make a model

CO=c(0,0,0,0,0)

AT=c(1,0,0,0,0)

SN=c(1,0,0,0,0)

PBC=c(1,0,0,0,0)

PMO=c(0,1,1,1,0)

x=rbind(CO,AT,SN,PBC,PMO)

colnames(x)=rownames(x)

innerplot(x, arr.pos=.6) #plot the data

out=list(6:9, 10:12, 13:17, 18:21, 22:23)

################

#running modle for total:

xx=plspm(dataset, x, out,

scheme="path",

boot.val=T, br=1247)