Monday, August 2, 2021

Ordinal Logistic Regression (OLR) in Neural Network (NN) using R

 library(neuralnet)

library("ISLR")


#load data

data<- read.csv(file.choose())

colnames(data)


smp_size <- floor(0.75 * nrow(data))

train_ind <- sample(seq_len(nrow(data)), size = smp_size)


train <- mtcars[train_ind, ]

test <- mtcars[-train_ind, ]


#divide training and testing data

mean_data <- apply(data[2:11], 2, mean)


sd_data <- apply(data[2:11], 2, sd)


data_scaled <- as.data.frame(scale(data[,2:11],center = mean_data, scale = sd_data))


head(data_scaled, n=20)


index = sample(1:nrow(data),round(0.70*nrow(data)))


train_data <- as.data.frame(data_scaled[index,])


test_data <- as.data.frame(data_scaled[-index,])



# Custom activation function

softplus <- function(x) 1 / (1 + exp(-x))

nn <- neuralnet((KABCO=="2") ~ Covid + Speed+ Volume+ Drug_Alco+

                  Lighting+ Road_Surface+ Speeding+ Work_Zone_Related+

                  Weather_Condition, train_data,

                linear.output = FALSE, hidden = c(9,9), act.fct = softplus,

                likelihood=TRUE, threshold=0.01)

print(nn)

plot(nn)

summary(nn)

predict_olr <- predict(nn,test_data)

plot(test_data$KABCO,predict_olr$nn,col='black',main='Real vs predicted for neural network',pch=18,cex=4)

abline(1,2,3,4,5, lwd=5)


#Check the data - actual and predicted

final_output=cbind (Input, Output,

                    as.data.frame(model$net.result) )

colnames(final_output) = c("Input", "Expected Output",

                           "Neural Net Output" )

print(final_output)


#for 2 row and 2 column

par(mfrow=c(2,2))


gwplot(net.infert, selected.covariate="parity")

gwplot(net.infert, selected.covariate="induced")

gwplot(net.infert, selected.covariate="spontaneous")


#mse

predict_net_test <- compute(nn,test_data)


MSE.net <- sum((test_data$KABCO - predict_net_test$net.result)^2)/nrow(test_data)

MSE.net



#####################################################################

#Confusion matrix

library(caret)

library(e1071)


# training


p<- predict(nn, train_data)


tab<- table(p, train_data$KABCO)


tab


1-sum(diag(tab))/sum(tab)






#testing


p1<- predict(nn, test_data)


tab1<- table(p1, test_data$KABCO)


tab1


1-sum(diag(tab1))/sum(tab1)




#end




#data


confusionMatrix(data$KABCO, sample(data$KABCO))


newPrior <- c(.05, .8, .15, 0.5, 0.9)


names(newPrior) <- levels(data$KABCO)




cm <-confusionMatrix(data$KABCO, sample(data$KABCO))




#2


# extract the confusion matrix values as data.frame


cm_d <- as.data.frame(cm$table)


# confusion matrix statistics as data.frame


cm_st <-data.frame(cm$overall)


# round the values


cm_st$cm.overall <- round(cm_st$cm.overall,2)




# here we also have the rounded percentage values


cm_p <- as.data.frame(prop.table(cm$table))


cm_d$Perc <- round(cm_p$Freq*100,2)




#3


library(ggplot2)     # to plot


library(gridExtra)   # to put more


library(grid)        # plot together




# plotting the matrix


cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+

  

  geom_tile() +

  

  geom_text(aes(label = paste("",Freq,",",Perc,"%")), color = 'red', size = 8) +

  

  theme_light() +

  

  guides(fill=FALSE) 




# plotting the stats


cm_st_p <-  tableGrob(cm_st)




# all together


grid.arrange(cm_d_p, cm_st_p,nrow = 1, ncol = 2, 

             

             top=textGrob("Confusion Matrix and Statistics",gp=gpar(fontsize=25,font=1)))


#search confusion matrix plot




###########################################################################

#Creates vectors having data points


expected_value <- factor(train_data[c("KABCO"))

predicted_value <- factor(test_data[c("KABCO"))


#Creating confusion matrix

example <- confusionMatrix(data=predicted_value, reference = expected_value)


#Display results 

example


table(expected_value,predicted_value)


#install required packages

install.packages('gmodels')

#import required library 

library(gmodels)


#Computes the crosstable calculations

CrossTable(expected_value,predicted_value)





Sunday, August 1, 2021

Logistic Regression with Confusion Matrix in R

 #Coded by Tawkir Ahmed 

library(ggplot2) #USed for plotting data

library(dplyr) #Used to extract columns in the data

library(rms) #Used to extract p-value from logistic model

library(aod)

library(caret)


# logistic model

rail <- read.csv(choose.files())   #training

test<- read.csv(choose.files())    #testing

labs <- attributes(rail)$OPS #need to think

labs

summary(rail)


# collapse all missing values to NA

rail$OPS <- factor(rail$OPS, levels=c("1", "2", "3", "4", "5"))

# run our regression model

model <- glm(OPS~ y1+ y2+ y3+ y4+ y5, 

               data=rail, family="binomial")

summary(model)

#Confusion matrix

# training

p<- predict(model, rail)

tab<- table(p, rail$OPS)

tab

1-sum(diag(tab))/sum(tab)



#testing

p1<- predict(model, test)

tab1<- table(p1, test$OPS)

tab1

1-sum(diag(tab1))/sum(tab1)


#end

library(e1071)

#data

confusionMatrix(rail$OPS, sample(rail$OPS))

newPrior <- c(.05, .8, .15, 0.5, 0.9)

names(newPrior) <- levels(rail$OPS)


cm <-confusionMatrix(rail$OPS, sample(rail$OPS))


#2

# extract the confusion matrix values as data.frame

cm_d <- as.data.frame(cm$table)

# confusion matrix statistics as data.frame

cm_st <-data.frame(cm$overall)

# round the values

cm_st$cm.overall <- round(cm_st$cm.overall,2)


# here we also have the rounded percentage values

cm_p <- as.data.frame(prop.table(cm$table))

cm_d$Perc <- round(cm_p$Freq*100,2)


#3

library(ggplot2)     # to plot

library(gridExtra)   # to put more

library(grid)        # plot together


# plotting the matrix

cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+

  geom_tile() +

  geom_text(aes(label = paste("",Freq,",",Perc,"%")), color = 'red', size = 8) +

  theme_light() +

  guides(fill=FALSE) 


# plotting the stats

cm_st_p <-  tableGrob(cm_st)


# all together

grid.arrange(cm_d_p, cm_st_p,nrow = 1, ncol = 2, 

             top=textGrob("Confusion Matrix and Statistics",gp=gpar(fontsize=25,font=1)))

#search confusion matrix plot


###########################################################################




Tuesday, July 27, 2021

Scatter plot in R

# install.packages("ggplot2")

# install.packages("ggExtra")

library(ggplot2)

library(ggExtra)


dat<- read.csv(file.choose())

colnames(dat)


# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Speed_2017_to_2019, y =  Volume_2017_to_2019)) +

  geom_point(shape=5, color="gray")+

  ylim(0, 100)+

  xlim(0, 100)+

  geom_smooth(method=lm, color="darkred", fill="blue")+

  theme_classic()


# Densigram

ggMarginal(p, type = "densigram",

           xparams = list(fill = 4),

           yparams = list(fill = 3)) 



# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Speed_2020, y =  Volume_2020)) +

  geom_point()+ 

  ylim(0, 100)+

  xlim(0, 100)+

  geom_smooth(method=lm)+

  theme_classic()

#scale_size_manual(values=c(2,3,4))

#p + scale_y_discrete(limits=c("0", "100", "200", "300", "400"))

#p + scale_x_discrete(limits=c("0", "30", "60", "90", "120"))


# Densigram

ggMarginal(p, type = "densigram",

           xparams = list(fill = 4),

           yparams = list(fill = 3)) 


# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Crash_2020, y = Speed_2020)) +

  geom_point()+

  geom_smooth(method=lm, color="darkred", fill="blue")


# Densigram

ggMarginal(p, type = "densigram",

           xparams = list(fill = 4),

           yparams = list(fill = 3))





Histogram in R

 # Load data

p <- read.csv(file.choose())

colnames(p)


hist(p$Speed_during, xlab = "Speed in 2020", ylab = "Frequency", 

     main = "",breaks= 20, xlim= c(0, 120), w=20)

hist(p$Volume_during, xlab = "Volume in 2020", ylab = "Frequency", main = "",

     breaks= 20, xlim= c(0, 120), w=20)

#breaks=beans



Thursday, July 22, 2021

No Parametric Model in R

 require(foreign)  #import data files

require(ggplot2)  #plot

require(MASS)     #for Modern Applied Statistics with S

library("mosaic") #Statistics and Mathematics Teaching Utilities


# Load data

p <- read.csv(file.choose())


m<- fitModel(SPEEDING_CRASH  ~ 

              exp(LENGTH              +

                    LOG_SL*a                  +

                    LOG_NL *b          +

                    P_TRUCK   *c     +

                    LOG_AVG_AADT*d +

                    LOG_SIGNAL_PER_MILE*e +

                    LOG_STOPPED_PER_MILE*f +

                    LOG_MEDIAN_WIDTH*g +

                    MEDIAN_TYPE*h +

                    P_RESIDENTIAL*i +

                    P_COMMERCIAL*j +

                    P_INDUSTRIAL*k +

                    P_AGRICULTURAL*l +

                    P_INSTITUTIONAL*m +

                    P_GOVERNMENTAL*n +

                    LANDUSE_MIX*o +

                    LOG_POP_DEN*p +

                    PCT_POV *q          +

                    PCT_VEHICLE_0*r +

                    PCT_OLD_YOUNG*s +

                    PCT_MEANS_PEDBIC*t  +

                    LOG_DAILY_TRANSIT*u  +

                    LOG_PAVEMENT_CONDITION *v +

                    ISLDTYPE*w +

                    LOG_ISLDWIDTH*x +

                    OSLDTYPE*y +

                    LOG_OSLDWIDTH*z +

                    SURF_TYPE*a1 +

                    LOG_SURFACE_WIDTH*b1  +

                    LOG_LANE_WIDTH*c1 +

                    LOG_POLES_PER_MILE*d1 +

                    P_SINGLE_SIDEWALK_ONLY*e1  +

                    P_BOTH_SIDEWALK_ONLY*f1 +

                    LOG_SIDEWALK_WIDTH*g1 +

                    P_SINGLE_BIKELANE_ONLY*h1 +

                    P_BOTH_BIKELANE_ONLY*i1 +

                    P_SINGLE_BIKESLOT_ONLY*j1 +

                    P_BOTH_BIKESLOT_ONLY*k1 +

                    P_SINGLE_SHAREDPATH_ONLY*l1  +

                    P_BOTH_SHAREDPATH_ONLY*m1   +

                    LOG_SHAREDPATH_WIDTH*n1 +

                    LOG_SHAREDPATH_DISTFMRD *o1  +

                    LOG_FRQ_AM78 *p1           +

                    NARROW_LANE *q1       +

                    ASPHALT_SURFACE   *r1      +          

                    PRESENCE_OF_MEDIAN_ISL_AT_CROSS *s1 +

                    HIGH_FREQ_TRANSIT*t1 +

                    P_SPEEDING_ADJ*u1 +           

                    LOG_85_PERCENT_SPEED_ADJ*v1+

                    w1), data = p, start=

               list(a=0,b=0,c=0,d=0,e=0,f=0,g=0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,a1=0,

                    b1=0,c1=0,d1=0,e1=0,f1=0,g1=0,h1=0,i1=0,j1=0,k1=0,l1=0,m1=0,n1=0,o1=0,p1=0,q1=0,r1=0,s1=0,t1=0,u1=0,v1=0,w1=0)) #we want to start our calculation from 1


print(summary(m))


#CMF

m1<-coef(m)

m1

cmf<- exp(m1*(1-0))

cmf


############################## 

modelSummary <- summary(m)  # capture model summary as an object

modelCoeffs <- modelSummary$coefficients  # model coefficients all 

beta.estimate <- modelCoeffs[, "Estimate"]  # get beta estimate from model

beta.estimate 

stde <- modelCoeffs[, "Std. Error"]  # get std.error from model

stde

####standard error

se<- (exp(beta.estimate+stde)-exp(beta.estimate-stde))/2

se

#end

Monday, July 12, 2021

MARS in R

 #MARS

library(earth)

require(MASS)


# fit model

fit <- earth(formula=SPEEDING_CRASH ~

               log(AVG_AADT)+LANDUSE_MIX+PAVEMENT_CONDITION+ISLDWIDTH+

               P_BOTH_SIDEWALK_ONLY+HIGH_FREQ_TRANSIT+SIGNAL_PER_MILE+

               MEDIAN_WIDTH+DAILY_TRANSIT+PAVEMENT_CONDITION+LANE_WIDTH

             ,  a, degree=1)

fit

plot(fit)

# summarize the fit

summary(fit)

# summarize the importance of input variables

evimp(fit)

plot(evimp(fit))

# make predictions

predictions <- predict(fit, a)

plot(fit)

# summarize accuracy

coefficients(fit)

residuals(fit)

mse <- mean((a$SPEEDING_CRASH - predictions)^2)

print(mse)

#standart error

vcov(fit)

standard_error(fit)

Std.Error(fit)

se <- sqrt(diag(vcov(fit)))

se.coef(fit)

sqrt(diag(cov(fit)))

#2 tailed z test

z<- coef(fit)/standard_error(fit)

p<- (1-pnorm(abs(z),0,1))*2

p        #pvalue


Sunday, April 25, 2021

Multigroup Analysis for SEM

 #R 3.6 version is needed for MGA analysis 

#More than two option in a variable Mga is not possible

#install.packages("devtools") 

#library(devtools)

# install "plspm"

#install_github("gastonstat/plspm")

# load plspm

library(plspm)

#require(plspm)

dataset <- read.csv(file.choose())


#make a model

CO=c(0,0,0,0,0)

AT=c(1,0,0,0,0)

SN=c(1,0,0,0,0)

PBC=c(1,0,0,0,0)

PMO=c(0,1,1,1,0)


x=rbind(CO,AT,SN,PBC,PMO)

colnames(x)=rownames(x)

innerplot(x, arr.pos=.6)   #plot the data



out=list(6:9, 10:12, 13:17, 18:21, 22:23)


################

#running modle for total:

xx=plspm(dataset, x, out,

         scheme="path",

         boot.val=T, br=1247)


#Multigroup Analysis using bootstrap 

plspm.groups(xx, dataset$gender,

             method="bootstrap",

             reps=500)

#a

#Multigroup Analysis using bootstrap

plspm.groups(xx, as.factor(gender),

             method="bootstrap",

             reps=500)


#Multigroup Analysis using bootstrap

plspm.groups(xx, dataset$Education.level,

             method="bootstrap",

             reps=500)

#Multigroup Analysis using bootstrap

plspm.groups(xx, dataset$income,

             method="bootstrap",

             reps=500)

#Multigroup Analysis using bootstrap

plspm.groups(xx, dataset$living,

             method="bootstrap",

             reps=500)



Thursday, April 22, 2021

Principal Component Analysis (PCA)

#install.packages("magrittr")  # for piping %>%

#install.packages("ade4")      # PCA computation

#install.packages("factoextra")# PCA visualization

library(psych)

#library(fmsb)

#PCA - principal component analysis

#Covid19 data analysis


mydata<- read.csv(file.choose())

summary(mydata)


#Define_mydataiables

#mydata<- cbind(CO1,CO2,CO3,CO4,CO5,CO6,CO7,CO8,PMO1,PMO2,PMO3,

            #AT1,AT2,AT3,AT4,AT5,AT6,AT7,SN1,SN2,SN3,SN4,

            #PBC1,PBC2,PBC3,PBC4)


#Calculating_Cronbach's Alpha

covid_concern<- alpha(data.frame(mydata[c("CO2", "CO3", "CO5", "CO7")]))

attitude<- alpha(data.frame(mydata[c("SN3", "PBC1", "PBC2")]))

social_norm<- alpha(data.frame(mydata[c("CO4", "AT7", "SN1", "SN2", "PMO1")]))

perc_beh_control<- alpha(data.frame(mydata[c("AT2", "AT4", "AT5", "AT6")]))

perc_mor_obligation<-alpha(data.frame(mydata[c("CO8", "AT1")]))


#KMO

covid_concern<- KMO(data.frame(mydata[c("CO2", "CO3", "CO5", "CO7")]))

attitude<- KMO(data.frame(mydata[c("SN3", "PBC1", "PBC2")]))

social_norm<- KMO(data.frame(mydata[c("CO4", "AT7", "SN1", "SN2", "PMO1")]))

perc_beh_control<- KMO(data.frame(mydata[c("AT2", "AT4", "AT5", "AT6")]))

perc_mor_obligation<-KMO(data.frame(mydata[c("CO8", "AT1")]))


#Bartlett's Test of Sphericity

#$pvalue is the sphericity(sig.) value

covid_concern<-cortest.bartlett(cor(data.frame(mydata[, 1:8])))

attitude<- cortest.bartlett(cor(data.frame(mydata[, 9:15])))

social_norm<- cortest.bartlett(cor(data.frame(mydata[, 16:19])))

perc_beh_control<- cortest.bartlett(cor(data.frame(mydata[, 20:23])))

perc_mor_obligation<-cortest.bartlett(cor(data.frame(mydata[c("CO8", "AT1")])))


#Descriptive statistics

summary(mydata)

cor(mydata)


#Principal Component Analysis

pca1<-princomp(mydata, scores=TRUE, cor=TRUE)

summary(pca1)


#Loading of principal components

loadings(pca1)

#pca1$loadings


#Scree plot of eigenvalues

plot(pca1)

#barplot(pca1)

screeplot(pca1, type="line", main="Scree Plot")



#Biplot of score mydataiables

biplot(pca1)


#Scores of the components

pca1$scores[1:26, ]


#Rotation

#mydataimax(pca1$roatation)

#Promax(pca1$rotation)



#Factor analysis - different results from other softwares and no roation

fa1<- factanal(mydata, factor = 5)

fa1


fa2<- factanal(mydata, factor=5, rotation="varimax",  scores = "regression")

fa2


fa3<- factanal(mydata, factors=5, rotation = "cluster", scores = "regression")

fa3


library(ade4)

library(factoextra)

library(magrittr)

#Visualize eigenvalues (scree plot). Show the percentage of variances explained by each principal component.

fviz_eig(pca1)

#Graph of individuals. Individuals with a similar profile are grouped together.

fviz_pca_ind(pca1,

             col.ind = "cos2", # Color by the quality of representation

             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),

             repel = TRUE)     # Avoid text overlapping 

#Graph of variables. Positive correlated variables point to the same side of the plot. Negative correlated variables point to opposite sides of the graph.

fviz_pca_var(pca1,

             col.var = "contrib", # Color by contributions to the PC

             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),

             repel = TRUE     # Avoid text overlapping

)

#Biplot of individuals and variables

fviz_pca_biplot(pca1, repel = TRUE,

                col.var = "#2E9FDF", # Variables color

                col.ind = "#696969"  # Individuals color

)


#The ade4 package creates R base plots.

# Scree plot

screeplot(pca1, main = "Screeplot - Eigenvalues", xlab="Component")


# Correlation circle of variables

s.corcircle(pca1$co)

 





Thursday, April 15, 2021

Descriptive statistics in R

 #Tawkir codehub

data<-read.csv(file.choose())  #to input data

summary(data) #to show all data variables

describe(data) #to show mean and median for numeric data

head(data)  #to show some column and variable names


#Rename one column name

colnames(data)[colnames(data)=="lining"]<- "Living"

#Rename all column names

colnames(data)<- c("Age", "Income", "Gender")

#Rename some column names

colnames(data)[colnames(data) %in% c("CO","PMO")]

               

               #Replace value or character in a column

               data$Gender[data$Gender=="Male"]<- "2"

               data$Gender[data$Gender=="Female"]<- "1"

               ##ingeneral variable convert for all data

               data(data=="NA")<- "0"   #Error handeling

               

               #Percentage and frequenty 

               agef<-table(data$Age)  #frequenty table

               agep<-prop.table(table(data$Age)) #percentage table

               barplot(agep, ylab="%")


table(data$Living)

data$Living[data$Living=="Megacity"]<- "1"

data$Living[data$Living=="City"]<- "2"

data$Living[data$Living=="Town"]<- "2"

data$Living[data$Living=="Village"]<- "2"


table(data$Age)

data$Age[data$Age=="Young"]<-"1"

data$Age[data$Age=="Adult"]<-"2"

data$Age[data$Age=="Old"]<-"3"


table(data$Education)

data$Education[data$Education=="Secondary"]<-"1"

data$Education[data$Education=="Tertiary level (college and university degree)"]<-"2"


table(data$Income)

data$Income[data$Income=="Low"]<-"1"

data$Income[data$Income=="Middle"]<-"2"

data$Income[data$Income=="Higher-middle"]<-"3"

data$Income[data$Income=="Higher"]<-"4"


#convert character to number

data$Living<-as.character(data$Living)

data$Age<-as.character(data$Age)

data$Gender<-as.character(data$Gender)

data$Education<-as.character(data$Education)

data$Income<-as.character(data$Income)


data$Living<-as.numeric(as.character(data$Living))

data$Age<-as.numeric(as.character(data$Age))

data$Gender<-as.numeric(as.character(data$Gender))

data$Education<-as.numeric(as.character(data$Education))

data$Income<-as.numeric(as.character(data$Income))

   

data$Living<-as.factor(data$Living)

data$Age<-as.factor(data$Age)

data$Gender<-as.factor(data$Gender)

data$Education<-as.factor(data$Education)

data$Income<-as.factor(data$Income)


#create new data for trial

female<- ifelse(all$Gender=="Female", 1,0)

male<- ifelse(all$Gender=="Male", 2,0)

young<- ifelse(all$Age=="Young",1,0)



describe(data)

summary(data)

colnames(data)



library(psych)

#Calculating_Cronbach's Alpha

demo<- alpha(data.frame(data[c("Living", "Age", "Gender",

                                       "Education", "Income" )]))

#save the change file for future work

write.csv(data,"E:\\planning''''''''''''''''''''''''\\SEM C19 model sir\\chang variable name main\\CODE\\Covid_main.csv", row.names = FALSE)



Wednesday, April 14, 2021

Reliability test in R

 #Calculating_Cronbach's Alpha

covid_concern<- alpha(data.frame(all[c("CO2", "CO3", "CO5", "CO7")]))

attitude<- alpha(data.frame(all[c("SN3", "PBC1", "PBC2")]))

social_norm<- alpha(data.frame(all[c("CO4", "AT7", "SN1", "SN2", "PMO1")]))

perc_beh_control<- alpha(data.frame(all[c("AT2", "AT4", "AT5", "AT6")]))

perc_mor_obligation<-alpha(data.frame(all[c("CO8", "AT1")]))

Tuesday, April 13, 2021

Conditional statement and new variable creation in R

 # Load data

p <- read.csv(file.choose())


#create new data for trial

#create new data for trial

lw9=ifelse(p$LANE_WIDTH<9, 1,0)

lw10=ifelse(p$LANE_WIDTH>=10 & p$LANE_WIDTH<11, 1,0)

lw11=ifelse(p$LANE_WIDTH>=11 & p$LANE_WIDTH<12, 1,0)

lw12=ifelse(p$LANE_WIDTH>=12 & p$LANE_WIDTH<13, 1,0)

lw13=ifelse(p$LANE_WIDTH>=13 & p$LANE_WIDTH<14, 1,0)

lw14=ifelse(p$LANE_WIDTH>=14 & p$LANE_WIDTH<15, 1,0)

lw15=ifelse(p$LANE_WIDTH>=15 & p$LANE_WIDTH<16, 1,0)

lw16=ifelse(p$LANE_WIDTH>=16 & p$LANE_WIDTH<17, 1,0)

lw17=ifelse(p$LANE_WIDTH>=17 & p$LANE_WIDTH<18, 1,0)

lw18=ifelse(p$LANE_WIDTH>=18 & p$LANE_WIDTH<19, 1,0)

lw19=ifelse(p$LANE_WIDTH>=19 & p$LANE_WIDTH<20, 1,0)

lw20=ifelse(p$LANE_WIDTH>=20 & p$LANE_WIDTH<21, 1,0)

lw21=ifelse(p$LANE_WIDTH>=21 & p$LANE_WIDTH<22, 1,0)

lw22=ifelse(p$LANE_WIDTH>=22, 1,0)


#combined data frame

data=cbind(p, lw9, lw10, lw11, lw12, lw13, lw14, lw15, lw16, lw17, lw18, lw19, lw20, lw21, lw22)


#for letter data

#create new data for trial

female<- ifelse(data$Gender=="Female", 1,0)

male<- ifelse(data$Gender=="Male", 1,0)

describe(newdata$CO2)

#Data to word

#for letter data

#create new data for trial

gender<- ifelse(data$Gender==1, "Female", "Male")


Wednesday, March 10, 2021

Multiple chart in R

 par(mfrow=c(3, 3))

hist(as.numeric(data_cleaned$Cl.thickness))

hist(as.numeric(data_cleaned$Cell.size))

hist(as.numeric(data_cleaned$Cell.shape))

hist(as.numeric(data_cleaned$Marg.adhesion))

hist(as.numeric(data_cleaned$Epith.c.size))

hist(as.numeric(data_cleaned$Bare.nuclei))

hist(as.numeric(data_cleaned$Bl.cromatin))

hist(as.numeric(data_cleaned$Normal.nucleoli))

hist(as.numeric(data_cleaned$Mitoses))


Sunday, February 28, 2021

SEM in R

library(psych)
all=read.csv(choose.files())
#Calculating_Cronbach's Alpha
covid_concern<- alpha(data.frame(all[c("CO2", "CO3", "CO5", "CO7")]))
attitude<- alpha(data.frame(all[c("SN3", "PBC1", "PBC2")]))
social_norm<- alpha(data.frame(all[c("CO4", "AT7", "SN1", "SN2", "PMO1")]))
perc_beh_control<- alpha(data.frame(all[c("AT2", "AT4", "AT5", "AT6")]))
perc_mor_obligation<-alpha(data.frame(all[c("CO8", "AT1")]))

####sem

library(lavaan)
library(semPlot)

#all=read.csv(choose.files())

model1<- '
# Structural model
CO=~CO2+ CO3 +CO5 +CO7
AT=~SN3+ PBC1+ PBC2
SN=~CO4+ AT7+ SN1+ SN2+ PMO1
PBC=~AT2+AT4+AT5+AT6
PMO=~CO8+AT1

# Covariance structure of exogenous variables

# New parameters (indirect effect)
#Regression
AT~CO
SN~CO
PBC~CO

PMO~AT
PMO~SN
PMO~PBC
                  '
fit1<- sem(model1, data=all)
fit1

summary(fit1, rsquare = TRUE,
        fit.measures = TRUE,
        standardized = TRUE)


fitMeasures(fit1)

semPaths(fit1, what="paths", whatLabels = "stand",
         rotation = 2,
         layout = "spring",
         posCol = "black",
         edge.width = 0.5, 
         style = "Lisrel",
         fade = T,
         edge.label.position = 0.55)
###############

# Extract the correlation matrix
all.cor <- cor(all[], method = "pearson", use = "pairwise.complete.obs")
all.cor


# Correlogram
corrplot(all.cor, order = "hclust",
         tl.col = "black", tl.srt = 80,
         addCoef.col = "black",
         number.cex = 0.8,
         cl.cex = 1,
         tl.cex = 0.8)

library(corrplot)
library(RColorBrewer)

#library(psych)
#corPlot(data, cex =1.2, main="", 
#       cex.lab = 1.2,
#        cex.axis =1.2,
#       cex.main = 1.2,
#      cex.sub = 1.2)

library(psych)
cor.plot(all.cor,numbers=TRUE,colors=TRUE,
         n=51,main=NULL,labels=NULL,
         cex =1,
         cex.lab = 1,
         cex.axis =1, #right side level
         cex.main = 1,
         cex.sub = 1)



Confusion Matrix for Any Model in R



 #Confusion matrix

# training

p<- predict(model, rail)

tab<- table(p, rail$OPS)

tab

1-sum(diag(tab))/sum(tab)



#testing

p1<- predict(model, test)

tab1<- table(p1, test$OPS)

tab1

1-sum(diag(tab1))/sum(tab1)


#end

library(e1071)

#data

confusionMatrix(rail$OPS, sample(rail$OPS))

newPrior <- c(.05, .8, .15, 0.5, 0.9)

names(newPrior) <- levels(rail$OPS)


cm <-confusionMatrix(rail$OPS, sample(rail$OPS))


#2

# extract the confusion matrix values as data.frame

cm_d <- as.data.frame(cm$table)

# confusion matrix statistics as data.frame

cm_st <-data.frame(cm$overall)

# round the values

cm_st$cm.overall <- round(cm_st$cm.overall,2)


# here we also have the rounded percentage values

cm_p <- as.data.frame(prop.table(cm$table))

cm_d$Perc <- round(cm_p$Freq*100,2)


#3

library(ggplot2)     # to plot

library(gridExtra)   # to put more

library(grid)        # plot together


# plotting the matrix

cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+

  geom_tile() +

  geom_text(aes(label = paste("",Freq,",",Perc,"%")), color = 'red', size = 8) +

  theme_light() +

  guides(fill=FALSE) 


# plotting the stats

cm_st_p <-  tableGrob(cm_st)


# all together

grid.arrange(cm_d_p, cm_st_p,nrow = 1, ncol = 2, 

             top=textGrob("Confusion Matrix and Statistics",gp=gpar(fontsize=25,font=1)))

#search confusion matrix plot


###########################################################################


Tuesday, February 16, 2021

Sankey Plot

#Tawkir_ahmed_code

#Sankey_plot


 

library(networkD3)


## create a dataframe with 10 nodes

nodes = data.frame("name" = c("Node_0", "Node_1", "Node_2", "Node_3", "Node_4", "Node_5",

                              "Node_6", "Node_7", "Node_8", "Node_9"))


## create edges with weights

links = as.data.frame(matrix(c(0, 5, 2, # node 0 -> node 5 with weight 2

                               0, 9, 2, # node 0 -> node 9 with weight 2

                               0, 6, 2, # node 0 -> node 5 with weight 2

                               1, 6, 1, # node 1 -> node 6 with weight 1

                               1, 7, 3, # node 1 -> node 7 with weight 3

                               1, 8, 2, # node 1 -> node 8 with weight 2

                               2, 9, 3, # node 2 -> node 9 with weight 3

                               3, 5, 1, # node 3 -> node 5 with weight 1

                               3, 8, 1, # node 3 -> node 8 with weight 1

                               3, 9, 5, # node 3 -> node 9 with weight 5

                               4, 9, 2,  # node 4 -> node 9 with weight 2

                               4, 6, 2  # node 4 -> node 6 with weight 2

), byrow = TRUE, ncol = 3))


## set column names for links

names(links) = c("source", "target", "value")


## add edge types for coloring purpose

links$group = c("type_0",

                "type_0",

                "type_0",

                "type_1",

                "type_1", 

                "type_1",

                "type_2",

                "type_3",

                "type_3",

                "type_3",

                "type_4",

                "type_4")


## Create custom color list using d3 for each node

node_color <- 'd3.scaleOrdinal() .domain(["Node_0", "Node_1", "Node_2", "Node_3", "Node_4", 

"Node_5", "Node_6", "Node_7", "Node_8", "Node_9", "type_0", "type_1", "type_2", 

"type_3", "type_4"]) .range(["#bf5b17", "#beaed4", "#fdc086" , "#386cb0", "#7fc97f", 

"#bf5b17", "#beaed4", "#fdc086" , "#386cb0", "#7fc97f", "#bf5b17", "#beaed4", "#fdc086" , "#386cb0", "#7fc97f"])'


## Draw Sankey Diagram

p = sankeyNetwork(Links = links, Nodes = nodes,

                  Source = "source", Target = "target",

                  Value = "value", NodeID = "name",

                  fontSize = 16, nodeWidth = 40,

                  colourScale = node_color,

                  LinkGroup="group")

p

Wednesday, January 20, 2021

Correlation Matrix

data= read.csv(file.choose())

library(corrplot)

library(RColorBrewer)

library(psych)

data<-cbind(X,Y)

corMat = cor(data, method="spearman")

cor.plot(corMat,numbers=TRUE,colors=TRUE,tl.col="black", tl.srt=45,

         n=51,main=NULL,labels=NULL,

         cex =1.1,

         cex.lab = 0.8,

         cex.axis =0.8, #right side level

         cex.main = 0.8,

         cex.sub = 0.8)


Alternate way
###
corMat = cor(data, method="spearman")
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(corMat, method="color", col=col(200),  
         type="upper", order="hclust", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col="black", tl.srt=65, #Text label color and rotation
         # hide correlation coefficient on the principal diagonal
         diag=FALSE 
)