Friday, December 18, 2020

Effect plot in R

December 18, 2020 Md. Tawkir Ahmed No comments

library(effects)

x1 <- predictorEffect("x6", model)

plot(x1)

x2 <- predictorEffect("x2", model)

plot(x2, main="model COVID 19",xlab="X2", ylab="x") #single plot

plot(allEffects(model)) #all plot

model summary in R

December 18, 2020 Md. Tawkir Ahmed No comments

# check that model is good fit or not

with(model, cbind(res.deviance = deviance, df = df.residual,

p = pchisq(deviance, df.residual, lower.tail=FALSE)))

## odds ratios and 95% CI ***********

exp(cbind(OR = coef(model), confint(model)))

# MORE SUMMARIES ###########################################

anova(model) # Coefficients w/inferential tests

coef(model) # Coefficients

hist(coef(model))

confint(model) # CI for coefficients

hist(confint(model))

resid(model) # Residuals case-by-case

hist(residuals(model),main="model COVID 19" ) # Histogram of residuals

plot(residuals(model), main="model COVID 19" )

logLik(model)

BIC(model)

PseudoR2(model)

predict(model)

hist(predict(model))

#peseudo r square

model$null.deviance

model$deviance

modelChi <- model$null.deviance - model$deviance

pseudo <- modelChi / model$null.deviance

pseudo

# Compute the pseudo p-value

Chidf <- model$df.null - model$df.residual

modelChi <- model$null.deviance - model$deviance

1 - pchisq(modelChi, Chidf)

#RSS(residual sum of square)

RSS <- c(crossprod(model$residuals))

RSS

#Mean square error

MSE <- RSS / length(model$residuals)

MSE

#Root MSE

RMSE <- sqrt(MSE)

RMSE

#Pearson estimated residual variance

sig2 <- RSS / model$df.residual

sig2

Poisson lognormal regression model in R

December 18, 2020 Md. Tawkir Ahmed No comments

library(PLNmodels)

library(ggplot2)

library(corrplot)

Y=read.csv(choose.files())

X=read.csv(choose.files())

c=list(Y,X)

names(c) <- c("output", "input")

d <- prepare_data(c$output, c$input)

model <- PLN(Abundance ~

LANE_WIDTH +

LENGTH +

LOG_AVG_AADT +

LOG_PAVEMENT_CONDITION +

HIGH_FREQ_TRANSIT +

eightyfivepercent_SPEED_ADJ +

P_SPEEDING_ADJ +

SL +

NL, data = d)

print(model) #variational lower bound of the ICL

coef(model) #mu -vectors of means of the latent variable

sigma(model) #covariance matrix of the latent variable

vcov(model) #Variance-Covariance Matrix

fitted(model)

standard_error(model)

plot(fitted(model))

barplot(fitted(model))

#2 tailed z test

z<- coef(model)/standard_error(model)

p<- (1-pnorm(abs(z),0,1))*2

p #pvalue

p value for any statistical model using R

December 18, 2020 Md. Tawkir Ahmed No comments

#2 tailed z test

z<- coef(model)/standard_error(model)

p<- (1-pnorm(abs(z),0,1))*2

p #pvalue

Hypothesis Testing: One Sample Cases. Outline: – The logic of hypothesis testing – The Five-Step Model – Hypothesis testing for single sample means (z. - ppt download

Friday, November 13, 2020

Beta Regression Model

November 13, 2020 Md. Tawkir Ahmed No comments

library(betareg)

a <- read.csv(choose.files())

# beta regression model

summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))

Poisson Model

November 13, 2020 Md. Tawkir Ahmed No comments

# Step 1 - understand data

# Load data

p <- read.csv(file.choose())

head(p)

# STEP 2- EFA(exploratory factor analysis)

#summary of variables

summary(p)

#variance

var(p)

# Dependent variable plot

hist(p$SPEEDING_CRASH,

main = "Histogram of Speeding Crash",

xlab = "Speeding Crash Number",

ylab = "Frequency")

#STEP 3- Poisson regression model

model <-glm(formula = SPEEDING_CRASH ~ SL+NL+P_TRUCK+AVG_AADT+SIGNAL_NUM+SIGNAL_PER_MILE +STOPPED_NUM +STOPPED_PER_MILE

+MEDIAN_WIDTH +MEDIAN_TYPE +P_RESIDENTIAL +P_COMMERCIAL +P_INDUSTRIAL +P_AGRICULTURAL +P_INSTITUTIONAL+

P_GOVERNMENTAL +LANDUSE_MIX+POP_DEN +DAILY_TRANSIT +ISLDTYPE+ISLDWIDTH+OSLDTYPE +OSLDWIDTH +

SURF_TYPE +SURFACE_WIDTH +LANE_WIDTH +POLES_PER_MILE +P_SINGLE_SIDEWALK_ONLY +P_BOTH_SIDEWALK_ONLY +

SIDEWALK_WIDTH + P_SINGLE_BIKELANE_ONLY +P_BOTH_BIKELANE_ONLY +P_SINGLE_BIKESLOT_ONLY +P_BOTH_BIKESLOT_ONLY +

P_SINGLE_SHAREDPATH_ONLY +P_BOTH_SHAREDPATH_ONLY +SHAREDPATH_WIDTH +NARROW_LANE +ASPHALT_SURFACE +

PRESENCE_OF_MEDIAN_ISL_AT_CROSS+HIGH_FREQ_TRANSIT+PCT_POV+PCT_VEHICLE_0+PCT_OLD_YOUNG+PCT_MEANS_PEDBIC+

PAVEMENT_CONDITION +SHAREDPATH_DISTFMRD +FRQ_AM78+MEDIAN_TYPE +ISLDTYPE+OSLDTYPE+SURF_TYPE ,

data = p,

family = poisson)

print(summary(model))

# robust std error calculation

library(sandwich)

cov.model <- vcovHC(model, type="HC0") #HC0:heteroskedasticity-robust standard error

std.err <- sqrt(diag(cov.model))

robust <- cbind(Estimate= coef(model), "Robust Std Err" = std.err,

"Pr(>|z|)" = 2 * pnorm(abs(coef(model)/std.err), lower.tail=FALSE),

LL = coef(model) - 1.96 * std.err,

UL = coef(model) + 1.96 * std.err) #95% confidence level

robust

# check that model is good fit or not

with(model, cbind(res.deviance = deviance, df = df.residual,

p = pchisq(deviance, df.residual, lower.tail=FALSE)))

# MORE SUMMARIES ###########################################

anova(model) # Coefficients w/inferential tests

coef(model) # Coefficients

confint(model) # CI for coefficients

resid(model) # Residuals case-by-case

hist(residuals(model) ) # Histogram of residuals

# CLEAN UP #################################################

# Clear environment

rm(list = ls())

# Clear packages

p_unload(all) # Remove all add-ons

detach("package:datasets", unload = TRUE) # For base

# Clear plots

dev.off() # But only if there IS a plot

# Clear console

cat("\014") # ctrl+L

# Clear mind :)

Probit Model

November 13, 2020 Md. Tawkir Ahmed No comments

require(aod)

require(ggplot2)

mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")

## convert rank to a factor (categorical variable)

mydata$rank <- factor(mydata$rank)

## view first few rows

head(mydata)

summary(mydata)

xtabs(~rank + admit, data = mydata) #

myprobit <- glm(admit ~ gre + gpa + rank, family = binomial(link = "probit"),

data = mydata)

## model summary

summary(myprobit)

confint(myprobit)

wald.test(b = coef(myprobit), Sigma = vcov(myprobit), Terms = 4:6)

l <- cbind(0, 0, 0, 1, -1, 0) #FOR 4:6 IS 6 VALUE TAKEN

wald.test(b = coef(myprobit), Sigma = vcov(myprobit), L = l)

newdata <- data.frame(gre = rep(seq(from = 200, to = 800, length.out = 100),

4 * 4), gpa = rep(c(2.5, 3, 3.5, 4), each = 100 * 4), rank = factor(rep(rep(1:4,

each = 100), 4)))

head(newdata)

newdata[, c("p", "se")] <- predict(myprobit, newdata, type = "response", se.fit = TRUE)[-3]

ggplot(newdata, aes(x = gre, y = p, colour = rank)) + geom_line() + facet_wrap(~gpa)

## change in deviance

with(myprobit, null.deviance - deviance)

## change in degrees of freedom

with(myprobit, df.null - df.residual)

## chi square test p-value

with(myprobit, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

logLik(myprobit)

Logistic Model

November 13, 2020 Md. Tawkir Ahmed No comments

#Coded by Tawkir Ahmed

library(ggplot2) # USed for plotting data

library(dplyr) # Used to extract columns in the data

library(rms) # Used to extract p-value from logistic model

library(aod)

theme_set(theme_gray() ) # the default

# logistic model

covid <- read.csv(choose.files())

labs <- attributes(covid)$labels

summary(covid)

# collapse all missing values to NA

covid$x <- factor(covid$x, levels=c("1", "2", "3", "4", "5", "6", "7"))

# run our regression model

before <- glm(x~ x1+ x2+ x3+ x4+

x5+ x6,

data=covid, family="binomial")

summary(before)

# check that model is good fit or not

with(before, cbind(res.deviance = deviance, df = df.residual,

p = pchisq(deviance, df.residual, lower.tail=FALSE)))

## odds ratios and 95% CI ***********

exp(cbind(OR = coef(before), confint(before)))

# MORE SUMMARIES ###########################################

anova(before) # Coefficients w/inferential tests

coef(before) # Coefficients

hist(coef(before))

confint(before) # CI for coefficients

hist(confint(before))

resid(before) # Residuals case-by-case

hist(residuals(before),main="Before COVID 19" ) # Histogram of residuals

plot(residuals(before), main="Before COVID 19" )

logLik(before)

BIC(before)

PseudoR2(before)

predict(before)

hist(predict(before))

#peseudo r square

before$null.deviance

before$deviance

modelChi <- before$null.deviance - before$deviance

pseudo <- modelChi / before$null.deviance

pseudo

# Compute the pseudo p-value

Chidf <- before$df.null - before$df.residual

modelChi <- before$null.deviance - before$deviance

1 - pchisq(modelChi, Chidf)

#RSS(residual sum of square)

RSS <- c(crossprod(before$residuals))

RSS

#Mean square error

MSE <- RSS / length(before$residuals)

MSE

#Root MSE

RMSE <- sqrt(MSE)

RMSE

#Pearson estimated residual variance

sig2 <- RSS / before$df.residual

sig2

# CLEAN UP #################################################

plot(before) #plot 4 times to get four graph

library(effects)

x1 <- predictorEffect("x6", before)

plot(x1)

x2 <- predictorEffect("x2", before)

plot(x2, main="Before COVID 19",xlab="X2", ylab="x") #single plot

plot(allEffects(before)) #all plot

Negative Binomial Model

November 13, 2020 Md. Tawkir Ahmed No comments

library(betareg)

a <- read.csv(choose.files())

# beta regression model

summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))

#negative binomial model

require(foreign)

require(ggplot2)

require(MASS)

b<- a$P_SPEEDING_ADJ

c<- a$AVG_AADT

d<- log(c)

SPEEDING_AADT<- b*d

model1= glm.nb(formula=SPEEDING_CRASH ~SPEEDING_AADT ,

data = a)

print(summary(model1))

Sunday, September 27, 2020

Neural Network in R

September 27, 2020 Md. Tawkir Ahmed No comments

library(neuralnet)

# creating training data set

TAWKIR=c(20,10,30,20,80,30)

AHMED=c(90,20,40,50,50,80)

Placed=c(1,0,0,0,1,1)

# Here, you will combine multiple columns or features into a single set of data

df=data.frame(TAWKIR,AHMED,Placed)

# load library

require(neuralnet)

# fit neural network

nn=neuralnet(Placed~TAWKIR+AHMED,data=df, hidden=3,act.fct = "logistic",

linear.output = FALSE)

# plot neural network

plot(nn)

Saturday, September 26, 2020

Bar plot

September 26, 2020 Md. Tawkir Ahmed No comments

plot(rf,$a xlib= " ", ylib=" ", main = "Error in different trees")

Friday, September 18, 2020

Random Forest plot

September 18, 2020 Md. Tawkir Ahmed No comments

library(randomForest)

# Load the dataset and explore

data1 <- read.csv(file.choose(), header = TRUE)

head(data1)

str(data1)

summary(data1)

# Split into Train and Validation sets

# Training Set : Validation Set = 70 : 30 (random)

set.seed(1000)

train <- sample(nrow(data1), 0.7*nrow(data1), replace = FALSE)

TrainSet <- data1[train,]

ValidSet <- data1[-train,]

summary(TrainSet)

summary(ValidSet)

# Create a Random Forest model with default parameters

model1 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, importance = TRUE, family =negative.binomial)

model1

# or

# Fine tuning parameters of Random Forest model

model2 <- randomForest(SPEEDING_CRASH ~ ., data = TrainSet, ntree = 500, mtry = 6, importance = TRUE, objective = "reg:negative.binomial")

model2

# Predicting on train set

predTrain <- predict(model2, TrainSet, type = "class")

# Checking classification accuracy

#table(predTrain, TrainSet$SPEEDING_CRASH)

#mean(predValid == ValidSet$SPEEDING_CRASH)

# To check important variables

importance(model2)

varImpPlot(model2)

#OR

importance(model1)

varImpPlot(model1)

SHAP plot

September 18, 2020 Md. Tawkir Ahmed No comments

#Part 1: library input

suppressPackageStartupMessages({

library(SHAPforxgboost)

library(xgboost)

library(data.table)

library(ggplot2)

})

#part 2:

#file load and shap value calculation

a <- read.csv(file.choose())

X1 = as.matrix(a[,-1])

mod1 = xgboost::xgboost(

data = X1, label = a$SPEEDING_CRASH, gamma = 0, eta = 1,

lambda = 0,nrounds = 10, verbose = F, objective = "reg:squarederror")

# shap.values(model, X_dataset) returns the SHAP

# data matrix and ranked features by mean|SHAP|

shap_values <- shap.values(xgb_model = mod1, X_train = X1)

shap_values$mean_shap_score

#part 3:

shap_values_iris <- shap_values$shap_score

# shap.prep() returns the long-format SHAP data from either model or

shap_long_iris <- shap.prep(xgb_model = mod1, X_train = X1)

# is the same as: using given shap_contrib

shap_long_iris <- shap.prep(shap_contrib = shap_values_iris, X_train = X1)

# -------------------------------------------------------------------------

shap.plot.summary(shap_long_iris)

# option of dilute is offered to make plot faster if there are over thousands of observations

# please see documentation for details.

shap.plot.summary(shap_long_iris, x_bound = 1.5, dilute = 10)

#end...................................

Text plot

September 18, 2020 Md. Tawkir Ahmed No comments

## A blank plot to set up a coordinate system

## Final result will be Figure 3-42
> plot(0:10, 0:10, type = "n")
## Some regular text as a baseline
text(2,10, "Regular text", pos = 4)
## Set text larger and use serif family
par(list(cex = 2, family = "serif"))
## Add some text
text(2,8, "Serif Family", pos = 4)
## Alter rotation and set sans serif family
par(list(srt = 180, family = "sans"))
## Add some text note pos is opposite to previous
text(2,6, "Sans Family", pos = 2)
## Alter rotation and set monospace family
par(list(srt = 90, family = "mono"))
## Add some text
text(8,6, "Monospace Family")
## Reset parameters
par(list(cex = 1, srt = 0, family = ""))
## Create multi-line text
text(2,4, "Multi-line\ntext with\ndefault spacing", pos = 4)
## Alter line height
par(lheight = 2)
## More multi-line text
text(4, 2, "Multi-line\ntext with\ncustom spacing", pos = 4)
## Reset line height
par(lheight = 1)

Bi Plot for PCA

September 18, 2020 Md. Tawkir Ahmed No comments

## Use datasets:USArrests

data(USArrests) # Get datafile

names(USArrests) # View variable names

## Scaled PCA using entire data.frame

pca1 = prcomp(USArrests, scale = TRUE)

## Both following commands produce same PCA as previous

pca2 = prcomp(~., data = USArrests, scale = TRUE)

pca3 = prcomp(~ Murder + Assault + Rape + UrbanPop,

data = USArrests, scale = TRUE)

pca1 # View result

names(pca1) # View elements in result object

summary(pca1) # Summary

## Plots for results...

## Scree-plot of variances (Figure 2-5)

#plot(pca1, type = "lines", main = "PCA for USArrests")

## Bi-plot of result (Figure 2-6)

biplot(pca1, col = 1, cex = c(0.8, 1.2), expand = 0.9)

Scree plot from PCA

September 18, 2020 Md. Tawkir Ahmed No comments

## Use datasets:USArrests

data(USArrests) # Get datafile

names(USArrests) # View variable names

## Scaled PCA using entire data.frame

pca1 = prcomp(USArrests, scale = TRUE)

## Both following commands produce same PCA as previous

pca2 = prcomp(~., data = USArrests, scale = TRUE)

pca3 = prcomp(~ Murder + Assault + Rape + UrbanPop,

data = USArrests, scale = TRUE)

pca1 # View result

names(pca1) # View elements in result object

summary(pca1) # Summary

## Plots for results...

## Scree-plot of variances

plot(pca1, type = "lines", main = "PCA for USArrests")

R for Transportation Engineers

Friday, December 18, 2020

Effect plot in R

model summary in R

Poisson lognormal regression model in R

p value for any statistical model using R

Friday, November 13, 2020

Beta Regression Model

Poisson Model

Probit Model

Logistic Model

Negative Binomial Model

Sunday, September 27, 2020

Neural Network in R

Saturday, September 26, 2020

Bar plot

Friday, September 18, 2020

Random Forest plot

SHAP plot

Text plot

Bi Plot for PCA

Scree plot from PCA

Monday, September 14, 2020

Pic chart

Dot chart

Bar chart

Line plot

Tuesday, September 8, 2020

Scatter plot