Friday, November 13, 2020

Beta Regression Model

November 13, 2020 Md. Tawkir Ahmed No comments

library(betareg)

a <- read.csv(choose.files())

# beta regression model

summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))

Poisson Model

November 13, 2020 Md. Tawkir Ahmed No comments

# Step 1 - understand data

# Load data

p <- read.csv(file.choose())

head(p)

# STEP 2- EFA(exploratory factor analysis)

#summary of variables

summary(p)

#variance

var(p)

# Dependent variable plot

hist(p$SPEEDING_CRASH,

main = "Histogram of Speeding Crash",

xlab = "Speeding Crash Number",

ylab = "Frequency")

#STEP 3- Poisson regression model

model <-glm(formula = SPEEDING_CRASH ~ SL+NL+P_TRUCK+AVG_AADT+SIGNAL_NUM+SIGNAL_PER_MILE +STOPPED_NUM +STOPPED_PER_MILE

+MEDIAN_WIDTH +MEDIAN_TYPE +P_RESIDENTIAL +P_COMMERCIAL +P_INDUSTRIAL +P_AGRICULTURAL +P_INSTITUTIONAL+

P_GOVERNMENTAL +LANDUSE_MIX+POP_DEN +DAILY_TRANSIT +ISLDTYPE+ISLDWIDTH+OSLDTYPE +OSLDWIDTH +

SURF_TYPE +SURFACE_WIDTH +LANE_WIDTH +POLES_PER_MILE +P_SINGLE_SIDEWALK_ONLY +P_BOTH_SIDEWALK_ONLY +

SIDEWALK_WIDTH + P_SINGLE_BIKELANE_ONLY +P_BOTH_BIKELANE_ONLY +P_SINGLE_BIKESLOT_ONLY +P_BOTH_BIKESLOT_ONLY +

P_SINGLE_SHAREDPATH_ONLY +P_BOTH_SHAREDPATH_ONLY +SHAREDPATH_WIDTH +NARROW_LANE +ASPHALT_SURFACE +

PRESENCE_OF_MEDIAN_ISL_AT_CROSS+HIGH_FREQ_TRANSIT+PCT_POV+PCT_VEHICLE_0+PCT_OLD_YOUNG+PCT_MEANS_PEDBIC+

PAVEMENT_CONDITION +SHAREDPATH_DISTFMRD +FRQ_AM78+MEDIAN_TYPE +ISLDTYPE+OSLDTYPE+SURF_TYPE ,

data = p,

family = poisson)

print(summary(model))

# robust std error calculation

library(sandwich)

cov.model <- vcovHC(model, type="HC0") #HC0:heteroskedasticity-robust standard error

std.err <- sqrt(diag(cov.model))

robust <- cbind(Estimate= coef(model), "Robust Std Err" = std.err,

"Pr(>|z|)" = 2 * pnorm(abs(coef(model)/std.err), lower.tail=FALSE),

LL = coef(model) - 1.96 * std.err,

UL = coef(model) + 1.96 * std.err) #95% confidence level

robust

# check that model is good fit or not

with(model, cbind(res.deviance = deviance, df = df.residual,

p = pchisq(deviance, df.residual, lower.tail=FALSE)))

# MORE SUMMARIES ###########################################

anova(model) # Coefficients w/inferential tests

coef(model) # Coefficients

confint(model) # CI for coefficients

resid(model) # Residuals case-by-case

hist(residuals(model) ) # Histogram of residuals

# CLEAN UP #################################################

# Clear environment

rm(list = ls())

# Clear packages

p_unload(all) # Remove all add-ons

detach("package:datasets", unload = TRUE) # For base

# Clear plots

dev.off() # But only if there IS a plot

# Clear console

cat("\014") # ctrl+L

# Clear mind :)

Probit Model

November 13, 2020 Md. Tawkir Ahmed No comments

require(aod)

require(ggplot2)

mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")

## convert rank to a factor (categorical variable)

mydata$rank <- factor(mydata$rank)

## view first few rows

head(mydata)

summary(mydata)

xtabs(~rank + admit, data = mydata) #

myprobit <- glm(admit ~ gre + gpa + rank, family = binomial(link = "probit"),

data = mydata)

## model summary

summary(myprobit)

confint(myprobit)

wald.test(b = coef(myprobit), Sigma = vcov(myprobit), Terms = 4:6)

l <- cbind(0, 0, 0, 1, -1, 0) #FOR 4:6 IS 6 VALUE TAKEN

wald.test(b = coef(myprobit), Sigma = vcov(myprobit), L = l)

newdata <- data.frame(gre = rep(seq(from = 200, to = 800, length.out = 100),

4 * 4), gpa = rep(c(2.5, 3, 3.5, 4), each = 100 * 4), rank = factor(rep(rep(1:4,

each = 100), 4)))

head(newdata)

newdata[, c("p", "se")] <- predict(myprobit, newdata, type = "response", se.fit = TRUE)[-3]

ggplot(newdata, aes(x = gre, y = p, colour = rank)) + geom_line() + facet_wrap(~gpa)

## change in deviance

with(myprobit, null.deviance - deviance)

## change in degrees of freedom

with(myprobit, df.null - df.residual)

## chi square test p-value

with(myprobit, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))

logLik(myprobit)

Logistic Model

November 13, 2020 Md. Tawkir Ahmed No comments

#Coded by Tawkir Ahmed

library(ggplot2) # USed for plotting data

library(dplyr) # Used to extract columns in the data

library(rms) # Used to extract p-value from logistic model

library(aod)

theme_set(theme_gray() ) # the default

# logistic model

covid <- read.csv(choose.files())

labs <- attributes(covid)$labels

summary(covid)

# collapse all missing values to NA

covid$x <- factor(covid$x, levels=c("1", "2", "3", "4", "5", "6", "7"))

# run our regression model

before <- glm(x~ x1+ x2+ x3+ x4+

x5+ x6,

data=covid, family="binomial")

summary(before)

# check that model is good fit or not

with(before, cbind(res.deviance = deviance, df = df.residual,

p = pchisq(deviance, df.residual, lower.tail=FALSE)))

## odds ratios and 95% CI ***********

exp(cbind(OR = coef(before), confint(before)))

# MORE SUMMARIES ###########################################

anova(before) # Coefficients w/inferential tests

coef(before) # Coefficients

hist(coef(before))

confint(before) # CI for coefficients

hist(confint(before))

resid(before) # Residuals case-by-case

hist(residuals(before),main="Before COVID 19" ) # Histogram of residuals

plot(residuals(before), main="Before COVID 19" )

logLik(before)

BIC(before)

PseudoR2(before)

predict(before)

hist(predict(before))

#peseudo r square

before$null.deviance

before$deviance

modelChi <- before$null.deviance - before$deviance

pseudo <- modelChi / before$null.deviance

pseudo

# Compute the pseudo p-value

Chidf <- before$df.null - before$df.residual

modelChi <- before$null.deviance - before$deviance

1 - pchisq(modelChi, Chidf)

#RSS(residual sum of square)

RSS <- c(crossprod(before$residuals))

RSS

#Mean square error

MSE <- RSS / length(before$residuals)

MSE

#Root MSE

RMSE <- sqrt(MSE)

RMSE

#Pearson estimated residual variance

sig2 <- RSS / before$df.residual

sig2

# CLEAN UP #################################################

plot(before) #plot 4 times to get four graph

library(effects)

x1 <- predictorEffect("x6", before)

plot(x1)

x2 <- predictorEffect("x2", before)

plot(x2, main="Before COVID 19",xlab="X2", ylab="x") #single plot

plot(allEffects(before)) #all plot

Negative Binomial Model

November 13, 2020 Md. Tawkir Ahmed No comments

library(betareg)

a <- read.csv(choose.files())

# beta regression model

summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))

#negative binomial model

require(foreign)

require(ggplot2)

require(MASS)

b<- a$P_SPEEDING_ADJ

c<- a$AVG_AADT

d<- log(c)

SPEEDING_AADT<- b*d

model1= glm.nb(formula=SPEEDING_CRASH ~SPEEDING_AADT ,

data = a)

print(summary(model1))