Friday, November 13, 2020

Poisson Model

 # Step 1 - understand data

# Load data

p <- read.csv(file.choose())


head(p)


# STEP 2- EFA(exploratory factor analysis)


#summary of variables

summary(p)


#variance

var(p)



# Dependent variable plot


hist(p$SPEEDING_CRASH,

     main = "Histogram of Speeding Crash",

     xlab = "Speeding Crash Number",

     ylab = "Frequency")



#STEP 3- Poisson regression model


model <-glm(formula = SPEEDING_CRASH  ~ SL+NL+P_TRUCK+AVG_AADT+SIGNAL_NUM+SIGNAL_PER_MILE +STOPPED_NUM +STOPPED_PER_MILE 

             +MEDIAN_WIDTH +MEDIAN_TYPE +P_RESIDENTIAL +P_COMMERCIAL +P_INDUSTRIAL +P_AGRICULTURAL +P_INSTITUTIONAL+

               P_GOVERNMENTAL +LANDUSE_MIX+POP_DEN +DAILY_TRANSIT +ISLDTYPE+ISLDWIDTH+OSLDTYPE +OSLDWIDTH +

               SURF_TYPE +SURFACE_WIDTH +LANE_WIDTH +POLES_PER_MILE +P_SINGLE_SIDEWALK_ONLY +P_BOTH_SIDEWALK_ONLY +

               SIDEWALK_WIDTH + P_SINGLE_BIKELANE_ONLY +P_BOTH_BIKELANE_ONLY +P_SINGLE_BIKESLOT_ONLY +P_BOTH_BIKESLOT_ONLY +

               P_SINGLE_SHAREDPATH_ONLY +P_BOTH_SHAREDPATH_ONLY +SHAREDPATH_WIDTH +NARROW_LANE +ASPHALT_SURFACE +

               PRESENCE_OF_MEDIAN_ISL_AT_CROSS+HIGH_FREQ_TRANSIT+PCT_POV+PCT_VEHICLE_0+PCT_OLD_YOUNG+PCT_MEANS_PEDBIC+

               PAVEMENT_CONDITION   +SHAREDPATH_DISTFMRD +FRQ_AM78+MEDIAN_TYPE +ISLDTYPE+OSLDTYPE+SURF_TYPE , 

             data = p, 

             family = poisson)


print(summary(model))



# robust std error calculation


library(sandwich)

cov.model <- vcovHC(model, type="HC0")    #HC0:heteroskedasticity-robust standard error

std.err <- sqrt(diag(cov.model))

robust <- cbind(Estimate= coef(model), "Robust Std Err" = std.err,

                "Pr(>|z|)" = 2 * pnorm(abs(coef(model)/std.err), lower.tail=FALSE),

                LL = coef(model) - 1.96 * std.err,

                UL = coef(model) + 1.96 * std.err)  #95% confidence level


robust



# check that model is good fit or not

with(model, cbind(res.deviance = deviance, df = df.residual,

                   p = pchisq(deviance, df.residual, lower.tail=FALSE)))


# MORE SUMMARIES ###########################################


anova(model)            # Coefficients w/inferential tests

coef(model)             # Coefficients 

confint(model)          # CI for coefficients

resid(model)            # Residuals case-by-case

hist(residuals(model) ) # Histogram of residuals




# CLEAN UP #################################################


# Clear environment

rm(list = ls()) 


# Clear packages

p_unload(all)  # Remove all add-ons

detach("package:datasets", unload = TRUE)  # For base


# Clear plots

dev.off()  # But only if there IS a plot


# Clear console

cat("\014")  # ctrl+L


# Clear mind :)

Probit Model

 require(aod)

require(ggplot2)


mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")


## convert rank to a factor (categorical variable)

mydata$rank <- factor(mydata$rank)


## view first few rows


head(mydata)


summary(mydata)


xtabs(~rank + admit, data = mydata) #


myprobit <- glm(admit ~ gre + gpa + rank, family = binomial(link = "probit"), 

                data = mydata)


## model summary

summary(myprobit)


confint(myprobit)


wald.test(b = coef(myprobit), Sigma = vcov(myprobit), Terms = 4:6)


l <- cbind(0, 0, 0, 1, -1, 0) #FOR 4:6 IS 6 VALUE TAKEN

wald.test(b = coef(myprobit), Sigma = vcov(myprobit), L = l)


newdata <- data.frame(gre = rep(seq(from = 200, to = 800, length.out = 100), 

                                4 * 4), gpa = rep(c(2.5, 3, 3.5, 4), each = 100 * 4), rank = factor(rep(rep(1:4, 

                                                                                                            each = 100), 4)))


head(newdata)


newdata[, c("p", "se")] <- predict(myprobit, newdata, type = "response", se.fit = TRUE)[-3]


ggplot(newdata, aes(x = gre, y = p, colour = rank)) + geom_line() + facet_wrap(~gpa)


## change in deviance

with(myprobit, null.deviance - deviance)


## change in degrees of freedom

with(myprobit, df.null - df.residual)


## chi square test p-value

with(myprobit, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))


logLik(myprobit)


Logistic Model

 #Coded by Tawkir Ahmed 

library(ggplot2) # USed for plotting data

library(dplyr) # Used to extract columns in the data

library(rms) # Used to extract p-value from logistic model

library(aod)


theme_set(theme_gray() ) # the default

# logistic model

covid <- read.csv(choose.files())

labs <- attributes(covid)$labels


summary(covid)


# collapse all missing values to NA

covid$x <- factor(covid$x, levels=c("1", "2", "3", "4", "5", "6", "7"))

# run our regression model

before <- glm(x~ x1+ x2+ x3+ x4+

                 x5+ x6, 

               data=covid, family="binomial")

summary(before)


# check that model is good fit or not

with(before, cbind(res.deviance = deviance, df = df.residual,

                   p = pchisq(deviance, df.residual, lower.tail=FALSE)))



## odds ratios and 95% CI ***********

exp(cbind(OR = coef(before), confint(before)))


# MORE SUMMARIES ###########################################


anova(before)            # Coefficients w/inferential tests


coef(before)             # Coefficients 

hist(coef(before))


confint(before)          # CI for coefficients

hist(confint(before))

resid(before)            # Residuals case-by-case

hist(residuals(before),main="Before COVID 19" ) # Histogram of residuals

plot(residuals(before), main="Before COVID 19" )


logLik(before)

BIC(before)

PseudoR2(before)

predict(before)

hist(predict(before))

#peseudo r square 

before$null.deviance

before$deviance

modelChi <- before$null.deviance - before$deviance

pseudo <- modelChi / before$null.deviance

pseudo

# Compute the pseudo p-value

Chidf <- before$df.null - before$df.residual

modelChi <- before$null.deviance - before$deviance

1 - pchisq(modelChi, Chidf)


#RSS(residual sum of square)

RSS <- c(crossprod(before$residuals))

RSS

#Mean square error

MSE <- RSS / length(before$residuals)

MSE

#Root MSE

RMSE <- sqrt(MSE)

RMSE

#Pearson estimated residual variance

sig2 <- RSS / before$df.residual

sig2


# CLEAN UP #################################################

plot(before)  #plot 4 times to get four graph



library(effects)

x1 <- predictorEffect("x6", before)

plot(x1)


x2 <- predictorEffect("x2", before) 

plot(x2, main="Before COVID 19",xlab="X2", ylab="x")  #single plot


plot(allEffects(before)) #all plot


Negative Binomial Model

 library(betareg)

a <- read.csv(choose.files())

# beta regression model

summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))


#negative binomial model

require(foreign)

require(ggplot2)

require(MASS)

b<- a$P_SPEEDING_ADJ

c<- a$AVG_AADT

d<- log(c)

SPEEDING_AADT<- b*d


model1= glm.nb(formula=SPEEDING_CRASH ~SPEEDING_AADT  , 

               data = a)


print(summary(model1))