Friday, November 13, 2020

Poisson Model

 # Step 1 - understand data

# Load data

p <- read.csv(file.choose())


head(p)


# STEP 2- EFA(exploratory factor analysis)


#summary of variables

summary(p)


#variance

var(p)



# Dependent variable plot


hist(p$SPEEDING_CRASH,

     main = "Histogram of Speeding Crash",

     xlab = "Speeding Crash Number",

     ylab = "Frequency")



#STEP 3- Poisson regression model


model <-glm(formula = SPEEDING_CRASH  ~ SL+NL+P_TRUCK+AVG_AADT+SIGNAL_NUM+SIGNAL_PER_MILE +STOPPED_NUM +STOPPED_PER_MILE 

             +MEDIAN_WIDTH +MEDIAN_TYPE +P_RESIDENTIAL +P_COMMERCIAL +P_INDUSTRIAL +P_AGRICULTURAL +P_INSTITUTIONAL+

               P_GOVERNMENTAL +LANDUSE_MIX+POP_DEN +DAILY_TRANSIT +ISLDTYPE+ISLDWIDTH+OSLDTYPE +OSLDWIDTH +

               SURF_TYPE +SURFACE_WIDTH +LANE_WIDTH +POLES_PER_MILE +P_SINGLE_SIDEWALK_ONLY +P_BOTH_SIDEWALK_ONLY +

               SIDEWALK_WIDTH + P_SINGLE_BIKELANE_ONLY +P_BOTH_BIKELANE_ONLY +P_SINGLE_BIKESLOT_ONLY +P_BOTH_BIKESLOT_ONLY +

               P_SINGLE_SHAREDPATH_ONLY +P_BOTH_SHAREDPATH_ONLY +SHAREDPATH_WIDTH +NARROW_LANE +ASPHALT_SURFACE +

               PRESENCE_OF_MEDIAN_ISL_AT_CROSS+HIGH_FREQ_TRANSIT+PCT_POV+PCT_VEHICLE_0+PCT_OLD_YOUNG+PCT_MEANS_PEDBIC+

               PAVEMENT_CONDITION   +SHAREDPATH_DISTFMRD +FRQ_AM78+MEDIAN_TYPE +ISLDTYPE+OSLDTYPE+SURF_TYPE , 

             data = p, 

             family = poisson)


print(summary(model))



# robust std error calculation


library(sandwich)

cov.model <- vcovHC(model, type="HC0")    #HC0:heteroskedasticity-robust standard error

std.err <- sqrt(diag(cov.model))

robust <- cbind(Estimate= coef(model), "Robust Std Err" = std.err,

                "Pr(>|z|)" = 2 * pnorm(abs(coef(model)/std.err), lower.tail=FALSE),

                LL = coef(model) - 1.96 * std.err,

                UL = coef(model) + 1.96 * std.err)  #95% confidence level


robust



# check that model is good fit or not

with(model, cbind(res.deviance = deviance, df = df.residual,

                   p = pchisq(deviance, df.residual, lower.tail=FALSE)))


# MORE SUMMARIES ###########################################


anova(model)            # Coefficients w/inferential tests

coef(model)             # Coefficients 

confint(model)          # CI for coefficients

resid(model)            # Residuals case-by-case

hist(residuals(model) ) # Histogram of residuals




# CLEAN UP #################################################


# Clear environment

rm(list = ls()) 


# Clear packages

p_unload(all)  # Remove all add-ons

detach("package:datasets", unload = TRUE)  # For base


# Clear plots

dev.off()  # But only if there IS a plot


# Clear console

cat("\014")  # ctrl+L


# Clear mind :)

0 comments:

Post a Comment