library(betareg)
a <- read.csv(choose.files())
# beta regression model
summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))
library(betareg)
a <- read.csv(choose.files())
# beta regression model
summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))
# Step 1 - understand data
# Load data
p <- read.csv(file.choose())
head(p)
# STEP 2- EFA(exploratory factor analysis)
#summary of variables
summary(p)
#variance
var(p)
# Dependent variable plot
hist(p$SPEEDING_CRASH,
main = "Histogram of Speeding Crash",
xlab = "Speeding Crash Number",
ylab = "Frequency")
#STEP 3- Poisson regression model
model <-glm(formula = SPEEDING_CRASH ~ SL+NL+P_TRUCK+AVG_AADT+SIGNAL_NUM+SIGNAL_PER_MILE +STOPPED_NUM +STOPPED_PER_MILE
+MEDIAN_WIDTH +MEDIAN_TYPE +P_RESIDENTIAL +P_COMMERCIAL +P_INDUSTRIAL +P_AGRICULTURAL +P_INSTITUTIONAL+
P_GOVERNMENTAL +LANDUSE_MIX+POP_DEN +DAILY_TRANSIT +ISLDTYPE+ISLDWIDTH+OSLDTYPE +OSLDWIDTH +
SURF_TYPE +SURFACE_WIDTH +LANE_WIDTH +POLES_PER_MILE +P_SINGLE_SIDEWALK_ONLY +P_BOTH_SIDEWALK_ONLY +
SIDEWALK_WIDTH + P_SINGLE_BIKELANE_ONLY +P_BOTH_BIKELANE_ONLY +P_SINGLE_BIKESLOT_ONLY +P_BOTH_BIKESLOT_ONLY +
P_SINGLE_SHAREDPATH_ONLY +P_BOTH_SHAREDPATH_ONLY +SHAREDPATH_WIDTH +NARROW_LANE +ASPHALT_SURFACE +
PRESENCE_OF_MEDIAN_ISL_AT_CROSS+HIGH_FREQ_TRANSIT+PCT_POV+PCT_VEHICLE_0+PCT_OLD_YOUNG+PCT_MEANS_PEDBIC+
PAVEMENT_CONDITION +SHAREDPATH_DISTFMRD +FRQ_AM78+MEDIAN_TYPE +ISLDTYPE+OSLDTYPE+SURF_TYPE ,
data = p,
family = poisson)
print(summary(model))
# robust std error calculation
library(sandwich)
cov.model <- vcovHC(model, type="HC0") #HC0:heteroskedasticity-robust standard error
std.err <- sqrt(diag(cov.model))
robust <- cbind(Estimate= coef(model), "Robust Std Err" = std.err,
"Pr(>|z|)" = 2 * pnorm(abs(coef(model)/std.err), lower.tail=FALSE),
LL = coef(model) - 1.96 * std.err,
UL = coef(model) + 1.96 * std.err) #95% confidence level
robust
# check that model is good fit or not
with(model, cbind(res.deviance = deviance, df = df.residual,
p = pchisq(deviance, df.residual, lower.tail=FALSE)))
# MORE SUMMARIES ###########################################
anova(model) # Coefficients w/inferential tests
coef(model) # Coefficients
confint(model) # CI for coefficients
resid(model) # Residuals case-by-case
hist(residuals(model) ) # Histogram of residuals
# CLEAN UP #################################################
# Clear environment
rm(list = ls())
# Clear packages
p_unload(all) # Remove all add-ons
detach("package:datasets", unload = TRUE) # For base
# Clear plots
dev.off() # But only if there IS a plot
# Clear console
cat("\014") # ctrl+L
# Clear mind :)
require(aod)
require(ggplot2)
mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
## convert rank to a factor (categorical variable)
mydata$rank <- factor(mydata$rank)
## view first few rows
head(mydata)
summary(mydata)
xtabs(~rank + admit, data = mydata) #
myprobit <- glm(admit ~ gre + gpa + rank, family = binomial(link = "probit"),
data = mydata)
## model summary
summary(myprobit)
confint(myprobit)
wald.test(b = coef(myprobit), Sigma = vcov(myprobit), Terms = 4:6)
l <- cbind(0, 0, 0, 1, -1, 0) #FOR 4:6 IS 6 VALUE TAKEN
wald.test(b = coef(myprobit), Sigma = vcov(myprobit), L = l)
newdata <- data.frame(gre = rep(seq(from = 200, to = 800, length.out = 100),
4 * 4), gpa = rep(c(2.5, 3, 3.5, 4), each = 100 * 4), rank = factor(rep(rep(1:4,
each = 100), 4)))
head(newdata)
newdata[, c("p", "se")] <- predict(myprobit, newdata, type = "response", se.fit = TRUE)[-3]
ggplot(newdata, aes(x = gre, y = p, colour = rank)) + geom_line() + facet_wrap(~gpa)
## change in deviance
with(myprobit, null.deviance - deviance)
## change in degrees of freedom
with(myprobit, df.null - df.residual)
## chi square test p-value
with(myprobit, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))
logLik(myprobit)
#Coded by Tawkir Ahmed
library(ggplot2) # USed for plotting data
library(dplyr) # Used to extract columns in the data
library(rms) # Used to extract p-value from logistic model
library(aod)
theme_set(theme_gray() ) # the default
# logistic model
covid <- read.csv(choose.files())
labs <- attributes(covid)$labels
summary(covid)
# collapse all missing values to NA
covid$x <- factor(covid$x, levels=c("1", "2", "3", "4", "5", "6", "7"))
# run our regression model
before <- glm(x~ x1+ x2+ x3+ x4+
x5+ x6,
data=covid, family="binomial")
summary(before)
# check that model is good fit or not
with(before, cbind(res.deviance = deviance, df = df.residual,
p = pchisq(deviance, df.residual, lower.tail=FALSE)))
## odds ratios and 95% CI ***********
exp(cbind(OR = coef(before), confint(before)))
# MORE SUMMARIES ###########################################
anova(before) # Coefficients w/inferential tests
coef(before) # Coefficients
hist(coef(before))
confint(before) # CI for coefficients
hist(confint(before))
resid(before) # Residuals case-by-case
hist(residuals(before),main="Before COVID 19" ) # Histogram of residuals
plot(residuals(before), main="Before COVID 19" )
logLik(before)
BIC(before)
PseudoR2(before)
predict(before)
hist(predict(before))
#peseudo r square
before$null.deviance
before$deviance
modelChi <- before$null.deviance - before$deviance
pseudo <- modelChi / before$null.deviance
pseudo
# Compute the pseudo p-value
Chidf <- before$df.null - before$df.residual
modelChi <- before$null.deviance - before$deviance
1 - pchisq(modelChi, Chidf)
#RSS(residual sum of square)
RSS <- c(crossprod(before$residuals))
RSS
#Mean square error
MSE <- RSS / length(before$residuals)
MSE
#Root MSE
RMSE <- sqrt(MSE)
RMSE
#Pearson estimated residual variance
sig2 <- RSS / before$df.residual
sig2
# CLEAN UP #################################################
plot(before) #plot 4 times to get four graph
library(effects)
x1 <- predictorEffect("x6", before)
plot(x1)
x2 <- predictorEffect("x2", before)
plot(x2, main="Before COVID 19",xlab="X2", ylab="x") #single plot
plot(allEffects(before)) #all plot
library(betareg)
a <- read.csv(choose.files())
# beta regression model
summary(betareg(P_SPEEDING_ADJ ~ log(AVG_AADT), data=a))
#negative binomial model
require(foreign)
require(ggplot2)
require(MASS)
b<- a$P_SPEEDING_ADJ
c<- a$AVG_AADT
d<- log(c)
SPEEDING_AADT<- b*d
model1= glm.nb(formula=SPEEDING_CRASH ~SPEEDING_AADT ,
data = a)
print(summary(model1))