Tuesday, July 27, 2021

Scatter plot in R

# install.packages("ggplot2")

# install.packages("ggExtra")

library(ggplot2)

library(ggExtra)


dat<- read.csv(file.choose())

colnames(dat)


# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Speed_2017_to_2019, y =  Volume_2017_to_2019)) +

  geom_point(shape=5, color="gray")+

  ylim(0, 100)+

  xlim(0, 100)+

  geom_smooth(method=lm, color="darkred", fill="blue")+

  theme_classic()


# Densigram

ggMarginal(p, type = "densigram",

           xparams = list(fill = 4),

           yparams = list(fill = 3)) 



# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Speed_2020, y =  Volume_2020)) +

  geom_point()+ 

  ylim(0, 100)+

  xlim(0, 100)+

  geom_smooth(method=lm)+

  theme_classic()

#scale_size_manual(values=c(2,3,4))

#p + scale_y_discrete(limits=c("0", "100", "200", "300", "400"))

#p + scale_x_discrete(limits=c("0", "30", "60", "90", "120"))


# Densigram

ggMarginal(p, type = "densigram",

           xparams = list(fill = 4),

           yparams = list(fill = 3)) 


# Save the scatter plot in a variable

p <- ggplot(dat, aes(x = Crash_2020, y = Speed_2020)) +

  geom_point()+

  geom_smooth(method=lm, color="darkred", fill="blue")


# Densigram

ggMarginal(p, type = "densigram",

           xparams = list(fill = 4),

           yparams = list(fill = 3))





Histogram in R

 # Load data

p <- read.csv(file.choose())

colnames(p)


hist(p$Speed_during, xlab = "Speed in 2020", ylab = "Frequency", 

     main = "",breaks= 20, xlim= c(0, 120), w=20)

hist(p$Volume_during, xlab = "Volume in 2020", ylab = "Frequency", main = "",

     breaks= 20, xlim= c(0, 120), w=20)

#breaks=beans



Thursday, July 22, 2021

No Parametric Model in R

 require(foreign)  #import data files

require(ggplot2)  #plot

require(MASS)     #for Modern Applied Statistics with S

library("mosaic") #Statistics and Mathematics Teaching Utilities


# Load data

p <- read.csv(file.choose())


m<- fitModel(SPEEDING_CRASH  ~ 

              exp(LENGTH              +

                    LOG_SL*a                  +

                    LOG_NL *b          +

                    P_TRUCK   *c     +

                    LOG_AVG_AADT*d +

                    LOG_SIGNAL_PER_MILE*e +

                    LOG_STOPPED_PER_MILE*f +

                    LOG_MEDIAN_WIDTH*g +

                    MEDIAN_TYPE*h +

                    P_RESIDENTIAL*i +

                    P_COMMERCIAL*j +

                    P_INDUSTRIAL*k +

                    P_AGRICULTURAL*l +

                    P_INSTITUTIONAL*m +

                    P_GOVERNMENTAL*n +

                    LANDUSE_MIX*o +

                    LOG_POP_DEN*p +

                    PCT_POV *q          +

                    PCT_VEHICLE_0*r +

                    PCT_OLD_YOUNG*s +

                    PCT_MEANS_PEDBIC*t  +

                    LOG_DAILY_TRANSIT*u  +

                    LOG_PAVEMENT_CONDITION *v +

                    ISLDTYPE*w +

                    LOG_ISLDWIDTH*x +

                    OSLDTYPE*y +

                    LOG_OSLDWIDTH*z +

                    SURF_TYPE*a1 +

                    LOG_SURFACE_WIDTH*b1  +

                    LOG_LANE_WIDTH*c1 +

                    LOG_POLES_PER_MILE*d1 +

                    P_SINGLE_SIDEWALK_ONLY*e1  +

                    P_BOTH_SIDEWALK_ONLY*f1 +

                    LOG_SIDEWALK_WIDTH*g1 +

                    P_SINGLE_BIKELANE_ONLY*h1 +

                    P_BOTH_BIKELANE_ONLY*i1 +

                    P_SINGLE_BIKESLOT_ONLY*j1 +

                    P_BOTH_BIKESLOT_ONLY*k1 +

                    P_SINGLE_SHAREDPATH_ONLY*l1  +

                    P_BOTH_SHAREDPATH_ONLY*m1   +

                    LOG_SHAREDPATH_WIDTH*n1 +

                    LOG_SHAREDPATH_DISTFMRD *o1  +

                    LOG_FRQ_AM78 *p1           +

                    NARROW_LANE *q1       +

                    ASPHALT_SURFACE   *r1      +          

                    PRESENCE_OF_MEDIAN_ISL_AT_CROSS *s1 +

                    HIGH_FREQ_TRANSIT*t1 +

                    P_SPEEDING_ADJ*u1 +           

                    LOG_85_PERCENT_SPEED_ADJ*v1+

                    w1), data = p, start=

               list(a=0,b=0,c=0,d=0,e=0,f=0,g=0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0,p=0,q=0,r=0,s=0,t=0,u=0,v=0,w=0,x=0,y=0,z=0,a1=0,

                    b1=0,c1=0,d1=0,e1=0,f1=0,g1=0,h1=0,i1=0,j1=0,k1=0,l1=0,m1=0,n1=0,o1=0,p1=0,q1=0,r1=0,s1=0,t1=0,u1=0,v1=0,w1=0)) #we want to start our calculation from 1


print(summary(m))


#CMF

m1<-coef(m)

m1

cmf<- exp(m1*(1-0))

cmf


############################## 

modelSummary <- summary(m)  # capture model summary as an object

modelCoeffs <- modelSummary$coefficients  # model coefficients all 

beta.estimate <- modelCoeffs[, "Estimate"]  # get beta estimate from model

beta.estimate 

stde <- modelCoeffs[, "Std. Error"]  # get std.error from model

stde

####standard error

se<- (exp(beta.estimate+stde)-exp(beta.estimate-stde))/2

se

#end

Monday, July 12, 2021

MARS in R

 #MARS

library(earth)

require(MASS)


# fit model

fit <- earth(formula=SPEEDING_CRASH ~

               log(AVG_AADT)+LANDUSE_MIX+PAVEMENT_CONDITION+ISLDWIDTH+

               P_BOTH_SIDEWALK_ONLY+HIGH_FREQ_TRANSIT+SIGNAL_PER_MILE+

               MEDIAN_WIDTH+DAILY_TRANSIT+PAVEMENT_CONDITION+LANE_WIDTH

             ,  a, degree=1)

fit

plot(fit)

# summarize the fit

summary(fit)

# summarize the importance of input variables

evimp(fit)

plot(evimp(fit))

# make predictions

predictions <- predict(fit, a)

plot(fit)

# summarize accuracy

coefficients(fit)

residuals(fit)

mse <- mean((a$SPEEDING_CRASH - predictions)^2)

print(mse)

#standart error

vcov(fit)

standard_error(fit)

Std.Error(fit)

se <- sqrt(diag(vcov(fit)))

se.coef(fit)

sqrt(diag(cov(fit)))

#2 tailed z test

z<- coef(fit)/standard_error(fit)

p<- (1-pnorm(abs(z),0,1))*2

p        #pvalue