Introduction

TXDoT has available, online, detailed data regarding traffic collisions throughout the state. The data itself must be queried and downloaded manually as CSV files, but that is not too bad. I downloaded the data for Harris county from 2010 to 2018.

Database is documented at https://www.txdot.gov/inside-txdot/division/traffic/data-access.html

Access is from https://cris.txdot.gov/secure/Share

Log on and download one year at a time. The zip files will require the login password to open them.

After unzipping, the main files required for crash data are the files with “crash” in the filename. Also need to add the data from the “charges” files for my analysis.

The “unit” files contain information about the car (make, model, year), which I will ignore for now.

Download files and unzip all of them into a single directory.

I also copied the translation tables for several key fields and built data frames to be used in a later join operation.

TXDoT Crash files

I have downloaded and unzipped the requisite files. Now I want to read in a subset of them - the “Crash” files and the “Charges” files.

#   What are all the CSV files?

filenames <- list.files(path = "~/Dropbox/Rprojects/Collisions/TrafficAccidents/fullunzip", pattern="*csv$")

#   Add back the full path

filenames <- paste("~/Dropbox/Rprojects/Collisions/TrafficAccidents/fullunzip/",filenames, sep="")

#filenames <- filenames[grep("201[01]0101", filenames)] # for testing

#  first read in the crash files

DF_collisions <- filenames[grep("crash", filenames)] %>% 
  map(~read_csv(.x, col_types = cols(
    Crash_Date = col_date(format="%m/%d/%Y"),
    Crash_Time = col_time(format = "%H:%M %p")
  ))) %>% 
  reduce(rbind)

#  now read in the charges files

DF_charges <- filenames[grep("charges", filenames)] %>% 
  map(read_csv) %>% 
  reduce(rbind)
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_double(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
## Parsed with column specification:
## cols(
##   Crash_ID = col_double(),
##   Unit_Nbr = col_double(),
##   Prsn_Nbr = col_double(),
##   Charge_Cat_ID = col_logical(),
##   Charge = col_character(),
##   Citation_Nbr = col_character()
## )
#   The Charge_Cat_ID is all but useless, as it is not filled in a
#   majority of the time when Charge is filled in. So we will need
#   to do some work.


# need to rearrange charges so that they show up as a list for each
# crash. Sometimes the Crash_ID gets repeated, probably due to
# multiple citations

DF_charges <- DF_charges %>% 
  group_by(Crash_ID) %>% 
  arrange(Charge) %>% 
  summarise(Charge = paste(Charge, collapse=", "))

Analyze charges file and simplify it so we can use it.

Because the Charge_Cat_ID is rarely filled in, we will have to use the free text charge field, which of course contains many inconsistencies and spelling errors. Much manual tweaking follows, until I got tired and delegated patterns that occurred less than about 50 times to the noise.

# Populate a:z with each charge, and combine later

DF_charges$a[DF_charges$Charge=="NO CHARGES"] <- "None"
DF_charges$a[DF_charges$Charge=="NO CHARGE"] <- "None"
DF_charges$a[DF_charges$Charge=="NONE"] <- "None"
DF_charges$a[DF_charges$Charge=="NONE."] <- "None"
DF_charges$a[DF_charges$Charge=="NONE FILED"] <- "None"
DF_charges$a[DF_charges$Charge=="NONE AT THIS TIME"] <- "None"
DF_charges$a[DF_charges$Charge=="N/A"] <- "None"
DF_charges$a[grepl("CONFLICT[IE][ND]G? STATEMENT", DF_charges$Charge)] <- "None"
DF_charges$a[grepl("NOT? CIT", DF_charges$Charge)] <- "None"
DF_charges$a[grepl("NO CHARGES", DF_charges$Charge)] <- "None"
DF_charges$a[grepl("NOT? CH", DF_charges$Charge)] <- "None"
DF_charges$a[grepl("^ACCIDENT$", DF_charges$Charge)] <- "None"
DF_charges$a[grepl("^CRASH$", DF_charges$Charge)] <- "None"
DF_charges$a[grepl("^ACC$", DF_charges$Charge)] <- "None"
DF_charges$a[is.na(DF_charges$Charge)] <- "None"


#   Function to build mask for updating Simple_Charge
update_charge <- function(match){
  mask <- grepl(match, DF_charges$Charge)
  print(sum(mask))
  mask
}

###################
# Speeding
###################
mask <- update_charge("SPEE*D")
## [1] 180460
DF_charges$b[mask] <- "Speeding"

mask <- update_charge("SPEE ")
## [1] 11
DF_charges$b[mask] <- "Speeding"

mask <- update_charge("SPEE")
## [1] 180485
DF_charges$b[mask] <- "Speeding"

mask <- update_charge("SPD")
## [1] 151
DF_charges$b[mask] <- "Speeding"

mask <- update_charge("SEED")
## [1] 19
DF_charges$b[mask] <- "Speeding"

mask <- update_charge("SPPED")
## [1] 208
DF_charges$b[mask] <- "Speeding"

mask <- update_charge("F\\.?T\\.?C\\.?S")
## [1] 363
DF_charges$b[mask] <- "Speeding"

###################
# Bad license plates 
###################
mask <- update_charge("PLATE")
## [1] 1379
DF_charges$c[mask] <- "License Plate"

mask <- update_charge("MVR")
## [1] 571
DF_charges$c[mask] <- "License Plate"

mask <- update_charge(" TAG")
## [1] 275
DF_charges$c[mask] <- "License Plate"

###################
# No or expired driver's license
###################
mask <- update_charge("LICENSE")
## [1] 94272
DF_charges$d[mask] <- "No License"

mask <- update_charge(" DL")
## [1] 32117
DF_charges$d[mask] <- "No License"

mask <- update_charge(" TDL")
## [1] 2644
DF_charges$d[mask] <- "No License"

mask <- update_charge(" LIC ")
## [1] 211
DF_charges$d[mask] <- "No License"

mask <- update_charge("LICENC")
## [1] 445
DF_charges$d[mask] <- "No License"

mask <- update_charge("LISCEN")
## [1] 51
DF_charges$d[mask] <- "No License"

mask <- update_charge("LINCEN")
## [1] 38
DF_charges$d[mask] <- "No License"

mask <- update_charge("TXDL")
## [1] 459
DF_charges$d[mask] <- "No License"

mask <- update_charge("NO OPER")
## [1] 1551
DF_charges$d[mask] <- "No License"

mask <- update_charge("D\\.L")
## [1] 2091
DF_charges$d[mask] <- "No License"

mask <- update_charge("D\\.?W\\.?L")
## [1] 3496
DF_charges$d[mask] <- "No License"

mask <- update_charge("NDL")
## [1] 39
DF_charges$d[mask] <- "No License"

mask <- update_charge("NO O\\.?P?L")
## [1] 182
DF_charges$d[mask] <- "No License"

mask <- update_charge("N\\.?O\\.?L")
## [1] 8114
DF_charges$d[mask] <- "No License"

mask <- update_charge("CDL")
## [1] 185
DF_charges$d[mask] <- "No License"

mask <- update_charge("^NO L$")
## [1] 1
DF_charges$d[mask] <- "No License"

mask <- update_charge(" D/L")
## [1] 106
DF_charges$d[mask] <- "No License"

mask <- update_charge("^DL ")
## [1] 1043
DF_charges$d[mask] <- "No License"

mask <- update_charge("^N?O?T?/?-?DL$")
## [1] 14
DF_charges$d[mask] <- "No License"

mask <- update_charge("DWLI")
## [1] 2374
DF_charges$d[mask] <- "No License"

mask <- update_charge("ENDOR")
## [1] 287
DF_charges$d[mask] <- "No License"

mask <- update_charge("LISEN")
## [1] 104
DF_charges$d[mask] <- "No License"

mask <- update_charge("UNLI")
## [1] 701
DF_charges$d[mask] <- "No License"

mask <- update_charge("DRIVER'?S? LIC")
## [1] 81664
DF_charges$d[mask] <- "No License"

###################
# No insurance
###################
mask <- update_charge("INSUR")
## [1] 41263
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("^INS$")
## [1] 3
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("INSR")
## [1] 32
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("^NO[ -/]INS")
## [1] 4742
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("RESP")
## [1] 44282
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("NO F\\.R\\.")
## [1] 44
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("FINAN")
## [1] 43036
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("FIN\\.? RES")
## [1] 1558
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("MAINTAIN F")
## [1] 13093
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("NO INS\\.?$")
## [1] 3793
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("NO INS\\.")
## [1] 714
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("NO INSU$")
## [1] 44
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("FT?MFR")
## [1] 2287
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("F\\.?M")
## [1] 1538
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("F\\.T\\.?M")
## [1] 103
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("LIAB")
## [1] 1712
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("F M F R")
## [1] 23
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("QUALI")
## [1] 50
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("NOIN?$")
## [1] 65
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("NOIN?$")
## [1] 65
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("PROOF OF FI")
## [1] 3352
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge(" NSURA")
## [1] 22
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("IN?SUR?ANCE")
## [1] 40535
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("URANCE")
## [1] 40600
DF_charges$e[mask] <- "No Insurance"

mask <- update_charge("PROOF OF INS")
## [1] 542
DF_charges$e[mask] <- "No Insurance"

###################
# Ran red light
###################
mask <- update_charge(" RED ")
## [1] 34461
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("RED ?LIGHT")
## [1] 33866
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge(" RED$")
## [1] 361
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("STOP LIGHT")
## [1] 691
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("CONTROL DEV")
## [1] 2145
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("STOP ?AND|\\&|/ ?GO")
## [1] 7384
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("LANE CONT")
## [1] 171
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("DISREGARDE?D? SIGNAL")
## [1] 145
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("DISREGARDE?D? LIGHT")
## [1] 45
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("DISREGARDE?D? TRAFFIC")
## [1] 2111
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("RAN A? ?LIGHT")
## [1] 29
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("RAN A? ?SIGNAL LIGHT")
## [1] 13
DF_charges$f[mask] <- "Ran Red Light"

mask <- update_charge("RAN RA?D ")
## [1] 12
DF_charges$f[mask] <- "Ran Red Light"

###################
# Unsafe Lane Change
###################
mask <- update_charge("CHANGES? LANE")
## [1] 5602
DF_charges$g[mask] <- "Unsafe Lane Change"

mask <- update_charge("CHANGED LANE")
## [1] 21183
DF_charges$g[mask] <- "Unsafe Lane Change"

mask <- update_charge("LANE CHANGE")
## [1] 7787
DF_charges$g[mask] <- "Unsafe Lane Change"

mask <- update_charge("CHANGING LANE")
## [1] 3803
DF_charges$g[mask] <- "Unsafe Lane Change"

mask <- update_charge("CHANGED? ?LA?NE?")
## [1] 26798
DF_charges$g[mask] <- "Unsafe Lane Change"

mask <- update_charge("LA?NE? ?CHANGE")
## [1] 7816
DF_charges$g[mask] <- "Unsafe Lane Change"

###################
# Single Lane failure
###################
mask <- update_charge("SI[NG][GN]LE")
## [1] 37169
DF_charges$h[mask] <- "Fail Single Lane"

mask <- update_charge("SINGL ")
## [1] 29
DF_charges$h[mask] <- "Fail Single Lane"

mask <- update_charge("MAINTAIN SING ")
## [1] 6
DF_charges$h[mask] <- "Fail Single Lane"

mask <- update_charge("MAINTAIN SI[NG]")
## [1] 9374
DF_charges$h[mask] <- "Fail Single Lane"

mask <- update_charge("MAINTAIN LANE")
## [1] 175
DF_charges$h[mask] <- "Fail Single Lane"

###################
# DWI
###################
mask <- update_charge("D\\.?W\\.?I")
## [1] 11160
DF_charges$i[mask] <- "DWI"

mask <- update_charge("INTOX")
## [1] 9296
DF_charges$i[mask] <- "DWI"

mask <- update_charge("D\\.?U\\.?I")
## [1] 492
DF_charges$i[mask] <- "DWI"

mask <- update_charge("INFLU")
## [1] 339
DF_charges$i[mask] <- "DWI"

###################
# Failure to Yield
###################
mask <- update_charge("YIELD")
## [1] 75813
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("YLD")
## [1] 22340
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("YL ")
## [1] 8
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("YEIL")
## [1] 6240
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("YIEL")
## [1] 76133
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("FYR")
## [1] 881
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("F\\.?T\\.?Y\\.?R")
## [1] 2919
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("^F\\.?T\\.?Y")
## [1] 2911
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("FAIL TO Y")
## [1] 50919
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("FAIL TO STOP FR")
## [1] 58
DF_charges$j[mask] <- "Failure to Yield"

mask <- update_charge("YROW")
## [1] 4113
DF_charges$j[mask] <- "Failure to Yield"


###################
# Failure to Stop and Give Information
###################
mask <- update_charge("F\\.?S\\.?G\\.?I")
## [1] 9135
DF_charges$k[mask] <- "FSGI"

mask <- update_charge("GIVE INFO")
## [1] 5000
DF_charges$k[mask] <- "FSGI"

mask <- update_charge("FGSI")
## [1] 22
DF_charges$k[mask] <- "FSGI"

###################
# Failure to Stop and Render Aid
###################
mask <- update_charge("F\\.?S\\.?R\\.?A")
## [1] 781
DF_charges$l[mask] <- "FSRA"

mask <- update_charge(" AID")
## [1] 1470
DF_charges$l[mask] <- "FSRA"

mask <- update_charge("HIT AND RUN")
## [1] 22
DF_charges$l[mask] <- "FSRA"

mask <- update_charge("LEAVING T?H?E? ?SCENE")
## [1] 46
DF_charges$l[mask] <- "FSRA"

###################
# Registration issue
###################
mask <- update_charge("REGIS")
## [1] 10807
DF_charges$m[mask] <- "Registration Issue"

mask <- update_charge("EXPIRED REG")
## [1] 7439
DF_charges$m[mask] <- "Registration Issue"

mask <- update_charge("EXP?\\.? REG")
## [1] 1216
DF_charges$m[mask] <- "Registration Issue"

mask <- update_charge("NO REG\\.?$")
## [1] 64
DF_charges$m[mask] <- "Registration Issue"

mask <- update_charge("EXP/NO REG")
## [1] 45
DF_charges$m[mask] <- "Registration Issue"

###################
# Unsafe Backing
###################
mask <- update_charge("BACK")
## [1] 13143
DF_charges$n[mask] <- "Unsafe Backing"

###################
# Follow Too Close
###################
mask <- update_charge("TOO? CLOSS?[LE]E?Y?")
## [1] 10628
DF_charges$o[mask] <- "Follow Too Close"

###################
# Stop Sign
###################
mask <- update_charge("STOP SIGN")
## [1] 28301
DF_charges$p[mask] <- "Stop Sign"

mask <- update_charge("RAN STOP")
## [1] 5179
DF_charges$p[mask] <- "Stop Sign"

###################
# Unsafe Turn
###################
mask <- update_charge("TURN")
## [1] 48113
DF_charges$q[mask] <- "Unsafe Turn"

###################
# Inspection
###################
mask <- update_charge("INSP")
## [1] 2963
DF_charges$r[mask] <- "Inspection"

mask <- update_charge("MVI")
## [1] 405
DF_charges$r[mask] <- "Inspection"

mask <- update_charge("EXP\\.?I?R?E?D? INS")
## [1] 2629
DF_charges$r[mask] <- "Inspection"

###################
# No Glasses
###################

mask <- update_charge("GLASSES")
## [1] 10
DF_charges$s[mask] <- "No Glasses"

mask <- update_charge("LENSE")
## [1] 144
DF_charges$s[mask] <- "No Glasses"

mask <- update_charge("CODE A")
## [1] 76
DF_charges$s[mask] <- "No Glasses"

mask <- update_charge("RESTRICTION A")
## [1] 147
DF_charges$s[mask] <- "No Glasses"

###################
# Unsafe Passing
###################
mask <- update_charge("PASS ")
## [1] 2273
DF_charges$t[mask] <- "Unsafe Passing"

mask <- update_charge("PASSING")
## [1] 1381
DF_charges$t[mask] <- "Unsafe Passing"

mask <- update_charge("PASSED")
## [1] 794
DF_charges$t[mask] <- "Unsafe Passing"

mask <- update_charge("DRIVING ON SHOULDER")
## [1] 565
DF_charges$t[mask] <- "Unsafe Passing"

###################
# Other Lane violations
###################
mask <- update_charge("DESIGNATED LANE")
## [1] 999
DF_charges$u[mask] <- "Other Lane Violations"

mask <- update_charge("ONE-HALF")
## [1] 41
DF_charges$u[mask] <- "Other Lane Violations"

mask <- update_charge(" HALF ")
## [1] 74
DF_charges$u[mask] <- "Other Lane Violations"

mask <- update_charge(" 1/2 ")
## [1] 8
DF_charges$u[mask] <- "Other Lane Violations"

mask <- update_charge("SHOULDER")
## [1] 818
DF_charges$u[mask] <- "Other Lane Violations"

mask <- update_charge("CENTER LANE")
## [1] 345
DF_charges$u[mask] <- "Other Lane Violations"

###################
# Child Seat
###################
mask <- update_charge("CHILD")
## [1] 2732
DF_charges$v[mask] <- "Child Seat"

mask <- update_charge("CAR SEAT")
## [1] 152
DF_charges$v[mask] <- "Child Seat"

###################
# Texting etc
###################
mask <- update_charge("TEXT")
## [1] 17
DF_charges$w[mask] <- "Texting/Phone"

mask <- update_charge("PHONE")
## [1] 18
DF_charges$w[mask] <- "Texting/Phone"

mask <- update_charge(" ELEC")
## [1] 27
DF_charges$w[mask] <- "Texting/Phone"

###################
# Unsafe start
###################
mask <- update_charge("START")
## [1] 1695
DF_charges$x[mask] <- "Unsafe Start"

###################
# Unsafe Load
###################
mask <- update_charge("LOAD")
## [1] 911
DF_charges$y[mask] <- "Unsafe Load"

mask <- update_charge("FAIL TO SECURE")
## [1] 224
DF_charges$y[mask] <- "Unsafe Load"

###################
# no seatbelt
###################
mask <- update_charge("BELT")
## [1] 1342
DF_charges$z[mask] <- "No Seatbelt"

mask <- update_charge("RESTRAIN")
## [1] 261
DF_charges$z[mask] <- "No Seatbelt"

###################
# Evading
###################
mask <- update_charge("EVAD")
## [1] 1847
DF_charges$A[mask] <- "Evading"

###################
# Evasive Action
###################
mask <- update_charge("EVASS?IVE")
## [1] 1123
DF_charges$B[mask] <- "Faulty Evasive Action"

###################
# Address Change
###################
mask <- update_charge("ADD")
## [1] 1803
DF_charges$C[mask] <- "Address Change"

mask <- update_charge("ADRESS")
## [1] 23
DF_charges$C[mask] <- "Address Change"

###################
# Wrong Way
###################
mask <- update_charge("WRONG WAY")
## [1] 710
DF_charges$D[mask] <- "Wrong Way"

mask <- update_charge("WRONG SIDE")
## [1] 767
DF_charges$D[mask] <- "Wrong Way"

###################
# Impeding Traffic
###################
mask <- update_charge("IMPED")
## [1] 342
DF_charges$E[mask] <- "Impeding Traffic"

###################
# Inattention
###################
mask <- update_charge("DISTRACT")
## [1] 67
DF_charges$F[mask] <- "Distraction"

mask <- update_charge("INATT")
## [1] 256
DF_charges$F[mask] <- "Distraction"

###################
# Defective
###################
mask <- update_charge("DEFECT")
## [1] 1190
DF_charges$G[mask] <- "Defective Vehicle"

mask <- update_charge("BALD")
## [1] 300
DF_charges$G[mask] <- "Defective Vehicle"

mask <- update_charge("CRACK")
## [1] 99
DF_charges$G[mask] <- "Defective Vehicle"

mask <- update_charge("^DEF")
## [1] 1129
DF_charges$G[mask] <- "Defective Vehicle"

###################
# Unauthorized Use
###################
mask <- update_charge("UUMV")
## [1] 171
DF_charges$H[mask] <- "Unauthorized Use"

mask <- update_charge("UNAUTHORIZED USE")
## [1] 155
DF_charges$H[mask] <- "Unauthorized Use"

###################
# Drug possession
###################
mask <- update_charge("PARAPH")
## [1] 78
DF_charges$I[mask] <- "Drug Possession"

mask <- update_charge("SUBST")
## [1] 263
DF_charges$I[mask] <- "Drug Possession"

mask <- update_charge("MARIJ")
## [1] 199
DF_charges$I[mask] <- "Drug Possession"

mask <- update_charge("^P\\.?O\\.?M")
## [1] 25
DF_charges$I[mask] <- "Drug Possession"

mask <- update_charge("PCS")
## [1] 306
DF_charges$I[mask] <- "Drug Possession"

mask <- update_charge("COCAINE")
## [1] 39
DF_charges$I[mask] <- "Drug Possession"

mask <- update_charge("DANGEROUS DRUG")
## [1] 28
DF_charges$I[mask] <- "Drug Possession"

###################
# Open Container
###################
mask <- update_charge("OPEN CON")
## [1] 299
DF_charges$J[mask] <- "Open Container"

mask <- update_charge("OPEN ALCOH")
## [1] 15
DF_charges$J[mask] <- "Open Container"

mask <- update_charge("POSS.* ALCOHOL")
## [1] 102
DF_charges$J[mask] <- "Open Container"

###################
# Reckless
###################
mask <- update_charge("RECK")
## [1] 319
DF_charges$K[mask] <- "Reckless"

mask <- update_charge("CARELESS")
## [1] 17
DF_charges$K[mask] <- "Reckless"

###################
# Stationary Object
###################
mask <- update_charge("STATIONA")
## [1] 228
DF_charges$L[mask] <- "Stationary Object"

mask <- update_charge("FIXED")
## [1] 210
DF_charges$L[mask] <- "Stationary Object"

###################
# Fail to Control
###################
mask <- update_charge("FAILE?D? TO CONTROL$")
## [1] 63
DF_charges$M[mask] <- "Fail to Control"

mask <- update_charge("FAILURE TO CONTROL$")
## [1] 12
DF_charges$M[mask] <- "Fail to Control"

mask <- update_charge("CONTROL MOTOR")
## [1] 435
DF_charges$M[mask] <- "Fail to Control"

###################
# Fail to Stop properly
###################
mask <- update_charge("STOP A?T? ?DESIG")
## [1] 1810
DF_charges$N[mask] <- "Improper Stop"

mask <- update_charge("STOP A?T? ?PROPER")
## [1] 244
DF_charges$N[mask] <- "Improper Stop"

###################
# Blocking Traffic
###################
mask <- update_charge("BLOCK")
## [1] 216
DF_charges$O[mask] <- "Blocking Traffic"

mask <- update_charge("PARK[EI][DN]G? [OI]N ROADWAY")
## [1] 58
DF_charges$O[mask] <- "Blocking Traffic"

mask <- update_charge("PARK[EI][DN]G? IN A? ?TRAFFIC")
## [1] 23
DF_charges$O[mask] <- "Blocking Traffic"

mask <- update_charge("PARK[EI][DN]G? IN A? ?MOVING")
## [1] 17
DF_charges$O[mask] <- "Blocking Traffic"

mask <- update_charge("STOPP?E?D? IN A? ?MOVING")
## [1] 58
DF_charges$O[mask] <- "Blocking Traffic"

###################
# Misc Criminal charge
###################
mask <- update_charge("CRIMINAL")
## [1] 158
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("ASSAULT")
## [1] 799
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("THEFT")
## [1] 79
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("ROBBERY")
## [1] 90
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("RESISTING ARREST")
## [1] 47
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("MANSLAUGHTER")
## [1] 346
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("MURDER")
## [1] 39
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("WEAPON")
## [1] 168
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("FIREARM")
## [1] 55
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("UCW")
## [1] 31
DF_charges$P[mask] <- "Misc Criminal"

mask <- update_charge("WARRANT")
## [1] 280
DF_charges$P[mask] <- "Misc Criminal"

###################
# Safety Zone
###################
mask <- update_charge("SAFETY ZONE")
## [1] 52
DF_charges$Q[mask] <- "Safety Zone"

###################
# Parked Facing Traffic
###################
mask <- update_charge("FACING")
## [1] 38
DF_charges$R[mask] <- "Park Facing Traf"

###################
# Failure to Report
###################
mask <- update_charge("REPORT ACC")
## [1] 117
DF_charges$S[mask] <- "Failure to Report"

mask <- update_charge("COMPLY")
## [1] 82
DF_charges$S[mask] <- "Failure to Report"

mask <- update_charge("NOTIFY OWNER")
## [1] 28
DF_charges$S[mask] <- "Failure to Report"

mask <- update_charge("DUTY U?P?ON")
## [1] 35
DF_charges$S[mask] <- "Failure to Report"

###################
# Racing
###################
mask <- update_charge("RACING")
## [1] 67
DF_charges$T[mask] <- "Racing"

###################
# No Adult
###################
mask <- update_charge("CODE B")
## [1] 314
DF_charges$U[mask] <- "No Adult"

mask <- update_charge("RESTRICTION B")
## [1] 572
DF_charges$U[mask] <- "No Adult"

mask <- update_charge("LOFS 21")
## [1] 316
DF_charges$U[mask] <- "No Adult"

###################
# Pedestrian error
###################
mask <- update_charge("PEDESTRIAN ENTER")
## [1] 35
DF_charges$V[mask] <- "Pedestrian Error"

mask <- update_charge("PEDESTRIAN CROSSED")
## [1] 17
DF_charges$V[mask] <- "Pedestrian Error"

mask <- update_charge("PEDESTRIAN IN ROAD")
## [1] 6
DF_charges$V[mask] <- "Pedestrian Error"

mask <- update_charge("JAY")
## [1] 36
DF_charges$V[mask] <- "Pedestrian Error"

###################
# No lights
###################
mask <- update_charge("WITHOUT H?E?A?D? ?LIGHTS")
## [1] 45
DF_charges$W[mask] <- "Lights Off"

mask <- update_charge("W/O H?E?A?D? ?LIGHTS")
## [1] 41
DF_charges$W[mask] <- "Lights Off"

mask <- update_charge("NO H?E?A?D? ?LIGHTS")
## [1] 24
DF_charges$W[mask] <- "Lights Off"

###################
# Toll Violation
###################
mask <- update_charge(" TOLL")
## [1] 78
DF_charges$X[mask] <- "Toll Violation"

###################
# Obey Officer
###################
mask <- update_charge("OFFICER")
## [1] 231
DF_charges$Y[mask] <- "Obey Officer"

mask <- update_charge("SCHOOL CROSS")
## [1] 35
DF_charges$Y[mask] <- "Obey Officer"

###################
# Height
###################
mask <- update_charge("HEIGHT")
## [1] 73
DF_charges$Z[mask] <- "Height"

mask <- update_charge("CLEARA")
## [1] 234
DF_charges$Z[mask] <- "Height"

# Collapse a,... columns into Simple_Charge
#   Note: using the development version of tidyr for na.rm=TRUE option

DF_Charges_temp <- 
  DF_charges %>% 
  tidyr::unite("Simple_Charge", a:Z, na.rm = TRUE, remove = FALSE, sep=", ")

#   Look at what is not categorized
ChargesTemp <- DF_Charges_temp[DF_Charges_temp$Simple_Charge=="",] %>% group_by(Charge) %>% tally() %>% arrange(-n)

#   What does it look like?

head(ChargesTemp)
## # A tibble: 6 x 2
##   Charge                                    n
##   <chr>                                 <int>
## 1 NONE, NONE                              104
## 2 N/A, N/A                                 31
## 3 DEADLY CONDUCT                           14
## 4 NA                                       13
## 5 DP-DROVE OFF PAVED PORTION OF ROADWAY    12
## 6 PENDING                                  11

Finalize charges and join to crashes

#   Finally create the list and do not keep the a:Z columns

DF_charges <- 
  DF_charges %>% 
  tidyr::unite("Simple_Charge", a:Z, na.rm = TRUE, remove = FALSE, sep=", ") %>% 
  select(-a:-Z)

# Sample to look for issues

head(sort(unique(DF_charges$Simple_Charge)))
## [1] ""                              "Address Change"               
## [3] "Address Change, Misc Criminal" "Address Change, No Adult"     
## [5] "Address Change, Reckless"      "Address Change, Wrong Way"
tail(sort(unique(DF_charges$Simple_Charge)))
## [1] "Unsafe Turn, Wrong Way, No Adult" "Wrong Way"                       
## [3] "Wrong Way, Lights Off"            "Wrong Way, Obey Officer"         
## [5] "Wrong Way, Open Container"        "Wrong Way, Park Facing Traf"
#   Minor cleanup 

DF_charges$Simple_Charge <- str_replace(DF_charges$Simple_Charge,"^$", "UNK")


# need to rearrange charges so that they show up as a list for each
# crash. Sometimes the Crash_ID gets repeated, probably multiple citations

#DF_charges <- DF_charges %>% 
#  group_by(Crash_ID) %>% 
#  arrange(Simple_Charge) %>% 
#  summarise(Charges = paste(Simple_Charge, collapse=", "))

# combine the two

DF <- left_join(DF_collisions, DF_charges, by="Crash_ID")

df <- DF %>% select(Crash_Fatal_Fl, Crash_Date, Crash_Time, Rpt_City_ID, Rpt_Block_Num, Rpt_Street_Pfx, Rpt_Street_Name, Rpt_Street_Sfx, At_Intrsct_Fl, Latitude, Longitude, Street_Name, Street_Nbr, Tot_Injry_Cnt, Death_Cnt, Crash_Sev_ID, Crash_Speed_Limit, Harm_Evnt_ID, Road_Cls_ID, Intrsct_Relat_ID, Nbr_Of_Lane, Simple_Charge  )

#   Join text from table to codes

df <- left_join(df, City, by="Rpt_City_ID")
df <- left_join(df, Severity, by="Crash_Sev_ID")
df <- left_join(df, Harm, by="Harm_Evnt_ID")
df <- left_join(df, RoadType, by="Road_Cls_ID")
df <- left_join(df, Intersection, by="Intrsct_Relat_ID")

Do some quality control checks

# Look for NA's in the various fields

df %>%
    map_df(function(x) sum(is.na(x))) %>%
    gather(feature, num_nulls) %>%
    print(n = 100)
## # A tibble: 27 x 2
##    feature           num_nulls
##    <chr>                 <int>
##  1 Crash_Fatal_Fl            0
##  2 Crash_Date                0
##  3 Crash_Time                0
##  4 Rpt_City_ID               0
##  5 Rpt_Block_Num         26071
##  6 Rpt_Street_Pfx       645038
##  7 Rpt_Street_Name           0
##  8 Rpt_Street_Sfx       228905
##  9 At_Intrsct_Fl             0
## 10 Latitude             173842
## 11 Longitude            173842
## 12 Street_Name               0
## 13 Street_Nbr           632780
## 14 Tot_Injry_Cnt             0
## 15 Death_Cnt                 0
## 16 Crash_Sev_ID              0
## 17 Crash_Speed_Limit         0
## 18 Harm_Evnt_ID              0
## 19 Road_Cls_ID               0
## 20 Intrsct_Relat_ID          0
## 21 Nbr_Of_Lane          710394
## 22 Simple_Charge        372187
## 23 City                    170
## 24 Severity                  0
## 25 Harmed                    0
## 26 RoadType                  0
## 27 Intersection              0
# Crash fatality field

sort(unique(df$Crash_Fatal_Fl))
## [1] "N" "Y"
# Crash date and time

head(sort(unique(df$Crash_Date)))
## [1] "2010-01-01" "2010-01-02" "2010-01-03" "2010-01-04" "2010-01-05"
## [6] "2010-01-06"
tail(sort(unique(df$Crash_Date)))
## [1] "2018-12-26" "2018-12-27" "2018-12-28" "2018-12-29" "2018-12-30"
## [6] "2018-12-31"
range(df$Crash_Date)
## [1] "2010-01-01" "2018-12-31"
range(df$Crash_Time)
## Time differences in secs
## [1]     0 86340
#   Charges

head(sort(unique(df$Simple_Charge)))
## [1] "Address Change"                "Address Change, Misc Criminal"
## [3] "Address Change, No Adult"      "Address Change, Reckless"     
## [5] "Address Change, Wrong Way"     "Blocking Traffic"
df$Charges <- str_replace_all(df$Simple_Charge, ", None", "")
df$Charges <- str_replace_all(df$Simple_Charge, "None, ", "")
#   Crash time

head(sort(unique(df$Crash_Time)))
## [1]   0  60 120 180 240 300
tail(sort(unique(df$Crash_Time)))
## [1] 86040 86100 86160 86220 86280 86340
# City ID

sort(unique(df$Rpt_City_ID))
##   [1]   22   29   30   33   55   57  107  108  111  121  132  135  136  141
##  [15]  160  164  169  185  189  208  211  212  216  219  223  228  244  253
##  [29]  254  255  260  264  269  278  281  283  295  296  304  311  315  325
##  [43]  326  331  333  336  359  369  379  383  391  405  408  409  413  422
##  [57]  430  434  435  447  453  459  478  523  625  647  672  843  851  855
##  [71]  899  952 1013 1061 1073 1212 1322 1326 1348 1353 1368 1389 1444 1448
##  [85] 1532 1535 1566 1570 1573 1576 1585 1601 1610 1635 1640 1644 1649 1660
##  [99] 1661 1664 1665 1666 1667 1668 1669 1670 1671 1678 1688 1698 1711 1719
## [113] 1735 1743 1795 1802 1806 1926 2071 3313 4461 4728 5006 5659 6756 9999
# Reported block number

head(sort(unique(df$Rpt_Block_Num)))
## [1] "0"     "0-99"  "00"    "000"   "0000"  "00000"
# Non-numeric block numbers

df$Rpt_Block_Num[grepl("\\D+", df$Rpt_Block_Num)]
##   [1] "5123-J"     "12500`"     "9700-10800" "138-H"      "1-99"      
##   [6] "3098``"     "145OO"      "10502-B"    "234O0"      "11400R"    
##  [11] "2723 1/2"   "16,100"     "22OO"       "114OO"      "77OO"      
##  [16] "5000 BLOCK" "91OO"       "12,100"     "19,000"     "NONE"      
##  [21] "1 99"       "15,900"     "22,000"     "20,900"     "0-99"      
##  [26] "2-007"      "W"          "1-99"       "11811 1/2"  "11811 12"  
##  [31] "16,800"     "UNIT BLOCK" "9709-A"     "8000O"      "UNIT"      
##  [36] "UNIT"       "O"          "7500  BLK"  "3800 BLK"   "12OO"      
##  [41] "300`"       "1028 1/2"   "1505.5"     "3100O"      "21OO"      
##  [46] "660O"       "FIELDSTORE" "267OO"      "30O"        "US 290"    
##  [51] "FM 2920"    "231OO"      "521 EB MM"  "1-99"       "1505.5"    
##  [56] "1505.5"     "1e+05"      "3e+05"      "UNIT"       "K-2"       
##  [61] "1025B"      "44OO"       "1140O"      "1200`"      "530O"      
##  [66] "1-99"       "UNIT BLOCK" "O"          "808 1/2"    "600`"      
##  [71] "UNIT BLOCK" "UNIT BLOCK" "3616 1/2"   "1505.5"     "UNIT BLOCK"
##  [76] "UNIT BLOCK" "12,600"     "N"          "14OO"       "9100`"     
##  [81] "UNIT"       "485O"       "UNIT"       "500-BLK"    "20,000"    
##  [86] "100 BLOCK"  "1-99"       "2087-A"     "11200--"    "7410-A"    
##  [91] "1ST"        "727-7"      "7100-BLK"   "12000-BLK"  "4200-BLK"  
##  [96] "500-BLK"    "800-BLK"    "3600-BLK"   "600-BLK"    "4200-BLK"  
## [101] "100-BLK"    "1200-BLK"   "5100-BLK"   "4700-BLK"   "6200-BLK"  
## [106] "3500-BLK"   "1600-BLK"   "5100-BLK"   "2200-BLK"   "1100-BLK"  
## [111] "300-BLK"    "3300-BLK"   "4000-BLK"   "10900-BLK"  "3100-BLK"  
## [116] "4700-BLK"   "4200-BLK"   "700-BLK"    "600-BLK"    "7500-BLK"  
## [121] "5200-BLK"   "2700-BLK"   "8200-BLK"
#   Remove non-numeric stuff

df$Rpt_Block_Num <- str_replace(df$Rpt_Block_Num, " BLOCK","")
df$Rpt_Block_Num <- str_replace_all(df$Rpt_Block_Num, "O","0")
df$Rpt_Block_Num <- str_replace_all(df$Rpt_Block_Num, "^\\d+-","")
df$Rpt_Block_Num <- str_replace_all(df$Rpt_Block_Num, "PP","00")
df$Rpt_Block_Num <- str_replace_all(df$Rpt_Block_Num, " 1/2","")
df$Rpt_Block_Num <- str_replace(df$Rpt_Block_Num, "\\D+","")
df$Rpt_Block_Num <- str_replace(df$Rpt_Block_Num, "0RE","")
df$Rpt_Block_Num <- str_replace(df$Rpt_Block_Num, "0NE","")
df$Rpt_Block_Num <- str_replace(df$Rpt_Block_Num, "#","")

head(sort(unique(df$Rpt_Block_Num)))
## [1] ""      "0"     "00"    "000"   "0000"  "00000"
# Street Prefix

sort(unique(df$Rpt_Street_Pfx))
## [1] "E"   "N"   "NE"  "NW"  "S"   "SE"  "SW"  "UNK" "W"
df$Rpt_Street_Pfx <- str_replace(df$Rpt_Street_Pfx, "UNK", "")
# Street Suffix

sort(unique(df$Rpt_Street_Sfx))
##  [1] "AVE"  "BLVD" "CIR"  "CT"   "CV"   "DR"   "EXPY" "FWY"  "HWY"  "LN"  
## [11] "LOOP" "PARK" "PKWY" "PL"   "RD"   "ST"   "TRL"  "WAY"
# Lat and Long

df$Latitude[grepl("[^0-9\\-\\. ]", df$Latitude)]
## numeric(0)
df$Longitude[grepl("[^-][^0-9\\. ]", df$Longitude)]
## numeric(0)
df$Latitude <- as.numeric(df$Latitude)
df$Longitude <- as.numeric(df$Longitude)
range(df$Latitude, na.rm=TRUE)
## [1] 29.50095 30.16813
range(df$Longitude, na.rm = TRUE)
## [1] -95.95753 -94.91079
# Street Number

head(sort(unique(df$Street_Nbr)))
## [1] 0 1 2 3 4 5
df$Street_Nbr[grepl("\\D+", df$Street_Nbr)]
## numeric(0)
df$Street_Nbr <- str_replace(df$Street_Nbr, "\\D+","")
# total injury count

sort(unique(df$Tot_Injry_Cnt))
##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
## [24] 23 26 29 30 32 43
# Death count

sort(unique(df$Death_Cnt))
## [1] 0 1 2 3 4 5
# Severity

sort(unique(df$Severity))
## [1] "Fatal"                    "Non-Incapacitating"      
## [3] "Not Injured"              "Possible Injury"         
## [5] "Suspected Serious Injury" "Unknown"
df %>% group_by(Severity) %>% 
  summarise(count=n()) %>%
  arrange(count)
## # A tibble: 6 x 2
##   Severity                  count
##   <chr>                     <int>
## 1 Fatal                      3451
## 2 Suspected Serious Injury  16423
## 3 Unknown                   55825
## 4 Non-Incapacitating        70718
## 5 Possible Injury          174219
## 6 Not Injured              669147
# who or what was harmed

sort(unique(df$Harmed))
##  [1] "Animal"                     "Fixed Object"              
##  [3] "Motor Vehicle in Transport" "Not Reported"              
##  [5] "Other Non-Collision"        "Other Object"              
##  [7] "Overturned"                 "Parked Car"                
##  [9] "Pedal Cyclist"              "Pedestrian"                
## [11] "Rail Road"
df %>% group_by(Harmed) %>% 
  summarise(count=n()) %>%
  arrange(count)
## # A tibble: 11 x 2
##    Harmed                      count
##    <chr>                       <int>
##  1 Not Reported                   19
##  2 Rail Road                     545
##  3 Animal                       1327
##  4 Other Non-Collision          1442
##  5 Other Object                 4180
##  6 Pedal Cyclist                5289
##  7 Overturned                   7710
##  8 Pedestrian                  13025
##  9 Parked Car                  75498
## 10 Fixed Object               113414
## 11 Motor Vehicle in Transport 767334
# Roadtype

sort(unique(df$RoadType))
## [1] "City Street"         "County Road"         "Farm To Market"     
## [4] "Interstate"          "Non Trafficway"      "Other Roads"        
## [7] "Toll Bridges"        "Tollway"             "US & State Highways"
# Intersection

sort(unique(df$Intersection))
## [1] "Driveway Access"      "Intersection"         "Intersection Related"
## [4] "Non-Intersection"     "Not Reported"
# Number of lanes

sort(unique(df$Nbr_Of_Lane))
##  [1]  2  3  4  5  6  7  8  9 10 11 12
summary(df$Nbr_Of_Lane)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     2.0     4.0     6.0     6.4     8.0    12.0  710394
# Reported street name

head(sort(unique(df$Rpt_Street_Name)))
## [1] "0"                "0 SH 99"          "0 SPUR 330"      
## [4] "000 CENTER"       "000 INDEPENDENCE" "0000 CENTER"
# Street name (after cleanup?)

head(sort(unique((df$Street_Name))))
## [1] "000 INDEPENDENCE PKWY" "0000 CENTER ST"        "1 ST ST"              
## [4] "10"                    "10 FWY"                "10 HWY"
# Save the dataset

saveRDS(df, "~/Dropbox/Rprojects/Collisions/Collisions_2010_2018.RDS")