Vaccine Reluctance

Let’s look at Texas counties and test various factors for correlations to the vaccination rate.

We’ll primarily look at the rate of the first vaccination, since there are a variety of reasons why someone might not get the second dose.

Let’s start with the raw rates of vaccination by county.

Vaccine %>% 
  mutate(Pct_one_dose=People_one_dose/Pop_total) %>% 
  ggplot(aes(x=Date, y=Pct_one_dose, color=County)) +
    geom_line(show.legend = FALSE) +
    labs(x="Date", y="First Dose Percentage", title="Texas counties Vaccine Progress")

Hmmm… let’s do a little cleanup. We’ll pull out the offenders first and look at them.

foo <- 
Vaccine %>% 
  mutate(Pct_one_dose=People_one_dose/Pop_total) %>% 
  filter(!is.na(Pct_one_dose)) %>% 
  group_by(County) %>% 
    mutate(maxdose=max(Pct_one_dose)) %>% 
  ungroup() %>% 
  select(County, Date, People_one_dose, Pct_one_dose, maxdose) %>% 
  filter(maxdose>0.7)  

#   Cleanup

#   Set specific errors to na

Vaccine$People_one_dose[(Vaccine$County=="Cochran") & 
                      (Vaccine$Date>"2021-02-05")] <- NA
Vaccine$People_one_dose[(Vaccine$County=="Cottle") & 
                      (Vaccine$Date>"2020-12-27") & 
                      Vaccine$Date<"2021-03-24"] <- NA
Vaccine$People_one_dose[(Vaccine$County=="Edwards") & 
                      (Vaccine$Date>"2021-03-13") & 
                      Vaccine$Date<"2021-04-01"] <- NA

#   Delete data showing decrease and interpolate to infill

Vaccine <- 
  Vaccine %>% 
  group_by(County) %>% 
    mutate(Daily_dose=People_one_dose-lag(People_one_dose)) %>% 
  ungroup()

Vaccine$Daily_dose <- replace_na(Vaccine$Daily_dose, 0)

Vaccine$People_one_dose[Vaccine$Daily_dose<0] <- NA

Vaccine <- Vaccine %>% 
    #mutate(Daily_dose=zoo::na.approx(Daily_dose, na.rm=FALSE)) %>% 
    #mutate(People_one_dose=zoo::na.approx(People_one_dose, na.rm=FALSE))  
    mutate(Daily_dose=imputeTS::na_interpolation(Daily_dose, maxgap=5)) %>%   
    mutate(People_one_dose=imputeTS::na_interpolation(People_one_dose, maxgap=5))  
  
#   Replot 

Vaccine %>% 
  mutate(Pct_one_dose=100*People_one_dose/Pop_total) %>% 
  ggplot(aes(x=Date, y=Pct_one_dose, color=County)) +
    geom_line(show.legend = FALSE) +
    labs(x="Date", y="First Dose Percentage", title="Texas counties Vaccine Progress")

I think that looks better. At least the nonsensical data is gone.

Now let’s look at the distribution.

Vaccine %>% 
  #filter(Date=="2021-05-01") %>% 
  filter(Date==last(Date)) %>% 
  mutate(Pct_one_dose=100*People_one_dose/Pop_total) %>% 
  #arrange(Pct_one_dose)
  ggplot(aes(x=Pct_one_dose)) +
    geom_histogram() +
    labs(title="Percent Vaccinated With One Dose By Texas County",
         x="Percent Vaccinated",
         subtitle=paste("As of", today_date))

Let’s look at the progress to getting two doses. We’ll plot the number who are late getting a second dose by summing first doses up to 3 weeks ago, and subtracting that from the total fully vaxed.

Vaccine %>% 
  mutate(One_dose=if_else((Date<=today()-28), People_one_dose, 0)) %>% 
  group_by(County) %>% 
    summarise(Full=sum(People_fully, na.rm = TRUE), 
              One=sum(One_dose, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(Missed_pct=-100*(Full-One)/One) %>% 
  filter(Missed_pct>0, Missed_pct<100) %>% 
  ggplot(aes(x=Missed_pct)) + 
  geom_histogram() + 
  labs(x="Percent Missing Second Dose",
       y="Number of counties",
       title="Estimated Percent of First Dose Missing Second, by County")

Vaccine %>% 
  mutate(One_dose=if_else((Date<=today()-28), People_one_dose, 0)) %>% 
  group_by(County) %>% 
    summarise(Full=sum(People_fully, na.rm = TRUE), 
              One=sum(One_dose, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(Missed_pct=-100*(Full-One)/One) %>% 
  filter(Missed_pct>15, Missed_pct<100)
## # A tibble: 16 × 4
##    County        Full       One Missed_pct
##    <chr>        <dbl>     <dbl>      <dbl>
##  1 Crane       542347   716324        24.3
##  2 Dallam     1027469  1307588        21.4
##  3 Dimmit     2283264  3181835        28.2
##  4 Foard       174962   209760.       16.6
##  5 Hartley     891479  1054773        15.5
##  6 Jim Hogg   1013028  1220949        17.0
##  7 King         16489    20710.       20.4
##  8 Loving       11597    15175        23.6
##  9 Martin      596776   717627        16.8
## 10 Maverick  14308841 18632908        23.2
## 11 Moore      3110283  3760094        17.3
## 12 Motley      113568   163073        30.4
## 13 Reeves     2755792  4694902.       41.3
## 14 Sherman     342103   419349        18.4
## 15 Val Verde 10030442 12245956        18.1
## 16 Webb      75659991 91180428.       17.0

Let’s take a look at the data by regions

DataArchive <- "/home/ajackson/Dropbox/mirrors/ajackson/SharedData/"
MSA_raw <- readRDS(paste0(DataArchive, "Texas_MSA_Pop_Counties.rds")) 

#  Add vaccine to MSA's 

MSA <- MSA_raw %>% 
  unnest(Counties) %>% 
  rename(County=Counties) %>% 
  left_join(Vaccine, ., by="County") %>% 
  group_by(MSA, Date) %>% 
    summarise(People_one_dose=sum(People_one_dose, na.rm = TRUE),
              People_fully=sum(People_fully, na.rm = TRUE),
              Pop_total=sum(Pop_total, na.rm = TRUE),
              Population=unique(Population)) %>% 
    mutate(Daily_dose=People_fully-lag(People_fully)) %>% 
  ungroup() %>% 
  mutate(Pct_fully=100*People_fully/Pop_total) 

MSA$med <- zoo::rollmedian(MSA$Pct_fully, 9, 
                    fill=c(0, NA, last(MSA$Pct_fully)))  

# Pull out top 5 and bottom 5

MSA_extremes <- 
  MSA %>% 
  group_by(MSA) %>% 
    summarize(med=max(med), MSA=unique(MSA)) %>% 
    arrange(-med) %>% 
  ungroup() 

  
MSA_top <- slice_head(MSA_extremes, n=4)
MSA_bot <- slice_tail(MSA_extremes, n=4)
MSA_topbot <- bind_rows(MSA_top, MSA_bot)

MSA_extremes <- inner_join(MSA, MSA_topbot, by="MSA") %>% 
  rename(med=med.x, ordering=med.y) %>% 
  arrange(-ordering)

#  MSA$Pct_one_dose[MSA$Daily_dose<0] <- NA
# Plot region progress

MSA %>% 
  ggplot(aes(x=Date, y=med, color=MSA)) + 
  geom_line(aes(group=MSA),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_line(data=MSA_extremes,
            aes(color=fct_reorder(MSA, -ordering))) + 
  labs(x="Date",
       y="Percent Fully Vaxed",
       title="MSA regions and Vaccination history",
       subtitle=paste("As of", today_date))

Let’s plot new cases vs. Vaccine status by County

Vaccine_last <- Vaccine %>% 
  group_by(County) %>% 
    summarise(Vax=last(100*People_fully/Pop_total), 
              County=unique(County)) %>% 
  ungroup()

Cases_last <- Covid %>% 
  group_by(County) %>% 
    summarise(New_cases=last(avg_active_cases_percap), County=unique(County)) %>% 
  ungroup()

Vax_Cases <- left_join(Cases_last, Vaccine_last, by="County")

Vax_Cases %>% 
  ggplot(aes(x=New_cases, y=Vax)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  scale_x_continuous(trans='log10') + 
  labs(x="Active Cases per 100,000",
       y="Percent Vaccinated",
       title="Active Cases vs. Percent Vaccinated by County in Texas",
       subtitle=paste("As of", today_date))

Hmmm…. that doesn’t make sense. Slightly more cases where there are more people vaccinated?

Let’s try something different. Let’s look at the positive case rate only in the unvaccinated cohort.

Vaccine_last <- Vaccine %>% 
  group_by(County) %>% 
    summarise(Vax=last(People_fully), 
              Pop=last(Pop_total),
              County=unique(County)) %>% 
  ungroup()

Cases_last <- Covid %>% 
  group_by(County) %>% 
    arrange(Date) %>% 
    summarise(Active=last(active_cases), 
              County=unique(County)) %>% 
  ungroup()

Vax_Cases <- left_join(Cases_last, Vaccine_last, by="County") %>% 
  mutate(New_cases=Active*1.e5/(Pop-Vax), Vax_pct=100*Vax/Pop)  

Vax_Cases %>% 
  ggplot(aes(y=New_cases, x=Vax_pct)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  scale_y_continuous(trans='log10') + 
  labs(y="Active Cases per 100,000 Un-Vaxxed",
       x="Percent Vaccinated",
       title="Active Cases vs. Percent Vaccinated by County in Texas",
       subtitle=paste("As of", today_date))

Still doesn’t make sense. Maybe the individual county stats are too noisy - small rural counties imply small numbers imply noisy data.

Let’s combine counties into MSA’s to get better behaved statistics.

MSA_case <- MSA_raw %>% 
  unnest(Counties) %>% 
  rename(County=Counties) %>% 
  left_join(Covid, ., by="County") %>% 
  group_by(MSA, Date) %>% 
    summarise(Cases=sum(Cases, na.rm = TRUE),
              New_cases=sum(new_cases, na.rm = TRUE),
              Deaths=sum(Deaths, na.rm = TRUE),
              New_Deaths=sum(new_deaths, na.rm = TRUE),
              Active=sum(active_cases, na.rm = TRUE),
              new_tests=sum(new_tests, na.rm=TRUE)
              ) %>% 
  ungroup() 

MSA_final <- 
MSA_case %>% 
  group_by(MSA) %>% 
    slice(tail(row_number(),21)) %>%
    summarize(avg=mean(New_cases, na.rm=TRUE),
              New_Deaths=mean(New_Deaths, na.rm=TRUE),
              new_tests=mean(new_tests, na.rm=TRUE)) %>% 
  ungroup()

MSA_vax <- 
  MSA %>% 
  group_by(MSA) %>% 
    slice(tail(row_number(),21)) %>%
    summarize(Fully=mean(People_fully, na.rm=TRUE),
              Pop=last(Pop_total),
              Tot_Pop=last(Population)) %>% 
  ungroup()
  

MSA_all <- left_join(MSA_final, MSA_vax, by="MSA") %>% 
  mutate(UnvaxCase=avg*1.e5/(Pop-Fully),
         Vax_pct=100*Fully/Pop,
         Cases_per_100k=1.0e5*avg/Tot_Pop,
         Cases_per_unvax=1.0e5*avg/(Tot_Pop-Fully),
         Percap_tests=new_tests*1.0e5/Tot_Pop,
         Percap_deaths_unvax=New_Deaths*1.0e5/(Tot_Pop-Fully)
         ) 

#           Tests vs vax
p1 <- MSA_all %>% 
  #filter(Cases_per_100k<10) %>% 
  ggplot(aes(y=Percap_tests, x=Vax_pct)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  labs(y="Tests per 100,000",
       x="Percent Vaccinated",
       title="Avg Covid Tests vs. Percent Vaccinated by Metro Area in Texas",
       subtitle=paste("As of", today_date))

#         Cases vs Vaxed
p2 <- MSA_all %>% 
  #filter(Cases_per_100k<10) %>% 
  ggplot(aes(y=Cases_per_100k, x=Vax_pct)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  labs(y="Avg Cases per 100,000",
       x="Percent Vaccinated",
       title="Avg Covid Cases vs. Percent Vaccinated by Metro Area in Texas",
       subtitle=paste("As of", today_date))

#         Deaths vs Vaxed
p3 <- MSA_all %>% 
  #filter(Cases_per_100k<10) %>% 
  ggplot(aes(y=Percap_deaths_unvax, x=Vax_pct)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  labs(y="Deaths per 100,000 Unvaxed",
       x="Percent Vaccinated",
       title="Avg Covid Deaths vs. Percent Vaccinated by Metro Area in Texas",
       subtitle=paste("As of", today_date))

#         Deaths vs Tests
p4 <- MSA_all %>% 
  #filter(Cases_per_100k<10) %>% 
  ggplot(aes(y=Percap_deaths_unvax, x=Percap_tests)) + 
  geom_point() + 
  geom_smooth(method="lm") + 
  labs(y="Deaths per 100,000 Unvaxed",
       x="Tests per 100,000",
       title="Avg Covid Deaths vs. Covid Tests by Metro Area in Texas",
       subtitle=paste("As of", today_date))

print(p1)

print(p2)

print(p3)

print(p4)

#grid.arrange(p1,p2,p3,p4)

Well, still a bit mysterious. Fewer tests in lower vaxed counties - is that test reluctance or fewer people with symptoms?

Deaths also not making sense - though deaths are a lagging indicator, and I’m not sure how well they tag deaths back to the county of residence. Do the deaths counted in the county where they were hospitalized and died?

Now let’s try to predict the final vaccination rates based on fitting a logistic function to the percentages.

#---------------------------------------------------    
#------------------- Fit Logistic function ---------
#---------------------------------------------------    
fit_logistic <- function(indep="med", # independent variable
                         df=MSA,
                         MSA=MSA,
                         r=0.24,
                         output=c("fit", "asym"),
                         projection=10){
  
  #print(paste(":::::::  logistic", MSA))
  
  df$Days <- as.integer(df$Date - ymd("2020-12-28"))

  Asym <- max(df$med)*5
  xmid <- max(df$Days)*2
  scal <- 1/r
  my_formula <- as.formula(paste0(indep, " ~ SSlogis(Days, Asym, xmid, scal)"))
  
  #print("----1----")
  
  ## using a selfStart model
    
    logistic_model <- NULL
    try(logistic_model <- nls(my_formula, 
                              data=df)); # does not stop in the case of error
    
    if(is.null(logistic_model)) {
       case_params <<- list(K=NA, 
                            r=NA, 
                            xmid=NA,
                            xmid_se=NA)
      return()
    }
    
  #print("----2----")
  #print(logistic_model)
  coeffs <- coef(logistic_model)
  xmid_sigma <- 2*summary(logistic_model)$parameters[2,2] # 2 sigma
  #print("----3----")
  #print(coeffs)
  #print(summary(logistic_model))
  
  Cases <- predict(logistic_model, data.frame(Days=df$Days))
  if (output=="fit"){ return(Cases) }
  else {return(coeffs)}
} ###############   end of fit_logistic
  
#   Let's try fitting a Logistic to each region as a way to really smooth
# the curves

eraseme <- 
MSA %>% 
  mutate(Days=as.integer(Date - ymd("2020-12-28"))) %>% 
  select(MSA, Days, med, Date) %>% 
  complete(nesting(MSA), Days=seq(min(Days), max(Days), 1L)) %>% 
  nest(-MSA) %>% 
    mutate(fit=map(data, ~ fit_logistic(df=., MSA=MSA, output="fit"))) %>% 
  unnest(data,fit)

eraseme %>% 
  ggplot(aes(x=Date, color=MSA)) + 
  geom_line(aes(group=MSA, y=fit),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_point(aes(y=med)) + 
  labs(x="Date",
       y="Percent Vaxed",
       title="MSA regions and Vaccination history",
       subtitle=paste("As of", today_date))

asymptotes <- 
MSA %>% 
  mutate(Days=as.integer(Date - ymd("2020-12-28"))) %>% 
  select(MSA, Days, med, Date) %>% 
  complete(nesting(MSA), Days=seq(min(Days), max(Days), 1L)) %>% 
  nest(-MSA) %>% 
    mutate(Coeffs = map(data, ~ fit_logistic(df=.,
                                                MSA=MSA, 
                                                output="asym"))) %>% 
  unnest_wider(Coeffs)

asymptotes <- asymptotes %>% 
  select(MSA, Asym, xmid, scal) %>% 
  left_join(., MSA, by="MSA") %>% 
  select(MSA, Asym, xmid, scal, Population, Pop_total) %>% 
  group_by(MSA) %>% 
    filter(row_number()==n()) %>% 
  ungroup() %>% 
  mutate(midDate=xmid + ymd("2020-12-28"))

asymptotes %>% 
  arrange(-Asym) %>% 
  select(MSA, Asym, Pop_total) %>% 
  mutate(Asym=Asym/100) %>% 
  gt() %>% 
  tab_header(title="Texas Regions: Logistic Fit Asymptotes",
             subtitle="Expected final percent vaccinated") %>% 
  fmt_percent(columns=2, decimals=1) %>% 
  fmt_number(columns=3, decimals=0) %>% 
  cols_label(MSA= md("**Metro Stat Area**"),
             Asym= md("**Asymptote**"),
             Pop_total= md("**Eligible (12+) Population**"))
Texas Regions: Logistic Fit Asymptotes
Expected final percent vaccinated
Metro Stat Area Asymptote Eligible (12+) Population
Laredo 90.5% 276,652
Brownsville 75.6% 423,163
McAllen 72.2% 868,707
El Paso 70.3% 844,124
Austin 64.0% 2,227,083
San Antonio 59.8% 2,550,960
Houston-Galv 59.8% 7,066,141
Dallas-Fort Worth 55.9% 7,643,907
Corpus Christi 55.1% 452,534
Waco 49.3% 273,920
College Station-Bryan 48.3% 264,728
Victoria 47.1% 99,742
moderate 46.3% 1,819,676
San Angelo 46.0% 120,736
Lubbock 45.5% 322,257
Tyler 44.8% 232,751
Abilene 43.4% 172,060
small 43.3% 1,140,033
Wichita Falls 43.3% 151,254
Amarillo 43.2% 265,053
Beaumont-Port Arthur 43.1% 406,158
Killeen-Temple 42.0% 460,303
Longview 41.7% 220,104
Sherman 41.5% 136,212
Odessa 41.0% 166,223
Midland 40.4% 182,603
tiny 39.1% 115,552
Texarkana 34.6% 93,245
asymptotes %>% 
  arrange(-Asym) %>% 
  select(MSA, Asym, Pop_total) %>% 
  mutate(Asym=Asym/100) %>%
  mutate(vaxed=Asym*Pop_total) %>% 
  summarize(allvaxed=sum(vaxed), allpop=sum(Pop_total))
## # A tibble: 1 × 2
##    allvaxed   allpop
##       <dbl>    <dbl>
## 1 16429594. 28995881

Rather depressing. Good looking fits, implying that unless something changes, many counties will get no where near herd immunity levels. It looks like a surge of Delta cases waiting to happen.

Let’s look at vaccine vs vote

Votes <- Votes %>% 
  mutate(County=str_to_title(County))

Votes[Votes$County=='Mcculloch',]$County <- "McCulloch"
Votes[Votes$County=='Mclennan',]$County <- "McLennan"
Votes[Votes$County=='Mcmullen',]$County <- "McMullen"

Vax_vote <- left_join(Votes, Vaccine_last, by="County") %>% 
  mutate(Vax_pct=Vax/Pop) 

rects <- data.frame(ymin = -Inf, 
                    ymax = Inf,
                    xmin = c(-Inf,0.45,0.55),  
                    xmax = c(0.45,0.55,Inf),
                    fill = c("red", "magenta", "blue"))
Vax_vote %>% 
  ggplot(aes(y=Vax_pct, x=Blueness)) +
  geom_rect(data = rects, aes(xmin = xmin, 
                              xmax = xmax, 
                              ymin = ymin, 
                              ymax = ymax, 
                              fill = fill), 
            # Control the shading opacity here.
            inherit.aes = FALSE, alpha = 0.15) +
  scale_fill_identity() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  geom_point() +
  geom_smooth(method="lm") +
  geom_text_repel(data=subset(Vax_vote, Vax_pct < 0.30 | 
                          Vax_pct > 0.60 |
                          Blueness > 0.55),
            aes(Blueness,Vax_pct,label=County), hjust=0,
                nudge_x=.008) +
  labs(x="Fraction of Vote For Biden",
       y="Percent Vaccinated",
       title="Vaccination Rate per Texas County",
       subtitle=paste("As of", today_date),
       #subtitle=stamp("March 1, 1999")(today()-1),
       caption="Data from https://dshs.texas.gov/coronavirus/AdditionalData.aspx")

#  geom_text_repel(aes(label=County))
 
#   Vaccination rates vs demographics

Demographics <- readRDS(paste0(DataDemo, "County_Age_Sex.rds"))

Demo_data <- Demographics %>% 
  mutate(Pct_over_65=0.01*(Total_pct_90plus +
                     Total_pct_85to89 +
                     Total_pct_80to84 +
                     Total_pct_75to79 +
                     Total_pct_70to74 +
                     Total_pct_65to69)) %>% 
  select(CNTY_NM, FIPS, Pct_over_65)

Vax_vote <- Vax_vote %>% 
  left_join(., Demo_data, by=c("County"="CNTY_NM")) 
  
Vax_vote %>% 
  ggplot(aes(y=Vax_pct, x=Pct_over_65)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  geom_point() +
  geom_smooth(method="lm") +
  geom_text_repel(data=subset(Vax_vote, Vax_pct < 0.30 | 
                          Vax_pct > 0.60 |
                          Blueness > 0.55),
            aes(Pct_over_65,Vax_pct,label=County), hjust=0,
                nudge_x=.008) +
  labs(x="Percent of Population over 65",
       y="Percent Vaccinated",
       title="Vaccination Rate per Texas County",
       subtitle=paste("As of", today_date),
       #subtitle=stamp("March 1, 1999")(today()-1),
       caption="Data from https://dshs.texas.gov/coronavirus/AdditionalData.aspx")

Best correlation I’ve gotten yet. And the saddest. Politicians encouraging people to make bad decisions and die for political gain.

Let’s look at the history of case counts vs. politics in 2-month windows.

#   Shrink data to what will be needed and sum in 2 month window

df <- 
Covid %>% 
  select(County, Cases, Deaths, Date, Population) %>% 
  mutate(End_date=lubridate::ceiling_date(Date, unit="month")-1) %>% 
  filter(Date==End_date) %>% # pull out last day of every month
  group_by(County) %>% 
    mutate(Monthly_cases=pmax((Cases-lag(Cases, default=0)),0),
           Monthly_deaths=pmax((Deaths-lag(Deaths, default=0)),0)
           ) %>% 
  ungroup() %>% 
  mutate(bucket = case_when(
    month(Date) < 3  ~ "Jan-Feb",
    month(Date) < 5  ~ "Mar-Apr",
    month(Date) < 7  ~ "May-Jun",
    month(Date) < 9  ~ "Jul-Aug",
    month(Date) < 11 ~ "Sep-Oct",
    month(Date) < 13 ~ "Nov-Dec",
    TRUE             ~ "Unknown"
  )) %>% 
  mutate(bucket=ifelse(year(Date)<2021, 
                       paste(bucket, "2020"), 
                       paste(bucket, "2021"))) %>% 
  group_by(County,bucket) %>% 
    summarize(Cases=sum(Monthly_cases),
              Deaths=sum(Monthly_deaths),
              Population=unique(Population)) %>% 
  ungroup()
## `summarise()` has grouped output by 'County'. You can override using the
## `.groups` argument.
df$bucket <- factor(df$bucket, levels=c("Jan-Feb 2020", "Mar-Apr 2020",
                                        "May-Jun 2020", "Jul-Aug 2020",
                                        "Sep-Oct 2020", "Nov-Dec 2020",
                                        "Jan-Feb 2021", "Mar-Apr 2021",
                                        "May-Jun 2021", "Jul-Aug 2021",
                                        "Sep-Oct 2021", "Nov-Dec 2021",
                                        "Unknown 2021"))
#   Add in votes

df <- left_join(df, Votes, by="County")

#  plots

df %>%
  mutate(Cases_per_cap=1.e5*Cases/Population) %>% 
  ggplot(aes(x=Blueness, y=Cases_per_cap)) + 
  geom_point() + 
  geom_smooth(method="lm") +
  scale_y_continuous(trans='log10') + 
  facet_wrap(vars(bucket), scales="free_y") + 
  labs(x="Fraction In County Voting for Biden",
       y="Cases per 100,000",
       title="Covid Cases by Texas County in Two Month Intervals",
       subtitle=paste("As of", today_date))
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 46 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

df %>%
  mutate(Cases_per_cap=1.e5*Deaths/Population) %>% 
  ggplot(aes(x=Blueness, y=Cases_per_cap)) + 
  geom_point() + 
  geom_smooth(method="lm") +
  scale_y_continuous(trans='log10') + 
  facet_wrap(vars(bucket), scales="free_y") + 
  labs(x="Fraction In County Voting for Biden",
       y="Deaths per 100,000",
       title="Covid Deaths by Texas County in Two Month Intervals",
       subtitle=paste("As of", today_date))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 521 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

#    What is case vs death rates for various counties?  
  
df %>%
  mutate(Cases_per_cap=Deaths/Cases) %>% 
  ggplot(aes(x=Blueness, y=Cases_per_cap)) + 
  geom_point() + 
  geom_smooth(method="lm") +
  scale_y_continuous(trans='log10') + 
  facet_wrap(vars(bucket), scales="free_y") + 
  labs(x="Fraction In County Voting for Biden",
       y="Deaths per Case",
       title="Covid Deaths per Case by Texas County in Two Month Intervals",
       subtitle=paste("As of", today_date))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 523 rows containing non-finite values (stat_smooth).
## Warning: Removed 44 rows containing missing values (geom_point).

It does look like supporting Trump has amounted to a death sentence for many.

Let’s bring in the hospitalization data and add that into the mix

#   First Trauma Service Areas since that is how the hospital data is set

inpath <- "/home/ajackson/Dropbox/Rprojects/Curated_Data_Files/Texas_Trauma_Service_Areas/"

TSA <- readRDS(paste0(inpath, "Trauma_Service_Areas.rds"))

#   Read in hospitalization data

inpath <- "/home/ajackson/Dropbox/Rprojects/CovidTempData/DailyBackups/"

Hospital <- readRDS(paste0(inpath,"2022-03-21_Hospital.rds"))

Last_date <- last(Hospital$Date)

#   Attach TSA to Vaccine and Case files, then summarize by TSA

Vax_TSA <- Vaccine %>% 
  select(County, Date, People_fully, Pop_teen, Pop_total) %>% 
  left_join(., TSA, by="County") %>% 
  rename(TSA_Area = Area_name) %>% 
  group_by(TSA_Area, Date) %>% 
    summarise(People_fully=sum(People_fully, na.rm=TRUE),
              Pop_teen=sum(Pop_teen, na.rm = TRUE),
              Pop_total=sum(Pop_total, na.rm=TRUE)) %>% 
  left_join(., Hospital, by=c("TSA_Area", "Date")) %>% 
  select(-TSA_ID) %>% 
  filter(Date<=Last_date)
## `summarise()` has grouped output by 'TSA_Area'. You can override using the
## `.groups` argument.
#   Now add in votes

Votes[Votes$County=='Dewitt',]$County <- "DeWitt"

Votes_TSA <- Votes %>% 
  left_join(., TSA, by="County") %>% 
  rename(TSA_Area = Area_name) %>% 
  group_by(TSA_Area) %>% 
    summarize(Biden=sum(Biden, na.rm=TRUE),
              Trump=sum(Trump, na.rm=TRUE)) %>% 
  mutate(Blueness=Biden/(Trump+Biden))

Vax_TSA <- Vax_TSA %>% 
  left_join(., Votes_TSA, by="TSA_Area")

#   Most recent values

Vax_TSA_now <- Vax_TSA %>% 
  mutate(Hospitalized=as.numeric(Hospitalized)) %>% 
  filter(Date==Last_date) %>% 
  mutate(Vax_pct=People_fully/Pop_total) %>%
  mutate(Hosp_percap=1.e5*Hospitalized/Pop_total)


#   Now plot like crazy

#         Vax rate vs. vote
Vax_TSA_now %>% 
  ggplot(aes(y=Vax_pct, x=Blueness)) +
  geom_rect(data = rects, aes(xmin = xmin, 
                              xmax = xmax, 
                              ymin = ymin, 
                              ymax = ymax, 
                              fill = fill), 
            # Control the shading opacity here.
            inherit.aes = FALSE, alpha = 0.15) +
  scale_fill_identity() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  geom_point() +
  geom_smooth(method="lm") +
  geom_text_repel(data=subset(Vax_TSA_now, Vax_pct < 0.38 | 
                          Vax_pct > 0.65 |
                          Blueness > 0.55),
            aes(Blueness,Vax_pct,label=TSA_Area), hjust=0,
                nudge_x=.005) +
  labs(x="Fraction of Vote For Biden",
       y="Percent Vaccinated",
       title="Vaccination Rate per Texas Trauma Service Area",
       subtitle=paste("As of", today_date))
## `geom_smooth()` using formula 'y ~ x'

      # subtitle=stamp("March 1, 1999")(Last_date))  

#         Hospitalization rate vs. vote

Vax_TSA_now %>% 
  ggplot(aes(y=Hosp_percap, x=Blueness)) +
  geom_rect(data = rects, aes(xmin = xmin, 
                              xmax = xmax, 
                              ymin = ymin, 
                              ymax = ymax, 
                              fill = fill), 
            # Control the shading opacity here.
            inherit.aes = FALSE, alpha = 0.15) +
  scale_fill_identity() +
  #scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  geom_point() +
  geom_smooth(method="lm") +
  geom_text_repel(data=subset(Vax_TSA_now, Hosp_percap > 10),
            aes(Blueness,Hosp_percap,label=TSA_Area), hjust=0,
                nudge_x=.005) +
  labs(x="Fraction of Vote For Biden",
       y="Covid Hospitalizations per 100,000",
       title="Covid Hospitalization Rate per Texas Trauma Service Area",
       subtitle=paste("As of", today_date))
## `geom_smooth()` using formula 'y ~ x'

       #subtitle=stamp("March 1, 1999")(Last_date))  

#         Hospitalization rate vs. Vaccination Rate

Vax_TSA_now %>% 
  ggplot(aes(y=Hosp_percap, x=Vax_pct)) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  geom_point() +
  geom_smooth(method="lm") +
  geom_text_repel(data=subset(Vax_TSA_now, Hosp_percap > 10),
            aes(Vax_pct,Hosp_percap,label=TSA_Area), hjust=0,
                nudge_x=.005) +
  labs(x="Percent Vaccinated",
       y="Covid Hospitalizations per 100,000",
       title="Covid Hospitalization Rate per Texas Trauma Service Area",
       subtitle=paste("As of", today_date))
## `geom_smooth()` using formula 'y ~ x'

       #subtitle=stamp("March 1, 1999")(Last_date))  

#   Plots vs time

Vax_TSA <- Vax_TSA %>% 
  mutate(Hospitalized=as.numeric(Hospitalized)) %>% 
  mutate(Vax_pct=People_fully/Pop_total) %>%
  mutate(Hosp_percap=1.e5*Hospitalized/Pop_total)

# Pull out top 5 and bottom 5

Vax_TSA_extremes <- 
  Vax_TSA %>% 
  group_by(TSA_Area) %>% 
    summarize(Hosp_percap=last(Hosp_percap), TSA_Area=unique(TSA_Area)) %>% 
    arrange(-Hosp_percap) %>% 
  ungroup() 

  
TSA_top <- slice_head(Vax_TSA_extremes, n=4)
TSA_bot <- slice_tail(Vax_TSA_extremes, n=4)
TSA_topbot <- bind_rows(TSA_top, TSA_bot)

TSA_extremes <- inner_join(Vax_TSA, TSA_topbot, by="TSA_Area") %>% 
  rename(Hosp_percap=Hosp_percap.x, ordering=Hosp_percap.y) %>% 
  arrange(-ordering)

# Plot region progress

Vax_TSA %>% 
  ggplot(aes(x=Date, y=Hosp_percap, color=TSA_Area)) + 
  geom_line(aes(group=TSA_Area),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_line(data=TSA_extremes,
            aes(color=fct_reorder(TSA_Area, -ordering))) + 
  labs(x="Date",
       y="Covid Hospitalizations per 100,000",
       title="Covid Hospitalization Rate per Texas Trauma Service Area",
       subtitle=paste("As of", today_date))

       #subtitle=stamp("March 1, 1999")(Last_date))  

#   Vaccination pct vs time by TSA


# Pull out top 5 and bottom 5 by vax

Vax_TSA_extremes <- 
  Vax_TSA %>% 
  group_by(TSA_Area) %>% 
    summarize(Vax_pct=last(Vax_pct), TSA_Area=unique(TSA_Area)) %>% 
    arrange(-Vax_pct) %>% 
  ungroup() 

  
TSA_top <- slice_head(Vax_TSA_extremes, n=4)
TSA_bot <- slice_tail(Vax_TSA_extremes, n=4)
TSA_topbot <- bind_rows(TSA_top, TSA_bot)

TSA_extremes <- inner_join(Vax_TSA, TSA_topbot, by="TSA_Area") %>% 
  rename(Vax_pct=Vax_pct.x, ordering=Vax_pct.y) %>% 
  arrange(-ordering)

# Plot region progress

Vax_TSA %>% 
  ggplot(aes(x=Date, y=Vax_pct, color=TSA_Area)) + 
  geom_line(aes(group=TSA_Area),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_line(data=TSA_extremes,
            aes(color=fct_reorder(TSA_Area, -ordering))) + 
  geom_hline(yintercept=0.50, color="black", size=0.5, 
             show.legend = FALSE) +
  labs(x="Date",
       y="Vaccination percentage",
       title="Vaccination Rate Rate per Texas Trauma Service Area",
       subtitle=paste("As of", today_date))

       #subtitle=stamp("March 1, 1999")(Last_date))  

Clearer picture in these areas. Fewer vaccinations imply more hospitalizations.

Votes for Trump imply more hospitalizations.

Next let’s try out some trajectory plots.

# Pull out top 5 and bottom 5 by hospitalization

Vax_TSA_extremes_hosp <- 
  Vax_TSA %>% 
  group_by(TSA_Area) %>% 
    summarize(Hosp_percap=last(Hosp_percap), TSA_Area=unique(TSA_Area)) %>% 
    arrange(-Hosp_percap) %>% 
  ungroup() 

  
TSA_top_hosp <- slice_head(Vax_TSA_extremes_hosp, n=4)
TSA_bot_hosp <- slice_tail(Vax_TSA_extremes_hosp, n=4)
TSA_topbot_hosp <- bind_rows(TSA_top_hosp, TSA_bot_hosp)

TSA_extremes_hosp <- inner_join(Vax_TSA, TSA_topbot_hosp, by="TSA_Area") %>% 
  rename(Hosp_percap=Hosp_percap.x, ordering=Hosp_percap.y) %>% 
  arrange(-ordering)

Vax_TSA %>% 
  ggplot(aes(x=Vax_pct, 
             y=Hosp_percap,
             color=TSA_Area)) +
  geom_line(aes(group=TSA_Area),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_line(data=TSA_extremes_hosp,
            aes(color=fct_reorder(TSA_Area, -ordering))) + 
  geom_vline(xintercept=0.50, color="black", size=0.5, 
             show.legend = FALSE) +
  labs(x="Percent Vaccinated (12+ Population)",
       y="Covid Hospitalizations per 100,000",
       title="Hospitalization/Vaccination Trajectories per Texas Trauma Service Area",
       subtitle=paste("As of", today_date))

#   Attach TSA to Case file, then summarize by TSA

Case_TSA <- Covid %>% 
  select(County, Date, Deaths, Cases, new_cases, new_deaths, Population) %>% 
  left_join(., TSA, by="County") %>% 
  rename(TSA_Area = Area_name) %>% 
  filter(Date>lubridate::ymd("2021-01-01")) %>% 
  group_by(TSA_Area, Date) %>% 
    summarise(Deaths=sum(Deaths, na.rm=TRUE),
              Cases=sum(Cases, na.rm = TRUE),
              Pop=sum(Population, na.rm = TRUE),
              new_cases=sum(new_cases, na.rm = TRUE),
              new_deaths=sum(new_deaths, na.rm = TRUE)
              ) %>% 
  mutate(cases_percap=Cases/Pop*100000) %>% 
  mutate(deaths_percap=(Deaths-first(Deaths))/Pop*100000) %>% 
  mutate(deaths_percase=Deaths/Cases) %>% 
  left_join(., Vax_TSA, by=c("TSA_Area", "Date")) %>% 
  filter(!is.na(Vax_pct)) %>% 
  mutate(Vax_pct=Vax_pct*100) %>% 
  group_by(TSA_Area) %>% 
     mutate(Vax_pct=zoo::rollmedian(Vax_pct,
                                          5,
                                          fill=c(0, NA, last(Vax_pct)))) %>% 
  ungroup() %>% 
  filter(Date<=Last_date)
## `summarise()` has grouped output by 'TSA_Area'. You can override using the
## `.groups` argument.
# Pull out top 5 and bottom 5 by hospitalization

Vax_TSA_extremes_case <- 
  Case_TSA %>% 
  group_by(TSA_Area) %>% 
    summarize(deaths_percap=last(deaths_percap), TSA_Area=unique(TSA_Area)) %>% 
    arrange(-deaths_percap) %>% 
  ungroup() 

  
TSA_top_case <- slice_head(Vax_TSA_extremes_case, n=4)
TSA_bot_case <- slice_tail(Vax_TSA_extremes_case, n=4)
TSA_topbot_case <- bind_rows(TSA_top_case, TSA_bot_case)

TSA_extremes_case <- inner_join(Case_TSA, TSA_topbot_case, by="TSA_Area") %>% 
  rename(deaths_percap=deaths_percap.x, ordering=deaths_percap.y) %>% 
  arrange(-ordering)

Case_TSA %>% 
  filter(Date>lubridate::ymd("2021-01-01")) %>% 
#  group_by(TSA_Area) %>% 
#     mutate(deaths_percap=deaths_percap-first(deaths_percap)) %>% 
#  ungroup() %>% 
  ggplot(aes(x=Vax_pct, 
             y=deaths_percap,
             color=TSA_Area)) +
  geom_line(aes(group=TSA_Area),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_line(data=TSA_extremes_case,
            aes(color=fct_reorder(TSA_Area, -ordering))) + 
 # geom_vline(xintercept=0.50, color="black", size=0.5, 
 #            show.legend = FALSE) +
  labs(x="Vaccination Percent",
       y="Covid Deaths per 100,000",
       title="Vaccination/Death Trajectories per Texas Trauma Service Area Since January",
       subtitle=paste("As of", today_date))

Vacination rates before and after full approval on August 23.

#  Let's look at 4 week tranches before and after approval

Approval = ymd("2021-08-13")

foo <- 
Vax_TSA %>% 
  mutate(Approval_week=(ceiling(as.numeric(Date-Approval)/7))) %>% 
  group_by(TSA_Area, Approval_week) %>% 
    summarize(All_dose=last(People_fully)-first(People_fully),
              Pop=first(Pop_total),
              n=n()) %>% 
  filter(All_dose>0) %>% 
  mutate(Week_date=Approval+Approval_week*n) %>% 
  mutate(All_dose_percap=All_dose/(Pop/100000)) %>% 
  filter(n==7)  
## `summarise()` has grouped output by 'TSA_Area'. You can override using the
## `.groups` argument.
# Pull out top 5 and bottom 5 by last Vax

Vax_TSA_extremes_case <- 
  foo %>% 
  group_by(TSA_Area) %>% 
    summarize(All_dose_percap=last(All_dose_percap),
              TSA_Area=unique(TSA_Area)) %>% 
    arrange(-All_dose_percap) 

  
TSA_top_case <- slice_head(Vax_TSA_extremes_case, n=4)
TSA_bot_case <- slice_tail(Vax_TSA_extremes_case, n=4)
TSA_topbot_case <- bind_rows(TSA_top_case, TSA_bot_case)

TSA_extremes_case <- inner_join(foo, TSA_topbot_case,
                                by="TSA_Area") %>% 
  rename(All_dose_percap=All_dose_percap.x, 
         ordering=All_dose_percap.y) %>% 
  arrange(-ordering)

 
foo %>% 
  ggplot(aes(y=All_dose_percap, x=Week_date)) +
  #geom_line(aes(color=TSA_Area)) +
  geom_line(aes(group=TSA_Area),colour = alpha("grey", 0.7),
            show.legend = FALSE) + 
  geom_line(data=TSA_extremes_case,
            aes(color=fct_reorder(TSA_Area, -ordering))) + 
  scale_colour_discrete(name="TSA Area") +
  geom_vline(xintercept=Approval, color="black", size=0.5, 
             show.legend = FALSE) +
  geom_text(x=Approval, y=0, label="Full FDA Approval") +
  labs(x="Date",
       y="Number of vaccinations given per 100,000",
       title="Weekly Vaccination in Texas by Trauma Service Area",
       subtitle=paste("As of", today_date))