tiger_us_boundaries__v1.1

CCUH Tiger Boundaries API

Author

Ran Li (Maintainer)

Published

January 28, 2025

This notebook compiles TIGER boundaries and makes them available as an API for reproducible access for CCUH notebooks.

1. Setup

1.1 Dependencies

First lets load dependencies.

library(pacman)
p_load(tigris, sf, arrow, geoarrow, glue, purrr, furrr, tictoc, cli, terra, reactable, dplyr, stringr, leaflet, duckdb, tidyverse, assertr)

1.2 Upstream Block

We only resuse the CCUH state level crosswalk. Here we copy the import code from the Notion page.

xwalk_state = arrow::read_parquet('//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/xwalk_state_v1/xwalk_state_v1.parquet')

1.3 Downstream Block

block_downstream = lst(
  id = 'tiger10_boundaries__v1.1',
  path = '//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1',
  api = file.path(path, 'tiger_us_boundaries_v1.parquet'),
  lake_albers_shp = file.path(path, 'lake_albers_shp')
)

2. Data

The TIGER10 FTP files are saved in this format tl_2010_{statefp/us}_{level}{yy}. Some are available via TIGRIS but all are available on the FTP. The table below shows availability and our stratedgy for each:

TIGRIS Available
- COUNTY 2000
- COUNTY 2010
- COUSUB 2010
- COUSUB 2019
- ZCTA 2000
- ZCTA 2010
- TRACT 2000
- TRACT 2010
Supplement with FTP
- COUSUB 2000
- PLACE 2000
- PLACE 2010
- CBSA 2010

For both strategies the files are saved as .shp files in the downstream data block location in the same format and file structure as the TIGER10 FTP archive.

block_downstream

$id
[1] "tiger10_boundaries__v1.1"

$path
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1"

$api
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/tiger_us_boundaries_v1.parquet"

$lake_albers_shp
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/lake_albers_shp"

2.1 TIGRIS

COUNTY 2000 and 2010

Pretty straight forward just a single USA level file for COUNTY 2000 and 2010.

c("2000",'2010') %>% 
  walk(~{
    
    ## Parameters
    YYYY = .x
    yy = str_sub(YYYY, -1L-1, -1L)
    scope = 'us'
    level = 'county'
    
    ## SHP paths
    base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      'Geographic Boundaries',
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      sf_tigris_results_tmp <- tigris::counties(year = YYYY)
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
    
  })

COUSUB 2010

This one we is not available for the whole US so we have to op. state level files.

c("2010",'2019') %>% walk(~{
  ## Parameters
  YYYY = .x
  yy = str_sub(YYYY, -1L-1, -1L)
  # scope = 'us'
  level = 'cousub'
  
  ## State level Call
  xwalk_state$state_census_geoid %>% 
    walk(~{
      
      scope = .x
      
      ## SHP paths
      base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
      shp_folder = file.path(  
        block_downstream$path,
        "TIGER10",
        'Geographic Boundaries',
        str_to_upper(level),
        YYYY,
        base_name_tmp)
      shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
      
      ## Import if not exists
      if (!file.exists(shp_out_tmp)) {
        if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
        sf_tigris_results_tmp <- tigris::county_subdivisions(state = scope, year = YYYY)
        sf_tigris_results_tmp %>% 
          terra::vect() %>%
          terra::writeVector(shp_out_tmp)
      }
    })
})

ZCTA 2000 and 2010

c("2000",'2010') %>% 
  walk(~{
    
    ## Parameters
    YYYY = .x
    yy = str_sub(YYYY, -1L-1, -1L)
    scope = 'us'
    level = 'zcta5'
    
    
      ## SHP paths
    base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      'Geographic Boundaries',
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::zctas(year = YYYY)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })

TRACT 2000 + 2010

Tract we will want by state for both years due to size issues.

## Parameters
template = tibble(
  YYYY = list(c('2000', '2010')),
  scope = list(xwalk_state$state_census_geoid)
) %>% 
  tidyr::unnest(cols = c(YYYY))%>% 
  tidyr::unnest(cols = c(scope)) %>% 
  mutate(
    level = 'tract',
    yy = str_sub(YYYY, -1L-1, -1L),
    cb = F
  )

## Iterate
template %>% 
  pwalk(function(YYYY, scope, level, yy, cb) {
    
    ## SHP paths
    base_name_tmp = ifelse(
      cb,
      glue("tl_2010_{scope}_{level}{yy}_cb"),
      glue("tl_2010_{scope}_{level}{yy}")
    )
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      ifelse(cb, 'Cartographic Boundaries', 'Geographic Boundaries'),
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::tracts(state = scope, 
                                              year = YYYY,
                                              cb = cb)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })

Place 2020

Place is only available in tigris after 2011.

## Parameters
template = tibble(
  YYYY = list(c('2020')),
  scope = list(xwalk_state$state_census_geoid)
) %>% 
  tidyr::unnest(cols = c(YYYY))%>% 
  tidyr::unnest(cols = c(scope)) %>% 
  mutate(
    level = 'place',
    yy = str_sub(YYYY, -1L-1, -1L),
    cb = list(c(T, F))
  ) %>% 
  unnest(cb)

## Iterate
template %>% 
  pwalk(function(YYYY, scope, level, yy, cb) {
    
    ## SHP paths
    base_name_tmp = ifelse(
      cb,
      glue("tl_2010_{scope}_{level}{yy}_cb"),
      glue("tl_2010_{scope}_{level}{yy}")
    )
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      ifelse(cb, 'Cartographic Boundaries', 'Geographic Boundaries'),
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::places(state = scope, 
                                              year = YYYY,
                                              cb = cb)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })

2.2 TIGER FTP (supplement)

There are four level-years for which we need to use the FTP. We downoaded the zipped files and stored them in the same structure as the FTP in our downstream block location.

PLACE 2000
PLACE 2010
CBSA 2010
COUSUB 2000

We will now unzip them into the same structure as the FTP

```{r}

## Get all zipped files
vec_zip_files = list.files(block_downstream$path, full.names = T, recursive = T) %>% 
  str_subset('zip$')

## Unzip
vec_zip_files %>%
  purrr::walk(~{
    
    # file_tmp = vec_zip_files %>% pluck(7)
    file_tmp = .x
    
    ## Get base name 
    baseName = basename(.x) %>% stringr::str_remove("\\.zip")
    path_shp_folder = .x %>% stringr::str_remove("\\.zip")
    cli_alert("start unzip: {baseName}")

    ## Unzip 
    if (dir.exists(path_shp_folder)) {
      cli_alert_info(glue("{baseName} already exists"))
      return()
    }
    if (!dir.exists(path_shp_folder)) {
      unzip(.x, exdir = path_shp_folder)
      cli_alert_success(glue("Unzipped {baseName}"))
    } 
  })

```

At this point our shape lake should already be finished. Lets do some EDA and QC before we wrap this up.

2.3 QC

Lets first inventory all our files.

vec__all_shp = list.files(block_downstream$path, full.names = T, recursive = T) %>% 
  str_subset('shp$')

df_inventory = tibble(
  path = vec__all_shp
) %>% 
  rowwise() %>% 
  mutate(
    tiger10_file =  basename(path) %>% 
      # str_remove('\\_cb.shp') %>% 
      str_remove('\\.shp'),
    cb = str_detect( basename(path), '\\_cb.shp'),
    year = paste0('20',str_sub(tiger10_file %>% str_remove('\\_cb'),  -1L-1,-1L) ),
    scope = str_extract(tiger10_file, '_\\d{2}_|_us_') %>% str_remove_all('_'),
    geo = str_extract(tiger10_file, 'zcta5|tract|cousub|county|place|cbsa') %>% 
      recode('zcta5' = 'zcta'),
    vintage = paste0(geo, str_sub(year, -1L-1, -1L), ifelse(cb,'_cb',''))
  ) %>% 
  ungroup() %>% 
  filter(!str_detect(tiger10_file, '_albers')) %>% 
  select(everything(), path) %>% 
  assert(is_uniq, tiger10_file) %>% 
  assert(not_na, c('geo','vintage')) 
  
head(df_inventory)

path	tiger10_file	cb	year	scope	geo	vintage
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_01_place20_cb/tl_2010_01_place20_cb.shp	tl_2010_01_place20_cb	TRUE	2020	01	place	place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_02_place20_cb/tl_2010_02_place20_cb.shp	tl_2010_02_place20_cb	TRUE	2020	02	place	place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_04_place20_cb/tl_2010_04_place20_cb.shp	tl_2010_04_place20_cb	TRUE	2020	04	place	place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_05_place20_cb/tl_2010_05_place20_cb.shp	tl_2010_05_place20_cb	TRUE	2020	05	place	place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_06_place20_cb/tl_2010_06_place20_cb.shp	tl_2010_06_place20_cb	TRUE	2020	06	place	place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_08_place20_cb/tl_2010_08_place20_cb.shp	tl_2010_08_place20_cb	TRUE	2020	08	place	place20_cb

Now lets do a few tests. Make sure we have all the files we expect to have.

Test Passed: No redundant files: if US is available the no other state files
Test Passed: tract is only state level (due to size)
Test Passed: All contiguous state is in available in state level instances.

df_invalid = df_inventory %>% 
  group_by(geo, year) %>%
  summarize(
    invalid = case_when(
      'us' %in% scope & n_distinct(scope) > 1 ~ T,
      TRUE ~ F
    )) %>% 
  ungroup() %>% 
  filter(invalid )
if (nrow(df_invalid) > 0) cli_abort("ERROR: Redundant files found")

df_invalid = df_inventory %>% 
  filter(geo == 'tract') %>% 
  filter(scope == 'us')
if (nrow(df_invalid) > 0) cli_abort("ERROR: Tract is only available at state level")

df_invalid = df_inventory %>% 
  filter(scope !='us') %>% 
  group_by(geo, year) %>%
  summarize(
    valid = all(xwalk_state$state_census_geoid %in% scope),
    states_missing = setdiff(xwalk_state$state_census_geoid, scope) %>% str_c(collapse = ',')
    ) %>% 
  ungroup() %>% 
  filter(!valid)
if (nrow(df_invalid) > 0) cli_abort("ERROR: Not all states are available in state level instances")

3. Inventory

3.1 Partioned processing

df_inventory %>% 
  count(year, geo, cb) %>% 
  arrange(n, geo, year)

year	geo	cb	n
2010	cbsa	FALSE	1
2000	county	FALSE	1
2010	county	FALSE	1
2000	zcta	FALSE	1
2010	zcta	FALSE	1
2010	cousub	FALSE	54
2019	cousub	FALSE	54
2020	place	FALSE	54
2020	place	TRUE	54
2000	tract	FALSE	54
2010	tract	FALSE	54
2000	cousub	FALSE	56
2000	place	FALSE	56
2010	place	FALSE	56

We can see that the available boundaries files from the census bureau differ based on year and geographic level. Lets try to combine these into a single dataset for easier access.

xwalk_zcta_county = read.csv('https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt') %>% 
  as_tibble() %>% 
  mutate(geoid = str_pad(ZCTA5, width = 5,  side = 'left', pad ='0'))  %>% 
  mutate(state_census_geoid = str_pad(STATE, width = 2,  side = 'left', pad ='0')) %>% 
  group_by(geoid) %>% 
  filter(ZPOPPCT == max(ZPOPPCT)) %>% 
  ungroup() %>% 
  select(geoid , state_census_geoid) %>% 
  mutate_all(as.character) %>% 
  left_join(xwalk_state %>% select(state_census_geoid, state_abb))


df_inventory %>% 
  filter(year %in% 2010:2020) %>%  
  group_by(row_number()) %>% 
  group_walk(~{
    
    ## Setup
    ## .x = df_inventory %>% filter(year %in% 2010, geo == 'cousub', scope == '05') %>% slice(1)
    out_path = file.path(
      'cache',
      'hdfs',
      paste0(.x$tiger10_file,'.parquet')
    )
    if (file.exists(out_path)) {
      cli_alert(paste(.x$tiger10_file,'- already cached'))
      return()
    } else {
       cli_alert_info(paste('Start processing', .x$tiger10_file))
    }
    
    ## Standardize 
    if (.x$geo == 'cbsa'){
      sfa = .x$path %>% 
        st_read() %>% 
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        rename_with(
          ~case_when(
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        mutate(state_abb = str_sub(NAME10, -1L-1, -1L)) %>% 
        left_join(xwalk_state %>% select(state_census_geoid, state_abb) ) %>% 
        select(geoid, state_abb, state_census_geoid, name = NAMELSAD10, ALAND, AWATER) 
    } else if (.x$geo == 'zcta'){
      sfa = .x$path %>% 
        st_read() %>% 
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        rename_with(
          ~case_when(
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        left_join(xwalk_zcta_county ) %>% 
        mutate(name = '') %>% 
        select(geoid, state_abb, state_census_geoid, name, ALAND, AWATER ) 
    } else {
      sfa = .x$path %>% 
        st_read() %>% 
        {if("STATEFP10" %in% names(.)) select(., -any_of("STATEFP")) else .} %>%
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        {if("NAMELSAD10" %in% names(.)) select(., -any_of("NAMELSAD")) else .} %>%
        rename_with(
          ~case_when(
            . == "STATEFP10" ~ "state_census_geoid",
            . == "STATEFP" ~ "state_census_geoid", 
            . == "NAMELSAD10" ~ "name",
            . == "NAMELSAD" ~ "name",
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        left_join(xwalk_state %>% select(state_census_geoid, state_abb))  %>% 
        select(geoid, state_abb, state_census_geoid, name, ALAND, AWATER ) 
    }
    
    
    ## Check CRS
    crs <- st_crs(sfa)
    valid_crs =  grepl("NAD83", crs$input) || crs$epsg == 4269
    if (!valid_crs) cli_abort("Invalid CRS!!")
    
    ## Export as parquet
    sf_final = sfa %>% 
      mutate(
        name = utf8::utf8_encode(name),
        vintage = .x$vintage, 
        geo = .x$geo,
        cb = .x$cb )%>% 
      st_cast("MULTIPOLYGON") %>%  
      select(geoid, name, geo, state_abb, state_census_geoid, vintage, cb, ALAND, AWATER, geometry ) %>% 
      tibble::as_tibble()
    sf_final |> 
      write_parquet(out_path)
    cli_alert_success(paste(.x$tiger10_file,'- exported as parquet'))
  })

Now we can access this partioned file system these with dplyr syntax via Arrow.

library(arrow)
library(geoarrow)
library(sf)
library(leaflet)

## Connect to API
api <- open_dataset("cache/hdfs/", format = "parquet")

## Explore availability by vintage 
api %>% 
  count(vintage,cb) %>% 
  collect()

vintage	cb	n
cousub10	FALSE	36685
cousub19	FALSE	36665
place10	FALSE	29793
place20	FALSE	31976
place20_cb	TRUE	31976
tract10	FALSE	74091
cbsa10	FALSE	955
county10	FALSE	3221
zcta10	FALSE	33120

## Query for boundaries of interest
sf_tmp = api %>% 
  filter(state_abb == 'CA', vintage  == 'place20_cb', cb == T) %>% 
  st_as_sf() 

## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()

3.2 Compile

Lets compile these partitioned parquet files into a single file for deployment - this gives user’s performance benefits.

if (!file.exists("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet")){
  ds_arrow <- open_dataset('cache/hdfs')
  
  ds_arrow %>%
    write_dataset('cache/tiger_us_boundaries_v1', 
                  basename_template = "tiger_us_boundaries_v1-{i}.parquet",
                  format = "parquet")
}

Test this boundaries API.

## Connect to API
api_geoarrow <- open_dataset("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet")

## Explore availability by vintage 
api_geoarrow %>% 
  count(vintage,geo) %>% 
  collect()

vintage	geo	n
place10	place	29793
cousub10	cousub	36685
place20_cb	place	31976
cousub19	cousub	36665
place20	place	31976
tract10	tract	74091
cbsa10	cbsa	955
county10	county	3221
zcta10	zcta	33120

## Query for boundaries of interest
sf_tmp = api_geoarrow %>% 
  filter(state_abb == 'PA', vintage  == 'county10') %>% 
  st_as_sf() 

## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()

okay looks good.

3.3 Deploy

To deploy we just copy and paste that compiled parquet file into the CCUH server at \\files.drexel.edu\colleges\SOPH\Shared\UHC\Projects\CCUH\ccuh-server\freeze\tiger10_boundaries__v1.1\tiger_us_boundaries_v1.parquet.

if (!file.exists(block_downstream$api)) {
  
  # Copy file to server
  file.copy(
    from = file.path("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet"),
    to = block_downstream$api,
    overwrite = TRUE
  )
  
  # Verify copy succeeded 
  cli_alert_success("File successfully deployed to server")
  
} else {
  cli_alert("TIGER boundaries already deployed!")
}

4. Access

Data

To use this API you will need to define three arguements:

vintage: what vintage you want (e.g. tract10 or place20 or place20_cb)
state_abb: optional for specific states

```{r}

library(arrow)
library(geoarrow)
library(sf)
library(leaflet)

## Connect to API
api <- "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/tiger_us_boundaries_v1.parquet" %>% 
  open_dataset()


## Query for boundaries of interest
sf_tmp = api %>% 
  filter(state_abb == 'PA', vintage  == 'county10') %>% 
  st_as_sf() 

## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()
```

Metadata

tribble(
  ~var_name,           ~var_type,        ~var_def,
  "geoid",             "character",       "Unique geographic identifier",
  "name",              "character",       "Geographic name derived from NAMELSAD10",
  "geo",               "character",       "Geographic level identifier (tract, county, place, cbsa, cousub, zcta)",
  "state_abb",         "character",       "State abbreviation (2-letter)",
  "state_census_geoid", "character",      "State FIPS code derived from STATEFP/STATEFP10 (2-digit)",
  "vintage",           "character",       "Temporal identifier combining geo level and year (e.g., 'tract10' for 2010 Census tracts)", 
  "cb",                "boolean",         "True for Census Bureau simplified cartographic boundaries optimized for small-scale thematic mapping, False for full-detail TIGER/Line geographic boundaries. Cartographic boundaries are generalized representations that exclude water areas and are designed for visualization at smaller scales",
  "ALAND",             "numeric",         "Land area in square meters",
  "AWATER",            "numeric",         "Water area in square meters",
  "geometry",          "MULTIPOLYGON",    "Geographic boundary geometry in NAD83 (EPSG:4269) coordinate system"
) %>% 
  reactable()

5. Downstream

5.1 Albers lake

Here we operationalize an albers shp lake for PRISM processing. Note that in addition to state level boudnaries we have a CONUS boundary which incldues the Continental US states plus the federal district of colombia.

api <- block_downstream$api %>% 
  open_dataset()

template_state = api %>% 
  count(state_abb, vintage) %>% 
  filter(!is.na(state_abb)) %>% 
  collect() %>% 
  mutate(
    year_abbrv = str_extract_all(vintage, "\\d{2}") %>% map_chr(~.x[1]),
    vintage_clean = vintage %>% 
      str_replace_all('_cb', "CB")) %>% 
  select(state_abb_tmp = state_abb, 
         year_abbrv_tmp = year_abbrv,
         vintage_tmp = vintage , 
         vintage_clean_tmp = vintage_clean) 

template_conus = template_state %>% 
  select(-state_abb_tmp) %>% 
  distinct() %>% 
  mutate(state_abb_tmp = 'CONUS')

template = bind_rows(template_conus, template_state)

template %>% 
  head(20) %>% 
  reactable()

Develop our iteration logic

tigris_to_albers_lake = function(
    state_abb_tmp, year_abbrv_tmp, 
    vintage_tmp, vintage_clean_tmp, 
    overwrite = F){
  
 
  
  { # Setup  ----------------------------
    crs_tmp = 'albers'
    state_abb_cleaned_tmp = state_abb_tmp
    if (state_abb_tmp == 'CONUS') {
      state_abb_cleaned_tmp = xwalk_state %>% 
        filter(state_contiguous  == 1) %>% 
        pull(state_abb)
    }
    base_name_tmp = glue("tl_{state_abb_tmp}_{vintage_clean_tmp}")
    out_path = glue("{block_downstream$lake_albers_shp}/{base_name_tmp}_{crs_tmp}")
  
    
     # cli_alert("Start: {vintage_tmp} - {state_abb_tmp} -> {out_path}")
  }
  
  
  { # Project + Export  ---------------
    
    if (!file.exists(out_path) | overwrite) {
      
      ## Import 
      tigris_results_tmp = api %>% 
        filter(vintage == vintage_tmp,
               state_abb %in% state_abb_cleaned_tmp) %>% 
        st_as_sf()
      if (nrow(tigris_results_tmp) < 1) cli_abort("Error in API boudary query!!")
      
      ## Project to albers
      sf_tmp = tigris_results_tmp %>% 
        st_transform("EPSG:5070")
      
      ## Export
      sf_tmp %>% 
        terra::vect() %>%
        terra::writeVector(out_path)
    }
  }
  
}

Lets iterate

```{r}
pmap(
  template %>% 
       filter(state_abb_tmp == 'CONUS',
              vintage_tmp %in% c('tract10', 'county10')),
  tigris_to_albers_lake)
```

you can access this lake with this code

```{r}
list.files("//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/lake_albers_shp", 
  recursive = T,
  full.names = T,
  pattern = '_albers.shp')
```