tiger_us_boundaries__v1.1

CCUH Tiger Boundaries API
Author

Ran Li (Maintainer)

Published

January 28, 2025

This notebook compiles TIGER boundaries and makes them available as an API for reproducible access for CCUH notebooks.

1. Setup

1.1 Dependencies

First lets load dependencies.

library(pacman)
p_load(tigris, sf, arrow, geoarrow, glue, purrr, furrr, tictoc, cli, terra, reactable, dplyr, stringr, leaflet, duckdb, tidyverse, assertr)

1.2 Upstream Block

We only resuse the CCUH state level crosswalk. Here we copy the import code from the Notion page.

xwalk_state = arrow::read_parquet('//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/xwalk_state_v1/xwalk_state_v1.parquet')

1.3 Downstream Block

block_downstream = lst(
  id = 'tiger10_boundaries__v1.1',
  path = '//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1',
  api = file.path(path, 'tiger_us_boundaries_v1.parquet'),
  lake_albers_shp = file.path(path, 'lake_albers_shp')
)

2. Data

The TIGER10 FTP files are saved in this format tl_2010_{statefp/us}_{level}{yy}. Some are available via TIGRIS but all are available on the FTP. The table below shows availability and our stratedgy for each:

  • TIGRIS Available
    • COUNTY 2000
    • COUNTY 2010
    • COUSUB 2010
    • COUSUB 2019
    • ZCTA 2000
    • ZCTA 2010
    • TRACT 2000
    • TRACT 2010
  • Supplement with FTP
    • COUSUB 2000
    • PLACE 2000
    • PLACE 2010
    • CBSA 2010

For both strategies the files are saved as .shp files in the downstream data block location in the same format and file structure as the TIGER10 FTP archive.

block_downstream
$id
[1] "tiger10_boundaries__v1.1"

$path
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1"

$api
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/tiger_us_boundaries_v1.parquet"

$lake_albers_shp
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/lake_albers_shp"

2.1 TIGRIS

COUNTY 2000 and 2010

Pretty straight forward just a single USA level file for COUNTY 2000 and 2010.

c("2000",'2010') %>% 
  walk(~{
    
    ## Parameters
    YYYY = .x
    yy = str_sub(YYYY, -1L-1, -1L)
    scope = 'us'
    level = 'county'
    
    ## SHP paths
    base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      'Geographic Boundaries',
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      sf_tigris_results_tmp <- tigris::counties(year = YYYY)
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
    
  })

COUSUB 2010

This one we is not available for the whole US so we have to op. state level files.

c("2010",'2019') %>% walk(~{
  ## Parameters
  YYYY = .x
  yy = str_sub(YYYY, -1L-1, -1L)
  # scope = 'us'
  level = 'cousub'
  
  ## State level Call
  xwalk_state$state_census_geoid %>% 
    walk(~{
      
      scope = .x
      
      ## SHP paths
      base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
      shp_folder = file.path(  
        block_downstream$path,
        "TIGER10",
        'Geographic Boundaries',
        str_to_upper(level),
        YYYY,
        base_name_tmp)
      shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
      
      ## Import if not exists
      if (!file.exists(shp_out_tmp)) {
        if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
        sf_tigris_results_tmp <- tigris::county_subdivisions(state = scope, year = YYYY)
        sf_tigris_results_tmp %>% 
          terra::vect() %>%
          terra::writeVector(shp_out_tmp)
      }
    })
})

ZCTA 2000 and 2010

c("2000",'2010') %>% 
  walk(~{
    
    ## Parameters
    YYYY = .x
    yy = str_sub(YYYY, -1L-1, -1L)
    scope = 'us'
    level = 'zcta5'
    
    
      ## SHP paths
    base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      'Geographic Boundaries',
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::zctas(year = YYYY)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })

TRACT 2000 + 2010

Tract we will want by state for both years due to size issues.

## Parameters
template = tibble(
  YYYY = list(c('2000', '2010')),
  scope = list(xwalk_state$state_census_geoid)
) %>% 
  tidyr::unnest(cols = c(YYYY))%>% 
  tidyr::unnest(cols = c(scope)) %>% 
  mutate(
    level = 'tract',
    yy = str_sub(YYYY, -1L-1, -1L),
    cb = F
  )

## Iterate
template %>% 
  pwalk(function(YYYY, scope, level, yy, cb) {
    
    ## SHP paths
    base_name_tmp = ifelse(
      cb,
      glue("tl_2010_{scope}_{level}{yy}_cb"),
      glue("tl_2010_{scope}_{level}{yy}")
    )
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      ifelse(cb, 'Cartographic Boundaries', 'Geographic Boundaries'),
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::tracts(state = scope, 
                                              year = YYYY,
                                              cb = cb)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })

Place 2020

Place is only available in tigris after 2011.

## Parameters
template = tibble(
  YYYY = list(c('2020')),
  scope = list(xwalk_state$state_census_geoid)
) %>% 
  tidyr::unnest(cols = c(YYYY))%>% 
  tidyr::unnest(cols = c(scope)) %>% 
  mutate(
    level = 'place',
    yy = str_sub(YYYY, -1L-1, -1L),
    cb = list(c(T, F))
  ) %>% 
  unnest(cb)

## Iterate
template %>% 
  pwalk(function(YYYY, scope, level, yy, cb) {
    
    ## SHP paths
    base_name_tmp = ifelse(
      cb,
      glue("tl_2010_{scope}_{level}{yy}_cb"),
      glue("tl_2010_{scope}_{level}{yy}")
    )
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      ifelse(cb, 'Cartographic Boundaries', 'Geographic Boundaries'),
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::places(state = scope, 
                                              year = YYYY,
                                              cb = cb)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })

2.2 TIGER FTP (supplement)

There are four level-years for which we need to use the FTP. We downoaded the zipped files and stored them in the same structure as the FTP in our downstream block location.

We will now unzip them into the same structure as the FTP

```{r}

## Get all zipped files
vec_zip_files = list.files(block_downstream$path, full.names = T, recursive = T) %>% 
  str_subset('zip$')

## Unzip
vec_zip_files %>%
  purrr::walk(~{
    
    # file_tmp = vec_zip_files %>% pluck(7)
    file_tmp = .x
    
    ## Get base name 
    baseName = basename(.x) %>% stringr::str_remove("\\.zip")
    path_shp_folder = .x %>% stringr::str_remove("\\.zip")
    cli_alert("start unzip: {baseName}")

    ## Unzip 
    if (dir.exists(path_shp_folder)) {
      cli_alert_info(glue("{baseName} already exists"))
      return()
    }
    if (!dir.exists(path_shp_folder)) {
      unzip(.x, exdir = path_shp_folder)
      cli_alert_success(glue("Unzipped {baseName}"))
    } 
  })

```

At this point our shape lake should already be finished. Lets do some EDA and QC before we wrap this up.

2.3 QC

Lets first inventory all our files.

vec__all_shp = list.files(block_downstream$path, full.names = T, recursive = T) %>% 
  str_subset('shp$')

df_inventory = tibble(
  path = vec__all_shp
) %>% 
  rowwise() %>% 
  mutate(
    tiger10_file =  basename(path) %>% 
      # str_remove('\\_cb.shp') %>% 
      str_remove('\\.shp'),
    cb = str_detect( basename(path), '\\_cb.shp'),
    year = paste0('20',str_sub(tiger10_file %>% str_remove('\\_cb'),  -1L-1,-1L) ),
    scope = str_extract(tiger10_file, '_\\d{2}_|_us_') %>% str_remove_all('_'),
    geo = str_extract(tiger10_file, 'zcta5|tract|cousub|county|place|cbsa') %>% 
      recode('zcta5' = 'zcta'),
    vintage = paste0(geo, str_sub(year, -1L-1, -1L), ifelse(cb,'_cb',''))
  ) %>% 
  ungroup() %>% 
  filter(!str_detect(tiger10_file, '_albers')) %>% 
  select(everything(), path) %>% 
  assert(is_uniq, tiger10_file) %>% 
  assert(not_na, c('geo','vintage')) 
  
head(df_inventory)
path tiger10_file cb year scope geo vintage
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_01_place20_cb/tl_2010_01_place20_cb.shp tl_2010_01_place20_cb TRUE 2020 01 place place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_02_place20_cb/tl_2010_02_place20_cb.shp tl_2010_02_place20_cb TRUE 2020 02 place place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_04_place20_cb/tl_2010_04_place20_cb.shp tl_2010_04_place20_cb TRUE 2020 04 place place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_05_place20_cb/tl_2010_05_place20_cb.shp tl_2010_05_place20_cb TRUE 2020 05 place place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_06_place20_cb/tl_2010_06_place20_cb.shp tl_2010_06_place20_cb TRUE 2020 06 place place20_cb
//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_08_place20_cb/tl_2010_08_place20_cb.shp tl_2010_08_place20_cb TRUE 2020 08 place place20_cb

Now lets do a few tests. Make sure we have all the files we expect to have.

  • Test Passed: No redundant files: if US is available the no other state files
  • Test Passed: tract is only state level (due to size)
  • Test Passed: All contiguous state is in available in state level instances.
df_invalid = df_inventory %>% 
  group_by(geo, year) %>%
  summarize(
    invalid = case_when(
      'us' %in% scope & n_distinct(scope) > 1 ~ T,
      TRUE ~ F
    )) %>% 
  ungroup() %>% 
  filter(invalid )
if (nrow(df_invalid) > 0) cli_abort("ERROR: Redundant files found")
df_invalid = df_inventory %>% 
  filter(geo == 'tract') %>% 
  filter(scope == 'us')
if (nrow(df_invalid) > 0) cli_abort("ERROR: Tract is only available at state level")
df_invalid = df_inventory %>% 
  filter(scope !='us') %>% 
  group_by(geo, year) %>%
  summarize(
    valid = all(xwalk_state$state_census_geoid %in% scope),
    states_missing = setdiff(xwalk_state$state_census_geoid, scope) %>% str_c(collapse = ',')
    ) %>% 
  ungroup() %>% 
  filter(!valid)
if (nrow(df_invalid) > 0) cli_abort("ERROR: Not all states are available in state level instances")

3. Inventory

3.1 Partioned processing

df_inventory %>% 
  count(year, geo, cb) %>% 
  arrange(n, geo, year)
year geo cb n
2010 cbsa FALSE 1
2000 county FALSE 1
2010 county FALSE 1
2000 zcta FALSE 1
2010 zcta FALSE 1
2010 cousub FALSE 54
2019 cousub FALSE 54
2020 place FALSE 54
2020 place TRUE 54
2000 tract FALSE 54
2010 tract FALSE 54
2000 cousub FALSE 56
2000 place FALSE 56
2010 place FALSE 56

We can see that the available boundaries files from the census bureau differ based on year and geographic level. Lets try to combine these into a single dataset for easier access.

xwalk_zcta_county = read.csv('https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt') %>% 
  as_tibble() %>% 
  mutate(geoid = str_pad(ZCTA5, width = 5,  side = 'left', pad ='0'))  %>% 
  mutate(state_census_geoid = str_pad(STATE, width = 2,  side = 'left', pad ='0')) %>% 
  group_by(geoid) %>% 
  filter(ZPOPPCT == max(ZPOPPCT)) %>% 
  ungroup() %>% 
  select(geoid , state_census_geoid) %>% 
  mutate_all(as.character) %>% 
  left_join(xwalk_state %>% select(state_census_geoid, state_abb))


df_inventory %>% 
  filter(year %in% 2010:2020) %>%  
  group_by(row_number()) %>% 
  group_walk(~{
    
    ## Setup
    ## .x = df_inventory %>% filter(year %in% 2010, geo == 'cousub', scope == '05') %>% slice(1)
    out_path = file.path(
      'cache',
      'hdfs',
      paste0(.x$tiger10_file,'.parquet')
    )
    if (file.exists(out_path)) {
      cli_alert(paste(.x$tiger10_file,'- already cached'))
      return()
    } else {
       cli_alert_info(paste('Start processing', .x$tiger10_file))
    }
    
    ## Standardize 
    if (.x$geo == 'cbsa'){
      sfa = .x$path %>% 
        st_read() %>% 
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        rename_with(
          ~case_when(
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        mutate(state_abb = str_sub(NAME10, -1L-1, -1L)) %>% 
        left_join(xwalk_state %>% select(state_census_geoid, state_abb) ) %>% 
        select(geoid, state_abb, state_census_geoid, name = NAMELSAD10, ALAND, AWATER) 
    } else if (.x$geo == 'zcta'){
      sfa = .x$path %>% 
        st_read() %>% 
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        rename_with(
          ~case_when(
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        left_join(xwalk_zcta_county ) %>% 
        mutate(name = '') %>% 
        select(geoid, state_abb, state_census_geoid, name, ALAND, AWATER ) 
    } else {
      sfa = .x$path %>% 
        st_read() %>% 
        {if("STATEFP10" %in% names(.)) select(., -any_of("STATEFP")) else .} %>%
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        {if("NAMELSAD10" %in% names(.)) select(., -any_of("NAMELSAD")) else .} %>%
        rename_with(
          ~case_when(
            . == "STATEFP10" ~ "state_census_geoid",
            . == "STATEFP" ~ "state_census_geoid", 
            . == "NAMELSAD10" ~ "name",
            . == "NAMELSAD" ~ "name",
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        left_join(xwalk_state %>% select(state_census_geoid, state_abb))  %>% 
        select(geoid, state_abb, state_census_geoid, name, ALAND, AWATER ) 
    }
    
    
    ## Check CRS
    crs <- st_crs(sfa)
    valid_crs =  grepl("NAD83", crs$input) || crs$epsg == 4269
    if (!valid_crs) cli_abort("Invalid CRS!!")
    
    ## Export as parquet
    sf_final = sfa %>% 
      mutate(
        name = utf8::utf8_encode(name),
        vintage = .x$vintage, 
        geo = .x$geo,
        cb = .x$cb )%>% 
      st_cast("MULTIPOLYGON") %>%  
      select(geoid, name, geo, state_abb, state_census_geoid, vintage, cb, ALAND, AWATER, geometry ) %>% 
      tibble::as_tibble()
    sf_final |> 
      write_parquet(out_path)
    cli_alert_success(paste(.x$tiger10_file,'- exported as parquet'))
  })

Now we can access this partioned file system these with dplyr syntax via Arrow.

library(arrow)
library(geoarrow)
library(sf)
library(leaflet)

## Connect to API
api <- open_dataset("cache/hdfs/", format = "parquet")

## Explore availability by vintage 
api %>% 
  count(vintage,cb) %>% 
  collect()
vintage cb n
cousub10 FALSE 36685
cousub19 FALSE 36665
place10 FALSE 29793
place20 FALSE 31976
place20_cb TRUE 31976
tract10 FALSE 74091
cbsa10 FALSE 955
county10 FALSE 3221
zcta10 FALSE 33120
## Query for boundaries of interest
sf_tmp = api %>% 
  filter(state_abb == 'CA', vintage  == 'place20_cb', cb == T) %>% 
  st_as_sf() 

## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()

3.2 Compile

Lets compile these partitioned parquet files into a single file for deployment - this gives user’s performance benefits.

if (!file.exists("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet")){
  ds_arrow <- open_dataset('cache/hdfs')
  
  ds_arrow %>%
    write_dataset('cache/tiger_us_boundaries_v1', 
                  basename_template = "tiger_us_boundaries_v1-{i}.parquet",
                  format = "parquet")
}

Test this boundaries API.

## Connect to API
api_geoarrow <- open_dataset("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet")

## Explore availability by vintage 
api_geoarrow %>% 
  count(vintage,geo) %>% 
  collect()
vintage geo n
place10 place 29793
cousub10 cousub 36685
place20_cb place 31976
cousub19 cousub 36665
place20 place 31976
tract10 tract 74091
cbsa10 cbsa 955
county10 county 3221
zcta10 zcta 33120
## Query for boundaries of interest
sf_tmp = api_geoarrow %>% 
  filter(state_abb == 'PA', vintage  == 'county10') %>% 
  st_as_sf() 

## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()

okay looks good.

3.3 Deploy

To deploy we just copy and paste that compiled parquet file into the CCUH server at \\files.drexel.edu\colleges\SOPH\Shared\UHC\Projects\CCUH\ccuh-server\freeze\tiger10_boundaries__v1.1\tiger_us_boundaries_v1.parquet.

if (!file.exists(block_downstream$api)) {
  
  # Copy file to server
  file.copy(
    from = file.path("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet"),
    to = block_downstream$api,
    overwrite = TRUE
  )
  
  # Verify copy succeeded 
  cli_alert_success("File successfully deployed to server")
  
} else {
  cli_alert("TIGER boundaries already deployed!")
}

4. Access

Data

To use this API you will need to define three arguements:

  • vintage: what vintage you want (e.g. tract10 or place20 or place20_cb)
  • state_abb: optional for specific states
```{r}

library(arrow)
library(geoarrow)
library(sf)
library(leaflet)

## Connect to API
api <- "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/tiger_us_boundaries_v1.parquet" %>% 
  open_dataset()


## Query for boundaries of interest
sf_tmp = api %>% 
  filter(state_abb == 'PA', vintage  == 'county10') %>% 
  st_as_sf() 

## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()
```

Metadata

tribble(
  ~var_name,           ~var_type,        ~var_def,
  "geoid",             "character",       "Unique geographic identifier",
  "name",              "character",       "Geographic name derived from NAMELSAD10",
  "geo",               "character",       "Geographic level identifier (tract, county, place, cbsa, cousub, zcta)",
  "state_abb",         "character",       "State abbreviation (2-letter)",
  "state_census_geoid", "character",      "State FIPS code derived from STATEFP/STATEFP10 (2-digit)",
  "vintage",           "character",       "Temporal identifier combining geo level and year (e.g., 'tract10' for 2010 Census tracts)", 
  "cb",                "boolean",         "True for Census Bureau simplified cartographic boundaries optimized for small-scale thematic mapping, False for full-detail TIGER/Line geographic boundaries. Cartographic boundaries are generalized representations that exclude water areas and are designed for visualization at smaller scales",
  "ALAND",             "numeric",         "Land area in square meters",
  "AWATER",            "numeric",         "Water area in square meters",
  "geometry",          "MULTIPOLYGON",    "Geographic boundary geometry in NAD83 (EPSG:4269) coordinate system"
) %>% 
  reactable()

5. Downstream

5.1 Albers lake

Here we operationalize an albers shp lake for PRISM processing. Note that in addition to state level boudnaries we have a CONUS boundary which incldues the Continental US states plus the federal district of colombia.

api <- block_downstream$api %>% 
  open_dataset()

template_state = api %>% 
  count(state_abb, vintage) %>% 
  filter(!is.na(state_abb)) %>% 
  collect() %>% 
  mutate(
    year_abbrv = str_extract_all(vintage, "\\d{2}") %>% map_chr(~.x[1]),
    vintage_clean = vintage %>% 
      str_replace_all('_cb', "CB")) %>% 
  select(state_abb_tmp = state_abb, 
         year_abbrv_tmp = year_abbrv,
         vintage_tmp = vintage , 
         vintage_clean_tmp = vintage_clean) 

template_conus = template_state %>% 
  select(-state_abb_tmp) %>% 
  distinct() %>% 
  mutate(state_abb_tmp = 'CONUS')

template = bind_rows(template_conus, template_state)

template %>% 
  head(20) %>% 
  reactable()

Develop our iteration logic

tigris_to_albers_lake = function(
    state_abb_tmp, year_abbrv_tmp, 
    vintage_tmp, vintage_clean_tmp, 
    overwrite = F){
  
 
  
  { # Setup  ----------------------------
    crs_tmp = 'albers'
    state_abb_cleaned_tmp = state_abb_tmp
    if (state_abb_tmp == 'CONUS') {
      state_abb_cleaned_tmp = xwalk_state %>% 
        filter(state_contiguous  == 1) %>% 
        pull(state_abb)
    }
    base_name_tmp = glue("tl_{state_abb_tmp}_{vintage_clean_tmp}")
    out_path = glue("{block_downstream$lake_albers_shp}/{base_name_tmp}_{crs_tmp}")
  
    
     # cli_alert("Start: {vintage_tmp} - {state_abb_tmp} -> {out_path}")
  }
  
  
  { # Project + Export  ---------------
    
    if (!file.exists(out_path) | overwrite) {
      
      ## Import 
      tigris_results_tmp = api %>% 
        filter(vintage == vintage_tmp,
               state_abb %in% state_abb_cleaned_tmp) %>% 
        st_as_sf()
      if (nrow(tigris_results_tmp) < 1) cli_abort("Error in API boudary query!!")
      
      ## Project to albers
      sf_tmp = tigris_results_tmp %>% 
        st_transform("EPSG:5070")
      
      ## Export
      sf_tmp %>% 
        terra::vect() %>%
        terra::writeVector(out_path)
    }
  }
  
}

Lets iterate

```{r}
pmap(
  template %>% 
       filter(state_abb_tmp == 'CONUS',
              vintage_tmp %in% c('tract10', 'county10')),
  tigris_to_albers_lake)
```

you can access this lake with this code

```{r}
list.files("//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/lake_albers_shp", 
  recursive = T,
  full.names = T,
  pattern = '_albers.shp')
```