library(pacman)
p_load(tigris, sf, arrow, geoarrow, glue, purrr, furrr, tictoc, cli, terra, reactable, dplyr, stringr, leaflet, duckdb, tidyverse, assertr)tiger_us_boundaries__v1.1
This notebook compiles TIGER boundaries and makes them available as an API for reproducible access for CCUH notebooks.
1. Setup
1.1 Dependencies
First lets load dependencies.
1.2 Upstream Block
We only resuse the CCUH state level crosswalk. Here we copy the import code from the Notion page.
xwalk_state = arrow::read_parquet('//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/xwalk_state_v1/xwalk_state_v1.parquet')1.3 Downstream Block
block_downstream = lst(
  id = 'tiger10_boundaries__v1.1',
  path = '//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1',
  api = file.path(path, 'tiger_us_boundaries_v1.parquet'),
  lake_albers_shp = file.path(path, 'lake_albers_shp')
)2. Data
The TIGER10 FTP files are saved in this format tl_2010_{statefp/us}_{level}{yy}. Some are available via TIGRIS but all are available on the FTP. The table below shows availability and our stratedgy for each:
- TIGRIS Available
- COUNTY 2000
- COUNTY 2010
- COUSUB 2010
- COUSUB 2019
- ZCTA 2000
- ZCTA 2010
- TRACT 2000
- TRACT 2010
 
- Supplement with FTP
- COUSUB 2000
- PLACE 2000
- PLACE 2010
- CBSA 2010
 
For both strategies the files are saved as .shp files in the downstream data block location in the same format and file structure as the TIGER10 FTP archive.
block_downstream$id
[1] "tiger10_boundaries__v1.1"
$path
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1"
$api
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/tiger_us_boundaries_v1.parquet"
$lake_albers_shp
[1] "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/lake_albers_shp"2.1 TIGRIS
COUNTY 2000 and 2010
Pretty straight forward just a single USA level file for COUNTY 2000 and 2010.
c("2000",'2010') %>% 
  walk(~{
    
    ## Parameters
    YYYY = .x
    yy = str_sub(YYYY, -1L-1, -1L)
    scope = 'us'
    level = 'county'
    
    ## SHP paths
    base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      'Geographic Boundaries',
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      sf_tigris_results_tmp <- tigris::counties(year = YYYY)
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
    
  })COUSUB 2010
This one we is not available for the whole US so we have to op. state level files.
c("2010",'2019') %>% walk(~{
  ## Parameters
  YYYY = .x
  yy = str_sub(YYYY, -1L-1, -1L)
  # scope = 'us'
  level = 'cousub'
  
  ## State level Call
  xwalk_state$state_census_geoid %>% 
    walk(~{
      
      scope = .x
      
      ## SHP paths
      base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
      shp_folder = file.path(  
        block_downstream$path,
        "TIGER10",
        'Geographic Boundaries',
        str_to_upper(level),
        YYYY,
        base_name_tmp)
      shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
      
      ## Import if not exists
      if (!file.exists(shp_out_tmp)) {
        if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
        sf_tigris_results_tmp <- tigris::county_subdivisions(state = scope, year = YYYY)
        sf_tigris_results_tmp %>% 
          terra::vect() %>%
          terra::writeVector(shp_out_tmp)
      }
    })
})ZCTA 2000 and 2010
c("2000",'2010') %>% 
  walk(~{
    
    ## Parameters
    YYYY = .x
    yy = str_sub(YYYY, -1L-1, -1L)
    scope = 'us'
    level = 'zcta5'
    
    
      ## SHP paths
    base_name_tmp = glue("tl_2010_{scope}_{level}{yy}")
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      'Geographic Boundaries',
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::zctas(year = YYYY)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })TRACT 2000 + 2010
Tract we will want by state for both years due to size issues.
## Parameters
template = tibble(
  YYYY = list(c('2000', '2010')),
  scope = list(xwalk_state$state_census_geoid)
) %>% 
  tidyr::unnest(cols = c(YYYY))%>% 
  tidyr::unnest(cols = c(scope)) %>% 
  mutate(
    level = 'tract',
    yy = str_sub(YYYY, -1L-1, -1L),
    cb = F
  )
## Iterate
template %>% 
  pwalk(function(YYYY, scope, level, yy, cb) {
    
    ## SHP paths
    base_name_tmp = ifelse(
      cb,
      glue("tl_2010_{scope}_{level}{yy}_cb"),
      glue("tl_2010_{scope}_{level}{yy}")
    )
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      ifelse(cb, 'Cartographic Boundaries', 'Geographic Boundaries'),
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::tracts(state = scope, 
                                              year = YYYY,
                                              cb = cb)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })Place 2020
Place is only available in tigris after 2011.
## Parameters
template = tibble(
  YYYY = list(c('2020')),
  scope = list(xwalk_state$state_census_geoid)
) %>% 
  tidyr::unnest(cols = c(YYYY))%>% 
  tidyr::unnest(cols = c(scope)) %>% 
  mutate(
    level = 'place',
    yy = str_sub(YYYY, -1L-1, -1L),
    cb = list(c(T, F))
  ) %>% 
  unnest(cb)
## Iterate
template %>% 
  pwalk(function(YYYY, scope, level, yy, cb) {
    
    ## SHP paths
    base_name_tmp = ifelse(
      cb,
      glue("tl_2010_{scope}_{level}{yy}_cb"),
      glue("tl_2010_{scope}_{level}{yy}")
    )
    shp_folder = file.path(  
      block_downstream$path,
      "TIGER10",
      ifelse(cb, 'Cartographic Boundaries', 'Geographic Boundaries'),
      str_to_upper(level),
      YYYY,
      base_name_tmp)
    shp_out_tmp = file.path( shp_folder,glue("{base_name_tmp}.shp"))
    
    ## Import if not exists
    if (!file.exists(shp_out_tmp)) {
      if (!dir.exists(shp_folder)) dir.create(shp_folder, recursive = T)
      
      sf_tigris_results_tmp <- tigris::places(state = scope, 
                                              year = YYYY,
                                              cb = cb)
      
      sf_tigris_results_tmp %>% 
        terra::vect() %>%
        terra::writeVector(shp_out_tmp)
    }
  })2.2 TIGER FTP (supplement)
There are four level-years for which we need to use the FTP. We downoaded the zipped files and stored them in the same structure as the FTP in our downstream block location.
We will now unzip them into the same structure as the FTP
```{r}
## Get all zipped files
vec_zip_files = list.files(block_downstream$path, full.names = T, recursive = T) %>% 
  str_subset('zip$')
## Unzip
vec_zip_files %>%
  purrr::walk(~{
    
    # file_tmp = vec_zip_files %>% pluck(7)
    file_tmp = .x
    
    ## Get base name 
    baseName = basename(.x) %>% stringr::str_remove("\\.zip")
    path_shp_folder = .x %>% stringr::str_remove("\\.zip")
    cli_alert("start unzip: {baseName}")
    ## Unzip 
    if (dir.exists(path_shp_folder)) {
      cli_alert_info(glue("{baseName} already exists"))
      return()
    }
    if (!dir.exists(path_shp_folder)) {
      unzip(.x, exdir = path_shp_folder)
      cli_alert_success(glue("Unzipped {baseName}"))
    } 
  })
```At this point our shape lake should already be finished. Lets do some EDA and QC before we wrap this up.
2.3 QC
Lets first inventory all our files.
vec__all_shp = list.files(block_downstream$path, full.names = T, recursive = T) %>% 
  str_subset('shp$')
df_inventory = tibble(
  path = vec__all_shp
) %>% 
  rowwise() %>% 
  mutate(
    tiger10_file =  basename(path) %>% 
      # str_remove('\\_cb.shp') %>% 
      str_remove('\\.shp'),
    cb = str_detect( basename(path), '\\_cb.shp'),
    year = paste0('20',str_sub(tiger10_file %>% str_remove('\\_cb'),  -1L-1,-1L) ),
    scope = str_extract(tiger10_file, '_\\d{2}_|_us_') %>% str_remove_all('_'),
    geo = str_extract(tiger10_file, 'zcta5|tract|cousub|county|place|cbsa') %>% 
      recode('zcta5' = 'zcta'),
    vintage = paste0(geo, str_sub(year, -1L-1, -1L), ifelse(cb,'_cb',''))
  ) %>% 
  ungroup() %>% 
  filter(!str_detect(tiger10_file, '_albers')) %>% 
  select(everything(), path) %>% 
  assert(is_uniq, tiger10_file) %>% 
  assert(not_na, c('geo','vintage')) 
  
head(df_inventory)| path | tiger10_file | cb | year | scope | geo | vintage | 
|---|---|---|---|---|---|---|
| //files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_01_place20_cb/tl_2010_01_place20_cb.shp | tl_2010_01_place20_cb | TRUE | 2020 | 01 | place | place20_cb | 
| //files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_02_place20_cb/tl_2010_02_place20_cb.shp | tl_2010_02_place20_cb | TRUE | 2020 | 02 | place | place20_cb | 
| //files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_04_place20_cb/tl_2010_04_place20_cb.shp | tl_2010_04_place20_cb | TRUE | 2020 | 04 | place | place20_cb | 
| //files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_05_place20_cb/tl_2010_05_place20_cb.shp | tl_2010_05_place20_cb | TRUE | 2020 | 05 | place | place20_cb | 
| //files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_06_place20_cb/tl_2010_06_place20_cb.shp | tl_2010_06_place20_cb | TRUE | 2020 | 06 | place | place20_cb | 
| //files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/TIGER10/Cartographic Boundaries/PLACE/2020/tl_2010_08_place20_cb/tl_2010_08_place20_cb.shp | tl_2010_08_place20_cb | TRUE | 2020 | 08 | place | place20_cb | 
Now lets do a few tests. Make sure we have all the files we expect to have.
- Test Passed: No redundant files: if US is available the no other state files
- Test Passed: tract is only state level (due to size)
- Test Passed: All contiguous state is in available in state level instances.
df_invalid = df_inventory %>% 
  group_by(geo, year) %>%
  summarize(
    invalid = case_when(
      'us' %in% scope & n_distinct(scope) > 1 ~ T,
      TRUE ~ F
    )) %>% 
  ungroup() %>% 
  filter(invalid )
if (nrow(df_invalid) > 0) cli_abort("ERROR: Redundant files found")df_invalid = df_inventory %>% 
  filter(geo == 'tract') %>% 
  filter(scope == 'us')
if (nrow(df_invalid) > 0) cli_abort("ERROR: Tract is only available at state level")df_invalid = df_inventory %>% 
  filter(scope !='us') %>% 
  group_by(geo, year) %>%
  summarize(
    valid = all(xwalk_state$state_census_geoid %in% scope),
    states_missing = setdiff(xwalk_state$state_census_geoid, scope) %>% str_c(collapse = ',')
    ) %>% 
  ungroup() %>% 
  filter(!valid)
if (nrow(df_invalid) > 0) cli_abort("ERROR: Not all states are available in state level instances")3. Inventory
3.1 Partioned processing
df_inventory %>% 
  count(year, geo, cb) %>% 
  arrange(n, geo, year)| year | geo | cb | n | 
|---|---|---|---|
| 2010 | cbsa | FALSE | 1 | 
| 2000 | county | FALSE | 1 | 
| 2010 | county | FALSE | 1 | 
| 2000 | zcta | FALSE | 1 | 
| 2010 | zcta | FALSE | 1 | 
| 2010 | cousub | FALSE | 54 | 
| 2019 | cousub | FALSE | 54 | 
| 2020 | place | FALSE | 54 | 
| 2020 | place | TRUE | 54 | 
| 2000 | tract | FALSE | 54 | 
| 2010 | tract | FALSE | 54 | 
| 2000 | cousub | FALSE | 56 | 
| 2000 | place | FALSE | 56 | 
| 2010 | place | FALSE | 56 | 
We can see that the available boundaries files from the census bureau differ based on year and geographic level. Lets try to combine these into a single dataset for easier access.
xwalk_zcta_county = read.csv('https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt') %>% 
  as_tibble() %>% 
  mutate(geoid = str_pad(ZCTA5, width = 5,  side = 'left', pad ='0'))  %>% 
  mutate(state_census_geoid = str_pad(STATE, width = 2,  side = 'left', pad ='0')) %>% 
  group_by(geoid) %>% 
  filter(ZPOPPCT == max(ZPOPPCT)) %>% 
  ungroup() %>% 
  select(geoid , state_census_geoid) %>% 
  mutate_all(as.character) %>% 
  left_join(xwalk_state %>% select(state_census_geoid, state_abb))
df_inventory %>% 
  filter(year %in% 2010:2020) %>%  
  group_by(row_number()) %>% 
  group_walk(~{
    
    ## Setup
    ## .x = df_inventory %>% filter(year %in% 2010, geo == 'cousub', scope == '05') %>% slice(1)
    out_path = file.path(
      'cache',
      'hdfs',
      paste0(.x$tiger10_file,'.parquet')
    )
    if (file.exists(out_path)) {
      cli_alert(paste(.x$tiger10_file,'- already cached'))
      return()
    } else {
       cli_alert_info(paste('Start processing', .x$tiger10_file))
    }
    
    ## Standardize 
    if (.x$geo == 'cbsa'){
      sfa = .x$path %>% 
        st_read() %>% 
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        rename_with(
          ~case_when(
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        mutate(state_abb = str_sub(NAME10, -1L-1, -1L)) %>% 
        left_join(xwalk_state %>% select(state_census_geoid, state_abb) ) %>% 
        select(geoid, state_abb, state_census_geoid, name = NAMELSAD10, ALAND, AWATER) 
    } else if (.x$geo == 'zcta'){
      sfa = .x$path %>% 
        st_read() %>% 
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        rename_with(
          ~case_when(
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        left_join(xwalk_zcta_county ) %>% 
        mutate(name = '') %>% 
        select(geoid, state_abb, state_census_geoid, name, ALAND, AWATER ) 
    } else {
      sfa = .x$path %>% 
        st_read() %>% 
        {if("STATEFP10" %in% names(.)) select(., -any_of("STATEFP")) else .} %>%
        {if("GEOID10" %in% names(.)) select(., -any_of("GEOID")) else .} %>%
        {if("NAMELSAD10" %in% names(.)) select(., -any_of("NAMELSAD")) else .} %>%
        rename_with(
          ~case_when(
            . == "STATEFP10" ~ "state_census_geoid",
            . == "STATEFP" ~ "state_census_geoid", 
            . == "NAMELSAD10" ~ "name",
            . == "NAMELSAD" ~ "name",
            . == "GEOID10" ~ "geoid",
            . == "GEOID" ~ "geoid",
            . == "ALAND10" ~ "ALAND",
            . == "AWATER10" ~ "AWATER",
            TRUE ~ .
          )
        ) %>% 
        left_join(xwalk_state %>% select(state_census_geoid, state_abb))  %>% 
        select(geoid, state_abb, state_census_geoid, name, ALAND, AWATER ) 
    }
    
    
    ## Check CRS
    crs <- st_crs(sfa)
    valid_crs =  grepl("NAD83", crs$input) || crs$epsg == 4269
    if (!valid_crs) cli_abort("Invalid CRS!!")
    
    ## Export as parquet
    sf_final = sfa %>% 
      mutate(
        name = utf8::utf8_encode(name),
        vintage = .x$vintage, 
        geo = .x$geo,
        cb = .x$cb )%>% 
      st_cast("MULTIPOLYGON") %>%  
      select(geoid, name, geo, state_abb, state_census_geoid, vintage, cb, ALAND, AWATER, geometry ) %>% 
      tibble::as_tibble()
    sf_final |> 
      write_parquet(out_path)
    cli_alert_success(paste(.x$tiger10_file,'- exported as parquet'))
  })Now we can access this partioned file system these with dplyr syntax via Arrow.
library(arrow)
library(geoarrow)
library(sf)
library(leaflet)
## Connect to API
api <- open_dataset("cache/hdfs/", format = "parquet")
## Explore availability by vintage 
api %>% 
  count(vintage,cb) %>% 
  collect()| vintage | cb | n | 
|---|---|---|
| cousub10 | FALSE | 36685 | 
| cousub19 | FALSE | 36665 | 
| place10 | FALSE | 29793 | 
| place20 | FALSE | 31976 | 
| place20_cb | TRUE | 31976 | 
| tract10 | FALSE | 74091 | 
| cbsa10 | FALSE | 955 | 
| county10 | FALSE | 3221 | 
| zcta10 | FALSE | 33120 | 
## Query for boundaries of interest
sf_tmp = api %>% 
  filter(state_abb == 'CA', vintage  == 'place20_cb', cb == T) %>% 
  st_as_sf() 
## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()3.2 Compile
Lets compile these partitioned parquet files into a single file for deployment - this gives user’s performance benefits.
if (!file.exists("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet")){
  ds_arrow <- open_dataset('cache/hdfs')
  
  ds_arrow %>%
    write_dataset('cache/tiger_us_boundaries_v1', 
                  basename_template = "tiger_us_boundaries_v1-{i}.parquet",
                  format = "parquet")
}Test this boundaries API.
## Connect to API
api_geoarrow <- open_dataset("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet")
## Explore availability by vintage 
api_geoarrow %>% 
  count(vintage,geo) %>% 
  collect()| vintage | geo | n | 
|---|---|---|
| place10 | place | 29793 | 
| cousub10 | cousub | 36685 | 
| place20_cb | place | 31976 | 
| cousub19 | cousub | 36665 | 
| place20 | place | 31976 | 
| tract10 | tract | 74091 | 
| cbsa10 | cbsa | 955 | 
| county10 | county | 3221 | 
| zcta10 | zcta | 33120 | 
## Query for boundaries of interest
sf_tmp = api_geoarrow %>% 
  filter(state_abb == 'PA', vintage  == 'county10') %>% 
  st_as_sf() 
## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()okay looks good.
3.3 Deploy
To deploy we just copy and paste that compiled parquet file into the CCUH server at \\files.drexel.edu\colleges\SOPH\Shared\UHC\Projects\CCUH\ccuh-server\freeze\tiger10_boundaries__v1.1\tiger_us_boundaries_v1.parquet.
if (!file.exists(block_downstream$api)) {
  
  # Copy file to server
  file.copy(
    from = file.path("cache/tiger_us_boundaries_v1/tiger_us_boundaries_v1-0.parquet"),
    to = block_downstream$api,
    overwrite = TRUE
  )
  
  # Verify copy succeeded 
  cli_alert_success("File successfully deployed to server")
  
} else {
  cli_alert("TIGER boundaries already deployed!")
}4. Access
Data
To use this API you will need to define three arguements:
- vintage: what vintage you want (e.g. tract10 or place20 or place20_cb)
- state_abb: optional for specific states
```{r}
library(arrow)
library(geoarrow)
library(sf)
library(leaflet)
## Connect to API
api <- "//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/tiger_us_boundaries_v1.parquet" %>% 
  open_dataset()
## Query for boundaries of interest
sf_tmp = api %>% 
  filter(state_abb == 'PA', vintage  == 'county10') %>% 
  st_as_sf() 
## Map
sf_tmp %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons()
```Metadata
tribble(
  ~var_name,           ~var_type,        ~var_def,
  "geoid",             "character",       "Unique geographic identifier",
  "name",              "character",       "Geographic name derived from NAMELSAD10",
  "geo",               "character",       "Geographic level identifier (tract, county, place, cbsa, cousub, zcta)",
  "state_abb",         "character",       "State abbreviation (2-letter)",
  "state_census_geoid", "character",      "State FIPS code derived from STATEFP/STATEFP10 (2-digit)",
  "vintage",           "character",       "Temporal identifier combining geo level and year (e.g., 'tract10' for 2010 Census tracts)", 
  "cb",                "boolean",         "True for Census Bureau simplified cartographic boundaries optimized for small-scale thematic mapping, False for full-detail TIGER/Line geographic boundaries. Cartographic boundaries are generalized representations that exclude water areas and are designed for visualization at smaller scales",
  "ALAND",             "numeric",         "Land area in square meters",
  "AWATER",            "numeric",         "Water area in square meters",
  "geometry",          "MULTIPOLYGON",    "Geographic boundary geometry in NAD83 (EPSG:4269) coordinate system"
) %>% 
  reactable()5. Downstream
5.1 Albers lake
Here we operationalize an albers shp lake for PRISM processing. Note that in addition to state level boudnaries we have a CONUS boundary which incldues the Continental US states plus the federal district of colombia.
api <- block_downstream$api %>% 
  open_dataset()
template_state = api %>% 
  count(state_abb, vintage) %>% 
  filter(!is.na(state_abb)) %>% 
  collect() %>% 
  mutate(
    year_abbrv = str_extract_all(vintage, "\\d{2}") %>% map_chr(~.x[1]),
    vintage_clean = vintage %>% 
      str_replace_all('_cb', "CB")) %>% 
  select(state_abb_tmp = state_abb, 
         year_abbrv_tmp = year_abbrv,
         vintage_tmp = vintage , 
         vintage_clean_tmp = vintage_clean) 
template_conus = template_state %>% 
  select(-state_abb_tmp) %>% 
  distinct() %>% 
  mutate(state_abb_tmp = 'CONUS')
template = bind_rows(template_conus, template_state)
template %>% 
  head(20) %>% 
  reactable()Develop our iteration logic
tigris_to_albers_lake = function(
    state_abb_tmp, year_abbrv_tmp, 
    vintage_tmp, vintage_clean_tmp, 
    overwrite = F){
  
 
  
  { # Setup  ----------------------------
    crs_tmp = 'albers'
    state_abb_cleaned_tmp = state_abb_tmp
    if (state_abb_tmp == 'CONUS') {
      state_abb_cleaned_tmp = xwalk_state %>% 
        filter(state_contiguous  == 1) %>% 
        pull(state_abb)
    }
    base_name_tmp = glue("tl_{state_abb_tmp}_{vintage_clean_tmp}")
    out_path = glue("{block_downstream$lake_albers_shp}/{base_name_tmp}_{crs_tmp}")
  
    
     # cli_alert("Start: {vintage_tmp} - {state_abb_tmp} -> {out_path}")
  }
  
  
  { # Project + Export  ---------------
    
    if (!file.exists(out_path) | overwrite) {
      
      ## Import 
      tigris_results_tmp = api %>% 
        filter(vintage == vintage_tmp,
               state_abb %in% state_abb_cleaned_tmp) %>% 
        st_as_sf()
      if (nrow(tigris_results_tmp) < 1) cli_abort("Error in API boudary query!!")
      
      ## Project to albers
      sf_tmp = tigris_results_tmp %>% 
        st_transform("EPSG:5070")
      
      ## Export
      sf_tmp %>% 
        terra::vect() %>%
        terra::writeVector(out_path)
    }
  }
  
}Lets iterate
```{r}
pmap(
  template %>% 
       filter(state_abb_tmp == 'CONUS',
              vintage_tmp %in% c('tract10', 'county10')),
  tigris_to_albers_lake)
```you can access this lake with this code
```{r}
list.files("//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze/tiger10_boundaries__v1.1/lake_albers_shp", 
  recursive = T,
  full.names = T,
  pattern = '_albers.shp')
```