NCLD Landcover Dataset v1

Authors

Ran Li (Maintainer)

Joe Simeone (Context Contributor)

Published

January 27, 2025

This is one of a series of notebooks that downloads geospatial data available from several federated data sources via the FedData R package. The data is stored in a cache as geotiff projected as Albers USAL NAD83 which can then be used to generate zonal statistics.

This notebook focuses on the National Land Cover Database (NLCD) Tree Landcover Dataset.

## Dependencies
pacman::p_load(tidyverse, exactextractr, terra, glue, sfarrow, 
               leaflet, furrr, sf, purrr, cli, tictoc, assertr, reactable,
               tigris, FedData, mapview)
source("_global_context.R")

## Local Context 
context = c(global_context)


## Local objects
z_PA <- states(cb = F, year = 2010, progress_bar = F) |> 
  filter(STUSPS10 == 'PA') |>
  st_transform("EPSG:5070")  # USA Contiguous Albers Equal Area Conic

Setup

Parameter space

Test: Get NLCD raster data
df_template = tibble(
  database = "NLCD",
  state = "PA",
  year = c(2001, 2004, 2006, 2008, 2011, 2016, 2019, 2021),
  dataset = list(c('Landcover'))
) |>
  unnest(dataset) |>
  mutate(
    out_filename = glue("{state}_{database}_{dataset}_{year}_albers"),
    out_folder  = file.path(
      context$local_raster, 
      out_filename),
    out_path = file.path(
      out_folder,
      paste0(out_filename, '.tif')),
  ) 


df_template %>% 
  filter(dataset == 'Landcover') %>% 
  reactable()

Looks good to me I think we can iterate over all available parameters.

Function

Lets write a function to download this data using the FedData Pacakge. Note that the FedData funciton will download a raster data based on a inputed boundary, for now we will just do state level pulls so the input would be the state boundaries.

download_ncld_landcover = function(row){
  
  ## Setup
  # row =  df_template %>% slice(1)
  # row =  df_template %>% filter(dataset == 'Landcover', year == 2011)
  cli::cli_alert("Start download for {row$dataset} - {row$state} {row$year}")

  ## Pull Data
  if (!file.exists(row$out_path)) {
    tryCatch(
      {
        
        ## State Boundaries
        z_PA <- states(cb = F, year = 2010, progress_bar = F) |> 
          filter(STUSPS10 == row$state) |>
          # USA Contiguous Albers Equal Area Conic
          st_transform("EPSG:5070")  
        
        ## NCLD Raster dataset
        r_tmp <- get_nlcd(
          template = z_PA,
          label = row$state,
          dataset = str_to_lower(row$dataset),
          year = row$year 
        ) |>
          # Use clamp to keep only valid percentages (0-100) - e.g. remove 255 garbage value
          terra::clamp(lower=0, upper=100, values=TRUE)
        
        ## Write
        if (!dir.exists(row$out_folder)) dir.create(row$out_folder)
        terra::writeRaster(
          r_tmp,
          filename = row$out_path,
          overwrite = TRUE
        )
        
        cli_alert_success("Finished processing and export: {row$out_filename}")
      },
      error = function(e) {
        cli::cli_alert_danger("Results not available for this combination")
        if (dir.exists(row$out_folder)) {
          unlink(row$out_folder, recursive = T)
        }
        return()  # or whatever default value you want to return on failure
      }
    )
    
    
  } else {
    cli_alert("Already exists: {row$out_filename}")
  }
  
}

Data

Pull

```{r}
df_template  %>% 
  filter(dataset == 'Landcover') |>
  group_by(row_number()) |>
  group_walk(~{download_ncld_landcover(.x)})
```

Note that Results were not available from the API at the time of pull for the following combinations:

  • Landcover
    • 2021

All the other years are available and downloaded as albers projected raster data.

Visual Inspection

Plot raster
file_tmp = list.files("cache_sensitive/raster/", recursive = T, full.names = T) %>% 
  keep(~str_detect(.x, "Landcover")) 

r_test = file_tmp %>%
  pluck(1) %>% 
  terra::rast()
# st_crs(r_test)

plot(r_test, main = basename(file_tmp))
plot(z_PA, add = TRUE, col = NA, lwd = 4, border = "black")

Looks good to me!

Landcover class distribution

The data stored in this raster files are categorical.

Count Classes for two years
if (!file.exists("cache/preview-landcover_2001.parquet")){
  r_test = file_tmp %>%  
    purrr::keep(~str_detect(.x,"2001.*\\.tif$")) %>% 
    terra::rast()
  df_2001 = as.data.frame(r_test, xy=TRUE, cells=TRUE, na.rm=TRUE) 
  df_2001 %>% 
    count(Class) %>% 
    arrange(desc(n)) %>% 
    arrow::write_parquet('cache/preview-landcover_2001.parquet')
} else {
  df_2001 = arrow::read_parquet('cache/preview-landcover_2001.parquet') 
}
if (!file.exists("cache/preview-landcover_2019.parquet")){
  r_test = file_tmp %>%  
    purrr::keep(~str_detect(.x,"2019.*\\.tif$")) %>% 
    terra::rast()
  df_2019 = as.data.frame(r_test, xy=TRUE, cells=TRUE, na.rm=TRUE) 
  df_2019 %>% 
    count(Class) %>% 
    arrange(desc(n)) %>% 
    arrow::write_parquet('cache/preview-landcover_2019.parquet')
} else {
  df_2019 = arrow::read_parquet('cache/preview-landcover_2019.parquet') 
}

Let’s do a quick visualizaiton of the disibturtion.

Plot change between 2001 and 2019
# Combine the data for both years
combined_data <- bind_rows(
  df_2001 %>% mutate(year = 2001),
  df_2019 %>% mutate(year = 2019)
)

# Calculate the percentage for each class within each year
plot_data <- combined_data %>%
  group_by(year) %>%
  mutate(total = sum(n),
         percentage = n/total * 100) %>%
  ungroup() %>%
  # Reorder classes by the average percentage across both years
  group_by(Class) %>%
  mutate(avg_pct = mean(percentage)) %>%
  ungroup() %>%
  mutate(Class = fct_reorder(Class, avg_pct))

# Create the visualization
ggplot(plot_data, aes(x = Class, y = percentage, fill = factor(year))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +
  scale_fill_manual(values = c("2001" = "#1f77b4", "2019" = "#ff7f0e"),
                    name = "Year") +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Land Use Class Distribution: 2001 vs 2019",
    subtitle = "Comparison of land use classes as percentage of total area",
    x = "Land Use Class",
    y = "Percentage of Total Area (%)"
  ) +
  theme(
    axis.text.y = element_text(size = 10),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 11),
    legend.position = "bottom",
    panel.grid.major.y = element_blank()
  )

Metadata

The columns include:

Code
df_codebook <- tribble(
  ~var,                        ~var_def,                                              ~value_type,
 "cell",                            "Unique cell identifier in raster grid",                 "numeric index",
  "x",                               "X coordinate in NAD83/Conus Albers projection (m)",     "coordinate (meters)",
  "y",                               "Y coordinate in NAD83/Conus Albers projection (m)",     "coordinate (meters)",
  "Class",                           "Land cover classification based on NLCD categories",     "categorical (factor)"
)

Here is another table of the coding

Code
df_coding <- tribble(
  ~class,                            ~class_def,
  # Developed Categories
  "Developed, Open Space",           "Areas with a mixture of constructed materials and vegetation, with impervious surfaces accounting for <20% of total cover. Most commonly includes large-lot single-family housing units, parks, golf courses, and vegetation planted in developed settings.",
  "Developed, Low Intensity",        "Areas with a mixture of constructed materials and vegetation, with impervious surfaces accounting for 20-49% of total cover. Most commonly includes single-family housing units.",
  "Developed, Medium Intensity",     "Areas with a mixture of constructed materials and vegetation, with impervious surfaces accounting for 50-79% of total cover. Most commonly includes single-family housing units.",
  "Developed High Intensity",        "Highly developed areas where people reside or work in high numbers. Impervious surfaces account for 80-100% of total cover. Examples include apartment complexes, row houses, and commercial/industrial areas.",
  
  # Forest Categories
  "Deciduous Forest",                "Areas dominated by trees generally greater than 5 meters tall and greater than 20% of total vegetation cover. More than 75% of the tree species shed foliage simultaneously in response to seasonal change.",
  "Evergreen Forest",                "Areas dominated by trees generally greater than 5 meters tall and greater than 20% of total vegetation cover. More than 75% of the tree species maintain their leaves all year. Canopy is never without green foliage.",
  "Mixed Forest",                    "Areas dominated by trees generally greater than 5 meters tall and greater than 20% of total vegetation cover. Neither deciduous nor evergreen species are greater than 75% of total tree cover.",
  
  # Agricultural Categories
  "Pasture/Hay",                     "Areas of grasses, legumes, or grass-legume mixtures planted for livestock grazing or the production of seed or hay crops, typically on a perennial cycle. Pasture/hay vegetation accounts for greater than 20% of total vegetation.",
  "Cultivated Crops",                "Areas used for the production of annual crops, such as corn, soybeans, vegetables, tobacco, and cotton, and perennial woody crops such as orchards and vineyards. Crop vegetation accounts for greater than 20% of total vegetation.",
  
  # Natural/Semi-Natural Categories
  "Grassland/Herbaceous",            "Areas dominated by gramanoid or herbaceous vegetation, generally greater than 80% of total vegetation. These areas are not subject to intensive management such as tilling but can be utilized for grazing.",
  "Shrub/Scrub",                     "Areas dominated by shrubs less than 5 meters tall with shrub canopy typically greater than 20% of total vegetation. This class includes true shrubs, young trees in an early successional stage, or trees stunted from environmental conditions.",
  "Barren Land (Rock/Sand/Clay)",    "Areas of bedrock, desert pavement, scarps, talus, slides, volcanic material, glacial debris, sand dunes, strip mines, gravel pits, and other accumulations of earthen material. Generally, vegetation accounts for less than 15% of total cover.",
  
  # Water and Wetland Categories
  "Open Water",                      "Areas of open water, generally with less than 25% cover of vegetation or soil. Includes streams, rivers, lakes, and ocean.",
  "Woody Wetlands",                  "Areas where forest or shrubland vegetation accounts for greater than 20% of vegetative cover and the soil or substrate is periodically saturated with or covered with water.",
  "Emergent Herbaceous Wetlands",    "Areas where perennial herbaceous vegetation accounts for greater than 80% of vegetative cover and the soil or substrate is periodically saturated with or covered with water."
)

Access

Data

```{r}

library(terra)

all_shp = list.files('//files.drexel.edu/colleges/SOPH/Shared/UHC/Projects/CCUH/ccuh-server/freeze//fed_data_v1/raster',
                     recursive = T,
                     pattern = "Landcover.*\\.tif$",
                     full.names = T) 

raster_tmp = all_shp[[1]] |> terra::rast()

plot(raster_tmp, main = basename(file_tmp))
```

Metadata

Codebook

The metadata for this Raster object includes:

Code
df_codebook %>% 
  reactable()

Coding

Code
df_coding %>% 
  reactable()