library(pacman)
pacman::p_load(tidyverse, httr, jsonlite, glue, dotenv, cli)
dotenv::load_dot_env()
NOTION_TOKEN <- Sys.getenv("SECRET_NOTION")
DATABASE_PARENT_ID <- "aa290b0231ed45ca92439e6cb05ba3e4"
DATABASE_NAME <- "DBT Docs - Proof of Concept (Database)"
DATABASE_ID <- '31bb9b7a9ab246f5b7f47227bb5fec89'
NUMERIC_ZERO_VALUE <- -1DBT Docs to Notion Integration - Proof of Concept
1 Introduction
This guide outlines the process of integrating DBT (Data Build Tool) documentation with Notion to create a lightweight, user-friendly data catalog. By leveraging the strengths of both tools, we can increase the visibility of our data warehouse with minimal infrastructure overhead.
1.1 Prerequisites
- Basic understanding of DBT
- DBT project initialized with existing SQL or Python models and documentation
- R programming environment
- Notion account with admin privileges
2 Set up Notion Integration
- Navigate to https://www.notion.so/profile/integrations
- Click ‘New Integration’
- Enter information:
- Basic information:
- Integration name (e.g., “dbt-docs-v1”)
- Select type as internal
- Select workspace (e.g., UHC Climate)
- Configurations:
- Check all content and comment capacity
- User capabilities: check “No user information”
- Basic information:
3 Create a Notion page for dbt-docs
- Navigate to Notion’s home
- Create a new page (e.g., “Datawarehouse”)
- Add a connection to the integration:
- Click page settings (… button in the top right)
- Add a connection to the integration created in step 1
- Search for the integration
- Select and confirm the connection
4 Implement the ETL Pipeline
4.1 Setup
Load required libraries and environment variables:
4.2 Helper Functions
Implement helper functions for API requests and data parsing:
make_request <- function(endpoint,
querystring = "",
method = "GET",
body = NULL,
body_format_json = F) {
Sys.sleep(0.34) # notion api limit is 3 requests per second
url <- glue("https://api.notion.com/v1/{endpoint}{querystring}")
headers <- c(
"Authorization" = NOTION_TOKEN,
"Content-Type" = "application/json",
"Notion-Version" = "2022-02-22"
)
body_json = body
if (!body_format_json) body_json = toJSON(body, auto_unbox = TRUE)
response <- VERB(
verb = method,
url = url,
body = if (!is.null(body)) body_json,
add_headers(headers)
)
if (http_error(response)) {
stop(glue("Request returned status code {status_code(response)}
Response text: {content(response, 'text')}"))
}
content(response, "parsed")
}
get_paths_or_empty <- function(parent_object, paths_array, zero_value = "") {
paths_array %>%
detect(~ !is.null(purrr::pluck(parent_object, !!!.x)), .default = zero_value) %>%
{if (. == zero_value) zero_value else purrr::pluck(parent_object, !!!.)}
}
get_owner <- function(data, catalog_nodes, model_name) {
owner <- get_paths_or_empty(data, list(c("config", "meta", "owner")), NULL)
if (!is.null(owner)) return(owner)
get_paths_or_empty(catalog_nodes, list(c(model_name, "metadata", "owner")), "")
}4.3 Parse DBT Docs
Parse manifest.json and catalog.json:
manifest <- jsonlite::read_json("../target/manifest.json")
catalog <- jsonlite::read_json("../target/catalog.json")
manifest_nodes <- manifest$nodes %>% keep(~.x$description != '')
catalog_nodes <- catalog$nodes %>% keep(~.x$unique_id %in% names(manifest_nodes))
models <- manifest_nodes %>%
keep(~ .$resource_type == "model") %>%
enframe(name = "node_name", value = "data")4.4 Initialize Notion Database
Check if the database exists and create it if not:
children_query_resp <- make_request(
endpoint = "blocks/",
querystring = glue("{DATABASE_PARENT_ID}/children"),
method = "GET"
)
database_id <- children_query_resp$results %>%
keep(~ !is.null(.$child_database) && identical(.$child_database$title, DATABASE_NAME)) %>%
map_chr("id") %>%
first()
db_exists = !is.na(database_id)
if (db_exists) {
cat(glue("database {database_id} already exists, proceeding to update records!\n"))
} else {
cat("creating database\n")
database_creation_resp <- make_request(
endpoint = "databases/",
method = "POST",
body = fromJSON(database_json)
)
database_id <- database_creation_resp$id
cat(glue("\ncreated database {database_id}, proceeding to create records!\n"))
}database 31bb9b7a-9ab2-46f5-b7f4-7227bb5fec89 already exists, proceeding to update records!
4.5 Create/Update Records
Implement functions to create JSON structures for Notion records:
make_parent_json <- function(database_id) {
glue('
"parent": {{
"database_id": "{database_id}"
}}
')
}
make_properties_json <- function(data) {
glue('
"properties": {{
"Name": {{
"title": [
{{
"text": {{
"content": "{data$name}"
}}
}}
]
}},
"Description": {{
"rich_text": [
{{
"text": {{
"content": "{str_remove_all(data$description, "\n")}"
}}
}}
]
}}
}}
')
}
make_text_block <- function(text) {
text_escaped <- gsub("\n", "\\\\n", text)
glue('
{{
"object": "block",
"type": "paragraph",
"paragraph": {{
"rich_text": [
{{
"type": "text",
"text": {{
"content": "{text_escaped}"
}}
}}
]
}}
}}
')
}
make_h1_block <- function(title) {
glue('
{{
"object": "block",
"type": "heading_1",
"heading_1": {{
"rich_text": [
{{
"type": "text",
"text": {{
"content": "{title}"
}}
}}
]
}}
}}
')
}
make_dbt_code_block <- function(data) {
code_str = data$raw_code %>%
str_replace_all("'", "\\'") %>%
str_replace_all("\r\n", "\\\\n") %>% # Replace Windows-style line endings
str_replace_all("\n", "\\\\n") %>% # Replace Unix-style line endings
str_replace_all("\r", "\\\\n")
glue('
{{
"object": "block",
"type": "code",
"code": {{
"rich_text": [
{{
"type": "text",
"text": {{
"content": "{code_str}"
}}
}}
],
"language": "sql"
}}
}}
')
}4.5.1 Columns Table
prep_df_columns_from_dbt_docs = function(data){
data$columns %>%
map(~{
tibble(
name = .x$name,
data_type = .x$data_type,
description = .x$description
)
}) %>%
bind_rows()
}
make_table_row_cell = function(string){
glue('
[
{{
"type": "text",
"text": {{
"content": "{string}"
}},
"plain_text": "{string}"
}}
]
')
}
make_3_cell_row_notion_api = function(cell1, cell2, cell3){
glue('
{{
"type": "table_row",
"table_row": {{
"cells": [
{make_table_row_cell(cell1)},
{make_table_row_cell(cell2)},
{make_table_row_cell(cell3)}
]
}}
}}
')
}
make_table_children_dbt_notion_api = function(data){
df_columns = prep_df_columns_from_dbt_docs(data)
children_json = df_columns %>%
group_by(row_number()) %>%
group_map(~{
make_3_cell_row_notion_api(.x$name, .x$data_type, .x$description)
}) %>%
paste(collapse = ",")
return(
glue('
[
{make_3_cell_row_notion_api("Name", "Data Type", "Description")},
{children_json}
]
')
)
}
make_dbt_table_block = function(data){
glue('{{
"object": "block",
"type": "table",
"table": {{
"table_width": 3,
"has_column_header": true,
"has_row_header": false,
"children": {make_table_children_dbt_notion_api(data)}
}}
}}')
}Create a function to check if a record exists:
check_record_exists <- function(database_id, data) {
query_json <- glue('
{{
"filter": {{
"property": "Name",
"title": {{
"equals": "{data$name}"
}}
}}
}}
')
record_query_resp <- make_request(
endpoint = "databases/",
querystring = paste0(database_id, "/query"),
method = "POST",
body = query_json,
body_format_json = T
)
record_exists = length(record_query_resp$results) > 0
return(record_exists)
}Create a function to create or update a DBT record:
create_dbt_record <- function(DATABASE_ID, data) {
body_json <- glue('
{{
{make_parent_json(DATABASE_ID)},
{make_properties_json(data)},
"children": [
{make_h1_block("Description")},
{make_text_block(data$description)},
{make_h1_block("Columns")},
{make_dbt_table_block(data)},
{make_h1_block("Code")},
{make_dbt_code_block(data)}
]
}}
')
record_exists = check_record_exists(DATABASE_ID, data)
if (record_exists) {
cli_alert("Record exists, do nothing")
} else {
cli_alert("Creating record")
make_request(
endpoint = "pages/",
querystring = "",
method = "POST",
body = body_json,
body_format_json = T
)
}
}4.6 Update All Records
Iterate through all models and create/update records:
for (i in 1:nrow(models)) {
model_name <- models$node_name[i]
data <- models$data[[i]]
cli_alert_info("Processing model: {data$name}")
create_dbt_record(DATABASE_ID, data)
}Now we have a minimal lightweight catalog that builds off our work with DBT

5 Conclusion
This proof of concept demonstrates a successful integration of DBT documentation with Notion, creating a lightweight, accessible data catalog. Key achievements include:
- Leveraging rich DBT metadata
- Bridging technical data management and user-friendly documentation
- Enhancing data warehouse visibility across the organization
5.1 Next Steps
Future enhancements will focus on:
- Utilizing DBT tags for selective model inclusion
- Leveraging DBT meta fields for custom metadata (e.g., owners)
- Operationalizing columns/variables/measures as separate databases
- This will improve reporting capabilities at the measure level
- organize the R function logic into a package like system or consider into NotionR package
As we refine this integration, we aim to create a more dynamic, customizable data catalog aligned with organizational needs. Regular updates will ensure the catalog remains a valuable resource for all stakeholders.
We encourage implementation, expansion, and community sharing of experiences to further innovate in making data more accessible and understandable across organizations.