Merge Ichthyo & Bottle to Working DuckLake

Published

2026-03-06

Overview

Goal: Merge parquet outputs from both upstream ingest workflows, perform cross-dataset reconciliation (ship matching, cruise bridge, taxonomy standardization), then load into Working DuckLake and create a frozen release.

Steps:

  1. Load parquet files from both upstream workflows into local temp DuckDB
  2. Fuzzy match ships between datasets
  3. Build cruise bridge (casts → cruise_key)
  4. Standardize taxonomy (species → WoRMS/ITIS/GBIF)
  5. Validate cross-dataset integrity
  6. Write modified parquet outputs + manifest with GCS references
  7. Ingest all tables into Working DuckLake with provenance
  8. Freeze release (strip provenance)
  9. Upload to GCS

Upstream workflows (must complete first):

  • ingest_swfsc.noaa.gov_calcofi-db.qmd → ichthyo tables (10)
  • ingest_calcofi.org_bottle-database.qmd → bottle/cast tables (5)
Code
flowchart LR
    subgraph upstream["Upstream Parquet"]
        s["icthyo<br/>10 tables"]
        b["bottle<br/>5 tables"]
    end
    subgraph merge["Merge DuckDB"]
        m1["Ship matching"]
        m2["Cruise bridge"]
        m3["Taxonomy"]
    end
    subgraph output["Output"]
        p["Modified parquet<br/>+ manifest"]
        w["Working DuckLake"]
        f["Frozen Release"]
    end
    s --> merge
    b --> merge
    merge --> p --> w --> f

    style upstream fill:#e3f2fd,stroke:#1565c0
    style merge fill:#fff3e0,stroke:#ef6c00
    style output fill:#e8f4e8,stroke:#2e7d32
flowchart LR
    subgraph upstream["Upstream Parquet"]
        s["icthyo<br/>10 tables"]
        b["bottle<br/>5 tables"]
    end
    subgraph merge["Merge DuckDB"]
        m1["Ship matching"]
        m2["Cruise bridge"]
        m3["Taxonomy"]
    end
    subgraph output["Output"]
        p["Modified parquet<br/>+ manifest"]
        w["Working DuckLake"]
        f["Frozen Release"]
    end
    s --> merge
    b --> merge
    merge --> p --> w --> f

    style upstream fill:#e3f2fd,stroke:#1565c0
    style merge fill:#fff3e0,stroke:#ef6c00
    style output fill:#e8f4e8,stroke:#2e7d32
Figure 1: Integration workflow: upstream parquet → merge → Working DuckLake → Frozen Release

0.1 Setup

Code
devtools::load_all(here::here("../calcofi4db"))
devtools::load_all(here::here("../calcofi4R"))
librarian::shelf(
  CalCOFI / calcofi4db,
  DBI,
  dm,
  dplyr,
  DT,
  fs,
  glue,
  here,
  jsonlite,
  purrr,
  readr,
  stringr,
  tibble,
  tidyr,
  worrms,
  taxize,
  quiet = T
)
options(readr.show_col_types = F)
options(DT.options = list(scrollX = TRUE))

# parquet input directories (from upstream workflows)
dir_pq_ichthyo <- here("data/parquet/swfsc.noaa.gov_calcofi-db")
dir_pq_bottle <- here("data/parquet/calcofi.org_bottle-database")

# MarineSensitivity species DB for local taxonomy lookups
spp_db_path <- Sys.getenv(
  "SPP_DB_PATH",
  unset = "/Users/bbest/_big/msens/derived/spp.duckdb"
)

# release version
release_version <- format(Sys.Date(), "v%Y.%m")

# local merge database
db_path <- here("data/wrangling/merge_ichthyo_bottle.duckdb")
overwrite <- TRUE # set TRUE to rebuild from scratch

if (overwrite && file_exists(db_path)) {
  file_delete(db_path)
}
dir_create(dirname(db_path))
con <- get_duckdb_con(db_path)
load_duckdb_extension(con, "spatial")

# check if database already has tables (from previous run)
existing_tables <- DBI::dbListTables(con)
skip_load <- length(existing_tables) > 0
if (skip_load) {
  message(glue("Using existing database with {length(existing_tables)} tables"))
}

0.2 Check Upstream Workflows

Verify that parquet outputs and manifests exist from both upstream workflows.

Code
# check ichthyo manifest
ichthyo_manifest_path <- file.path(dir_pq_ichthyo, "manifest.json")
stopifnot(
  "Missing ichthyo parquet outputs. Run ingest_swfsc.noaa.gov_calcofi-db.qmd first." = file.exists(
    ichthyo_manifest_path
  )
)

ichthyo_manifest <- read_json(ichthyo_manifest_path)
message(glue(
  "Ichthyo parquet: {ichthyo_manifest$total_rows} total rows, ",
  "created {ichthyo_manifest$created_at}"
))

# check bottle manifest
bottle_manifest_path <- file.path(dir_pq_bottle, "manifest.json")
stopifnot(
  "Missing bottle parquet outputs. Run ingest_calcofi.org_bottle-database.qmd first." = file.exists(
    bottle_manifest_path
  )
)

bottle_manifest <- read_json(bottle_manifest_path)
message(glue(
  "Bottle parquet: {bottle_manifest$total_rows} total rows, ",
  "created {bottle_manifest$created_at}"
))

# show manifest stats
tibble(
  source = c("ichthyo", "bottle"),
  tables = c(
    length(ichthyo_manifest$tables),
    length(bottle_manifest$tables)
  ),
  rows = c(
    ichthyo_manifest$total_rows,
    bottle_manifest$total_rows
  ),
  created = c(
    ichthyo_manifest$created_at,
    bottle_manifest$created_at
  )
) |>
  datatable(caption = "Upstream workflow manifest stats")

0.3 Load SWFSC Tables

Load all ichthyo parquet files into the local merge DuckDB. Tables with geometry columns (grid, site, segment) need WKB→GEOMETRY conversion.

Code
ichthyo_files <- list.files(
  dir_pq_ichthyo,
  pattern = "\\.parquet$",
  full.names = TRUE
)

# tables with geometry columns that need WKB conversion
geom_tables <- c("grid", "site", "segment")

if (skip_load) {
  message("Using existing ichthyo tables from database")
  ichthyo_stats <- map_dfr(ichthyo_files, function(pqt_path) {
    tbl_name <- tools::file_path_sans_ext(basename(pqt_path))
    n <- DBI::dbGetQuery(con, glue("SELECT COUNT(*) AS n FROM {tbl_name}"))$n
    tibble(table = tbl_name, rows = n, source = "ichthyo")
  })
} else {
  ichthyo_stats <- map_dfr(ichthyo_files, function(pqt_path) {
    tbl_name <- tools::file_path_sans_ext(basename(pqt_path))

    if (tbl_name %in% geom_tables) {
      # use load_gcs_parquet_to_duckdb for WKB→GEOMETRY conversion
      DBI::dbExecute(
        con,
        glue(
          "CREATE OR REPLACE TABLE {tbl_name} AS
         SELECT * FROM read_parquet('{pqt_path}')"
        )
      )
      # detect and convert WKB BLOB columns
      blob_cols <- DBI::dbGetQuery(
        con,
        glue(
          "SELECT column_name FROM information_schema.columns
         WHERE table_name = '{tbl_name}'
           AND data_type = 'BLOB'
           AND column_name LIKE '%geom%'"
        )
      )$column_name
      for (gc in blob_cols) {
        tmp_col <- paste0(gc, "_tmp")
        DBI::dbExecute(
          con,
          glue(
            'ALTER TABLE {tbl_name} ADD COLUMN {tmp_col} GEOMETRY'
          )
        )
        DBI::dbExecute(
          con,
          glue(
            'UPDATE {tbl_name} SET {tmp_col} = ST_GeomFromWKB({gc})'
          )
        )
        DBI::dbExecute(
          con,
          glue(
            'ALTER TABLE {tbl_name} DROP COLUMN {gc}'
          )
        )
        DBI::dbExecute(
          con,
          glue(
            'ALTER TABLE {tbl_name} RENAME COLUMN {tmp_col} TO {gc}'
          )
        )
      }
    } else {
      DBI::dbExecute(
        con,
        glue(
          "CREATE OR REPLACE TABLE {tbl_name} AS
         SELECT * FROM read_parquet('{pqt_path}')"
        )
      )
    }

    n <- DBI::dbGetQuery(con, glue("SELECT COUNT(*) AS n FROM {tbl_name}"))$n
    tibble(table = tbl_name, rows = n, source = "ichthyo")
  })
}

ichthyo_stats |> datatable(caption = "SWFSC ichthyo tables loaded")

0.4 Load Bottle Tables

Load bottle parquet files (grid excluded from bottle export — canonical from swfsc).

Code
bottle_files <- list.files(
  dir_pq_bottle,
  pattern = "\\.parquet$",
  full.names = TRUE
)

# belt-and-suspenders: skip grid.parquet if it still exists
bottle_files <- bottle_files[!grepl("grid\\.parquet$", bottle_files)]

if (skip_load) {
  message("Using existing bottle tables from database")
  bottle_stats <- map_dfr(bottle_files, function(pqt_path) {
    tbl_name <- tools::file_path_sans_ext(basename(pqt_path))
    n <- DBI::dbGetQuery(con, glue("SELECT COUNT(*) AS n FROM {tbl_name}"))$n
    tibble(table = tbl_name, rows = n, source = "bottle")
  })
} else {
  bottle_stats <- map_dfr(bottle_files, function(pqt_path) {
    tbl_name <- tools::file_path_sans_ext(basename(pqt_path))

    DBI::dbExecute(
      con,
      glue(
        "CREATE OR REPLACE TABLE {tbl_name} AS
       SELECT * FROM read_parquet('{pqt_path}')"
      )
    )

    n <- DBI::dbGetQuery(con, glue("SELECT COUNT(*) AS n FROM {tbl_name}"))$n
    tibble(table = tbl_name, rows = n, source = "bottle")
  })
}

bottle_stats |> datatable(caption = "Bottle tables loaded")

0.5 Show Loaded Tables

Code
all_stats <- bind_rows(ichthyo_stats, bottle_stats)

message(glue(
  "Loaded {nrow(all_stats)} tables: ",
  "{sum(all_stats$source == 'ichthyo')} ichthyo + ",
  "{sum(all_stats$source == 'bottle')} bottle"
))

all_stats |>
  arrange(source, table) |>
  datatable(caption = "All loaded tables")

0.6 Fuzzy Ship Matching

Reconcile ship codes between bottle casts and swfsc ship reference table. Uses calcofi4db::match_ships().

Code
# get unmatched ships from casts
unmatched <- dbGetQuery(
  con,
  "
  SELECT DISTINCT c.ship_code, c.ship_name
  FROM casts c
  LEFT JOIN ship s ON c.ship_code = s.ship_nodc
  WHERE s.ship_key IS NULL"
)

message(glue("{nrow(unmatched)} unmatched ship codes in casts"))

# ship renames CSV (manual overrides from previous reconciliation)
ship_renames_csv <- here(
  "metadata/calcofi.org/bottle-database/ship_renames.csv"
)

# run fuzzy matching
ship_matches <- match_ships(
  unmatched_ships = unmatched,
  reference_ships = dbReadTable(con, "ship"),
  ship_renames_csv = ship_renames_csv,
  fetch_ices = TRUE
)

ship_matches |>
  datatable(caption = "Ship matching results")

0.6.1 Match Statistics

Code
ship_matches |>
  count(match_type) |>
  datatable(caption = "Ship match type counts")

0.7 Build Cruise Bridge

Link bottle casts to swfsc cruise table via ship_key → cruise_key.

0.7.1 Step 1: Add ship_key to casts

Code
# add ship_key column
dbExecute(con, "ALTER TABLE casts ADD COLUMN IF NOT EXISTS ship_key TEXT")
[1] 0
Code
# exact match: casts.ship_code = ship.ship_nodc
dbExecute(
  con,
  "
  UPDATE casts SET ship_key = (
    SELECT s.ship_key FROM ship s
    WHERE s.ship_nodc = casts.ship_code
    LIMIT 1)"
)
[1] 35644
Code
n_exact <- dbGetQuery(
  con,
  "SELECT COUNT(*) AS n FROM casts WHERE ship_key IS NOT NULL"
)$n
message(glue("Exact ship_nodc match: {n_exact} casts"))

# apply fuzzy match results for previously unmatched ships
matched_ships <- ship_matches |>
  filter(match_type != "unmatched", !is.na(matched_ship_key))

if (nrow(matched_ships) > 0) {
  for (i in seq_len(nrow(matched_ships))) {
    m <- matched_ships[i, ]
    dbExecute(
      con,
      glue(
        "
      UPDATE casts SET ship_key = '{m$matched_ship_key}'
      WHERE ship_code = '{m$ship_code}'
        AND ship_key IS NULL"
      )
    )
  }
  n_fuzzy <- dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM casts WHERE ship_key IS NOT NULL"
  )$n -
    n_exact
  message(glue("Fuzzy/manual match: {n_fuzzy} additional casts"))
}

# summary
ship_key_stats <- dbGetQuery(
  con,
  "
  SELECT
    CASE WHEN ship_key IS NULL THEN 'no_ship_match'
         ELSE 'matched' END AS status,
    COUNT(*) AS n_casts
  FROM casts
  GROUP BY status"
)
ship_key_stats |> datatable(caption = "ship_key assignment stats")

0.7.2 Step 2: Derive cruise_key

Cruise key format: YYMMKK (2-digit year, 2-digit month, 2-letter ship key). The bottle ingest stores the original Cruise_ID as cruise_key_0 (interim), so cruise_key is a new column bridging bottle casts to the SWFSC cruise table.

Code
# add cruise_key column
dbExecute(con, "ALTER TABLE casts ADD COLUMN IF NOT EXISTS cruise_key TEXT")
[1] 0
Code
# derive YYMMKK from datetime_utc + ship_key
dbExecute(
  con,
  "
  UPDATE casts SET cruise_key = CONCAT(
    LPAD(CAST(EXTRACT(YEAR FROM datetime_utc) % 100 AS VARCHAR), 2, '0'),
    LPAD(CAST(EXTRACT(MONTH FROM datetime_utc) AS VARCHAR), 2, '0'),
    ship_key)
  WHERE ship_key IS NOT NULL"
)
[1] 35595
Code
n_cruise <- dbGetQuery(
  con,
  "SELECT COUNT(*) AS n FROM casts WHERE cruise_key IS NOT NULL"
)$n
message(glue("Derived cruise_key for {n_cruise} casts"))

0.7.3 Step 3: Validate against swfsc cruise table

Code
cruise_stats <- dbGetQuery(
  con,
  "
  SELECT
    CASE
      WHEN c.ship_key IS NULL THEN 'no_ship_match'
      WHEN c.cruise_key IS NULL THEN 'no_cruise_key'
      WHEN cr.cruise_key IS NULL THEN 'no_cruise_match'
      ELSE 'matched'
    END AS status,
    COUNT(*) AS n_casts
  FROM casts c
  LEFT JOIN cruise cr ON c.cruise_key = cr.cruise_key
  GROUP BY status
  ORDER BY status"
)

cruise_stats |> datatable(caption = "Cruise bridge match statistics")

0.7.4 Step 4: Report unmatched ship codes

Code
unmatched_report <- dbGetQuery(
  con,
  "
  SELECT DISTINCT
    c.ship_code, c.ship_name,
    COUNT(*) AS n_casts,
    MIN(c.datetime_utc) AS first_cast,
    MAX(c.datetime_utc) AS last_cast
  FROM casts c
  WHERE c.ship_key IS NULL
  GROUP BY c.ship_code, c.ship_name
  ORDER BY n_casts DESC"
)

if (nrow(unmatched_report) > 0) {
  unmatched_report |>
    datatable(caption = "Unmatched ship codes (no ship_key)")
} else {
  message("All ship codes matched!")
}

0.8 Standardize Taxonomy

Update species table with WoRMS/ITIS/GBIF identifiers using local lookups against spp.duckdb (MarineSensitivity species DB with full WoRMS, ITIS, GBIF tables). Falls back to WoRMS API only for species not found locally. Build taxonomy hierarchy via recursive CTEs.

Code
# check if species already standardized (all taxonomy columns exist with data)
sp_cols <- DBI::dbGetQuery(
  con,
  "SELECT column_name FROM information_schema.columns
   WHERE table_name = 'species'"
)$column_name
sp_standardized <- all(c("worms_id", "itis_id", "gbif_id") %in% sp_cols) &&
  DBI::dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM species WHERE gbif_id IS NOT NULL"
  )$n >
    0

if (sp_standardized && !overwrite) {
  message("Species already standardized, skipping")
  sp_results <- DBI::dbGetQuery(
    con,
    "SELECT species_id, scientific_name, worms_id, itis_id, gbif_id
     FROM species"
  )
} else {
  # add missing columns
  for (col in c("worms_id", "itis_id", "gbif_id")) {
    if (!col %in% sp_cols) {
      DBI::dbExecute(
        con,
        glue(
          "ALTER TABLE species ADD COLUMN {col} INTEGER"
        )
      )
    }
  }

  # attach MarineSensitivity species DB for local lookups
  DBI::dbExecute(
    con,
    glue(
      "ATTACH '{spp_db_path}' AS spp (READ_ONLY)"
    )
  )

  # worms: validate existing worms_id and resolve synonyms
  n_worms_before <- DBI::dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM species WHERE worms_id IS NOT NULL"
  )$n

  DBI::dbExecute(
    con,
    "
    UPDATE species SET worms_id = (
      SELECT CASE
        WHEN w.taxonomicStatus = 'accepted' THEN w.taxonID
        ELSE COALESCE(w.acceptedNameUsageID, w.taxonID)
      END
      FROM spp.worms w
      WHERE w.taxonID = species.worms_id
      LIMIT 1)
    WHERE worms_id IS NOT NULL"
  )

  n_worms <- DBI::dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM species WHERE worms_id IS NOT NULL"
  )$n
  message(glue(
    "WoRMS: {n_worms} species with validated worms_id ",
    "(was {n_worms_before})"
  ))

  # worms: name match for any still missing
  DBI::dbExecute(
    con,
    "
    UPDATE species SET worms_id = (
      SELECT CASE
        WHEN w.taxonomicStatus = 'accepted' THEN w.taxonID
        ELSE COALESCE(w.acceptedNameUsageID, w.taxonID)
      END
      FROM spp.worms w
      WHERE w.scientificName = species.scientific_name
        AND w.taxonRank = 'Species'
      ORDER BY CASE
        WHEN w.taxonomicStatus = 'accepted' THEN 0 ELSE 1 END
      LIMIT 1)
    WHERE worms_id IS NULL"
  )

  n_worms2 <- DBI::dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM species WHERE worms_id IS NOT NULL"
  )$n
  message(glue("WoRMS name match: {n_worms2 - n_worms} additional species"))

  # itis: match by scientific_name
  DBI::dbExecute(
    con,
    "
    UPDATE species SET itis_id = (
      SELECT COALESCE(i.acceptedNameUsageID, i.taxonID)
      FROM spp.itis i
      WHERE i.scientificName = species.scientific_name
      ORDER BY CASE
        WHEN i.taxonomicStatus = 'valid' THEN 0 ELSE 1 END
      LIMIT 1)
    WHERE itis_id IS NULL"
  )

  n_itis <- DBI::dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM species WHERE itis_id IS NOT NULL"
  )$n
  message(glue("ITIS: {n_itis} species matched"))

  # gbif: match by canonicalName
  DBI::dbExecute(
    con,
    "
    UPDATE species SET gbif_id = (
      SELECT COALESCE(
        TRY_CAST(NULLIF(g.acceptedNameUsageID, '') AS INTEGER),
        g.taxonID)
      FROM spp.gbif g
      WHERE g.canonicalName = species.scientific_name
      ORDER BY CASE
        WHEN g.taxonomicStatus = 'accepted' THEN 0 ELSE 1 END
      LIMIT 1)
    WHERE gbif_id IS NULL"
  )

  n_gbif <- DBI::dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM species WHERE gbif_id IS NOT NULL"
  )$n
  message(glue("GBIF: {n_gbif} species matched"))

  # API fallback for any species still missing worms_id
  missing_worms <- DBI::dbGetQuery(
    con,
    "SELECT species_id, scientific_name
     FROM species WHERE worms_id IS NULL"
  )

  if (nrow(missing_worms) > 0) {
    message(glue(
      "API fallback for {nrow(missing_worms)} species missing worms_id"
    ))
    for (i in seq_len(nrow(missing_worms))) {
      sp_name <- missing_worms$scientific_name[i]
      sp_id <- missing_worms$species_id[i]

      worms_id <- tryCatch(
        {
          records <- worrms::wm_records_name(sp_name, fuzzy = TRUE)
          if (!is.null(records) && nrow(records) > 0) {
            rec <- records[1, ]
            if (!is.na(rec$valid_AphiaID)) {
              as.integer(rec$valid_AphiaID)
            } else {
              as.integer(rec$AphiaID)
            }
          } else {
            NA_integer_
          }
        },
        error = function(e) NA_integer_
      )

      if (!is.na(worms_id)) {
        DBI::dbExecute(
          con,
          glue(
            "UPDATE species SET worms_id = {worms_id}
           WHERE species_id = {sp_id}"
          )
        )
      }
      Sys.sleep(0.5)
    }
    message("API fallback complete")
  } else {
    message("All species matched locally, no API fallback needed")
  }

  sp_results <- DBI::dbGetQuery(
    con,
    "SELECT species_id, scientific_name, worms_id, itis_id, gbif_id
     FROM species"
  )
}

sp_results |>
  datatable(caption = "Species standardization results")
Code
# check if taxon table already exists with data
taxon_exists <- "taxon" %in%
  existing_tables &&
  DBI::dbGetQuery(con, "SELECT COUNT(*) AS n FROM taxon")$n > 0

if (taxon_exists && !overwrite) {
  message("Taxon table already exists, skipping")
  taxon_rows <- DBI::dbReadTable(con, "taxon")
} else {
  # ensure spp.duckdb is attached
  spp_attached <- tryCatch(
    {
      DBI::dbGetQuery(con, "SELECT 1 FROM spp.worms LIMIT 1")
      TRUE
    },
    error = function(e) FALSE
  )
  if (!spp_attached) {
    DBI::dbExecute(
      con,
      glue(
        "ATTACH '{spp_db_path}' AS spp (READ_ONLY)"
      )
    )
  }

  # build WoRMS hierarchy via recursive CTE
  worms_taxon <- DBI::dbGetQuery(
    con,
    "
    WITH RECURSIVE hierarchy AS (
      SELECT
        w.taxonID,
        w.parentNameUsageID,
        w.scientificName,
        w.taxonRank,
        w.taxonID AS leaf_taxonID
      FROM spp.worms w
      WHERE w.taxonID IN (
        SELECT DISTINCT worms_id FROM species
        WHERE worms_id IS NOT NULL)

      UNION ALL

      SELECT
        p.taxonID,
        p.parentNameUsageID,
        p.scientificName,
        p.taxonRank,
        h.leaf_taxonID
      FROM spp.worms p
      JOIN hierarchy h ON p.taxonID = h.parentNameUsageID
      WHERE h.parentNameUsageID IS NOT NULL
        AND h.parentNameUsageID != h.taxonID
    )
    SELECT DISTINCT
      'WoRMS'                        AS authority,
      taxonID                        AS taxonID,
      leaf_taxonID                   AS acceptedNameUsageID,
      parentNameUsageID              AS parentNameUsageID,
      scientificName                 AS scientificName,
      taxonRank                      AS taxonRank,
      'accepted'                     AS taxonomicStatus,
      CAST(NULL AS VARCHAR)          AS scientificNameAuthorship
    FROM hierarchy
    WHERE taxonRank IS NOT NULL
  "
  )

  # deduplicate: keep one acceptedNameUsageID per (authority, taxonID)
  worms_taxon <- worms_taxon |>
    distinct(authority, taxonID, .keep_all = TRUE)
  message(glue("WoRMS hierarchy: {nrow(worms_taxon)} rows"))

  # build ITIS hierarchy via recursive CTE
  itis_taxon <- DBI::dbGetQuery(
    con,
    "
    WITH RECURSIVE hierarchy AS (
      SELECT
        i.taxonID,
        i.parentNameUsageID,
        i.scientificName,
        i.taxonRank,
        i.taxonID AS leaf_taxonID
      FROM spp.itis i
      WHERE i.taxonID IN (
        SELECT DISTINCT itis_id FROM species
        WHERE itis_id IS NOT NULL)

      UNION ALL

      SELECT
        p.taxonID,
        p.parentNameUsageID,
        p.scientificName,
        p.taxonRank,
        h.leaf_taxonID
      FROM spp.itis p
      JOIN hierarchy h ON p.taxonID = h.parentNameUsageID
      WHERE h.parentNameUsageID IS NOT NULL
        AND h.parentNameUsageID != h.taxonID
    )
    SELECT DISTINCT
      'ITIS'                         AS authority,
      taxonID                        AS taxonID,
      leaf_taxonID                   AS acceptedNameUsageID,
      parentNameUsageID              AS parentNameUsageID,
      scientificName                 AS scientificName,
      taxonRank                      AS taxonRank,
      'accepted'                     AS taxonomicStatus,
      CAST(NULL AS VARCHAR)          AS scientificNameAuthorship
    FROM hierarchy
    WHERE taxonRank IS NOT NULL
  "
  )

  itis_taxon <- itis_taxon |>
    distinct(authority, taxonID, .keep_all = TRUE)
  message(glue("ITIS hierarchy: {nrow(itis_taxon)} rows"))

  # combine and write
  taxon_rows <- bind_rows(worms_taxon, itis_taxon)

  DBI::dbExecute(con, "DROP TABLE IF EXISTS taxon")
  DBI::dbWriteTable(con, "taxon", taxon_rows)
  message(glue("Created taxon table: {nrow(taxon_rows)} rows"))

  # create taxa_rank lookup table (from calcofi4db::build_taxon_table)
  taxa_ranks_chr <- c(
    "Kingdom",
    "Subkingdom",
    "Phylum",
    "Subphylum",
    "Infraphylum",
    "Superclass",
    "Class",
    "Subclass",
    "Infraclass",
    "Megacohort",
    "Supercohort",
    "Cohort",
    "Subcohort",
    "Infracohort",
    "Superorder",
    "Order",
    "Suborder",
    "Infraorder",
    "Parvorder",
    "Superfamily",
    "Family",
    "Subfamily",
    "Supertribe",
    "Tribe",
    "Subtribe",
    "Genus",
    "Subgenus",
    "Series",
    "Subseries",
    "Species",
    "Subspecies",
    "Natio",
    "Mutatio",
    "Form",
    "Forma",
    "Subform",
    "Subforma",
    "Variety",
    "Subvariety",
    "Coll. sp.",
    "Aggr."
  )

  d_taxa_rank <- tibble(
    taxonRank = taxa_ranks_chr,
    rank_order = seq_along(taxa_ranks_chr)
  )

  DBI::dbExecute(con, "DROP TABLE IF EXISTS taxa_rank")
  DBI::dbWriteTable(con, "taxa_rank", d_taxa_rank)
  message(glue("Created taxa_rank table: {nrow(d_taxa_rank)} rank levels"))

  # detach spp.duckdb
  tryCatch(
    DBI::dbExecute(con, "DETACH spp"),
    error = function(e) NULL
  )
}
[1] 0
Code
# show taxon stats
if (nrow(taxon_rows) > 0) {
  taxon_rows |>
    count(authority, taxonRank) |>
    arrange(authority, taxonRank) |>
    datatable(caption = "Taxon hierarchy by authority and rank")
}
Code
# show taxa_rank lookup
dbReadTable(con, "taxa_rank") |>
  datatable(caption = "Taxa rank ordering")

0.8.1 Taxonomy Statistics

Code
n_species <- dbGetQuery(con, "SELECT COUNT(*) AS n FROM species")$n
n_worms <- dbGetQuery(
  con,
  "SELECT COUNT(*) AS n FROM species WHERE worms_id IS NOT NULL"
)$n
n_itis <- dbGetQuery(
  con,
  "SELECT COUNT(*) AS n FROM species WHERE itis_id IS NOT NULL"
)$n
n_gbif <- dbGetQuery(
  con,
  "SELECT COUNT(*) AS n FROM species WHERE gbif_id IS NOT NULL"
)$n
n_taxon <- dbGetQuery(con, "SELECT COUNT(*) AS n FROM taxon")$n

tibble(
  metric = c(
    "total species",
    "with worms_id",
    "with itis_id",
    "with gbif_id",
    "taxon hierarchy rows"
  ),
  count = c(n_species, n_worms, n_itis, n_gbif, n_taxon)
) |>
  datatable(caption = "Taxonomy standardization summary")

0.9 Validate Cross-Dataset Integrity

Code
# grid_key integrity: casts.grid_key should all be in grid.grid_key
if (
  "grid_key" %in%
    dbListFields(con, "casts") &&
    "grid_key" %in% dbListFields(con, "grid")
) {
  grid_orphans <- dbGetQuery(
    con,
    "
    SELECT COUNT(*) AS n FROM casts c
    WHERE c.grid_key IS NOT NULL
      AND c.grid_key NOT IN (SELECT grid_key FROM grid)"
  )$n
  message(glue("Grid key orphans in casts: {grid_orphans}"))
}

# ship PK uniqueness
ship_dups <- dbGetQuery(
  con,
  "
  SELECT ship_key, COUNT(*) AS n FROM ship
  GROUP BY ship_key HAVING COUNT(*) > 1"
)
if (nrow(ship_dups) > 0) {
  warning(glue("Duplicate ship_key values: {nrow(ship_dups)}"))
}

# cruise PK uniqueness
cruise_dups <- dbGetQuery(
  con,
  "
  SELECT cruise_key, COUNT(*) AS n FROM cruise
  GROUP BY cruise_key HAVING COUNT(*) > 1"
)
if (nrow(cruise_dups) > 0) {
  warning(glue("Duplicate cruise_key values: {nrow(cruise_dups)}"))
}

# cruise bridge match statistics
bridge_stats <- dbGetQuery(
  con,
  "
  SELECT
    COUNT(*) AS total_casts,
    SUM(CASE WHEN ship_key IS NOT NULL THEN 1 ELSE 0 END) AS with_ship_key,
    SUM(CASE WHEN cruise_key IS NOT NULL THEN 1 ELSE 0 END) AS with_cruise_key
  FROM casts"
)
bridge_stats |> datatable(caption = "Cruise bridge coverage")
Code
message("Cross-dataset validation complete")

0.10 Show Combined Schema

Code
# build dm from connection (exclude internal tables)
tbls <- setdiff(dbListTables(con), c("_sp_update"))

d <- dm::dm_from_con(con, table_names = tbls, learn_keys = FALSE)

# set table colors by group
d <- d |>
  dm::dm_set_colors(
    lightblue = c(cruise, site, tow, net), # ichthyo chain
    lightyellow = c(ichthyo, species, lookup), # species/lookup
    lightgreen = c(grid, segment), # spatial
    pink = c(
      casts,
      bottle,
      bottle_measurement,
      cast_condition,
      measurement_type
    )
  ) # bottle chain

# add colors for new taxonomy tables if they exist
if ("taxon" %in% tbls) {
  d <- d |> dm::dm_set_colors(lightyellow = taxon)
}
if ("taxa_rank" %in% tbls) {
  d <- d |> dm::dm_set_colors(lightyellow = taxa_rank)
}
if ("ship" %in% tbls) {
  d <- d |> dm::dm_set_colors(lightblue = ship)
}

dm::dm_draw(d, rankdir = "LR", view_type = "all")

0.11 Write Modified Parquet Outputs

Only export tables that differ from upstream (modified or new). Unchanged tables reference upstream GCS URIs in the manifest.

Code
# tables modified by this workflow
modified_tables <- c(
  "casts", # added ship_key, cruise_key columns
  "species", # updated worms_id, itis_id, gbif_id
  "taxon", # new table (taxonomy hierarchy)
  "taxa_rank"
) # new table (rank ordering)

dir_pq_merged <- here("data/parquet/merge_ichthyo_bottle")

merge_stats <- write_parquet_outputs(
  con = con,
  output_dir = dir_pq_merged,
  tables = modified_tables,
  strip_provenance = FALSE
)

merge_stats |>
  mutate(file = basename(path)) |>
  select(-path) |>
  datatable(caption = "Modified parquet export statistics")

0.11.1 Build Combined Manifest

Reference GCS URIs for unchanged upstream tables + local parquet for modified tables.

Code
# read upstream manifests
ichthyo_manifest <- read_json(ichthyo_manifest_path)
bottle_manifest <- read_json(bottle_manifest_path)

# build combined manifest
manifest <- list(
  created_at = as.character(Sys.time()),
  release_version = release_version,
  modified_tables = merge_stats |>
    select(table, rows, file_size) |>
    as.list(),
  upstream_refs = list(
    ichthyo = list(
      manifest = ichthyo_manifest$created_at,
      tables = setdiff(
        ichthyo_manifest$tables,
        modified_tables
      )
    ),
    bottle = list(
      manifest = bottle_manifest$created_at,
      tables = setdiff(
        bottle_manifest$tables,
        c(modified_tables, "grid")
      )
    )
  )
) # grid canonical from swfsc

write_json(
  manifest,
  file.path(dir_pq_merged, "manifest.json"),
  auto_unbox = TRUE,
  pretty = TRUE
)

message(glue(
  "Manifest written: {length(modified_tables)} modified tables, ",
  "upstream refs for {length(manifest$upstream_refs$ichthyo$tables)} ",
  "ichthyo + {length(manifest$upstream_refs$bottle$tables)} bottle"
))

0.12 Ingest to Working DuckLake

Load ALL tables into Working DuckLake with provenance. Unchanged tables read directly from upstream parquet. Modified tables read from merge output dir.

Code
con_wdl <- get_working_ducklake()
load_duckdb_extension(con_wdl, "spatial")

# helper to ingest a parquet file with provenance
ingest_pqt <- function(pqt_path, tbl_name, source_label) {
  data <- arrow::read_parquet(pqt_path)
  ingest_to_working(
    con = con_wdl,
    data = data,
    table = tbl_name,
    source_file = source_label,
    mode = "replace"
  )
}

# ingest ichthyo upstream tables (skip modified ones)
ichthyo_upstream <- setdiff(ichthyo_manifest$tables, modified_tables)
ichthyo_wdl <- map_dfr(ichthyo_upstream, function(tbl) {
  pqt <- file.path(dir_pq_ichthyo, paste0(tbl, ".parquet"))
  if (file.exists(pqt)) {
    ingest_pqt(
      pqt,
      tbl,
      glue("parquet/swfsc.noaa.gov_calcofi-db/{tbl}.parquet")
    )
  }
})

# ingest bottle upstream tables (skip modified + grid)
bottle_upstream <- setdiff(
  bottle_manifest$tables,
  c(modified_tables, "grid")
)
bottle_wdl <- map_dfr(bottle_upstream, function(tbl) {
  pqt <- file.path(dir_pq_bottle, paste0(tbl, ".parquet"))
  if (file.exists(pqt)) {
    ingest_pqt(
      pqt,
      tbl,
      glue("parquet/calcofi.org_bottle-database/{tbl}.parquet")
    )
  }
})

# ingest modified tables from merge output
merge_wdl <- map_dfr(modified_tables, function(tbl) {
  pqt <- file.path(dir_pq_merged, paste0(tbl, ".parquet"))
  if (file.exists(pqt)) {
    ingest_pqt(pqt, tbl, glue("parquet/merge_ichthyo_bottle/{tbl}.parquet"))
  }
})

# combine stats
wdl_stats <- bind_rows(
  ichthyo_wdl |> mutate(source = "ichthyo_upstream"),
  bottle_wdl |> mutate(source = "bottle_upstream"),
  merge_wdl |> mutate(source = "merge_modified")
)

wdl_stats |> datatable(caption = "Working DuckLake ingestion stats")

0.13 List Working Tables

Code
working_tables <- list_working_tables(con_wdl)
working_tables |>
  datatable(caption = "Working DuckLake tables with provenance")

0.14 Save Working DuckLake

Code
save_working_ducklake(con_wdl)
[1] "gs://calcofi-db/ducklake/working/calcofi.duckdb"
Code
message("Working DuckLake saved to GCS")

0.15 Create Frozen Release

Create a frozen (immutable) release for public access. Strips provenance columns and exports clean parquet files. See Frozen DuckLake pattern.

Code
dir_frozen <- here(glue("data/releases/{release_version}"))
message(glue("Creating frozen release: {release_version}"))

# get tables to freeze (exclude internal tables)
tables_to_freeze <- DBI::dbListTables(con_wdl) |>
  setdiff(c("_meta"))

# create frozen parquet (strips provenance)
freeze_stats <- write_parquet_outputs(
  con = con_wdl,
  output_dir = file.path(dir_frozen, "parquet"),
  tables = tables_to_freeze,
  strip_provenance = TRUE,
  compression = "zstd"
)

freeze_stats |>
  datatable(caption = glue("Frozen release {release_version} statistics"))

0.15.1 Release Notes

Code
# build release notes using paste() to avoid glue() parsing issues with code blocks
tables_list <- paste0(
  "- ",
  freeze_stats$table,
  " (",
  format(freeze_stats$rows, big.mark = ","),
  " rows)"
)

release_notes <- paste0(
  "# CalCOFI Database Release ",
  release_version,
  "\n\n",
  "**Release Date**: ",
  Sys.Date(),
  "\n\n",
  "## Tables Included\n\n",
  paste(tables_list, collapse = "\n"),
  "\n\n",
  "## Total\n\n",
  "- **Tables**: ",
  nrow(freeze_stats),
  "\n",
  "- **Total Rows**: ",
  format(sum(freeze_stats$rows), big.mark = ","),
  "\n",
  "- **Total Size**: ",
  round(sum(freeze_stats$file_size) / 1024 / 1024, 1),
  " MB\n\n",
  "## Data Sources\n\n",
  "- `ingest_swfsc.noaa.gov_calcofi-db.qmd` - Ichthyo tables (cruise, ship, site, tow, net, species, ichthyo, grid, segment, lookup)\n",
  "- `ingest_calcofi.org_bottle-database.qmd` - Bottle/cast tables (casts, bottle, bottle_measurement, cast_condition, measurement_type)\n\n",
  "## Cross-Dataset Integration\n\n",
  "- **Ship matching**: Reconciled ship codes between bottle casts and swfsc ship reference\n",
  "- **Cruise bridge**: Derived cruise_key (YYMMKK) for bottle casts via ship_key + datetime\n",
  "- **Taxonomy**: Standardized species with WoRMS AphiaID, ITIS TSN, GBIF backbone key\n",
  "- **Taxon hierarchy**: Built taxon + taxa_rank tables from WoRMS/ITIS classification\n\n",
  "## Access\n\n",
  "Parquet files can be queried directly from GCS:\n\n",
  "```r\n",
  "library(duckdb)\n",
  "con <- dbConnect(duckdb())\n",
  "dbExecute(con, 'INSTALL httpfs; LOAD httpfs;')\n",
  "dbGetQuery(con, \"\n",
  "  SELECT * FROM read_parquet(\n",
  "    'https://storage.googleapis.com/calcofi-db/ducklake/releases/",
  release_version,
  "/parquet/ichthyo.parquet')\n",
  "  LIMIT 10\")\n",
  "```\n\n",
  "Or use calcofi4r:\n\n",
  "```r\n",
  "library(calcofi4r)\n",
  "con <- cc_get_db(version = '",
  release_version,
  "')\n",
  "```\n"
)

writeLines(release_notes, file.path(dir_frozen, "RELEASE_NOTES.md"))
message(glue(
  "Release notes written to {file.path(dir_frozen, 'RELEASE_NOTES.md')}"
))
cat(release_notes)

1 CalCOFI Database Release v2026.03

Release Date: 2026-03-06

1.1 Tables Included

  • bottle ( 895,371 rows)
  • bottle_measurement (11,135,600 rows)
  • cast_condition ( 235,513 rows)
  • casts ( 35,644 rows)
  • cruise ( 691 rows)
  • grid ( 218 rows)
  • ichthyo ( 830,873 rows)
  • lookup ( 26 rows)
  • measurement_type ( 47 rows)
  • net ( 76,512 rows)
  • segment ( 60,413 rows)
  • ship ( 48 rows)
  • site ( 61,104 rows)
  • species ( 1,144 rows)
  • taxa_rank ( 41 rows)
  • taxon ( 3,348 rows)
  • tow ( 75,506 rows)

1.2 Total

  • Tables: 17
  • Total Rows: 13,412,099
  • Total Size: 90.7 MB

1.3 Data Sources

  • ingest_swfsc.noaa.gov_calcofi-db.qmd - Ichthyo tables (cruise, ship, site, tow, net, species, ichthyo, grid, segment, lookup)
  • ingest_calcofi.org_bottle-database.qmd - Bottle/cast tables (casts, bottle, bottle_measurement, cast_condition, measurement_type)

1.4 Cross-Dataset Integration

  • Ship matching: Reconciled ship codes between bottle casts and swfsc ship reference
  • Cruise bridge: Derived cruise_key (YYMMKK) for bottle casts via ship_key + datetime
  • Taxonomy: Standardized species with WoRMS AphiaID, ITIS TSN, GBIF backbone key
  • Taxon hierarchy: Built taxon + taxa_rank tables from WoRMS/ITIS classification

1.5 Access

Parquet files can be queried directly from GCS:

library(duckdb)
con <- dbConnect(duckdb())
dbExecute(con, 'INSTALL httpfs; LOAD httpfs;')
dbGetQuery(con, "
  SELECT * FROM read_parquet(
    'https://storage.googleapis.com/calcofi-db/ducklake/releases/v2026.03/parquet/ichthyo.parquet')
  LIMIT 10")

Or use calcofi4r:

library(calcofi4r)
con <- cc_get_db(version = 'v2026.03')

1.5.1 Upload Frozen Release to GCS

Code
# upload frozen release to GCS (creates catalog.json, updates versions.json and latest.txt)
upload_frozen_release(
  release_dir = dir_frozen,
  version = release_version,
  set_latest = TRUE
)

1.6 Cleanup

Code
# close working ducklake connection
close_duckdb(con_wdl)
message("Working DuckLake connection closed")

# close local merge database
close_duckdb(con)
message("Local merge database connection closed")

# summary
message(glue("\n=== Summary ==="))
message(glue("Working DuckLake: saved to GCS"))
message(glue("Frozen release: {release_version} created at {dir_frozen}"))
message(glue("Tables: {nrow(freeze_stats)}"))
message(glue("Total rows: {format(sum(freeze_stats$rows), big.mark = ',')}"))
Code
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       macOS Sequoia 15.7.1
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Rome
 date     2026-03-06
 pandoc   3.8.3 @ /opt/homebrew/bin/ (via rmarkdown)
 quarto   1.8.25 @ /Applications/quarto/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 !  package            * version    date (UTC) lib source
    abind                1.4-8      2024-09-12 [1] CRAN (R 4.5.0)
    ape                  5.8-1      2024-12-16 [1] CRAN (R 4.5.0)
    arrow                22.0.0.1   2025-12-23 [1] CRAN (R 4.5.2)
    assertthat           0.2.1      2019-03-21 [1] CRAN (R 4.5.0)
    backports            1.5.0      2024-05-23 [1] CRAN (R 4.5.0)
    base64enc            0.1-6      2026-02-02 [1] CRAN (R 4.5.2)
    bit                  4.6.0      2025-03-06 [1] CRAN (R 4.5.0)
    bit64                4.6.0-1    2025-01-16 [1] CRAN (R 4.5.0)
    blob                 1.3.0      2026-01-14 [1] CRAN (R 4.5.2)
    broom                1.0.10     2025-09-13 [1] CRAN (R 4.5.0)
    bslib                0.10.0     2026-01-26 [1] CRAN (R 4.5.2)
    cachem               1.1.0      2024-05-16 [1] CRAN (R 4.5.0)
 VP calcofi4db         * 2.4.0      2026-02-04 [?] load_all() (on disk 2.1.0)
 P  calcofi4r          * 1.1.3      2026-02-09 [?] Github (calcofi/calcofi4r@304d271)
    chromote             0.5.1      2025-04-24 [1] CRAN (R 4.5.0)
    class                7.3-23     2025-01-01 [1] CRAN (R 4.5.2)
    classInt             0.4-11     2025-01-08 [1] CRAN (R 4.5.0)
    cli                  3.6.5      2025-04-23 [1] CRAN (R 4.5.0)
    codetools            0.2-20     2024-03-31 [1] CRAN (R 4.5.2)
    crayon               1.5.3      2024-06-20 [1] CRAN (R 4.5.0)
    crosstalk            1.2.2      2025-08-26 [1] CRAN (R 4.5.0)
    crul                 1.6.0      2025-07-23 [1] CRAN (R 4.5.0)
    curl                 7.0.0      2025-08-19 [1] CRAN (R 4.5.0)
    data.table           1.18.2.1   2026-01-27 [1] CRAN (R 4.5.2)
    DBI                * 1.3.0      2026-02-25 [1] CRAN (R 4.5.2)
    dbplyr               2.5.1      2025-09-10 [1] CRAN (R 4.5.0)
    desc                 1.4.3      2023-12-10 [1] CRAN (R 4.5.0)
    devtools             2.4.6      2025-10-03 [1] CRAN (R 4.5.0)
    DiagrammeR           1.0.11     2024-02-02 [1] CRAN (R 4.5.0)
    DiagrammeRsvg        0.1        2016-02-04 [1] CRAN (R 4.5.0)
    digest               0.6.39     2025-11-19 [1] CRAN (R 4.5.2)
    dm                 * 1.0.12     2025-07-02 [1] CRAN (R 4.5.0)
    dplyr              * 1.2.0      2026-02-03 [1] CRAN (R 4.5.2)
    DT                 * 0.34.0     2025-09-02 [1] CRAN (R 4.5.0)
    duckdb               1.4.4      2026-01-28 [1] CRAN (R 4.5.2)
    dygraphs             1.1.1.6    2018-07-11 [1] CRAN (R 4.5.0)
    e1071                1.7-17     2025-12-18 [1] CRAN (R 4.5.2)
    ellipsis             0.3.2      2021-04-29 [1] CRAN (R 4.5.0)
    evaluate             1.0.5      2025-08-27 [1] CRAN (R 4.5.0)
    farver               2.1.2      2024-05-13 [1] CRAN (R 4.5.0)
    fastmap              1.2.0      2024-05-15 [1] CRAN (R 4.5.0)
    fs                 * 1.6.6      2025-04-12 [1] CRAN (R 4.5.0)
    fuzzyjoin            0.1.6.1    2025-07-10 [1] CRAN (R 4.5.0)
    gargle               1.6.1      2026-01-29 [1] CRAN (R 4.5.2)
    generics             0.1.4      2025-05-09 [1] CRAN (R 4.5.0)
    geojsonsf            2.0.5      2025-11-26 [1] CRAN (R 4.5.2)
    ggplot2              4.0.2      2026-02-03 [1] CRAN (R 4.5.2)
    glue               * 1.8.0      2024-09-30 [1] CRAN (R 4.5.0)
    googledrive          2.1.2      2025-09-10 [1] CRAN (R 4.5.0)
    gtable               0.3.6      2024-10-25 [1] CRAN (R 4.5.0)
    here               * 1.0.2      2025-09-15 [1] CRAN (R 4.5.0)
    highcharter          0.9.4      2022-01-03 [1] CRAN (R 4.5.0)
    hms                  1.1.4      2025-10-17 [1] CRAN (R 4.5.0)
    htmltools            0.5.9      2025-12-04 [1] CRAN (R 4.5.2)
    htmlwidgets          1.6.4      2023-12-06 [1] CRAN (R 4.5.0)
    httpcode             0.3.0      2020-04-10 [1] CRAN (R 4.5.0)
    httpuv               1.6.16     2025-04-16 [1] CRAN (R 4.5.0)
    httr                 1.4.7      2023-08-15 [1] CRAN (R 4.5.0)
    httr2                1.2.2      2025-12-08 [1] CRAN (R 4.5.2)
    igraph               2.2.1      2025-10-27 [1] CRAN (R 4.5.0)
    isoband              0.3.0      2025-12-07 [1] CRAN (R 4.5.2)
    janitor              2.2.1      2024-12-22 [1] CRAN (R 4.5.0)
    jquerylib            0.1.4      2021-04-26 [1] CRAN (R 4.5.0)
    jsonlite           * 2.0.0      2025-03-27 [1] CRAN (R 4.5.0)
    KernSmooth           2.23-26    2025-01-01 [1] CRAN (R 4.5.2)
    knitr                1.51       2025-12-20 [1] CRAN (R 4.5.2)
    later                1.4.7      2026-02-24 [1] CRAN (R 4.5.2)
    lattice              0.22-7     2025-04-02 [1] CRAN (R 4.5.2)
    lazyeval             0.2.2      2019-03-15 [1] CRAN (R 4.5.0)
    leafem               0.2.5      2025-08-28 [1] CRAN (R 4.5.0)
    leaflet              2.2.3      2025-09-04 [1] CRAN (R 4.5.0)
    librarian            1.8.1      2021-07-12 [1] CRAN (R 4.5.0)
    lifecycle            1.0.5      2026-01-08 [1] CRAN (R 4.5.2)
    lubridate            1.9.5      2026-02-04 [1] CRAN (R 4.5.2)
    magrittr             2.0.4      2025-09-12 [1] CRAN (R 4.5.0)
    mapgl                0.4.4.9000 2026-03-04 [1] Github (walkerke/mapgl@b9421e0)
    mapview              2.11.4     2025-09-08 [1] CRAN (R 4.5.0)
    markdown             2.0        2025-03-23 [1] CRAN (R 4.5.0)
    Matrix               1.7-4      2025-08-28 [1] CRAN (R 4.5.2)
    memoise              2.0.1      2021-11-26 [1] CRAN (R 4.5.0)
    mgcv                 1.9-3      2025-04-04 [1] CRAN (R 4.5.2)
    mime                 0.13       2025-03-17 [1] CRAN (R 4.5.0)
    nlme                 3.1-168    2025-03-31 [1] CRAN (R 4.5.2)
    otel                 0.2.0      2025-08-29 [1] CRAN (R 4.5.0)
    pillar               1.11.1     2025-09-17 [1] CRAN (R 4.5.0)
    pkgbuild             1.4.8      2025-05-26 [1] CRAN (R 4.5.0)
    pkgconfig            2.0.3      2019-09-22 [1] CRAN (R 4.5.0)
    pkgload              1.4.1      2025-09-23 [1] CRAN (R 4.5.0)
    plotly               4.12.0     2026-01-24 [1] CRAN (R 4.5.2)
    png                  0.1-8      2022-11-29 [1] CRAN (R 4.5.0)
    processx             3.8.6      2025-02-21 [1] CRAN (R 4.5.0)
    promises             1.5.0      2025-11-01 [1] CRAN (R 4.5.0)
    proxy                0.4-29     2025-12-29 [1] CRAN (R 4.5.2)
    ps                   1.9.1      2025-04-12 [1] CRAN (R 4.5.0)
    purrr              * 1.2.1      2026-01-09 [1] CRAN (R 4.5.2)
    quantmod             0.4.28     2025-06-19 [1] CRAN (R 4.5.0)
    R6                   2.6.1      2025-02-15 [1] CRAN (R 4.5.0)
    rappdirs             0.3.4      2026-01-17 [1] CRAN (R 4.5.2)
    raster               3.6-32     2025-03-28 [1] CRAN (R 4.5.0)
    RColorBrewer         1.1-3      2022-04-03 [1] CRAN (R 4.5.0)
    Rcpp                 1.1.1      2026-01-10 [1] CRAN (R 4.5.2)
    readr              * 2.1.6      2025-11-14 [1] CRAN (R 4.5.2)
    remotes              2.5.0      2024-03-17 [1] CRAN (R 4.5.0)
    rlang                1.1.7      2026-01-09 [1] CRAN (R 4.5.2)
    rlist                0.4.6.2    2021-09-03 [1] CRAN (R 4.5.0)
    rmarkdown            2.30       2025-09-28 [1] CRAN (R 4.5.0)
    rnaturalearth        1.2.0      2026-01-19 [1] CRAN (R 4.5.2)
    rnaturalearthhires   1.0.0.9000 2025-10-02 [1] Github (ropensci/rnaturalearthhires@e4736f6)
    RPostgres            1.4.8      2025-02-25 [1] CRAN (R 4.5.0)
    rprojroot            2.1.1      2025-08-26 [1] CRAN (R 4.5.0)
    rstudioapi           0.18.0     2026-01-16 [1] CRAN (R 4.5.2)
    rvest                1.0.5      2025-08-29 [1] CRAN (R 4.5.0)
    S7                   0.2.1      2025-11-14 [1] CRAN (R 4.5.2)
    sass                 0.4.10     2025-04-11 [1] CRAN (R 4.5.0)
    satellite            1.0.6      2025-08-21 [1] CRAN (R 4.5.0)
    scales               1.4.0      2025-04-24 [1] CRAN (R 4.5.0)
    selectr              0.4-2      2019-11-20 [1] CRAN (R 4.5.0)
    sessioninfo          1.2.3      2025-02-05 [1] CRAN (R 4.5.0)
    sf                   1.1-0      2026-02-24 [1] CRAN (R 4.5.2)
    shiny                1.13.0     2026-02-20 [1] CRAN (R 4.5.2)
    shinyWidgets         0.9.0      2025-02-21 [1] CRAN (R 4.5.0)
    snakecase            0.11.1     2023-08-27 [1] CRAN (R 4.5.0)
    sp                   2.2-0      2025-02-01 [1] CRAN (R 4.5.0)
    stars                0.7-0      2025-12-14 [1] CRAN (R 4.5.2)
    stringi              1.8.7      2025-03-27 [1] CRAN (R 4.5.0)
    stringr            * 1.6.0      2025-11-04 [1] CRAN (R 4.5.0)
    taxize             * 0.10.0     2025-02-07 [1] CRAN (R 4.5.0)
    terra                1.8-93     2026-01-12 [1] CRAN (R 4.5.2)
    tibble             * 3.3.1      2026-01-11 [1] CRAN (R 4.5.2)
    tidyr              * 1.3.2      2025-12-19 [1] CRAN (R 4.5.2)
    tidyselect           1.2.1      2024-03-11 [1] CRAN (R 4.5.0)
    timechange           0.4.0      2026-01-29 [1] CRAN (R 4.5.2)
    TTR                  0.24.4     2023-11-28 [1] CRAN (R 4.5.0)
    tzdb                 0.5.0      2025-03-15 [1] CRAN (R 4.5.0)
    units                1.0-0      2025-10-09 [1] CRAN (R 4.5.0)
    usethis              3.2.1      2025-09-06 [1] CRAN (R 4.5.0)
    uuid                 1.2-2      2026-01-23 [1] CRAN (R 4.5.2)
    V8                   8.0.1      2025-10-10 [1] CRAN (R 4.5.0)
    vctrs                0.7.1      2026-01-23 [1] CRAN (R 4.5.2)
    viridisLite          0.4.3      2026-02-04 [1] CRAN (R 4.5.2)
    visNetwork           2.1.4      2025-09-04 [1] CRAN (R 4.5.0)
    vroom                1.7.0      2026-01-27 [1] CRAN (R 4.5.2)
    websocket            1.4.4      2025-04-10 [1] CRAN (R 4.5.0)
    withr                3.0.2      2024-10-28 [1] CRAN (R 4.5.0)
    worrms             * 0.4.3      2023-06-20 [1] CRAN (R 4.5.0)
    xfun                 0.56       2026-01-18 [1] CRAN (R 4.5.2)
    xml2                 1.5.2      2026-01-17 [1] CRAN (R 4.5.2)
    xtable               1.8-8      2026-02-22 [1] CRAN (R 4.5.2)
    xts                  0.14.1     2024-10-15 [1] CRAN (R 4.5.0)
    yaml                 2.3.12     2025-12-10 [1] CRAN (R 4.5.2)
    zoo                  1.8-15     2025-12-15 [1] CRAN (R 4.5.2)

 [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library

 * ── Packages attached to the search path.
 V ── Loaded and on-disk version mismatch.
 P ── Loaded and on-disk path mismatch.

──────────────────────────────────────────────────────────────────────────────