Ingest Bottle Database

Published

2026-04-07

1 Overview

Source: Bottle Database | CalCOFI.org

Goal: Ingest bottle database from source CSV files into a local wrangling DuckDB, then export as parquet files for downstream use.

Two source tables:

  • 194903-202105_Cast.csv (61 fields) — station-level: cruise, location, meteorology, integrated measurements
  • 194903-202105_Bottle.csv (62 fields) — sample-level: depth, temperature, salinity, O₂, nutrients, pigments, C14, pH

Coverage: 1949-03 to 2021-05 (72 years)

Join key: cast_id (in both tables; source column Cst_Cnt)

This workflow processes CalCOFI bottle database CSV files and outputs parquet files. The workflow:

  1. Reads CSV files from source directory (with GCS archive sync)
  2. Loads into local wrangling DuckDB with column renames
  3. Applies ship name/code corrections
  4. Consolidates temporal columns into datetime_utc, drops derivable columns, creates casts_derived VIEW
  5. Pivots bottle measurements into long-format bottle_measurement table
  6. Pivots cast environmental conditions into cast_condition table
  7. Loads measurement_type reference table
  8. Validates primary keys and foreign key integrity
  9. Adds spatial geometry and grid assignments
  10. Exports to parquet files (for later integration into Working DuckLake)
Code
# devtools::install_local(here::here("../calcofi4db"), force = T)
devtools::load_all(here::here("../calcofi4db"))
devtools::load_all(here::here("../calcofi4r"))

librarian::shelf(
  CalCOFI / calcofi4db,
  CalCOFI / calcofi4r,
  DBI,
  dplyr,
  DT,
  fs,
  glue,
  gargle,
  googledrive,
  here,
  htmltools,
  janitor,
  jsonlite,
  knitr,
  listviewer,
  litedown,
  lubridate,
  mapview,
  purrr,
  readr,
  rlang,
  sf,
  stringr,
  tibble,
  tidyr,
  units,
  webshot2,
  quiet = T
)
options(readr.show_col_types = F)
options(DT.options = list(scrollX = TRUE))

# define paths
dataset_name <- "CalCOFI Bottle Database"
provider <- "calcofi"
dataset <- "bottle"
dir_label <- glue("{provider}_{dataset}")
dir_data <- "~/My Drive/projects/calcofi/data-public"
subdir <- "CalCOFI_Database_194903-202105_csv_16October2023"
overwrite <- FALSE
dir_parquet <- here(glue("data/parquet/{dir_label}"))
db_path <- here(glue("data/wrangling/{dir_label}.duckdb"))
# load data using calcofi4db package
# - reads from local Google Drive mount
# - syncs to GCS archive if files changed (creates new timestamped archive)
# - tracks GCS archive path for provenance
dir_dataset <- glue("{dir_data}/{provider}/{dataset}")
# unified measurement_type reference (needed for pivot mappings)
meas_type_csv <- here("metadata/measurement_type.csv")
Code
if (overwrite) {
  if (file_exists(db_path)) {
    file_delete(db_path)
  }
  if (dir_exists(dir_parquet)) dir_delete(dir_parquet)
}

dir_create(dirname(db_path))
con <- get_duckdb_con(db_path)
load_duckdb_extension(con, "spatial")
Code
d_meas_type <- read_csv(meas_type_csv)

d <- read_csv_files(
  provider = provider,
  dataset = dataset,
  subdir = subdir,
  dir_data = dir_data,
  sync_archive = TRUE,
  metadata_dir = here("metadata"),
  field_descriptions = list(
    "194903-202105_Cast" = glue(
      "{dir_dataset}/Cast Field Descriptions.csv"
    ),
    "194903-202105_Bottle" = glue(
      "{dir_dataset}/Bottle Field Descriptions - UTF-8.csv"
    )
  )
)

# show source files summary
message(glue("Loaded {nrow(d$source_files)} tables from {d$paths$dir_csv}"))
message(glue("Total rows: {sum(d$source_files$nrow)}"))

2 Check for any mismatched tables and fields

Code
# check data integrity
# type_exceptions = "all": type mismatches (34) are expected since readr
# infers types differently (e.g. quality codes as numeric, Time as hms);
# these are resolved during ingestion via flds_redefine.csv type_new
integrity <- check_data_integrity(
  d = d,
  dataset_name = dataset_name,
  type_exceptions = "all"
)

2.1 ✅ Data Integrity Check Passed: CalCOFI Bottle Database

2.1.1 Passed with Accepted Exceptions

34 type mismatch(es) were found but accepted as known exceptions (e.g., readr infers types differently from redefinition metadata; resolved during ingestion via flds_redefine.csv type_new column).


3 Show Source Files

Code
show_source_files(d)

4 Show CSV Tables and Fields to Ingest

Code
d$d_csv$tables |>
  datatable(caption = "Tables to ingest.")
Code
d$d_csv$fields |>
  datatable(caption = "Fields to ingest.")

5 Show tables and fields redefined

Code
show_tables_redefine(d)
Code
show_fields_redefine(d)

6 Load Tables into Database

Code
# use ingest_dataset() which handles:
# - transform_data() for applying redefinitions
# - provenance tracking via gcs_path from read_csv_files()
# - ingest_to_working() for each table
tbl_stats <- ingest_dataset(
  con = con,
  d = d,
  mode = if (overwrite) "replace" else "append",
  verbose = TRUE
)

tbl_stats |>
  datatable(rownames = FALSE, filter = "top")

7 Apply Ship Corrections

Apply known ship name/code corrections from ship_renames.csv.

Code
ship_renames_csv <- here("metadata/ship_renames.csv")
d_ship_renames <- read_csv(ship_renames_csv)

# apply each correction
n_updated_total <- 0
for (i in seq_len(nrow(d_ship_renames))) {
  old_name <- d_ship_renames$csv_name[i]
  old_code <- d_ship_renames$csv_code[i]
  new_name <- d_ship_renames$csv_name_new[i]
  new_code <- d_ship_renames$csv_code_new[i]

  # update where both name and code match old values
  n_updated <- dbExecute(
    con,
    glue_sql(
      "UPDATE casts
     SET ship_name = {new_name}, ship_code = {new_code}
     WHERE ship_name = {old_name} AND ship_code = {old_code}",
      .con = con
    )
  )

  if (n_updated > 0) {
    message(glue(
      "Updated {n_updated} casts: ",
      "'{old_name}' [{old_code}] -> '{new_name}' [{new_code}]"
    ))
  }
  n_updated_total <- n_updated_total + n_updated
}

message(glue("\nTotal casts updated: {n_updated_total}"))

8 Column Naming Rationale

The column renames in flds_redefine.csv follow these db conventions:

  • *_id for integer primary/foreign keys (e.g. Cst_Cntcast_id, Btl_Cntbottle_id)
  • *_key for varchar natural/composite keys (e.g. Cast_IDcast_key, Sta_IDsite_key)
  • Note: Cruise_IDcruise_key_0 (interim _0 suffix avoids collision with SWFSC cruise_key YYYY-MM-NODC; discarded after merge)
  • unit suffixes for measurements (e.g. Depthmdepth_m)

8.1 Ambiguous column pairs

Column pair Why different
Cst_Cnt (→ cast_id) vs Cast_ID (→ cast_key) integer counter (1, 2, 3…) vs varchar composite (19-4903CR-HY-060-0930-05400560)
Sta_ID (→ site_key) vs DbSta_ID (→ db_sta_key) “054.0 056.0” (space-separated) vs “05400560” (compact, derivable)
Depth_ID (→ depth_key) vs Depthm (→ depth_m) varchar identifier vs numeric depth in meters

8.2 R_* = “Reported” (pre-QC) values

The R_* prefix means original instrument values before quality control. Critical: R_Sal (Specific Volume Anomaly, values ~233, units 10⁻⁸ m³/kg) is a completely different parameter/scale than Salnty (Practical Salinity PSS-78, values ~33.44). These are pivoted as distinct measurement types (r_salinity_sva vs salinity).

9 Consolidate Casts Columns

Combine date + time into datetime_utc, drop 16 derivable columns, and create a casts_derived convenience VIEW.

Code
# -- create datetime_utc from date + time (idempotent) ----
casts_cols <- dbListFields(con, "casts")

if ("date" %in% casts_cols && "time" %in% casts_cols) {
  # date is DATE type (e.g. 1949-03-01), time is INTERVAL (e.g. 09:30:00)
  # adding DATE + INTERVAL yields TIMESTAMP; 324 casts have NULL time
  dbExecute(con, "ALTER TABLE casts ADD COLUMN IF NOT EXISTS datetime_utc TIMESTAMP")
  dbExecute(
    con,
    "
    UPDATE casts SET datetime_utc =
      CASE
        WHEN \"time\" IS NOT NULL
        THEN CAST(\"date\" AS TIMESTAMP) + \"time\"
        ELSE CAST(\"date\" AS TIMESTAMP)
      END"
  )
  message(glue(
    "Created datetime_utc: {dbGetQuery(con, 'SELECT COUNT(*) FROM casts WHERE datetime_utc IS NOT NULL')[[1]]} non-null values"
  ))

  # -- drop derivable columns ----
  cols_to_drop <- c(
    "year", "month", "quarter", "julian_day", "julian_date",
    "time_zone", "lat_deg", "lat_min", "lat_hem",
    "lon_deg", "lon_min", "lon_hem",
    "cruise", "cruz_sta", "db_sta_key", "cruz_num",
    "date", "time"
  )

  n_before <- length(casts_cols)
  for (col in cols_to_drop) {
    tryCatch(
      dbExecute(con, glue('ALTER TABLE casts DROP COLUMN "{col}"')),
      error = function(e) NULL)
  }
  n_after <- length(dbListFields(con, "casts"))
  message(glue("casts columns: {n_before} -> {n_after} (dropped {n_before - n_after})"))
} else {
  message("casts already consolidated (datetime_utc exists, date/time dropped)")
}

10 Pivot Bottle Measurements

Pivot wide-format measurement columns from bottle into long-format bottle_measurement table. Each measurement type has a value, optional precision, and optional quality code.

Code
erDiagram
    bottle_BEFORE {
        int bottle_id PK
        int cast_id FK
        dbl depth_m
        dbl t_deg_c
        dbl t_prec
        str t_qual
        dbl salnty
        dbl s_prec
        str s_qual
        dbl o2ml_l
        str o_qual
        dbl chlor_a
        str chlqua
        dbl _etc "... 30 measurement columns"
    }
    bottle_AFTER {
        int bottle_id PK
        int cast_id FK
        dbl depth_m
        str depth_qual
    }
    bottle_measurement {
        int bottle_measurement_id PK
        int bottle_id FK
        str measurement_type
        dbl measurement_value
        dbl measurement_prec
        str measurement_qual
    }
    bottle_BEFORE ||--|{ bottle_measurement : "pivot into"
    bottle_AFTER ||--o{ bottle_measurement : "bottle_id"

Pivot wide bottle columns into long-format bottle_measurement table.

Code
# build measurement mapping from unified CSV
bottle_meas_map <- d_meas_type |>
  filter(
    str_detect(`_source_datasets`, "calcofi_bottle"),
    `_source_table` == "bottle"
  ) |>
  select(
    measurement_type,
    value_col = `_source_column`,
    prec_col = `_prec_column`,
    qual_col = `_qual_column`
  ) |>
  mutate(across(c(prec_col, qual_col), ~ na_if(.x, "")))

# build UNION ALL SQL from mapping
sql_parts <- purrr::pmap_chr(
  bottle_meas_map,
  function(
    measurement_type,
    value_col,
    prec_col,
    qual_col
  ) {
    prec_expr <- if (is.na(prec_col)) "NULL" else prec_col
    qual_expr <- if (is.na(qual_col)) "NULL" else qual_col
    glue(
      "SELECT bottle_id, '{measurement_type}' AS measurement_type,
       CAST({value_col} AS DOUBLE) AS measurement_value,
       CAST({prec_expr} AS DOUBLE) AS measurement_prec,
       CAST({qual_expr} AS VARCHAR) AS measurement_qual
     FROM bottle WHERE {value_col} IS NOT NULL"
    )
  }
)

sql_create <- glue(
  "CREATE OR REPLACE TABLE bottle_measurement AS
   SELECT ROW_NUMBER() OVER (ORDER BY bottle_id, measurement_type)
     AS bottle_measurement_id, *
   FROM (
     {paste(sql_parts, collapse = '\nUNION ALL\n')}
   ) sub"
)
dbExecute(con, sql_create)
[1] 11135600
Code
n_meas <- dbGetQuery(con, "SELECT COUNT(*) FROM bottle_measurement")[[1]]
n_types <- dbGetQuery(
  con,
  "SELECT COUNT(DISTINCT measurement_type) FROM bottle_measurement"
)[[1]]
message(glue(
  "bottle_measurement: {format(n_meas, big.mark = ',')} rows, {n_types} measurement types"
))

# -- drop measurement columns from bottle base table ----
cols_to_drop <- unique(c(
  bottle_meas_map$value_col,
  na.omit(bottle_meas_map$prec_col),
  na.omit(bottle_meas_map$qual_col)
))

n_before <- length(dbListFields(con, "bottle"))
for (col in cols_to_drop) {
  tryCatch(
    dbExecute(con, glue('ALTER TABLE bottle DROP COLUMN "{col}"')),
    error = function(e) warning(glue("could not drop {col}: {e$message}"))
  )
}

# rename p_qual to depth_qual (pressure/depth quality code stays in base table)
dbExecute(con, 'ALTER TABLE bottle RENAME COLUMN p_qual TO depth_qual')
[1] 0
Code
n_after <- length(dbListFields(con, "bottle"))
message(glue(
  "bottle columns: {n_before} -> {n_after} (dropped {n_before - n_after})"
))

11 Pivot Cast Conditions

Pivot meteorological and environmental observation columns from casts into long-format cast_condition table.

Code
erDiagram
    casts_BEFORE {
        int cast_id PK
        str site_key
        dbl lat_dec
        dbl lon_dec
        ts  datetime_utc
        dbl wave_dir
        dbl wave_ht
        dbl wave_prd
        dbl wind_dir
        dbl wind_spd
        dbl barometer
        dbl dry_t
        dbl wet_t
        dbl wea
        dbl cloud_typ
        dbl cloud_amt
        dbl visibility
        dbl secchi
        dbl forel_u
    }
    casts_AFTER {
        int cast_id PK
        str site_key
        dbl lat_dec
        dbl lon_dec
        ts  datetime_utc
    }
    cast_condition {
        int cast_condition_id PK
        int cast_id FK
        str condition_type
        dbl condition_value
    }
    casts_BEFORE ||--|{ cast_condition : "pivot into"
    casts_AFTER ||--o{ cast_condition : "cast_id"

Pivot wide cast condition columns into long-format cast_condition table.

Code
# build cast condition mapping from unified CSV
cast_cond_map <- d_meas_type |>
  filter(
    str_detect(`_source_datasets`, "calcofi_bottle"),
    `_source_table` == "casts"
  ) |>
  select(
    condition_type = measurement_type,
    source_col = `_source_column`
  )

# build UNION ALL SQL
sql_parts <- purrr::pmap_chr(
  cast_cond_map,
  function(condition_type, source_col) {
    glue(
      "SELECT cast_id, '{condition_type}' AS condition_type,
       CAST({source_col} AS DOUBLE) AS condition_value
     FROM casts WHERE {source_col} IS NOT NULL"
    )
  }
)

sql_create <- glue(
  "CREATE OR REPLACE TABLE cast_condition AS
   SELECT ROW_NUMBER() OVER (ORDER BY cast_id, condition_type)
     AS cast_condition_id, *
   FROM (
     {paste(sql_parts, collapse = '\nUNION ALL\n')}
   ) sub"
)
dbExecute(con, sql_create)
[1] 235513
Code
n_cond <- dbGetQuery(con, "SELECT COUNT(*) FROM cast_condition")[[1]]
n_types <- dbGetQuery(
  con,
  "SELECT COUNT(DISTINCT condition_type) FROM cast_condition"
)[[1]]
message(glue(
  "cast_condition: {format(n_cond, big.mark = ',')} rows, {n_types} condition types"
))

# -- drop condition columns from casts ----
n_before <- length(dbListFields(con, "casts"))
for (col in cast_cond_map$source_col) {
  tryCatch(
    dbExecute(con, glue('ALTER TABLE casts DROP COLUMN "{col}"')),
    error = function(e) warning(glue("could not drop {col}: {e$message}"))
  )
}
n_after <- length(dbListFields(con, "casts"))
message(glue(
  "casts columns: {n_before} -> {n_after} (dropped {n_before - n_after})"
))

12 Load Measurement Type Reference

Load the measurement_type lookup table that provides units and descriptions for all measurement/condition types across datasets.

12.1 measurement_type vs lookup

The measurement_type table and the swfsc lookup table serve fundamentally different purposes:

  • measurement_type answers “what was measured?” — provides metadata (units, description, source provenance) for measurement codes used in bottle_measurement, cast_condition, and ichthyo
  • lookup answers “what does this code mean?” — translates categorical values (egg developmental stages 1-11, larva stages, tow types) into human-readable descriptions

Merging would create a sparse table with many NULL columns (e.g., units/_source_column NULL for egg stages; lookup_num/lookup_chr NULL for temperature). Both tables coexist in the Working DuckLake, loaded from their respective upstream workflows.

Code
dbWriteTable(con, "measurement_type", d_meas_type, overwrite = TRUE)

# validate FK: all bottle_measurement types exist in reference table
orphan_btl_types <- dbGetQuery(
  con,
  "
  SELECT DISTINCT measurement_type FROM bottle_measurement
  WHERE measurement_type NOT IN (SELECT measurement_type FROM measurement_type)"
)
stopifnot(
  "all bottle_measurement.measurement_type must exist in measurement_type" = nrow(
    orphan_btl_types
  ) ==
    0
)

# validate FK: all cast_condition types exist in reference table
orphan_cond_types <- dbGetQuery(
  con,
  "
  SELECT DISTINCT condition_type FROM cast_condition
  WHERE condition_type NOT IN (SELECT measurement_type FROM measurement_type)"
)
stopifnot(
  "all cast_condition.condition_type must exist in measurement_type" = nrow(
    orphan_cond_types
  ) ==
    0
)

message(glue(
  "measurement_type: {nrow(d_meas_type)} types loaded, FK checks passed"
))

# validate: all measurement_types used by this dataset are registered
bottle_types_used <- dbGetQuery(
  con,
  "SELECT DISTINCT measurement_type FROM bottle_measurement"
)$measurement_type
cast_types_used <- dbGetQuery(
  con,
  "SELECT DISTINCT condition_type FROM cast_condition"
)$condition_type

registered <- d_meas_type |>
  filter(str_detect(`_source_datasets`, "calcofi_bottle")) |>
  pull(measurement_type)

unregistered <- setdiff(c(bottle_types_used, cast_types_used), registered)
stopifnot(
  "all measurement_types used by calcofi_bottle must be registered in _source_datasets" = length(
    unregistered
  ) ==
    0
)

13 Load Dataset Metadata

Code
d_dataset <- read_csv(here("metadata/dataset.csv"))
dbWriteTable(con, "dataset", d_dataset, overwrite = TRUE)
message(glue("dataset: {nrow(d_dataset)} datasets registered"))

14 Verify Primary Keys

Code
# verify cast_id uniqueness in casts
cast_keys <- tbl(con, "casts") |> pull(cast_id)
n_dup_cast <- sum(duplicated(cast_keys))
stopifnot("cast_id must be unique in casts" = n_dup_cast == 0)
message(glue("casts.cast_id: {length(cast_keys)} unique values, 0 duplicates"))

# verify bottle_id uniqueness in bottle
btl_keys <- tbl(con, "bottle") |> pull(bottle_id)
n_dup_btl <- sum(duplicated(btl_keys))
stopifnot("bottle_id must be unique in bottle" = n_dup_btl == 0)
message(glue(
  "bottle.bottle_id: {length(btl_keys)} unique values, 0 duplicates"
))

# verify FK: bottle.cast_id -> casts.cast_id
orphan_bottles <- tbl(con, "bottle") |>
  anti_join(
    tbl(con, "casts"),
    by = "cast_id"
  ) |>
  collect()
stopifnot(
  "all bottle.cast_id must exist in casts.cast_id" = nrow(orphan_bottles) == 0
)
message(glue(
  "FK integrity: all {length(btl_keys)} bottle.cast_id ",
  "found in casts.cast_id"
))

# verify bottle_measurement_id uniqueness
n_dup_bm_id <- dbGetQuery(
  con,
  "
  SELECT COUNT(*) FROM (
    SELECT bottle_measurement_id, COUNT(*) AS n
    FROM bottle_measurement
    GROUP BY bottle_measurement_id
    HAVING COUNT(*) > 1)"
)[[1]]
stopifnot("bottle_measurement_id must be unique" = n_dup_bm_id == 0)
message(glue("bottle_measurement.bottle_measurement_id: unique, 0 duplicates"))

# verify composite PK: bottle_measurement (bottle_id, measurement_type)
n_dup_meas <- dbGetQuery(
  con,
  "
  SELECT COUNT(*) FROM (
    SELECT bottle_id, measurement_type, COUNT(*) AS n
    FROM bottle_measurement
    GROUP BY bottle_id, measurement_type
    HAVING COUNT(*) > 1)"
)[[1]]
stopifnot(
  "bottle_measurement (bottle_id, measurement_type) must be unique" = n_dup_meas ==
    0
)
message(glue("bottle_measurement: composite key unique, 0 duplicates"))

# verify FK: bottle_measurement.bottle_id -> bottle.bottle_id
n_orphan_meas <- dbGetQuery(
  con,
  "
  SELECT COUNT(DISTINCT bottle_id) FROM bottle_measurement
  WHERE bottle_id NOT IN (SELECT bottle_id FROM bottle)"
)[[1]]
stopifnot(
  "all bottle_measurement.bottle_id must exist in bottle" = n_orphan_meas == 0
)
message(glue("bottle_measurement FK: all bottle_ids found in bottle"))

# verify cast_condition_id uniqueness
n_dup_cc_id <- dbGetQuery(
  con,
  "
  SELECT COUNT(*) FROM (
    SELECT cast_condition_id, COUNT(*) AS n
    FROM cast_condition
    GROUP BY cast_condition_id
    HAVING COUNT(*) > 1)"
)[[1]]
stopifnot("cast_condition_id must be unique" = n_dup_cc_id == 0)
message(glue("cast_condition.cast_condition_id: unique, 0 duplicates"))

# verify composite PK: cast_condition (cast_id, condition_type)
n_dup_cond <- dbGetQuery(
  con,
  "
  SELECT COUNT(*) FROM (
    SELECT cast_id, condition_type, COUNT(*) AS n
    FROM cast_condition
    GROUP BY cast_id, condition_type
    HAVING COUNT(*) > 1)"
)[[1]]
stopifnot(
  "cast_condition (cast_id, condition_type) must be unique" = n_dup_cond == 0
)
message(glue("cast_condition: composite key unique, 0 duplicates"))

# verify FK: cast_condition.cast_id -> casts.cast_id
n_orphan_cond <- dbGetQuery(
  con,
  "
  SELECT COUNT(DISTINCT cast_id) FROM cast_condition
  WHERE cast_id NOT IN (SELECT cast_id FROM casts)"
)[[1]]
stopifnot("all cast_condition.cast_id must exist in casts" = n_orphan_cond == 0)
message(glue("cast_condition FK: all cast_ids found in casts"))

15 Schema Documentation

Code
# exclude views (casts_derived)
db_tables <- function(con) {
  setdiff(dbListTables(con), "casts_derived") |> sort()
}

tbls <- db_tables(con)
cc_erd(con, tables = tbls)

15.1 Primary Key Strategy

Note on UUID-first approach: The SWFSC ichthyo tables use source UUIDs (minted at sea) as primary identifiers. The bottle/cast tables are different — they use source integer counters (Cst_Cnt, Btl_Cnt) as natural keys, which are stable identifiers from the source system. The pivoted tables (bottle_measurement, cast_condition) have no source identifier and use sequential integer IDs for convenience, with composite natural keys enforced as unique.

Table Primary Key Type
casts cast_id Source integer counter (Cst_Cnt) — stable source key
bottle bottle_id Source integer counter (Btl_Cnt) — stable source key
bottle_measurement bottle_measurement_id Sequential (derived/pivoted table); composite (bottle_id, measurement_type) enforced unique
cast_condition cast_condition_id Sequential (derived/pivoted table); composite (cast_id, condition_type) enforced unique
measurement_type measurement_type Natural key (type code string)
grid grid_key Natural key

15.2 Foreign Key Relationships

casts.cast_id (PK, integer) — station-level
    ↓
bottle.bottle_id (PK, integer) — sample-level
bottle.cast_id (FK) → casts.cast_id
    ↓
bottle_measurement.bottle_measurement_id (PK, integer) — ROW_NUMBER(ORDER BY bottle_id, measurement_type)
bottle_measurement.bottle_id (FK) → bottle.bottle_id
bottle_measurement.measurement_type (FK) → measurement_type.measurement_type

cast_condition.cast_condition_id (PK, integer) — ROW_NUMBER(ORDER BY cast_id, condition_type)
cast_condition.cast_id (FK) → casts.cast_id
cast_condition.condition_type (FK) → measurement_type.measurement_type

casts.grid_key (FK) → grid.grid_key (after spatial join)
casts.ship_code (soft FK) → ship.ship_nodc (validated after integration with swfsc)
Code
# define PK/FK relationships for visualization and relationships.json
bottle_rels <- list(
  primary_keys = list(
    casts = "cast_id",
    bottle = "bottle_id",
    bottle_measurement = "bottle_measurement_id",
    cast_condition = "cast_condition_id",
    measurement_type = "measurement_type"
  ),
  foreign_keys = list(
    list(
      table = "bottle",
      column = "cast_id",
      ref_table = "casts",
      ref_column = "cast_id"
    ),
    list(
      table = "bottle_measurement",
      column = "bottle_id",
      ref_table = "bottle",
      ref_column = "bottle_id"
    ),
    list(
      table = "bottle_measurement",
      column = "measurement_type",
      ref_table = "measurement_type",
      ref_column = "measurement_type"
    ),
    list(
      table = "cast_condition",
      column = "cast_id",
      ref_table = "casts",
      ref_column = "cast_id"
    ),
    list(
      table = "cast_condition",
      column = "condition_type",
      ref_table = "measurement_type",
      ref_column = "measurement_type"
    )
  )
)

cc_erd(con, tables = db_tables(con), rels = bottle_rels)

16 Add Spatial

16.1 Add casts.geom

Code
# add geometry column using DuckDB spatial
# note: DuckDB spatial doesn't track SRID metadata (unlike PostGIS)
# all geometries assumed WGS84 (EPSG:4326) by convention
add_point_geom(con, "casts")

16.2 Load grid table

Load grid from the ichthyo workflow’s local parquet output (single source of truth — grid is created in ingest_swfsc_ichthyo.qmd).

Code
load_prior_tables(
  con = con,
  tables = "grid",
  parquet_dir = here("data/parquet/swfsc_ichthyo"),
  as_view = TRUE
)
# A tibble: 1 × 3
  table  rows has_geom
  <chr> <dbl> <lgl>   
1 grid    218 TRUE    

16.3 Update casts.grid_key

Code
grid_stats <- assign_grid_key(con, "casts")
grid_stats |> datatable()

16.4 Create casts_derived VIEW

Create convenience VIEW after all casts table modifications are complete (column drops, pivots, spatial join). The VIEW adds back temporal and coordinate columns derivable from the base table.

Code
dbExecute(con, "DROP VIEW IF EXISTS casts_derived")
[1] 0
Code
dbExecute(
  con,
  "
  CREATE VIEW casts_derived AS
  SELECT *,
    EXTRACT(YEAR FROM datetime_utc)::SMALLINT    AS year,
    EXTRACT(MONTH FROM datetime_utc)::SMALLINT   AS month,
    EXTRACT(QUARTER FROM datetime_utc)::SMALLINT AS quarter,
    EXTRACT(DOY FROM datetime_utc)::SMALLINT     AS julian_day,
    (datetime_utc::DATE - DATE '1899-12-30')     AS julian_date,
    FLOOR(ABS(lat_dec))::SMALLINT                AS lat_deg,
    (ABS(lat_dec) - FLOOR(ABS(lat_dec))) * 60    AS lat_min,
    CASE WHEN lat_dec >= 0 THEN 'N' ELSE 'S' END AS lat_hem,
    FLOOR(ABS(lon_dec))::SMALLINT                AS lon_deg,
    (ABS(lon_dec) - FLOOR(ABS(lon_dec))) * 60    AS lon_min,
    CASE WHEN lon_dec >= 0 THEN 'E' ELSE 'W' END AS lon_hem,
    STRFTIME(datetime_utc, '%Y%m')                AS cruise,
    REPLACE(REPLACE(site_key, '.', ''), ' ', '')   AS db_sta_key
  FROM casts"
)
[1] 0
Code
message("Created casts_derived VIEW with derived temporal/coordinate columns")

16.5 Map Cast Locations

Code
# slowish, so use cached figure
map_casts_png <- here(glue("figures/{provider}_{dataset}_casts_map.png"))

if (!file_exists(map_casts_png)) {
  # exclude native GEOMETRY column; convert via ST_AsText for sf
  cast_cols <- dbGetQuery(
    con,
    "SELECT column_name FROM information_schema.columns
     WHERE table_name = 'casts' AND data_type != 'GEOMETRY'"
  )$column_name
  cast_sql <- paste(
    "SELECT",
    paste(cast_cols, collapse = ", "),
    ", ST_AsText(geom) AS geom_wkt FROM casts"
  )
  casts_sf <- dbGetQuery(con, cast_sql) |>
    st_as_sf(wkt = "geom_wkt", crs = 4326) |>
    select(-geom_wkt) |>
    mutate(year = year(datetime_utc))

  m <- mapView(casts_sf, zcol = "year")
  mapshot2(m, file = map_casts_png)
}

htmltools::img(
  src = map_casts_png |> str_replace(here(), "."),
  width = "600px"
)

17 Report

Code
# cc_erd handles GEOMETRY columns natively (unlike dm_from_con)
cc_erd(
  con,
  tables = db_tables(con),
  rels = bottle_rels,
  colors = list(
    lightyellow = c("bottle_measurement", "cast_condition"),
    lightblue = "measurement_type"
  )
)

Code
# summary statistics
n_casts <- dbGetQuery(con, "SELECT COUNT(*) FROM casts")[[1]]
n_bottles <- tbl(con, "bottle") |> tally() |> pull(n)
n_meas <- dbGetQuery(con, "SELECT COUNT(*) FROM bottle_measurement")[[1]]
n_cond <- dbGetQuery(con, "SELECT COUNT(*) FROM cast_condition")[[1]]

year_range <- dbGetQuery(
  con,
  "
  SELECT
    EXTRACT(YEAR FROM MIN(datetime_utc))::INTEGER AS yr_min,
    EXTRACT(YEAR FROM MAX(datetime_utc))::INTEGER AS yr_max
  FROM casts"
)

fmt <- function(x, ...) format(x, big.mark = ",", ...)
message(glue("Total casts: {fmt(n_casts)}"))
message(glue("Total bottles: {fmt(n_bottles)}"))
message(glue("Total bottle measurements: {fmt(n_meas)}"))
message(glue("Total cast conditions: {fmt(n_cond)}"))
message(glue("Year range: {year_range$yr_min} - {year_range$yr_max}"))

18 Validate Local Database

Validate data quality in the local wrangling database before exporting to parquet. The parquet outputs from this workflow can later be used to update the Working DuckLake.

Code
# validate data quality
validation <- validate_for_release(con)

if (validation$passed) {
  message("Validation passed!")
  if (nrow(validation$checks) > 0) {
    validation$checks |>
      filter(status != "pass") |>
      datatable(caption = "Validation Warnings")
  }
} else {
  cat("Validation FAILED:\n")
  cat(paste("-", validation$errors, collapse = "\n"))
}
Validation FAILED:
- Table 'casts' has 298 NULL values in required column 'grid_key'
- Table 'casts_derived' has 298 NULL values in required column 'grid_key'

19 Enforce Column Types

Force integer/smallint types on columns that R’s numeric mapped to DOUBLE during dbWriteTable(). Uses flds_redefine.csv (type_new) as the source of truth for source-table columns, plus explicit overrides for derived-table columns.

Code
type_changes <- enforce_column_types(
  con = con,
  d_flds_rd = d$d_flds_rd,
  type_overrides = list(),
  tables = db_tables(con),
  verbose = TRUE
)

if (nrow(type_changes) > 0) {
  type_changes |>
    datatable(caption = "Column type changes applied")
}
Code
# recreate casts_derived VIEW after type changes
dbExecute(con, "DROP VIEW IF EXISTS casts_derived")
[1] 0
Code
dbExecute(
  con,
  "
  CREATE VIEW casts_derived AS
  SELECT *,
    EXTRACT(YEAR FROM datetime_utc)::SMALLINT    AS year,
    EXTRACT(MONTH FROM datetime_utc)::SMALLINT   AS month,
    EXTRACT(QUARTER FROM datetime_utc)::SMALLINT AS quarter,
    EXTRACT(DOY FROM datetime_utc)::SMALLINT     AS julian_day,
    (datetime_utc::DATE - DATE '1899-12-30')     AS julian_date,
    FLOOR(ABS(lat_dec))::SMALLINT                AS lat_deg,
    (ABS(lat_dec) - FLOOR(ABS(lat_dec))) * 60    AS lat_min,
    CASE WHEN lat_dec >= 0 THEN 'N' ELSE 'S' END AS lat_hem,
    FLOOR(ABS(lon_dec))::SMALLINT                AS lon_deg,
    (ABS(lon_dec) - FLOOR(ABS(lon_dec))) * 60    AS lon_min,
    CASE WHEN lon_dec >= 0 THEN 'E' ELSE 'W' END AS lon_hem,
    STRFTIME(datetime_utc, '%Y%m')                AS cruise,
    REPLACE(REPLACE(site_key, '.', ''), ' ', '')   AS db_sta_key
  FROM casts"
)
[1] 0
Code
message("Recreated casts_derived VIEW after column type enforcement")

20 Data Preview

Preview first and last rows of each table before writing parquet outputs.

Code
preview_tables(
  con,
  c(
    "casts",
    "bottle",
    "bottle_measurement",
    "cast_condition",
    "measurement_type"
  )
) # grid canonical from swfsc

20.1 casts (35,644 rows)

20.2 bottle (895,371 rows)

20.3 bottle_measurement (11,135,600 rows)

20.4 cast_condition (235,513 rows)

20.5 measurement_type (104 rows)

Code
# casts_derived is a VIEW; exclude GEOMETRY column for R driver compatibility
cd_cols <- dbGetQuery(
  con,
  "SELECT column_name FROM information_schema.columns
   WHERE table_name = 'casts_derived' AND data_type != 'GEOMETRY'"
)$column_name
dbGetQuery(
  con,
  glue("SELECT {paste(cd_cols, collapse=', ')} FROM casts_derived LIMIT 100")
) |>
  datatable(
    caption = "casts_derived — first 100 rows (VIEW)",
    rownames = FALSE,
    filter = "top"
  )

21 Write Parquet Outputs

Export tables to parquet files for downstream use.

Code
# collect mismatches for manifest
mismatches <- list(
  ships = collect_ship_mismatches(con, "casts"),
  measurement_types = collect_measurement_type_mismatches(
    con,
    here("metadata/measurement_type.csv")
  ),
  cruise_keys = collect_cruise_key_mismatches(con, "casts")
)

# write parquet files with manifest
# export only tables this ingest creates (dependency _new deltas added later)
own_tables <- c(
  "casts",
  "bottle",
  "bottle_measurement",
  "cast_condition",
  "measurement_type",
  "dataset"
)

parquet_stats <- write_parquet_outputs(
  con = con,
  output_dir = dir_parquet,
  tables = own_tables,
  strip_provenance = FALSE,
  mismatches = mismatches
)

parquet_stats |>
  mutate(file = basename(path)) |>
  select(-path) |>
  datatable(caption = "Parquet export statistics")

22 Write Metadata

Build metadata.json sidecar file documenting all tables and columns in parquet outputs. DuckDB COMMENT ON does not propagate to parquet, so this provides the metadata externally.

Code
metadata_path <- build_metadata_json(
  con = con,
  d_tbls_rd = d$d_tbls_rd,
  d_flds_rd = d$d_flds_rd,
  metadata_derived_csv = here(
    "metadata/calcofi/bottle/metadata_derived.csv"
  ),
  output_dir = dir_parquet,
  tables = db_tables(con),
  set_comments = TRUE,
  provider = provider,
  dataset = dataset,
  workflow_url = glue(
    "https://calcofi.io/workflows/",
    "ingest_calcofi_bottle.html"
  )
)

# write relationships.json sidecar with PKs/FKs
build_relationships_json(
  rels = bottle_rels,
  output_dir = dir_parquet,
  provider = provider,
  dataset = dataset
)
[1] "/Users/bbest/Github/CalCOFI/workflows/data/parquet/calcofi_bottle/relationships.json"
Code
# show metadata summary
metadata <- jsonlite::fromJSON(metadata_path)
tibble(
  table = names(metadata$tables),
  n_cols = map_int(
    names(metadata$tables),
    ~ sum(grepl(glue("^{.x}\\."), names(metadata$columns)))
  ),
  name_long = map_chr(metadata$tables, ~ .x$name_long)
) |>
  datatable(caption = "Table metadata summary")
Code
listviewer::jsonedit(
  jsonlite::fromJSON(metadata_path, simplifyVector = FALSE),
  mode = "view")
Code
listviewer::jsonedit(
  jsonlite::fromJSON(
    file.path(dir_parquet, "relationships.json"),
    simplifyVector = FALSE),
  mode = "view")

23 Upload to GCS Archive

Upload parquet files, manifest, and metadata sidecar to gs://calcofi-db/ingest/{provider}_{dataset}/.

Code
sync_to_gcs(
  local_dir  = dir_parquet,
  gcs_prefix = glue("ingest/{dir_label}"),
  bucket     = "calcofi-db")
# A tibble: 10 × 4
   file                       action      size reason        
   <chr>                      <chr>      <dbl> <chr>         
 1 bottle_measurement.parquet skipped 89136977 checksum match
 2 bottle.parquet             skipped 14473563 checksum match
 3 cast_condition.parquet     skipped  1592621 checksum match
 4 casts.parquet              skipped  2001519 checksum match
 5 dataset.parquet            skipped     5665 checksum match
 6 manifest.json              skipped     1130 checksum match
 7 measurement_type.parquet   skipped     5781 checksum match
 8 metadata.json              skipped    14487 checksum match
 9 relationships.json         skipped     1031 checksum match
10 ship_new.parquet           skipped      784 checksum match

24 Cross-Dataset Integration

Link bottle casts to SWFSC ship/cruise reference tables via ship matching and cruise key derivation. This replaces the separate merge_ichthyo_bottle.qmd workflow step.

24.1 Load Reference Tables

Code
# tables declared in calcofi.modifies frontmatter — loaded as TABLEs (writable)
# all other dependency tables loaded as VIEWs (zero-copy, read-only)
modifies_tables <- c("ship") # from calcofi.modifies in YAML frontmatter
load_prior_tables(
  con = con,
  tables = modifies_tables,
  parquet_dir = here("data/parquet/swfsc_ichthyo")
)
# A tibble: 1 × 3
  table  rows has_geom
  <chr> <dbl> <lgl>   
1 ship     48 FALSE   
Code
load_prior_tables(
  con = con,
  tables = setdiff(c("ship", "cruise"), modifies_tables),
  parquet_dir = here("data/parquet/swfsc_ichthyo"),
  as_view = TRUE
)
# A tibble: 1 × 3
  table   rows has_geom
  <chr>  <dbl> <lgl>   
1 cruise   691 FALSE   
Code
# snapshot PKs of modified tables before modifications
modifies_pks <- list()
for (tbl in modifies_tables) {
  pk_col <- dbGetQuery(
    con,
    glue(
      "SELECT column_name FROM information_schema.columns
     WHERE table_name = '{tbl}' ORDER BY ordinal_position LIMIT 1"
    )
  )$column_name
  modifies_pks[[tbl]] <- list(
    pk_col = pk_col,
    keys = dbGetQuery(con, glue("SELECT {pk_col} FROM {tbl}"))[[1]]
  )
}

24.2 Ship Matching + Cruise Bridge

Code
bridge_result <- derive_cruise_key_on_casts(
  con = con,
  ship_renames_csv = here("metadata/ship_renames.csv"),
  fetch_ices = TRUE
)

# insert interim entries for still-unmatched ships (ship_nodc = "?XX?")
ensure_interim_ships(con, bridge_result$ship_matches)
[1] 1
Code
bridge_result$ship_matches |>
  datatable(caption = "Ship matching results")
Code
bridge_result$ship_matches |>
  count(match_type) |>
  datatable(caption = "Ship match type counts")
Code
bridge_result$cruise_stats |>
  datatable(caption = "Cruise bridge match statistics")
Code
if (nrow(bridge_result$unmatched_report) > 0) {
  bridge_result$unmatched_report |>
    datatable(caption = "Unmatched ship codes (no ship_key)")
} else {
  message("All ship codes matched!")
}

24.3 Cross-Dataset Validation

Code
# use information_schema to check columns (avoids GEOMETRY type issues with dbListFields)
casts_cols <- dbGetQuery(
  con,
  "SELECT column_name FROM information_schema.columns
   WHERE table_name = 'casts'"
)$column_name
grid_cols <- dbGetQuery(
  con,
  "SELECT column_name FROM information_schema.columns
   WHERE table_name = 'grid'"
)$column_name

# grid_key integrity: casts.grid_key should all be in grid.grid_key
if ("grid_key" %in% casts_cols && "grid_key" %in% grid_cols) {
  grid_orphans <- dbGetQuery(
    con,
    "SELECT COUNT(*) AS n FROM casts c
     WHERE c.grid_key IS NOT NULL
       AND c.grid_key NOT IN (SELECT grid_key FROM grid)"
  )$n
  message(glue("Grid key orphans in casts: {grid_orphans}"))
}

# ship PK uniqueness
ship_dups <- dbGetQuery(
  con,
  "SELECT ship_key, COUNT(*) AS n FROM ship
   GROUP BY ship_key HAVING COUNT(*) > 1"
)
if (nrow(ship_dups) > 0) {
  warning(glue("Duplicate ship_key values: {nrow(ship_dups)}"))
}

# cruise PK uniqueness
cruise_dups <- dbGetQuery(
  con,
  "SELECT cruise_key, COUNT(*) AS n FROM cruise
   GROUP BY cruise_key HAVING COUNT(*) > 1"
)
if (nrow(cruise_dups) > 0) {
  warning(glue("Duplicate cruise_key values: {nrow(cruise_dups)}"))
}

# cruise bridge coverage
bridge_coverage <- dbGetQuery(
  con,
  "SELECT
    COUNT(*) AS total_casts,
    SUM(CASE WHEN ship_key IS NOT NULL THEN 1 ELSE 0 END) AS with_ship_key,
    SUM(CASE WHEN cruise_key IS NOT NULL THEN 1 ELSE 0 END) AS with_cruise_key
  FROM casts"
)
bridge_coverage |> datatable(caption = "Cruise bridge coverage")
Code
message("Cross-dataset validation complete")

24.4 Re-export Casts Parquet

Update casts.parquet with the new ship_key and cruise_key columns.

Code
# re-export casts parquet (now includes ship_key, cruise_key)
casts_pqt <- file.path(dir_parquet, "casts.parquet")
cols <- DBI::dbGetQuery(
  con,
  glue(
    "SELECT column_name FROM information_schema.columns
   WHERE table_name = 'casts'"
  )
)$column_name
prov_cols <- c("_source_file", "_source_row", "_source_uuid", "_ingested_at")
export_cols <- setdiff(cols, prov_cols)
DBI::dbExecute(
  con,
  glue(
    "COPY (SELECT {paste(export_cols, collapse = ', ')} FROM casts)
   TO '{casts_pqt}' (FORMAT PARQUET, COMPRESSION 'snappy')"
  )
)
[1] 35644
Code
message("Re-exported casts.parquet with ship_key + cruise_key")

# export _new delta sidecars for modified dependency tables (from calcofi.modifies)
# these are NOT in the manifest — picked up by build_release_table_registry()
for (tbl in modifies_tables) {
  pk_col <- modifies_pks[[tbl]]$pk_col
  keys_old <- modifies_pks[[tbl]]$keys
  keys_new <- dbGetQuery(con, glue("SELECT {pk_col} FROM {tbl}"))[[1]]
  additions <- setdiff(keys_new, keys_old)

  if (length(additions) > 0) {
    new_tbl <- paste0(tbl, "_new")
    pq_path <- file.path(dir_parquet, paste0(new_tbl, ".parquet"))
    vals <- paste(shQuote(additions, "sh"), collapse = ", ")
    export_parquet(
      con,
      glue("SELECT * FROM {tbl} WHERE {pk_col} IN ({vals})"),
      pq_path
    )
    message(glue("{length(additions)} new {tbl} row(s) -> {new_tbl}.parquet"))
  } else {
    message(glue("No new {tbl} rows — {tbl}_new not exported"))
  }
}

25 Cleanup

Code
# close local wrangling database connection
close_duckdb(con)
message("Local wrangling database connection closed")

# note: parquet outputs are in data/parquet/calcofi_bottle/
# these can be used to update the Working DuckLake in a separate workflow
message(glue("Parquet outputs written to: {dir_parquet}"))
message(glue("GCS outputs at: gs://calcofi-db/ingest/{dir_label}/"))

26 TODO

Code
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       macOS Sequoia 15.7.1
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Rome
 date     2026-04-07
 pandoc   3.8.3 @ /opt/homebrew/bin/ (via rmarkdown)
 quarto   1.8.25 @ /usr/local/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 !  package            * version    date (UTC) lib source
    abind                1.4-8      2024-09-12 [1] CRAN (R 4.5.0)
    arrow                23.0.1.2   2026-03-25 [1] CRAN (R 4.5.2)
    assertthat           0.2.1      2019-03-21 [1] CRAN (R 4.5.0)
    backports            1.5.1      2026-04-03 [1] CRAN (R 4.5.2)
    base64enc            0.1-6      2026-02-02 [1] CRAN (R 4.5.2)
    bit                  4.6.0      2025-03-06 [1] CRAN (R 4.5.0)
    bit64                4.6.0-1    2025-01-16 [1] CRAN (R 4.5.0)
    blob                 1.3.0      2026-01-14 [1] CRAN (R 4.5.2)
    broom                1.0.12     2026-01-27 [1] CRAN (R 4.5.2)
    bslib                0.10.0     2026-01-26 [1] CRAN (R 4.5.2)
    cachem               1.1.0      2024-05-16 [1] CRAN (R 4.5.0)
 P  calcofi4db         * 2.5.6      2026-04-07 [?] load_all()
 VP calcofi4r          * 1.1.7      2026-04-03 [?] load_all() (on disk 1.1.6)
    chromote             0.5.1      2025-04-24 [1] CRAN (R 4.5.0)
    class                7.3-23     2025-01-01 [1] CRAN (R 4.5.2)
    classInt             0.4-11     2025-01-08 [1] CRAN (R 4.5.0)
    cli                  3.6.5      2025-04-23 [1] CRAN (R 4.5.0)
    codetools            0.2-20     2024-03-31 [1] CRAN (R 4.5.2)
    crayon               1.5.3      2024-06-20 [1] CRAN (R 4.5.0)
    crosstalk            1.2.2      2025-08-26 [1] CRAN (R 4.5.0)
    curl                 7.0.0      2025-08-19 [1] CRAN (R 4.5.0)
    data.table           1.18.2.1   2026-01-27 [1] CRAN (R 4.5.2)
    DBI                * 1.3.0      2026-02-25 [1] CRAN (R 4.5.2)
    dbplyr               2.5.2      2026-02-13 [1] CRAN (R 4.5.2)
    desc                 1.4.3      2023-12-10 [1] CRAN (R 4.5.0)
    devtools             2.5.0      2026-03-14 [1] CRAN (R 4.5.2)
    DiagrammeR           1.0.11     2024-02-02 [1] CRAN (R 4.5.0)
    DiagrammeRsvg        0.1        2016-02-04 [1] CRAN (R 4.5.0)
    digest               0.6.39     2025-11-19 [1] CRAN (R 4.5.2)
    dm                   1.1.1      2026-03-05 [1] CRAN (R 4.5.2)
    dplyr              * 1.2.0      2026-02-03 [1] CRAN (R 4.5.2)
    DT                 * 0.34.0     2025-09-02 [1] CRAN (R 4.5.0)
    duckdb               1.5.1      2026-03-26 [1] CRAN (R 4.5.2)
    dygraphs             1.1.1.6    2018-07-11 [1] CRAN (R 4.5.0)
    e1071                1.7-17     2025-12-18 [1] CRAN (R 4.5.2)
    ellipsis             0.3.2      2021-04-29 [1] CRAN (R 4.5.0)
    evaluate             1.0.5      2025-08-27 [1] CRAN (R 4.5.0)
    farver               2.1.2      2024-05-13 [1] CRAN (R 4.5.0)
    fastmap              1.2.0      2024-05-15 [1] CRAN (R 4.5.0)
    fs                 * 2.0.1      2026-03-24 [1] CRAN (R 4.5.2)
    fuzzyjoin            0.1.8      2026-02-20 [1] CRAN (R 4.5.2)
    gargle             * 1.6.1      2026-01-29 [1] CRAN (R 4.5.2)
    generics             0.1.4      2025-05-09 [1] CRAN (R 4.5.0)
    geojsonsf            2.0.5      2025-11-26 [1] CRAN (R 4.5.2)
    ggplot2              4.0.2      2026-02-03 [1] CRAN (R 4.5.2)
    glue               * 1.8.0      2024-09-30 [1] CRAN (R 4.5.0)
    googledrive        * 2.1.2      2025-09-10 [1] CRAN (R 4.5.0)
    gtable               0.3.6      2024-10-25 [1] CRAN (R 4.5.0)
    here               * 1.0.2      2025-09-15 [1] CRAN (R 4.5.0)
    highcharter          0.9.4      2022-01-03 [1] CRAN (R 4.5.0)
    hms                  1.1.4      2025-10-17 [1] CRAN (R 4.5.0)
    htmltools          * 0.5.9      2025-12-04 [1] CRAN (R 4.5.2)
    htmlwidgets          1.6.4      2023-12-06 [1] CRAN (R 4.5.0)
    httpuv               1.6.17     2026-03-18 [1] CRAN (R 4.5.2)
    httr                 1.4.8      2026-02-13 [1] CRAN (R 4.5.2)
    httr2                1.2.2      2025-12-08 [1] CRAN (R 4.5.2)
    igraph               2.2.2      2026-02-12 [1] CRAN (R 4.5.2)
    isoband              0.3.0      2025-12-07 [1] CRAN (R 4.5.2)
    janitor            * 2.2.1      2024-12-22 [1] CRAN (R 4.5.0)
    jquerylib            0.1.4      2021-04-26 [1] CRAN (R 4.5.0)
    jsonlite           * 2.0.0      2025-03-27 [1] CRAN (R 4.5.0)
    KernSmooth           2.23-26    2025-01-01 [1] CRAN (R 4.5.2)
    knitr              * 1.51       2025-12-20 [1] CRAN (R 4.5.2)
    later                1.4.8      2026-03-05 [1] CRAN (R 4.5.2)
    lattice              0.22-9     2026-02-09 [1] CRAN (R 4.5.2)
    lazyeval             0.2.2      2019-03-15 [1] CRAN (R 4.5.0)
    leafem               0.2.5      2025-08-28 [1] CRAN (R 4.5.0)
    leaflet              2.2.3      2025-09-04 [1] CRAN (R 4.5.0)
    librarian            1.8.1      2021-07-12 [1] CRAN (R 4.5.0)
    lifecycle            1.0.5      2026-01-08 [1] CRAN (R 4.5.2)
    listviewer         * 4.0.0      2023-09-30 [1] CRAN (R 4.5.0)
    litedown           * 0.9        2025-12-18 [1] CRAN (R 4.5.2)
    lubridate          * 1.9.5      2026-02-04 [1] CRAN (R 4.5.2)
    magrittr             2.0.4      2025-09-12 [1] CRAN (R 4.5.0)
    mapgl                0.4.5.9000 2026-03-31 [1] Github (bbest/mapgl@74df3b1)
    mapview            * 2.11.4     2025-09-08 [1] CRAN (R 4.5.0)
    markdown             2.0        2025-03-23 [1] CRAN (R 4.5.0)
    Matrix               1.7-5      2026-03-21 [1] CRAN (R 4.5.2)
    memoise              2.0.1      2021-11-26 [1] CRAN (R 4.5.0)
    mgcv                 1.9-4      2025-11-07 [1] CRAN (R 4.5.0)
    mime                 0.13       2025-03-17 [1] CRAN (R 4.5.0)
    nlme                 3.1-169    2026-03-27 [1] CRAN (R 4.5.2)
    otel                 0.2.0      2025-08-29 [1] CRAN (R 4.5.0)
    pillar               1.11.1     2025-09-17 [1] CRAN (R 4.5.0)
    pkgbuild             1.4.8      2025-05-26 [1] CRAN (R 4.5.0)
    pkgconfig            2.0.3      2019-09-22 [1] CRAN (R 4.5.0)
    pkgload              1.5.1      2026-04-01 [1] CRAN (R 4.5.2)
    plotly               4.12.0     2026-01-24 [1] CRAN (R 4.5.2)
    png                  0.1-9      2026-03-15 [1] CRAN (R 4.5.2)
    processx             3.8.7      2026-04-01 [1] CRAN (R 4.5.2)
    promises             1.5.0      2025-11-01 [1] CRAN (R 4.5.0)
    proxy                0.4-29     2025-12-29 [1] CRAN (R 4.5.2)
    purrr              * 1.2.1      2026-01-09 [1] CRAN (R 4.5.2)
    quantmod             0.4.28     2025-06-19 [1] CRAN (R 4.5.0)
    R6                   2.6.1      2025-02-15 [1] CRAN (R 4.5.0)
    rappdirs             0.3.4      2026-01-17 [1] CRAN (R 4.5.2)
    raster               3.6-32     2025-03-28 [1] CRAN (R 4.5.0)
    RColorBrewer         1.1-3      2022-04-03 [1] CRAN (R 4.5.0)
    Rcpp                 1.1.1      2026-01-10 [1] CRAN (R 4.5.2)
    readr              * 2.2.0      2026-02-19 [1] CRAN (R 4.5.2)
    rlang              * 1.1.7      2026-01-09 [1] CRAN (R 4.5.2)
    rlist                0.4.6.2    2021-09-03 [1] CRAN (R 4.5.0)
    rmarkdown            2.31       2026-03-26 [1] CRAN (R 4.5.2)
    rnaturalearth        1.2.0      2026-01-19 [1] CRAN (R 4.5.2)
    rnaturalearthhires   1.0.0.9000 2025-10-02 [1] Github (ropensci/rnaturalearthhires@e4736f6)
    RPostgres            1.4.10     2026-02-16 [1] CRAN (R 4.5.2)
    rprojroot            2.1.1      2025-08-26 [1] CRAN (R 4.5.0)
    rstudioapi           0.18.0     2026-01-16 [1] CRAN (R 4.5.2)
    rvest                1.0.5      2025-08-29 [1] CRAN (R 4.5.0)
    S7                   0.2.1      2025-11-14 [1] CRAN (R 4.5.2)
    sass                 0.4.10     2025-04-11 [1] CRAN (R 4.5.0)
    satellite            1.0.6      2025-08-21 [1] CRAN (R 4.5.0)
    scales               1.4.0      2025-04-24 [1] CRAN (R 4.5.0)
    selectr              0.5-1      2025-12-17 [1] CRAN (R 4.5.2)
    sessioninfo          1.2.3      2025-02-05 [1] CRAN (R 4.5.0)
    sf                 * 1.1-0      2026-02-24 [1] CRAN (R 4.5.2)
    shiny                1.13.0     2026-02-20 [1] CRAN (R 4.5.2)
    shinyWidgets         0.9.1      2026-03-09 [1] CRAN (R 4.5.2)
    snakecase            0.11.1     2023-08-27 [1] CRAN (R 4.5.0)
    sp                   2.2-1      2026-02-13 [1] CRAN (R 4.5.2)
    stars                0.7-1      2026-02-13 [1] CRAN (R 4.5.2)
    stringi              1.8.7      2025-03-27 [1] CRAN (R 4.5.0)
    stringr            * 1.6.0      2025-11-04 [1] CRAN (R 4.5.0)
    terra                1.9-11     2026-03-26 [1] CRAN (R 4.5.2)
    tibble             * 3.3.1      2026-01-11 [1] CRAN (R 4.5.2)
    tidyr              * 1.3.2      2025-12-19 [1] CRAN (R 4.5.2)
    tidyselect           1.2.1      2024-03-11 [1] CRAN (R 4.5.0)
    timechange           0.4.0      2026-01-29 [1] CRAN (R 4.5.2)
    TTR                  0.24.4     2023-11-28 [1] CRAN (R 4.5.0)
    tzdb                 0.5.0      2025-03-15 [1] CRAN (R 4.5.0)
    units              * 1.0-1      2026-03-11 [1] CRAN (R 4.5.2)
    usethis              3.2.1      2025-09-06 [1] CRAN (R 4.5.0)
    utf8                 1.2.6      2025-06-08 [1] CRAN (R 4.5.0)
    uuid                 1.2-2      2026-01-23 [1] CRAN (R 4.5.2)
    V8                   8.0.1      2025-10-10 [1] CRAN (R 4.5.0)
    vctrs                0.7.2      2026-03-21 [1] CRAN (R 4.5.2)
    viridisLite          0.4.3      2026-02-04 [1] CRAN (R 4.5.2)
    visNetwork           2.1.4      2025-09-04 [1] CRAN (R 4.5.0)
    vroom                1.7.1      2026-03-31 [1] CRAN (R 4.5.2)
    webshot2           * 0.1.2      2025-04-23 [1] CRAN (R 4.5.0)
    websocket            1.4.4      2025-04-10 [1] CRAN (R 4.5.0)
    withr                3.0.2      2024-10-28 [1] CRAN (R 4.5.0)
    xfun                 0.57       2026-03-20 [1] CRAN (R 4.5.2)
    xml2                 1.5.2      2026-01-17 [1] CRAN (R 4.5.2)
    xtable               1.8-8      2026-02-22 [1] CRAN (R 4.5.2)
    xts                  0.14.2     2026-02-28 [1] CRAN (R 4.5.2)
    yaml                 2.3.12     2025-12-10 [1] CRAN (R 4.5.2)
    zoo                  1.8-15     2025-12-15 [1] CRAN (R 4.5.2)

 [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library

 * ── Packages attached to the search path.
 V ── Loaded and on-disk version mismatch.
 P ── Loaded and on-disk path mismatch.

──────────────────────────────────────────────────────────────────────────────