Ingest Bottle Database

Published

2026-02-05

1 Overview

Source: Bottle Database | CalCOFI.org

Goal: Ingest bottle database from source CSV files into a local wrangling DuckDB, then export as parquet files for downstream use.

Two source tables:

  • 194903-202105_Cast.csv (61 fields) — station-level: cruise, location, meteorology, integrated measurements
  • 194903-202105_Bottle.csv (62 fields) — sample-level: depth, temperature, salinity, O₂, nutrients, pigments, C14, pH

Coverage: 1949-03 to 2021-05 (72 years)

Join key: cast_id (in both tables; source column Cst_Cnt)

This workflow processes CalCOFI bottle database CSV files and outputs parquet files. The workflow:

  1. Reads CSV files from source directory (with GCS archive sync)
  2. Loads into local wrangling DuckDB with column renames
  3. Applies ship name/code corrections
  4. Consolidates temporal columns into datetime_utc, drops derivable columns, creates casts_derived VIEW
  5. Pivots bottle measurements into long-format bottle_measurement table
  6. Pivots cast environmental conditions into cast_condition table
  7. Loads measurement_type reference table
  8. Validates primary keys and foreign key integrity
  9. Adds spatial geometry and grid assignments
  10. Exports to parquet files (for later integration into Working DuckLake)
Code
# devtools::install_local(here::here("../calcofi4db"), force = T)
devtools::load_all(here::here("../calcofi4db"))
devtools::load_all(here::here("../calcofi4r"))

librarian::shelf(
  CalCOFI/calcofi4db,
  CalCOFI/calcofi4r,
  DBI, dm, dplyr, DT, fs, glue, gargle, googledrive, here, htmltools,
  janitor, jsonlite, knitr, listviewer, litedown, lubridate, mapview,
  purrr, readr, rlang, sf, stringr, tibble, tidyr, units, webshot2,
  quiet = T)
options(readr.show_col_types = F)
options(DT.options = list(scrollX = TRUE))

# define paths
provider     <- "calcofi.org"
dataset      <- "bottle-database"
dataset_name <- "CalCOFI Bottle Database"
dir_data     <- "~/My Drive/projects/calcofi/data-public"
subdir       <- "CalCOFI_Database_194903-202105_csv_16October2023"
overwrite    <- TRUE
dir_parquet  <- here(glue("data/parquet/{provider}_{dataset}"))
db_path      <- here(glue("data/wrangling/{provider}_{dataset}.duckdb"))

if (overwrite){
  if (file_exists(db_path)) file_delete(db_path)
  if (dir_exists(dir_parquet)) dir_delete(dir_parquet)
}

dir_create(dirname(db_path))
con <- get_duckdb_con(db_path)
load_duckdb_extension(con, "spatial")

# load data using calcofi4db package
# - reads from local Google Drive mount
# - syncs to GCS archive if files changed (creates new timestamped archive)
# - tracks GCS archive path for provenance
dir_dataset <- glue("{dir_data}/{provider}/{dataset}")

d <- read_csv_files(
  provider           = provider,
  dataset            = dataset,
  subdir             = subdir,
  dir_data           = dir_data,
  sync_archive       = TRUE,
  metadata_dir       = here("metadata"),
  field_descriptions = list(
    "194903-202105_Cast"   = glue(
      "{dir_dataset}/Cast Field Descriptions.csv"),
    "194903-202105_Bottle" = glue(
      "{dir_dataset}/Bottle Field Descriptions - UTF-8.csv")))

# show source files summary
message(glue("Loaded {nrow(d$source_files)} tables from {d$paths$dir_csv}"))
message(glue("Total rows: {sum(d$source_files$nrow)}"))

2 Check for any mismatched tables and fields

Code
# check data integrity
# type_exceptions = "all": type mismatches (34) are expected since readr
# infers types differently (e.g. quality codes as numeric, Time as hms);
# these are resolved during ingestion via flds_redefine.csv type_new
integrity <- check_data_integrity(
  d               = d,
  dataset_name    = dataset_name,
  type_exceptions = "all")

2.1 ✅ Data Integrity Check Passed: CalCOFI Bottle Database

2.1.1 Passed with Accepted Exceptions

34 type mismatch(es) were found but accepted as known exceptions (e.g., readr infers types differently from redefinition metadata; resolved during ingestion via flds_redefine.csv type_new column).


3 Show Source Files

Code
show_source_files(d)

4 Show CSV Tables and Fields to Ingest

Code
d$d_csv$tables |>
  datatable(caption = "Tables to ingest.")
Code
d$d_csv$fields |>
  datatable(caption = "Fields to ingest.")

5 Show tables and fields redefined

Code
show_tables_redefine(d)
Code
show_fields_redefine(d)

6 Load Tables into Database

Code
# use ingest_dataset() which handles:
# - transform_data() for applying redefinitions
# - provenance tracking via gcs_path from read_csv_files()
# - ingest_to_working() for each table
tbl_stats <- ingest_dataset(
  con     = con,
  d       = d,
  mode    = if (overwrite) "replace" else "append",
  verbose = TRUE)

tbl_stats |>
  datatable(rownames = FALSE, filter = "top")

7 Apply Ship Corrections

Apply known ship name/code corrections from ship_renames.csv.

Code
ship_renames_csv <- here(
  "metadata/calcofi.org/bottle-database/ship_renames.csv")
d_ship_renames <- read_csv(ship_renames_csv)

# apply each correction
n_updated_total <- 0
for (i in seq_len(nrow(d_ship_renames))) {
  old_name <- d_ship_renames$csv_name[i]
  old_code <- d_ship_renames$csv_code[i]
  new_name <- d_ship_renames$csv_name_new[i]
  new_code <- d_ship_renames$csv_code_new[i]

  # update where both name and code match old values
  n_updated <- dbExecute(con, glue_sql(
    "UPDATE casts
     SET ship_name = {new_name}, ship_code = {new_code}
     WHERE ship_name = {old_name} AND ship_code = {old_code}",
    .con = con))

  if (n_updated > 0) {
    message(glue(
      "Updated {n_updated} casts: ",
      "'{old_name}' [{old_code}] -> '{new_name}' [{new_code}]"))
  }
  n_updated_total <- n_updated_total + n_updated
}

message(glue("\nTotal casts updated: {n_updated_total}"))

8 Column Naming Rationale

The column renames in flds_redefine.csv follow these db conventions:

  • *_id for integer primary/foreign keys (e.g. Cst_Cntcast_id, Btl_Cntbottle_id)
  • *_key for varchar natural/composite keys (e.g. Cast_IDcast_key, Cruise_IDcruise_key, Sta_IDsta_key)
  • unit suffixes for measurements (e.g. Depthmdepth_m)

8.1 Ambiguous column pairs

Column pair Why different
Cst_Cnt (→ cast_id) vs Cast_ID (→ cast_key) integer counter (1, 2, 3…) vs varchar composite (19-4903CR-HY-060-0930-05400560)
Sta_ID (→ sta_key) vs DbSta_ID (→ db_sta_key) “054.0 056.0” (space-separated) vs “05400560” (compact, derivable)
Depth_ID (→ depth_key) vs Depthm (→ depth_m) varchar identifier vs numeric depth in meters

8.2 R_* = “Reported” (pre-QC) values

The R_* prefix means original instrument values before quality control. Critical: R_Sal (Specific Volume Anomaly, values ~233, units 10⁻⁸ m³/kg) is a completely different parameter/scale than Salnty (Practical Salinity PSS-78, values ~33.44). These are pivoted as distinct measurement types (r_salinity_sva vs salinity).

9 Consolidate Casts Columns

Combine date + time into datetime_utc, drop 16 derivable columns, and create a casts_derived convenience VIEW.

Code
# -- create datetime_utc from date (DATE) + time (INTERVAL) ----
# date is DATE type (e.g. 1949-03-01), time is INTERVAL (e.g. 09:30:00)
# adding DATE + INTERVAL yields TIMESTAMP; 324 casts have NULL time
dbExecute(con, "ALTER TABLE casts ADD COLUMN datetime_utc TIMESTAMP")
[1] 0
Code
dbExecute(con, "
  UPDATE casts SET datetime_utc =
    CASE
      WHEN \"time\" IS NOT NULL
      THEN CAST(\"date\" AS TIMESTAMP) + \"time\"
      ELSE CAST(\"date\" AS TIMESTAMP)
    END")
[1] 35644
Code
message(glue(
  "Created datetime_utc: {dbGetQuery(con, 'SELECT COUNT(*) FROM casts WHERE datetime_utc IS NOT NULL')[[1]]} non-null values"))

# -- drop derivable columns ----
cols_to_drop <- c(
  "year", "month", "quarter", "julian_day", "julian_date",
  "time_zone",
  "lat_deg", "lat_min", "lat_hem",
  "lon_deg", "lon_min", "lon_hem",
  "cruise", "cruz_sta", "db_sta_key", "cruz_num")

# drop date and time (replaced by datetime_utc)
cols_to_drop <- c(cols_to_drop, "date", "time")

n_before <- length(dbListFields(con, "casts"))

for (col in cols_to_drop) {
  tryCatch(
    dbExecute(con, glue('ALTER TABLE casts DROP COLUMN "{col}"')),
    error = function(e) warning(glue("could not drop {col}: {e$message}")))
}

n_after <- length(dbListFields(con, "casts"))
message(glue("casts columns: {n_before} -> {n_after} (dropped {n_before - n_after})"))

10 Pivot Bottle Measurements

Pivot wide-format measurement columns from bottle into long-format bottle_measurement table. Each measurement type has a value, optional precision, and optional quality code.

Code
erDiagram
    bottle_BEFORE {
        int bottle_id PK
        int cast_id FK
        dbl depth_m
        dbl t_deg_c
        dbl t_prec
        str t_qual
        dbl salnty
        dbl s_prec
        str s_qual
        dbl o2ml_l
        str o_qual
        dbl chlor_a
        str chlqua
        dbl _etc "... 30 measurement columns"
    }
    bottle_AFTER {
        int bottle_id PK
        int cast_id FK
        dbl depth_m
        str depth_qual
    }
    bottle_measurement {
        int bottle_measurement_id PK
        int bottle_id FK
        str measurement_type
        dbl measurement_value
        dbl measurement_prec
        str measurement_qual
    }
    bottle_BEFORE ||--|{ bottle_measurement : "pivot into"
    bottle_AFTER ||--o{ bottle_measurement : "bottle_id"
erDiagram
    bottle_BEFORE {
        int bottle_id PK
        int cast_id FK
        dbl depth_m
        dbl t_deg_c
        dbl t_prec
        str t_qual
        dbl salnty
        dbl s_prec
        str s_qual
        dbl o2ml_l
        str o_qual
        dbl chlor_a
        str chlqua
        dbl _etc "... 30 measurement columns"
    }
    bottle_AFTER {
        int bottle_id PK
        int cast_id FK
        dbl depth_m
        str depth_qual
    }
    bottle_measurement {
        int bottle_measurement_id PK
        int bottle_id FK
        str measurement_type
        dbl measurement_value
        dbl measurement_prec
        str measurement_qual
    }
    bottle_BEFORE ||--|{ bottle_measurement : "pivot into"
    bottle_AFTER ||--o{ bottle_measurement : "bottle_id"

Pivot wide bottle columns into long-format bottle_measurement table.

Code
# define measurement mapping: type -> value/prec/qual columns
bottle_meas_map <- tribble(
  ~measurement_type,     ~value_col,      ~prec_col,  ~qual_col,
  "temperature",         "t_deg_c",       "t_prec",   "t_qual",
  "salinity",            "salnty",        "s_prec",   "s_qual",
  "oxygen_ml_l",         "o2ml_l",        NA,         "o_qual",
  "sigma_theta",         "s_theta",       NA,         "s_thtaq",
  "oxygen_saturation",   "o2sat",         NA,         "o2satq",
  "oxygen_umol_kg",      "oxy_umol_kg",   NA,         NA,
  "chlorophyll_a",       "chlor_a",       NA,         "chlqua",
  "phaeopigment",        "phaeop",        NA,         "phaqua",
  "phosphate",           "po4u_m",        NA,         "po4q",
  "silicate",            "si_o3u_m",      NA,         "si_o3qu",
  "nitrite",             "no2u_m",        NA,         "no2q",
  "nitrate",             "no3u_m",        NA,         "no3q",
  "ammonia",             "nh3u_m",        NA,         "nh3q",
  "c14_rep1",            "c14as1",        "c14a1p",   "c14a1q",
  "c14_rep2",            "c14as2",        "c14a2p",   "c14a2q",
  "c14_dark",            "dark_as",       "dark_ap",  "darkaq",
  "c14_mean",            "mean_as",       "mean_ap",  "mean_aq",
  "light_pct",           "light_p",       NA,         NA,
  "r_depth",             "r_depth",       NA,         NA,
  "r_temperature",       "r_temp",        NA,         NA,
  "r_salinity_sva",      "r_sal",         NA,         NA,
  "r_dynamic_height",    "r_dynht",       NA,         NA,
  "r_ammonium",          "r_nuts",        NA,         NA,
  "r_oxygen_umol_kg",    "r_oxy_umol_kg", NA,         NA,
  "dic_rep1",            "dic1",          NA,         "dic_quality_comment",
  "dic_rep2",            "dic2",          NA,         "dic_quality_comment",
  "alkalinity_rep1",     "ta1",           NA,         NA,
  "alkalinity_rep2",     "ta2",           NA,         NA,
  "ph_rep1",             "p_h1",          NA,         NA,
  "ph_rep2",             "p_h2",          NA,         NA)

# build UNION ALL SQL from mapping
sql_parts <- purrr::pmap_chr(bottle_meas_map, function(
    measurement_type, value_col, prec_col, qual_col) {
  prec_expr <- if (is.na(prec_col)) "NULL" else prec_col
  qual_expr <- if (is.na(qual_col)) "NULL" else qual_col
  glue(
    "SELECT bottle_id, '{measurement_type}' AS measurement_type,
       CAST({value_col} AS DOUBLE) AS measurement_value,
       CAST({prec_expr} AS DOUBLE) AS measurement_prec,
       CAST({qual_expr} AS VARCHAR) AS measurement_qual
     FROM bottle WHERE {value_col} IS NOT NULL")
})

sql_create <- glue(
  "CREATE TABLE bottle_measurement AS
   SELECT ROW_NUMBER() OVER (ORDER BY bottle_id, measurement_type)
     AS bottle_measurement_id, *
   FROM (
     {paste(sql_parts, collapse = '\nUNION ALL\n')}
   ) sub")
dbExecute(con, sql_create)
[1] 11135600
Code
n_meas <- dbGetQuery(con, "SELECT COUNT(*) FROM bottle_measurement")[[1]]
n_types <- dbGetQuery(con, "SELECT COUNT(DISTINCT measurement_type) FROM bottle_measurement")[[1]]
message(glue("bottle_measurement: {format(n_meas, big.mark = ',')} rows, {n_types} measurement types"))

# -- drop measurement columns from bottle base table ----
cols_to_drop <- unique(c(
  bottle_meas_map$value_col,
  na.omit(bottle_meas_map$prec_col),
  na.omit(bottle_meas_map$qual_col)))

n_before <- length(dbListFields(con, "bottle"))
for (col in cols_to_drop) {
  tryCatch(
    dbExecute(con, glue('ALTER TABLE bottle DROP COLUMN "{col}"')),
    error = function(e) warning(glue("could not drop {col}: {e$message}")))
}

# rename p_qual to depth_qual (pressure/depth quality code stays in base table)
dbExecute(con, 'ALTER TABLE bottle RENAME COLUMN p_qual TO depth_qual')
[1] 0
Code
n_after <- length(dbListFields(con, "bottle"))
message(glue("bottle columns: {n_before} -> {n_after} (dropped {n_before - n_after})"))

11 Pivot Cast Conditions

Pivot meteorological and environmental observation columns from casts into long-format cast_condition table.

Code
erDiagram
    casts_BEFORE {
        int cast_id PK
        str sta_key
        dbl lat_dec
        dbl lon_dec
        ts  datetime_utc
        dbl wave_dir
        dbl wave_ht
        dbl wave_prd
        dbl wind_dir
        dbl wind_spd
        dbl barometer
        dbl dry_t
        dbl wet_t
        dbl wea
        dbl cloud_typ
        dbl cloud_amt
        dbl visibility
        dbl secchi
        dbl forel_u
    }
    casts_AFTER {
        int cast_id PK
        str sta_key
        dbl lat_dec
        dbl lon_dec
        ts  datetime_utc
    }
    cast_condition {
        int cast_condition_id PK
        int cast_id FK
        str condition_type
        dbl condition_value
    }
    casts_BEFORE ||--|{ cast_condition : "pivot into"
    casts_AFTER ||--o{ cast_condition : "cast_id"
erDiagram
    casts_BEFORE {
        int cast_id PK
        str sta_key
        dbl lat_dec
        dbl lon_dec
        ts  datetime_utc
        dbl wave_dir
        dbl wave_ht
        dbl wave_prd
        dbl wind_dir
        dbl wind_spd
        dbl barometer
        dbl dry_t
        dbl wet_t
        dbl wea
        dbl cloud_typ
        dbl cloud_amt
        dbl visibility
        dbl secchi
        dbl forel_u
    }
    casts_AFTER {
        int cast_id PK
        str sta_key
        dbl lat_dec
        dbl lon_dec
        ts  datetime_utc
    }
    cast_condition {
        int cast_condition_id PK
        int cast_id FK
        str condition_type
        dbl condition_value
    }
    casts_BEFORE ||--|{ cast_condition : "pivot into"
    casts_AFTER ||--o{ cast_condition : "cast_id"

Pivot wide cast condition columns into long-format cast_condition table.

Code
cast_cond_map <- tribble(
  ~condition_type,        ~source_col,
  "wave_direction",       "wave_dir",
  "wave_height",          "wave_ht",
  "wave_period",          "wave_prd",
  "wind_direction",       "wind_dir",
  "wind_speed",           "wind_spd",
  "barometric_pressure",  "barometer",
  "dry_air_temp",         "dry_t",
  "wet_air_temp",         "wet_t",
  "weather_code",         "wea",
  "cloud_type",           "cloud_typ",
  "cloud_amount",         "cloud_amt",
  "visibility",           "visibility",
  "secchi_depth",         "secchi",
  "water_color",          "forel_u")

# build UNION ALL SQL
sql_parts <- purrr::pmap_chr(cast_cond_map, function(condition_type, source_col) {
  glue(
    "SELECT cast_id, '{condition_type}' AS condition_type,
       CAST({source_col} AS DOUBLE) AS condition_value
     FROM casts WHERE {source_col} IS NOT NULL")
})

sql_create <- glue(
  "CREATE TABLE cast_condition AS
   SELECT ROW_NUMBER() OVER (ORDER BY cast_id, condition_type)
     AS cast_condition_id, *
   FROM (
     {paste(sql_parts, collapse = '\nUNION ALL\n')}
   ) sub")
dbExecute(con, sql_create)
[1] 235513
Code
n_cond <- dbGetQuery(con, "SELECT COUNT(*) FROM cast_condition")[[1]]
n_types <- dbGetQuery(con, "SELECT COUNT(DISTINCT condition_type) FROM cast_condition")[[1]]
message(glue("cast_condition: {format(n_cond, big.mark = ',')} rows, {n_types} condition types"))

# -- drop condition columns from casts ----
n_before <- length(dbListFields(con, "casts"))
for (col in cast_cond_map$source_col) {
  tryCatch(
    dbExecute(con, glue('ALTER TABLE casts DROP COLUMN "{col}"')),
    error = function(e) warning(glue("could not drop {col}: {e$message}")))
}
n_after <- length(dbListFields(con, "casts"))
message(glue("casts columns: {n_before} -> {n_after} (dropped {n_before - n_after})"))

12 Load Measurement Type Reference

Load the measurement_type lookup table that provides units and descriptions for all measurement/condition types across datasets.

12.1 measurement_type vs lookup

The measurement_type table and the swfsc lookup table serve fundamentally different purposes:

  • measurement_type answers “what was measured?” — provides metadata (units, description, source provenance) for measurement codes used in bottle_measurement, cast_condition, and ichthyo
  • lookup answers “what does this code mean?” — translates categorical values (egg developmental stages 1-11, larva stages, tow types) into human-readable descriptions

Merging would create a sparse table with many NULL columns (e.g., units/_source_column NULL for egg stages; lookup_num/lookup_chr NULL for temperature). Both tables coexist in the Working DuckLake, loaded from their respective upstream workflows.

Code
meas_type_csv <- here(
  "metadata/calcofi.org/bottle-database/measurement_type.csv")
d_meas_type <- read_csv(meas_type_csv)

dbWriteTable(con, "measurement_type", d_meas_type, overwrite = TRUE)

# validate FK: all bottle_measurement types exist in reference table
orphan_btl_types <- dbGetQuery(con, "
  SELECT DISTINCT measurement_type FROM bottle_measurement
  WHERE measurement_type NOT IN (SELECT measurement_type FROM measurement_type)")
stopifnot(
  "all bottle_measurement.measurement_type must exist in measurement_type" =
    nrow(orphan_btl_types) == 0)

# validate FK: all cast_condition types exist in reference table
orphan_cond_types <- dbGetQuery(con, "
  SELECT DISTINCT condition_type FROM cast_condition
  WHERE condition_type NOT IN (SELECT measurement_type FROM measurement_type)")
stopifnot(
  "all cast_condition.condition_type must exist in measurement_type" =
    nrow(orphan_cond_types) == 0)

message(glue("measurement_type: {nrow(d_meas_type)} types loaded, FK checks passed"))

13 Verify Primary Keys

Code
# verify cast_id uniqueness in casts
cast_keys <- tbl(con, "casts") |> pull(cast_id)
n_dup_cast <- sum(duplicated(cast_keys))
stopifnot("cast_id must be unique in casts" = n_dup_cast == 0)
message(glue("casts.cast_id: {length(cast_keys)} unique values, 0 duplicates"))

# verify bottle_id uniqueness in bottle
btl_keys <- tbl(con, "bottle") |> pull(bottle_id)
n_dup_btl <- sum(duplicated(btl_keys))
stopifnot("bottle_id must be unique in bottle" = n_dup_btl == 0)
message(glue("bottle.bottle_id: {length(btl_keys)} unique values, 0 duplicates"))

# verify FK: bottle.cast_id -> casts.cast_id
orphan_bottles <- tbl(con, "bottle") |>
  anti_join(
    tbl(con, "casts"),
    by = "cast_id") |>
  collect()
stopifnot(
  "all bottle.cast_id must exist in casts.cast_id" =
    nrow(orphan_bottles) == 0)
message(glue(
  "FK integrity: all {length(btl_keys)} bottle.cast_id ",
  "found in casts.cast_id"))

# verify bottle_measurement_id uniqueness
n_dup_bm_id <- dbGetQuery(con, "
  SELECT COUNT(*) FROM (
    SELECT bottle_measurement_id, COUNT(*) AS n
    FROM bottle_measurement
    GROUP BY bottle_measurement_id
    HAVING COUNT(*) > 1)")[[1]]
stopifnot("bottle_measurement_id must be unique" = n_dup_bm_id == 0)
message(glue("bottle_measurement.bottle_measurement_id: unique, 0 duplicates"))

# verify composite PK: bottle_measurement (bottle_id, measurement_type)
n_dup_meas <- dbGetQuery(con, "
  SELECT COUNT(*) FROM (
    SELECT bottle_id, measurement_type, COUNT(*) AS n
    FROM bottle_measurement
    GROUP BY bottle_id, measurement_type
    HAVING COUNT(*) > 1)")[[1]]
stopifnot("bottle_measurement (bottle_id, measurement_type) must be unique" = n_dup_meas == 0)
message(glue("bottle_measurement: composite key unique, 0 duplicates"))

# verify FK: bottle_measurement.bottle_id -> bottle.bottle_id
n_orphan_meas <- dbGetQuery(con, "
  SELECT COUNT(DISTINCT bottle_id) FROM bottle_measurement
  WHERE bottle_id NOT IN (SELECT bottle_id FROM bottle)")[[1]]
stopifnot("all bottle_measurement.bottle_id must exist in bottle" = n_orphan_meas == 0)
message(glue("bottle_measurement FK: all bottle_ids found in bottle"))

# verify cast_condition_id uniqueness
n_dup_cc_id <- dbGetQuery(con, "
  SELECT COUNT(*) FROM (
    SELECT cast_condition_id, COUNT(*) AS n
    FROM cast_condition
    GROUP BY cast_condition_id
    HAVING COUNT(*) > 1)")[[1]]
stopifnot("cast_condition_id must be unique" = n_dup_cc_id == 0)
message(glue("cast_condition.cast_condition_id: unique, 0 duplicates"))

# verify composite PK: cast_condition (cast_id, condition_type)
n_dup_cond <- dbGetQuery(con, "
  SELECT COUNT(*) FROM (
    SELECT cast_id, condition_type, COUNT(*) AS n
    FROM cast_condition
    GROUP BY cast_id, condition_type
    HAVING COUNT(*) > 1)")[[1]]
stopifnot("cast_condition (cast_id, condition_type) must be unique" = n_dup_cond == 0)
message(glue("cast_condition: composite key unique, 0 duplicates"))

# verify FK: cast_condition.cast_id -> casts.cast_id
n_orphan_cond <- dbGetQuery(con, "
  SELECT COUNT(DISTINCT cast_id) FROM cast_condition
  WHERE cast_id NOT IN (SELECT cast_id FROM casts)")[[1]]
stopifnot("all cast_condition.cast_id must exist in casts" = n_orphan_cond == 0)
message(glue("cast_condition FK: all cast_ids found in casts"))

14 Schema Documentation

Code
# exclude views (casts_derived) — dm_from_con can't access views
db_tables <- function(con) {
  setdiff(dbListTables(con), "casts_derived") |> sort()
}

tbls <- db_tables(con)

# dm object for visualization (keys learned from data, not DB constraints)
dm_dev <- dm_from_con(con, table_names = tbls, learn_keys = FALSE)
dm_draw(dm_dev, view_type = "all")

14.1 Primary Key Strategy

Table Primary Key Type
casts cast_id Natural key (integer counter from source Cst_Cnt)
bottle bottle_id Natural key (integer counter from source Btl_Cnt)
bottle_measurement bottle_measurement_id Integer PK via ROW_NUMBER(ORDER BY bottle_id, measurement_type); composite (bottle_id, measurement_type) enforced unique
cast_condition cast_condition_id Integer PK via ROW_NUMBER(ORDER BY cast_id, condition_type); composite (cast_id, condition_type) enforced unique
measurement_type measurement_type Natural key (type code string)
grid grid_key Natural key

14.2 Foreign Key Relationships

casts.cast_id (PK, integer) — station-level
    ↓
bottle.bottle_id (PK, integer) — sample-level
bottle.cast_id (FK) → casts.cast_id
    ↓
bottle_measurement.bottle_measurement_id (PK, integer) — ROW_NUMBER(ORDER BY bottle_id, measurement_type)
bottle_measurement.bottle_id (FK) → bottle.bottle_id
bottle_measurement.measurement_type (FK) → measurement_type.measurement_type

cast_condition.cast_condition_id (PK, integer) — ROW_NUMBER(ORDER BY cast_id, condition_type)
cast_condition.cast_id (FK) → casts.cast_id
cast_condition.condition_type (FK) → measurement_type.measurement_type

casts.grid_key (FK) → grid.grid_key (after spatial join)
casts.ship_code (soft FK) → ship.ship_nodc (validated after integration with swfsc)
Code
tbls <- db_tables(con)

dm_dev <- dm_from_con(con, table_names = tbls, learn_keys = FALSE)

# define relationships in dm (in-memory for visualization only)
add_bottle_keys <- function(dm){
  dm |>
    dm_add_pk(casts, cast_id) |>
    dm_add_pk(bottle, bottle_id) |>
    dm_add_pk(bottle_measurement, bottle_measurement_id) |>
    dm_add_pk(cast_condition, cast_condition_id) |>
    dm_add_pk(measurement_type, measurement_type) |>
    dm_add_fk(bottle, cast_id, casts) |>
    dm_add_fk(bottle_measurement, bottle_id, bottle) |>
    dm_add_fk(bottle_measurement, measurement_type, measurement_type) |>
    dm_add_fk(cast_condition, cast_id, casts) |>
    dm_add_fk(cast_condition, condition_type, measurement_type, measurement_type)
}

dm_dev_fk <- dm_dev |>
  add_bottle_keys()

dm_draw(dm_dev_fk, view_type = "all")

15 Add Spatial

15.1 Add casts.geom

Code
# add geometry column using DuckDB spatial
# note: DuckDB spatial doesn't track SRID metadata (unlike PostGIS)
# all geometries assumed WGS84 (EPSG:4326) by convention
add_point_geom(con, "casts")

15.2 Load grid table

Load grid from the swfsc workflow’s GCS parquet output (single source of truth — grid is created in ingest_swfsc.noaa.gov_calcofi-db.qmd).

Code
load_gcs_parquet_to_duckdb(
  con        = con,
  gcs_path   = "gs://calcofi-db/ingest/swfsc.noaa.gov_calcofi-db/grid.parquet",
  table_name = "grid")

15.3 Update casts.grid_key

Code
grid_stats <- assign_grid_key(con, "casts")
grid_stats |> datatable()

15.4 Create casts_derived VIEW

Create convenience VIEW after all casts table modifications are complete (column drops, pivots, spatial join). The VIEW adds back temporal and coordinate columns derivable from the base table.

Code
dbExecute(con, "DROP VIEW IF EXISTS casts_derived")
[1] 0
Code
dbExecute(con, "
  CREATE VIEW casts_derived AS
  SELECT *,
    EXTRACT(YEAR FROM datetime_utc)::SMALLINT    AS year,
    EXTRACT(MONTH FROM datetime_utc)::SMALLINT   AS month,
    EXTRACT(QUARTER FROM datetime_utc)::SMALLINT AS quarter,
    EXTRACT(DOY FROM datetime_utc)::SMALLINT     AS julian_day,
    (datetime_utc::DATE - DATE '1899-12-30')     AS julian_date,
    FLOOR(ABS(lat_dec))::SMALLINT                AS lat_deg,
    (ABS(lat_dec) - FLOOR(ABS(lat_dec))) * 60    AS lat_min,
    CASE WHEN lat_dec >= 0 THEN 'N' ELSE 'S' END AS lat_hem,
    FLOOR(ABS(lon_dec))::SMALLINT                AS lon_deg,
    (ABS(lon_dec) - FLOOR(ABS(lon_dec))) * 60    AS lon_min,
    CASE WHEN lon_dec >= 0 THEN 'E' ELSE 'W' END AS lon_hem,
    STRFTIME(datetime_utc, '%Y%m')                AS cruise,
    REPLACE(REPLACE(sta_key, '.', ''), ' ', '')   AS db_sta_key
  FROM casts")
[1] 0
Code
message("Created casts_derived VIEW with derived temporal/coordinate columns")

15.5 Map Cast Locations

Code
# slowish, so use cached figure
map_casts_png <- here(glue("figures/{provider}_{dataset}_casts_map.png"))

if (!file_exists(map_casts_png)) {
  casts_sf <- calcofi4r::cc_read_sf(con, "casts") |>
    mutate(year = year(datetime_utc))

  m <- mapView(casts_sf, zcol = "year")
  mapshot2(m, file = map_casts_png)
}

htmltools::img(
  src   = map_casts_png |> str_replace(here(), "."),
  width = "600px")

16 Report

Code
all_tbls <- db_tables(con)

add_bottle_spatial_keys <- function(dm){
  dm |>
    dm_add_pk(grid, grid_key) |>
    dm_add_fk(casts, grid_key, grid)
}

dm_final <- dm_from_con(con, table_names = all_tbls, learn_keys = FALSE) |>
  add_bottle_keys() |>
  add_bottle_spatial_keys() |>
  dm_set_colors(
    lightgreen  = grid,
    lightyellow = c(bottle_measurement, cast_condition),
    lightblue   = measurement_type)
dm_draw(dm_final, view_type = "all")
Code
# summary statistics
n_casts   <- tbl(con, "casts") |> tally() |> pull(n)
n_bottles <- tbl(con, "bottle") |> tally() |> pull(n)
n_meas    <- dbGetQuery(con, "SELECT COUNT(*) FROM bottle_measurement")[[1]]
n_cond    <- dbGetQuery(con, "SELECT COUNT(*) FROM cast_condition")[[1]]

year_range <- dbGetQuery(con, "
  SELECT
    EXTRACT(YEAR FROM MIN(datetime_utc))::INTEGER AS yr_min,
    EXTRACT(YEAR FROM MAX(datetime_utc))::INTEGER AS yr_max
  FROM casts")

fmt <- function(x, ...) format(x, big.mark = ",", ...)
message(glue("Total casts: {fmt(n_casts)}"))
message(glue("Total bottles: {fmt(n_bottles)}"))
message(glue("Total bottle measurements: {fmt(n_meas)}"))
message(glue("Total cast conditions: {fmt(n_cond)}"))
message(glue("Year range: {year_range$yr_min} - {year_range$yr_max}"))

17 Validate Local Database

Validate data quality in the local wrangling database before exporting to parquet. The parquet outputs from this workflow can later be used to update the Working DuckLake.

Code
# validate data quality
validation <- validate_for_release(con)

if (validation$passed) {
  message("Validation passed!")
  if (nrow(validation$checks) > 0) {
    validation$checks |>
      filter(status != "pass") |>
      datatable(caption = "Validation Warnings")
  }
} else {
  cat("Validation FAILED:\n")
  cat(paste("-", validation$errors, collapse = "\n"))
}
Validation FAILED:
- Table 'casts' has 298 NULL values in required column 'grid_key'
- Table 'casts_derived' has 298 NULL values in required column 'grid_key'

18 Enforce Column Types

Force integer/smallint types on columns that R’s numeric mapped to DOUBLE during dbWriteTable(). Uses flds_redefine.csv (type_new) as the source of truth for source-table columns, plus explicit overrides for derived-table columns.

Code
type_changes <- enforce_column_types(
  con            = con,
  d_flds_rd      = d$d_flds_rd,
  type_overrides = list(),
  tables         = db_tables(con),
  verbose        = TRUE)

if (nrow(type_changes) > 0) {
  type_changes |>
    datatable(caption = "Column type changes applied")
}
Code
# recreate casts_derived VIEW after type changes
dbExecute(con, "DROP VIEW IF EXISTS casts_derived")
[1] 0
Code
dbExecute(con, "
  CREATE VIEW casts_derived AS
  SELECT *,
    EXTRACT(YEAR FROM datetime_utc)::SMALLINT    AS year,
    EXTRACT(MONTH FROM datetime_utc)::SMALLINT   AS month,
    EXTRACT(QUARTER FROM datetime_utc)::SMALLINT AS quarter,
    EXTRACT(DOY FROM datetime_utc)::SMALLINT     AS julian_day,
    (datetime_utc::DATE - DATE '1899-12-30')     AS julian_date,
    FLOOR(ABS(lat_dec))::SMALLINT                AS lat_deg,
    (ABS(lat_dec) - FLOOR(ABS(lat_dec))) * 60    AS lat_min,
    CASE WHEN lat_dec >= 0 THEN 'N' ELSE 'S' END AS lat_hem,
    FLOOR(ABS(lon_dec))::SMALLINT                AS lon_deg,
    (ABS(lon_dec) - FLOOR(ABS(lon_dec))) * 60    AS lon_min,
    CASE WHEN lon_dec >= 0 THEN 'E' ELSE 'W' END AS lon_hem,
    STRFTIME(datetime_utc, '%Y%m')                AS cruise,
    REPLACE(REPLACE(sta_key, '.', ''), ' ', '')   AS db_sta_key
  FROM casts")
[1] 0
Code
message("Recreated casts_derived VIEW after column type enforcement")

19 Data Preview

Preview first and last rows of each table before writing parquet outputs.

Code
preview_tables(con, c(
  "casts", "bottle", "bottle_measurement",
  "cast_condition", "measurement_type"))  # grid canonical from swfsc

19.1 casts (35,644 rows)

19.2 bottle (895,371 rows)

19.3 bottle_measurement (11,135,600 rows)

19.4 cast_condition (235,513 rows)

19.5 measurement_type (47 rows)

Code
# casts_derived is a VIEW, preview separately
dbGetQuery(con, "SELECT * FROM casts_derived LIMIT 100") |>
  datatable(
    caption  = "casts_derived — first 100 rows (VIEW)",
    rownames = FALSE,
    filter   = "top")

20 Write Parquet Outputs

Export tables to parquet files for downstream use.

Code
# write parquet files with manifest
parquet_stats <- write_parquet_outputs(
  con              = con,
  output_dir       = dir_parquet,
  tables           = setdiff(db_tables(con), "grid"),  # grid canonical from swfsc
  strip_provenance = F)

parquet_stats |>
  mutate(file = basename(path)) |>
  select(-path) |>
  datatable(caption = "Parquet export statistics")

21 Write Metadata

Build metadata.json sidecar file documenting all tables and columns in parquet outputs. DuckDB COMMENT ON does not propagate to parquet, so this provides the metadata externally.

Code
metadata_path <- build_metadata_json(
  con                  = con,
  d_tbls_rd            = d$d_tbls_rd,
  d_flds_rd            = d$d_flds_rd,
  metadata_derived_csv = here(
    "metadata/calcofi.org/bottle-database/metadata_derived.csv"),
  output_dir           = dir_parquet,
  tables               = db_tables(con),
  set_comments         = TRUE,
  provider             = provider,
  dataset              = dataset,
  workflow_url         = glue(
    "https://calcofi.io/workflows/",
    "ingest_calcofi.org_bottle-database.html"))

# show metadata summary
metadata <- jsonlite::fromJSON(metadata_path)
tibble(
  table   = names(metadata$tables),
  n_cols  = map_int(
    names(metadata$tables),
    ~sum(grepl(glue("^{.x}\\."), names(metadata$columns)))),
  name_long = map_chr(metadata$tables, ~.x$name_long)) |>
  datatable(caption = "Table metadata summary")
Code
listviewer::jsonedit(
  jsonlite::fromJSON(metadata_path, simplifyVector = FALSE),
  mode = "view")

22 Upload to GCS Archive

Upload parquet files, manifest, and metadata sidecar to gs://calcofi-db/ingest/{provider}_{dataset}/.

Code
gcs_ingest_prefix <- glue("ingest/{provider}_{dataset}")
gcs_bucket        <- "calcofi-db"

# list local files to upload (parquet + json sidecars)
local_files <- list.files(dir_parquet, full.names = TRUE)

# upload each file
walk(local_files, function(f) {
  gcs_path <- glue("gs://{gcs_bucket}/{gcs_ingest_prefix}/{basename(f)}")
  put_gcs_file(f, gcs_path)
  message(glue("Uploaded {basename(f)} -> {gcs_path}"))
})

message(glue(
  "\nUploaded {length(local_files)} files to ",
  "gs://{gcs_bucket}/{gcs_ingest_prefix}/"))

23 Cleanup

Code
# close local wrangling database connection
close_duckdb(con)
message("Local wrangling database connection closed")

# note: parquet outputs are in data/parquet/calcofi.org_bottle-database/
# these can be used to update the Working DuckLake in a separate workflow
message(glue("Parquet outputs written to: {dir_parquet}"))
message(glue("GCS outputs at: gs://{gcs_bucket}/{gcs_ingest_prefix}/"))

24 TODO

Code
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       macOS Sequoia 15.7.1
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Mexico_City
 date     2026-02-05
 pandoc   3.8.3 @ /opt/homebrew/bin/ (via rmarkdown)
 quarto   1.8.25 @ /usr/local/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 !  package            * version    date (UTC) lib source
    abind                1.4-8      2024-09-12 [1] CRAN (R 4.5.0)
    arrow                22.0.0.1   2025-12-23 [1] CRAN (R 4.5.2)
    assertthat           0.2.1      2019-03-21 [1] CRAN (R 4.5.0)
    backports            1.5.0      2024-05-23 [1] CRAN (R 4.5.0)
    base64enc            0.1-6      2026-02-02 [1] CRAN (R 4.5.2)
    bit                  4.6.0      2025-03-06 [1] CRAN (R 4.5.0)
    bit64                4.6.0-1    2025-01-16 [1] CRAN (R 4.5.0)
    blob                 1.3.0      2026-01-14 [1] CRAN (R 4.5.2)
    broom                1.0.10     2025-09-13 [1] CRAN (R 4.5.0)
    bslib                0.10.0     2026-01-26 [1] CRAN (R 4.5.2)
    cachem               1.1.0      2024-05-16 [1] CRAN (R 4.5.0)
 VP calcofi4db         * 2.2.1      2026-02-04 [?] load_all() (on disk 2.1.0)
 VP calcofi4r          * 1.1.1      2025-10-02 [?] Github (CalCOFI/calcofi4r@dbe213f) (on disk 0.8.1)
    chromote             0.5.1      2025-04-24 [1] CRAN (R 4.5.0)
    class                7.3-23     2025-01-01 [1] CRAN (R 4.5.2)
    classInt             0.4-11     2025-01-08 [1] CRAN (R 4.5.0)
    cli                  3.6.5      2025-04-23 [1] CRAN (R 4.5.0)
    codetools            0.2-20     2024-03-31 [1] CRAN (R 4.5.2)
    crayon               1.5.3      2024-06-20 [1] CRAN (R 4.5.0)
    crosstalk            1.2.2      2025-08-26 [1] CRAN (R 4.5.0)
    curl                 7.0.0      2025-08-19 [1] CRAN (R 4.5.0)
    data.table           1.18.0     2025-12-24 [1] CRAN (R 4.5.2)
    DBI                * 1.2.3      2024-06-02 [1] CRAN (R 4.5.0)
    dbplyr               2.5.1      2025-09-10 [1] CRAN (R 4.5.0)
    desc                 1.4.3      2023-12-10 [1] CRAN (R 4.5.0)
    devtools             2.4.6      2025-10-03 [1] CRAN (R 4.5.0)
    DiagrammeR           1.0.11     2024-02-02 [1] CRAN (R 4.5.0)
    DiagrammeRsvg        0.1        2016-02-04 [1] CRAN (R 4.5.0)
    digest               0.6.39     2025-11-19 [1] CRAN (R 4.5.2)
    dm                 * 1.0.12     2025-07-02 [1] CRAN (R 4.5.0)
    dplyr              * 1.2.0      2026-02-03 [1] CRAN (R 4.5.2)
    DT                 * 0.34.0     2025-09-02 [1] CRAN (R 4.5.0)
    duckdb               1.4.4      2026-01-28 [1] CRAN (R 4.5.2)
    dygraphs             1.1.1.6    2018-07-11 [1] CRAN (R 4.5.0)
    e1071                1.7-17     2025-12-18 [1] CRAN (R 4.5.2)
    ellipsis             0.3.2      2021-04-29 [1] CRAN (R 4.5.0)
    evaluate             1.0.5      2025-08-27 [1] CRAN (R 4.5.0)
    farver               2.1.2      2024-05-13 [1] CRAN (R 4.5.0)
    fastmap              1.2.0      2024-05-15 [1] CRAN (R 4.5.0)
    fs                 * 1.6.6      2025-04-12 [1] CRAN (R 4.5.0)
    fuzzyjoin            0.1.6.1    2025-07-10 [1] CRAN (R 4.5.0)
    gargle             * 1.6.1      2026-01-29 [1] CRAN (R 4.5.2)
    generics             0.1.4      2025-05-09 [1] CRAN (R 4.5.0)
    geojsonsf            2.0.5      2025-11-26 [1] CRAN (R 4.5.2)
    ggplot2              4.0.1      2025-11-14 [1] CRAN (R 4.5.2)
    glue               * 1.8.0      2024-09-30 [1] CRAN (R 4.5.0)
    googledrive        * 2.1.2      2025-09-10 [1] CRAN (R 4.5.0)
    gtable               0.3.6      2024-10-25 [1] CRAN (R 4.5.0)
    here               * 1.0.2      2025-09-15 [1] CRAN (R 4.5.0)
    highcharter          0.9.4      2022-01-03 [1] CRAN (R 4.5.0)
    hms                  1.1.4      2025-10-17 [1] CRAN (R 4.5.0)
    htmltools          * 0.5.9      2025-12-04 [1] CRAN (R 4.5.2)
    htmlwidgets          1.6.4      2023-12-06 [1] CRAN (R 4.5.0)
    httpuv               1.6.16     2025-04-16 [1] CRAN (R 4.5.0)
    httr                 1.4.7      2023-08-15 [1] CRAN (R 4.5.0)
    httr2                1.2.2      2025-12-08 [1] CRAN (R 4.5.2)
    igraph               2.2.1      2025-10-27 [1] CRAN (R 4.5.0)
    isoband              0.3.0      2025-12-07 [1] CRAN (R 4.5.2)
    janitor            * 2.2.1      2024-12-22 [1] CRAN (R 4.5.0)
    jquerylib            0.1.4      2021-04-26 [1] CRAN (R 4.5.0)
    jsonlite           * 2.0.0      2025-03-27 [1] CRAN (R 4.5.0)
    KernSmooth           2.23-26    2025-01-01 [1] CRAN (R 4.5.2)
    knitr              * 1.51       2025-12-20 [1] CRAN (R 4.5.2)
    later                1.4.5      2026-01-08 [1] CRAN (R 4.5.2)
    lattice              0.22-7     2025-04-02 [1] CRAN (R 4.5.2)
    lazyeval             0.2.2      2019-03-15 [1] CRAN (R 4.5.0)
    leafem               0.2.5      2025-08-28 [1] CRAN (R 4.5.0)
    leaflet              2.2.3      2025-09-04 [1] CRAN (R 4.5.0)
    librarian            1.8.1      2021-07-12 [1] CRAN (R 4.5.0)
    lifecycle            1.0.5      2026-01-08 [1] CRAN (R 4.5.2)
    listviewer         * 4.0.0      2023-09-30 [1] CRAN (R 4.5.0)
    litedown           * 0.8        2025-11-02 [1] CRAN (R 4.5.0)
    lubridate          * 1.9.4      2024-12-08 [1] CRAN (R 4.5.0)
    magrittr             2.0.4      2025-09-12 [1] CRAN (R 4.5.0)
    mapgl                0.4.4      2026-01-12 [1] CRAN (R 4.5.2)
    mapview            * 2.11.4     2025-09-08 [1] CRAN (R 4.5.0)
    markdown             2.0        2025-03-23 [1] CRAN (R 4.5.0)
    Matrix               1.7-4      2025-08-28 [1] CRAN (R 4.5.2)
    memoise              2.0.1      2021-11-26 [1] CRAN (R 4.5.0)
    mgcv                 1.9-3      2025-04-04 [1] CRAN (R 4.5.2)
    mime                 0.13       2025-03-17 [1] CRAN (R 4.5.0)
    nlme                 3.1-168    2025-03-31 [1] CRAN (R 4.5.2)
    otel                 0.2.0      2025-08-29 [1] CRAN (R 4.5.0)
    pillar               1.11.1     2025-09-17 [1] CRAN (R 4.5.0)
    pkgbuild             1.4.8      2025-05-26 [1] CRAN (R 4.5.0)
    pkgconfig            2.0.3      2019-09-22 [1] CRAN (R 4.5.0)
    pkgload              1.4.1      2025-09-23 [1] CRAN (R 4.5.0)
    plotly               4.11.0     2025-06-19 [1] CRAN (R 4.5.0)
    png                  0.1-8      2022-11-29 [1] CRAN (R 4.5.0)
    processx             3.8.6      2025-02-21 [1] CRAN (R 4.5.0)
    promises             1.5.0      2025-11-01 [1] CRAN (R 4.5.0)
    proxy                0.4-29     2025-12-29 [1] CRAN (R 4.5.2)
    ps                   1.9.1      2025-04-12 [1] CRAN (R 4.5.0)
    purrr              * 1.2.1      2026-01-09 [1] CRAN (R 4.5.2)
    quantmod             0.4.28     2025-06-19 [1] CRAN (R 4.5.0)
    R6                   2.6.1      2025-02-15 [1] CRAN (R 4.5.0)
    rappdirs             0.3.4      2026-01-17 [1] CRAN (R 4.5.2)
    raster               3.6-32     2025-03-28 [1] CRAN (R 4.5.0)
    RColorBrewer         1.1-3      2022-04-03 [1] CRAN (R 4.5.0)
    Rcpp                 1.1.1      2026-01-10 [1] CRAN (R 4.5.2)
    readr              * 2.1.6      2025-11-14 [1] CRAN (R 4.5.2)
    remotes              2.5.0      2024-03-17 [1] CRAN (R 4.5.0)
    rlang              * 1.1.7      2026-01-09 [1] CRAN (R 4.5.2)
    rlist                0.4.6.2    2021-09-03 [1] CRAN (R 4.5.0)
    rmarkdown            2.30       2025-09-28 [1] CRAN (R 4.5.0)
    rnaturalearth        1.1.0      2025-07-28 [1] CRAN (R 4.5.0)
    rnaturalearthhires   1.0.0.9000 2025-10-02 [1] Github (ropensci/rnaturalearthhires@e4736f6)
    RPostgres            1.4.8      2025-02-25 [1] CRAN (R 4.5.0)
    rprojroot            2.1.1      2025-08-26 [1] CRAN (R 4.5.0)
    rstudioapi           0.18.0     2026-01-16 [1] CRAN (R 4.5.2)
    S7                   0.2.1      2025-11-14 [1] CRAN (R 4.5.2)
    sass                 0.4.10     2025-04-11 [1] CRAN (R 4.5.0)
    satellite            1.0.6      2025-08-21 [1] CRAN (R 4.5.0)
    scales               1.4.0      2025-04-24 [1] CRAN (R 4.5.0)
    sessioninfo          1.2.3      2025-02-05 [1] CRAN (R 4.5.0)
    sf                 * 1.0-24     2026-01-13 [1] CRAN (R 4.5.2)
    shiny                1.11.1     2025-07-03 [1] CRAN (R 4.5.0)
    shinyWidgets         0.9.0      2025-02-21 [1] CRAN (R 4.5.0)
    snakecase            0.11.1     2023-08-27 [1] CRAN (R 4.5.0)
    sp                   2.2-0      2025-02-01 [1] CRAN (R 4.5.0)
    stars                0.6-8      2025-02-01 [1] CRAN (R 4.5.0)
    stringi              1.8.7      2025-03-27 [1] CRAN (R 4.5.0)
    stringr            * 1.6.0      2025-11-04 [1] CRAN (R 4.5.0)
    terra                1.8-93     2026-01-12 [1] CRAN (R 4.5.2)
    tibble             * 3.3.1      2026-01-11 [1] CRAN (R 4.5.2)
    tidyr              * 1.3.2      2025-12-19 [1] CRAN (R 4.5.2)
    tidyselect           1.2.1      2024-03-11 [1] CRAN (R 4.5.0)
    timechange           0.4.0      2026-01-29 [1] CRAN (R 4.5.2)
    TTR                  0.24.4     2023-11-28 [1] CRAN (R 4.5.0)
    tzdb                 0.5.0      2025-03-15 [1] CRAN (R 4.5.0)
    units              * 1.0-0      2025-10-09 [1] CRAN (R 4.5.0)
    usethis              3.2.1      2025-09-06 [1] CRAN (R 4.5.0)
    uuid                 1.2-2      2026-01-23 [1] CRAN (R 4.5.2)
    V8                   8.0.1      2025-10-10 [1] CRAN (R 4.5.0)
    vctrs                0.7.1      2026-01-23 [1] CRAN (R 4.5.2)
    viridisLite          0.4.2      2023-05-02 [1] CRAN (R 4.5.0)
    visNetwork           2.1.4      2025-09-04 [1] CRAN (R 4.5.0)
    vroom                1.7.0      2026-01-27 [1] CRAN (R 4.5.2)
    webshot2           * 0.1.2      2025-04-23 [1] CRAN (R 4.5.0)
    websocket            1.4.4      2025-04-10 [1] CRAN (R 4.5.0)
    withr                3.0.2      2024-10-28 [1] CRAN (R 4.5.0)
    xfun                 0.56       2026-01-18 [1] CRAN (R 4.5.2)
    xtable               1.8-4      2019-04-21 [1] CRAN (R 4.5.0)
    xts                  0.14.1     2024-10-15 [1] CRAN (R 4.5.0)
    yaml                 2.3.12     2025-12-10 [1] CRAN (R 4.5.2)
    zoo                  1.8-15     2025-12-15 [1] CRAN (R 4.5.2)

 [1] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library

 * ── Packages attached to the search path.
 V ── Loaded and on-disk version mismatch.
 P ── Loaded and on-disk path mismatch.

──────────────────────────────────────────────────────────────────────────────