Package index
-
create_gcs_manifest() - Create a manifest of current GCS files
-
get_calcofi_file() - Get a CalCOFI file from the immutable archive
-
get_gcs_file() - Get a file from Google Cloud Storage
-
get_historical_file() - Get historical file from a specific date
-
get_manifest() - Get manifest for a specific date
-
list_calcofi_files() - List CalCOFI files from manifest
-
list_gcs_files() - List files in a GCS bucket/prefix
-
list_gcs_versions() - List versions of a file in GCS archive
-
put_gcs_file() - Upload a file to Google Cloud Storage
-
sync_to_gcs() - Sync local files to GCS, skipping unchanged files
-
add_parquet_metadata() - Add metadata to Parquet file
-
csv_to_parquet() - Convert CSV file to Parquet format
-
get_parquet_metadata() - Get Parquet file metadata
-
read_parquet_table() - Read a Parquet table
-
upload_parquet() - Upload Parquet file to GCS
-
write_parquet_table() - Write data to Parquet format
-
close_duckdb() - Disconnect from DuckDB and shutdown
-
create_duckdb_from_parquet() - Create DuckDB from Parquet files
-
create_duckdb_views() - Create views from a manifest
-
duckdb_to_parquet() - Export DuckDB table to Parquet
-
get_duckdb_con() - Get a DuckDB connection
-
get_duckdb_tables() - Get table information from DuckDB
-
load_duckdb_extension() - Install and load DuckDB extension
-
save_duckdb_to_gcs() - Save DuckDB to GCS
-
set_duckdb_comments() - Set table and column comments in DuckDB
-
add_provenance_columns() - Add provenance columns to a data frame
-
get_working_ducklake() - Get Working DuckLake connection
-
ingest_dataset() - Ingest Dataset into Working DuckLake
-
ingest_to_working() - Ingest data to Working DuckLake
-
list_working_tables() - List tables with provenance in Working DuckLake
-
query_at_time() - Query Working DuckLake at a point in time
-
save_working_ducklake() - Save Working DuckLake to GCS
-
strip_provenance_columns() - Strip provenance columns from data
-
compare_releases() - Compare two frozen releases
-
freeze_release() - Freeze a release of the DuckLake
-
get_release_metadata() - Get metadata for a frozen release
-
list_frozen_releases() - List available frozen releases
-
upload_frozen_release() - Upload Frozen Release to GCS
-
validate_for_release() - Validate Working DuckLake for release
-
create_redefinition_files() - Create Redefinition Files for Tables and Fields
-
determine_field_types() - Determine Field Types for Database
-
read_csv_files() - Read CSV Files and Their Metadata
-
read_csv_metadata() - Read CSV Files and Extract Metadata
-
detect_csv_changes() - Detect Changes in CSV Files
-
display_csv_changes() - Display CSV Changes in a Formatted Table
-
print_csv_change_stats() - Print CSV Change Statistics
-
transform_data() - Transform Data for Database Ingestion
-
check_data_integrity() - Check Data Integrity for Ingestion
-
check_multiple_datasets() - Check Multiple Datasets for Integrity
-
render_integrity_message() - Render Data Integrity Check Message
-
delete_flagged_rows() - Delete Flagged Rows from Database
-
flag_invalid_rows() - Flag and Export Invalid Rows
-
validate_dataset() - Run All Validations for a Dataset
-
validate_egg_stages() - Validate Egg Stage Values
-
validate_fk_references() - Validate Foreign Key References
-
validate_lookup_values() - Validate Lookup Values Exist
-
ingest_csv_to_db()deprecated - Ingest CSV data to PostgreSQL database (DEPRECATED)
-
ingest_dataset_pg()deprecated - Ingest a Dataset to PostgreSQL (DEPRECATED)
-
get_schema_versions() - Get Schema Version History
-
init_schema_version_csv() - Initialize Schema Version CSV
-
record_schema_version() - Record Schema Version
-
copy_schema() - Copy Database Schema
-
get_db_con()deprecated - Get a database connection to the CalCOFI PostgreSQL database (DEPRECATED)
-
apply_data_corrections() - Apply Data Corrections
-
assign_deterministic_uuids() - Assign deterministic UUIDs from composite key columns
-
assign_sequential_ids() - Assign Sequential IDs with Deterministic Sort Order
-
build_metadata_json() - Build Metadata JSON for Parquet Outputs
-
consolidate_ichthyo_tables() - Consolidate Ichthyoplankton Tables into Tidy Format
-
create_cruise_key() - Create Cruise Key from Ship Key and Date
-
create_lookup_table() - Create Lookup Table from Vocabulary Definitions
-
enforce_column_types() - Enforce Column Types Before Export
-
propagate_natural_key() - Propagate Key from Parent to Child Table
-
replace_uuid_with_id() - Replace UUIDs with Integer Foreign Keys
-
write_parquet_outputs() - Write Tables to Parquet Files
-
integrate_to_working_ducklake() - Integrate Ingest Outputs into Working DuckLake
-
list_ingest_outputs() - List Available Ingest Outputs
-
read_ingest_manifest() - Read Ingest Manifest from GCS
-
read_ingest_parquet() - Read Ingest Parquet Table from GCS
-
write_ingest_outputs() - Write Ingest Workflow Outputs to GCS
-
github_file_link() - Create GitHub File Link
-
preview_tables() - Preview Tables with Head and Tail Rows
-
show_flagged_file() - Show Flagged File Result
-
show_validation_results() - Show Validation Results with GitHub Links
-
cleanup_duplicate_archives() - Remove duplicate archives from GCS
-
compare_local_vs_archive() - Compare local files with GCS archive
-
download_archive() - Download archive to local directory
-
get_archive_manifest() - Get archive manifest (file metadata)
-
get_latest_archive_timestamp() - Get latest archive timestamp from GCS
-
get_local_manifest() - Get local file manifest
-
sync_to_gcs_archive() - Sync local files to GCS archive
-
commit_version_and_permalink() - Commit Version Changes and Get Permalink
-
complete_version_release() - Complete Version Release Workflow
-
get_package_version() - Get Current Package Version
-
suggest_next_version() - Suggest Next Version
-
update_package_version() - Synchronized Version Management for Package and Database
-
show_fields_redefine() - Show fields to redefine
-
show_source_files() - Show source files
-
show_tables_redefine() - Show tables to redefine
-
add_point_geom() - Add Point Geometry Column to a DuckDB Table
-
assign_grid_key() - Assign Grid Key via Spatial Join
-
build_taxon_table() - Build Taxonomic Hierarchy Table from WoRMS
-
fetch_ship_ices() - Fetch Ship Codes from ICES Reference Codes API
-
load_gcs_parquet_to_duckdb() - Load a GCS Parquet File into DuckDB
-
match_ships() - Match Ship Codes Across Datasets Using Multi-Source References
-
standardize_species() - Standardize Species Identifiers Using WoRMS/ITIS/GBIF APIs