Packages & setup
# packages
if (!require("librarian")){
install.packages("librarian")
library(librarian)
}
librarian::shelf(
dplyr, DT, dygraphs, glue,googlesheets4, gstat, here, lubridate, mapview, purrr, readr,
raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select
mapviewOptions(fgb = FALSE)
# paths
dir_data <- switch(
Sys.info()["nodename"],
`ben-mbpro` = "/Users/bbest/My Drive (ben@ecoquants.com)/projects/calcofi/data",
`Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data",
`Bens-MacBook-Air.local` = "/Volumes/GoogleDrive/My Drive/projects/calcofi/data")
# TODO: get Erin's Google Drive path and "nodename")
Read dataset keys from gdrive
bottle_key_csv <- file.path(dir_data, "/dataset_keys/bottle_field_descriptions.csv")
cast_key_csv <- file.path(dir_data, "/dataset_keys/cast_field_descriptions.csv")
key_cast <- read_csv(cast_key_csv)
key_bottle <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/18c6eSGRf0bSdraDocjn3-j1rUIxN2WKqxsEnPQCR6rA/edit#gid=2046976359") %>%
na_if("n.a.") %>%
mutate(
dataset = "bottle_cast",
source_url = source_url[1])
key_DIC <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1SGfGMJUhiYZKIh746p5pGn7cD0cOcTA3g_imAnvyz88/edit#gid=0") %>%
na_if("n.a.") %>%
mutate(
dataset = "DIC_cast",
source_url = source_url[1])
data_key <- bind_rows(key_bottle, key_DIC) %>%
select(dataset, field_name, title, description_abbv, everything())
# convert to var_lookup
var_lookup_key_tbl <- data_key %>%
filter(!is.na(description_abbv))
var_lookup_key <- var_lookup_key_tbl %>%
split(seq(nrow(.))) %>%
lapply(as.list)
names(var_lookup_key) <- var_lookup_key_tbl$field_name
var_lookup_tbl <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1ghM30pIdcsun7XWzLRKh4EilUplN60YdHayfiilPFwE/edit#gid=0")
var_lookup <- var_lookup_tbl %>%
split(seq(nrow(.))) %>%
lapply(as.list)
names(var_lookup) <- var_lookup_tbl$var
Read Oceano Bottle data from gdrive
# get data file paths from gdrive
bottle_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Bottle.csv")
cast_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Cast.csv")
bottle_cast_rds <- file.path(dir_data, "/oceanographic-data/bottle-database/bottle_cast.rds")
DIC_csv <- file.path(dir_data, "/DIC/CalCOFI_DICs_200901-201507_28June2018.csv")
calcofi_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull.geojson")
calcofi_offshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_offshore.geojson")
calcofi_nearshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_nearshore.geojson")
# check paths
stopifnot(dir.exists(dir_data))
stopifnot(any(file.exists(bottle_csv,cast_csv)))
# read data
d_cast <- read_csv(cast_csv) %>%
separate(
Sta_ID, c("Sta_ID_Line", "Sta_ID_Station"),
sep=" ", remove=F) %>%
mutate(
Sta_ID_Line = as.double(Sta_ID_Line),
Sta_ID_Station = as.double(Sta_ID_Station))
d_bottle <- read_csv(bottle_csv, skip=1, col_names = F, guess_max = 1000000)
#d_bottle_problems() <- problems()
names(d_bottle) <- str_split(
readLines(bottle_csv, n=1), ",")[[1]] %>%
str_replace("\xb5", "µ")
table(is.na(d_bottle$pH1))
##
## FALSE TRUE
## 84 889416
# FALSE TRUE
# 84 889416
table(is.na(d_bottle$pH2))
##
## FALSE TRUE
## 10 889490
# FALSE TRUE
# 10 889490
table(is.na(d_bottle$O2ml_L))
##
## FALSE TRUE
## 719993 169507
# FALSE TRUE
# 719993 169507
table(is.na(d_bottle$O2Sat))
##
## FALSE TRUE
## 685072 204428
# FALSE TRUE
# 685072 204428
table(is.na(d_bottle[,'Oxy_µmol/Kg']))
##
## FALSE TRUE
## 685061 204439
# FALSE TRUE
# 685061 204439
d_DIC <- read_csv(DIC_csv, skip=1, col_names = F, guess_max = 1000000)
names(d_DIC) <- str_split(
readLines(DIC_csv, n=1), ",")[[1]] %>%
str_replace("\xb5", "µ")
# %>%
# separate(
# `Line Sta_ID`, c("Sta_ID_Line", "Sta_ID_Station"),
# sep=" ", remove=F) %>%
# mutate(
# Sta_ID_Line = as.double(Sta_ID_Line),
# Sta_ID_Station = as.double(Sta_ID_Station),
# offshore = ifelse(Sta_ID_Station > 60, T, F))
# Join data
# for now... ideally would join with all the data but this causes some issues with shared variables that have different values
bottle_cast <- d_cast %>%
left_join(
d_bottle %>% select(-Sta_ID),
by = "Cst_Cnt") %>%
mutate(Date = lubridate::as_date(Date, format = "%m/%d/%Y"))
DIC_cast <- d_cast %>%
left_join(
d_DIC %>%
select(-`Line Sta_ID`) %>%
rename(Depthm = `Depth(m)`),
by = c("Cst_Cnt" = "ID")) %>%
mutate(Date = lubridate::as_date(Date, format = "%m/%d/%Y"))
saveRDS(bottle_cast, bottle_cast_rds)
Read whales, seabirds, turtles data from gdrive
bird_mamm_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_observations.csv")
transects_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_transects.csv")
key_bird_mamm_beh_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_behaviorcodes.csv")
key_bird_mamm_sp_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_allspecieslist.csv")
key_bird_mamm_beh <- read_csv(key_bird_mamm_beh_csv)
key_bird_mamm_sp <- read_csv(key_bird_mamm_sp_csv)
# bird & mammal counts from CalCOFI cruises
d_bird_mamm <- read_csv(bird_mamm_csv, guess_max = 1000000)
# log of transect datetimes & locations from CalCOFI cruises
d_transects <- read_csv(transects_csv, guess_max = 1000000)
bird_mamm_transects <- d_bird_mamm %>%
left_join(
d_transects,
by = "GIS key") %>%
left_join(
key_bird_mamm_beh %>%
select(Behavior, Behavior_Description = Description),
by = "Behavior") %>%
left_join(
key_bird_mamm_sp %>%
mutate(across(
c("Bird", "LargeBird", "Fish", "Mammal", "Include", "Unidentified"),
as.logical)),
by = "Species")
Exploratory summary of bottle data
# d_cast
skim(d_cast)
# d_bottle
skim(d_bottle)
# d_DIC
skim(d_DIC)
Data summary
| Name |
d_DIC |
| Number of rows |
2084 |
| Number of columns |
23 |
| _______________________ |
|
| Column type frequency: |
|
| character |
3 |
| numeric |
20 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| Line Sta_ID |
0 |
1.00 |
11 |
11 |
0 |
33 |
0 |
| Depth_ID |
0 |
1.00 |
38 |
38 |
0 |
2084 |
0 |
| DIC Quality Comment |
2029 |
0.03 |
28 |
115 |
0 |
36 |
0 |
Variable type: numeric
| ID |
0 |
1.00 |
1058.07 |
613.45 |
1.00 |
521.75 |
1061.50 |
1585.25 |
2130.00 |
▇▇▇▇▇ |
| Cast_Index |
0 |
1.00 |
33165.93 |
596.82 |
31785.00 |
32780.00 |
33312.00 |
33646.00 |
33973.00 |
▂▂▅▆▇ |
| Bottle_Index |
0 |
1.00 |
833038.33 |
15412.45 |
796974.00 |
823082.75 |
836834.00 |
845385.50 |
853785.00 |
▂▂▅▅▇ |
| Cruise |
0 |
1.00 |
201258.07 |
196.70 |
200808.00 |
201108.00 |
201304.00 |
201407.00 |
201507.00 |
▂▁▆▃▇ |
| Depth(m) |
0 |
1.00 |
186.48 |
326.67 |
1.00 |
30.00 |
100.00 |
231.00 |
3542.00 |
▇▁▁▁▁ |
| DIC1 |
85 |
0.96 |
2153.24 |
113.00 |
1948.85 |
2028.33 |
2170.64 |
2253.81 |
2367.80 |
▇▃▅▇▃ |
| DIC2 |
1860 |
0.11 |
2168.15 |
154.85 |
1969.44 |
2008.98 |
2265.89 |
2315.52 |
2364.42 |
▇▁▁▁▇ |
| TA1 |
0 |
1.00 |
2256.06 |
34.84 |
2181.57 |
2230.32 |
2244.32 |
2278.50 |
2434.90 |
▅▇▃▁▁ |
| TA2 |
1850 |
0.11 |
2278.86 |
58.50 |
2198.15 |
2229.06 |
2247.50 |
2316.45 |
2437.00 |
▇▁▇▁▁ |
| pH1 |
2000 |
0.04 |
7.91 |
0.08 |
7.62 |
7.90 |
7.93 |
7.96 |
8.05 |
▁▁▁▇▂ |
| pH2 |
2074 |
0.00 |
7.95 |
0.02 |
7.92 |
7.93 |
7.95 |
7.96 |
7.99 |
▇▂▃▃▂ |
| Salinity1 |
0 |
1.00 |
33.76 |
0.40 |
32.84 |
33.42 |
33.73 |
34.15 |
34.68 |
▂▇▃▇▁ |
| Salinity2 |
1849 |
0.11 |
33.83 |
0.52 |
32.94 |
33.35 |
33.66 |
34.29 |
34.82 |
▅▆▁▇▁ |
| Temperature_degC |
0 |
1.00 |
11.05 |
3.80 |
1.52 |
8.25 |
10.00 |
14.01 |
22.75 |
▁▇▅▃▁ |
| Bottle Salinity |
0 |
1.00 |
33.77 |
0.40 |
32.84 |
33.42 |
33.74 |
34.15 |
34.68 |
▂▇▃▇▁ |
| Bottle O2(ml_L) |
0 |
1.00 |
3.40 |
2.14 |
0.00 |
1.36 |
3.22 |
5.64 |
7.81 |
▆▅▂▇▁ |
| Bottle O2 (µmol/Kg) |
0 |
1.00 |
148.28 |
93.39 |
0.00 |
59.31 |
140.12 |
245.95 |
340.42 |
▆▅▂▇▁ |
| Sigma-theta |
0 |
1.00 |
25.74 |
0.98 |
22.98 |
24.91 |
25.96 |
26.58 |
27.78 |
▁▅▅▇▂ |
| DIC Bottle_ID1 |
0 |
1.00 |
4269.62 |
3603.41 |
1.00 |
390.75 |
2952.50 |
7503.25 |
10004.00 |
▇▂▂▃▅ |
| DIC Bottle_ID2 |
1852 |
0.11 |
3519.40 |
3441.65 |
2.00 |
331.25 |
2247.00 |
6907.50 |
9959.00 |
▇▂▂▂▃ |
Get CINMS AOI
# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"
cinms_ply <- sf::st_read(sanctuaries_geo) %>%
dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source
## `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS: WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)
#cinms_txt
mapview(cinms_ply)
# get aoi
aoi = cinms_ply
Functions
Get Station IDs as points
# for summary, want to group by Sta_Code because each data point has a diff Sta_ID
get_pts <- function(data) {
data %>%
filter(
!is.na(Lat_Dec),
!is.na(Lon_Dec)) %>%
group_by(
Sta_ID) %>%
summarize(
lon = mean(Lon_Dec),
lat = mean(Lat_Dec),
Sta_ID_Line = mean(Sta_ID_Line),
Sta_ID_Station = mean(Sta_ID_Station)) %>%
st_as_sf(
coords = c("lon", "lat"), crs=4326, remove = F) %>%
mutate(
offshore = ifelse(Sta_ID_Station > 60, T, F))
}
get_pts(bottle_cast) %>% mapview(zcol="offshore")
get_pts(DIC_cast) %>% mapview(zcol="offshore")