Packages & setup

# packages
if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  dplyr, DT, dygraphs, glue,googlesheets4, gstat, here, lubridate, mapview, purrr, readr, 
  raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select
glue   <- glue::glue
mapviewOptions(fgb = FALSE)

TODO: access gdrive data from server

# paths
dir_data <- switch(
  Sys.info()["nodename"],
  `Bens-MacBook-Pro.local` = "/Users/bbest/My Drive/projects/calcofi/data",
  `Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data",
  `Bens-MacBook-Air.local` = "/Volumes/GoogleDrive/My Drive/projects/calcofi/data")
  # TODO: get Erin's Google Drive path and "nodename")

Read Bird Mammal Census datasets from gdrive

# bird & mammal counts from CalCOFI cruises
bird_mamm_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_observations.csv")
# log of transect datetimes & locations from CalCOFI cruises
transects_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_transects.csv")
# behavior codes
codes_bird_mamm_beh_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_behaviorcodes.csv")
# species codes
codes_bird_mamm_sp_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_allspecieslist.csv")
d_bird_mamm <- read_csv(bird_mamm_csv, guess_max = 1000000)
d_transects <- read_csv(transects_csv, guess_max = 1000000)
codes_bird_mamm_beh <- read_csv(codes_bird_mamm_beh_csv) 
codes_bird_mamm_sp  <- read_csv(codes_bird_mamm_sp_csv) 

Get aoi

# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"

cinms_ply <- sf::st_read(sanctuaries_geo) %>%
  dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source 
##   `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS:  WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)

# get aoi
aoi = cinms_ply

Clean & reformat transect data

TODO:

[X] make TRANSECT LINES from Lat/Lon Start and Stop [X] create LINE SEGMENTS b/w individual transect lines. Separate each part of line segment w/ a field to represent EFFORT. [ ] get DATETIME data for transects (currently only have DATE)

Get transects as points (not very useful)

# get transects as points
transect_pts <- d_transects %>% 
  sf::st_as_sf(coords = c("Longitude Mid (º)", "Latitude Mid (º)"), crs = 4326) %>% 
  distinct()

Get transects as lines

Functions : infer ‘off-effort’ segments; create lines from lat/lng pts

# correct for automatic re-formatting of dates
get_date <- function(date) { # for date in date_list
  date %>% as.character() %>% as.Date("%Y-%m-%d")
}

# get lat/lng data for inferred 'off-effort' segments
infer_segments <- function(d) {
  # d <- d_cruise_date
  cruise         <- list()
  transect_start <- list()
  transect_stop  <- list()
  date_start     <- list()
  date_stop      <- list()
  lat_start      <- list()
  lon_start      <- list()
  lat_stop       <- list()
  lon_stop       <- list()

  for (i in 1:nrow(d)) { 
    if (i < nrow(d)) {
      cruise[i]         <- d$Cruise[i]
      transect_start[i] <- d$`Transect number`[i]
      transect_stop[i]  <- d$`Transect number`[i+1]
      date_start[i]     <- as.character(d$Date[i]) 
      date_stop[i]      <- as.character(d$Date[i+1])
      lat_start[i]      <- d$lat_stop[i]
      lon_start[i]      <- d$lon_stop[i]
      lat_stop[i]       <- d$lat_start[i+1]
      lon_stop[i]       <- d$lon_start[i+1]
    }
  }
  
  date_start <- map(date_start, get_date) 
  date_stop  <- map(date_stop, get_date)
  
  tibble(
    cruise         = unlist(cruise),
    transect_start = unlist(transect_start),
    transect_stop  = unlist(transect_stop),
    date_start     = date_start %>% reduce(c),
    date_stop      = date_stop  %>% reduce(c),
    lat_start      = unlist(lat_start),
    lon_start      = unlist(lon_start),
    lat_stop       = unlist(lat_stop),
    lon_stop       = unlist(lon_stop),
    effort         = FALSE) %>%
    mutate(
      date         = NA,
      transect     = (transect_start + transect_stop)/2,
      lat_mid      = (lat_start + lat_stop)/2,
      lon_mid      = (lon_start + lon_stop)/2) %>% 
    select(
      cruise, transect, transect_start, transect_stop, 
      date, date_start, date_stop, 
      lat_start, lon_start, lat_stop, lon_stop, lat_mid,lon_mid, effort)
}

# prep/clean data, run `infer_segments(), 
# and bind inferred and original segments into one dataset
update_lines <- function(d_cruise_date){ 
  # for each df (cruise & date combination) in d_list
  d_cruise_date <- d_cruise_date %>% 
    mutate(
      transect_start = `Transect number`,
      transect_stop  = `Transect number`,
      date_start     = Date,
      date_stop      = Date,
      lat_start      = mean(`Latitude Start (º)`),
      lon_start      = mean(`Longitude Start (º)`),
      lat_stop       = mean(`Latitude Stop (º)`),
      lon_stop       = mean(`Longitude Stop (º)`),
      lat_mid        = mean(`Latitude Mid (º)`),
      lon_mid        = mean(`Longitude Mid (º)`)) %>%
    select(
      Cruise, 
      `Transect number`, transect_start, transect_stop, 
      Date, date_start, date_stop,
      lat_start, lon_start, lat_stop, lon_stop, lat_mid, lon_mid) %>% 
    mutate(effort = TRUE) %>% 
    distinct() 
  
  d_cruise_date %>% 
    infer_segments() %>% 
    ungroup() %>% 
    bind_rows(
      d_cruise_date %>% 
        rename(
          cruise   = Cruise,
          transect = `Transect number`,
          date     = Date))
}

# convert lat/lng data to linestring format
get_linestring <- function(d){ 
  # where d = output of mapping `update_lines()` across d_list
  start    <- tibble(lon = d$lon_start, lat = d$lat_start)
  stop     <- tibble(lon = d$lon_stop,  lat = d$lat_stop)
  lines_sf <- vector("list", nrow(d))
  for (i in seq_along(lines_sf)) {
    lines_sf[[i]] <- st_linestring(as.matrix(rbind(start[i,], stop[i,])))
  }
  st_sfc(lines_sf, crs = "+proj=longlat +datum=WGS84") %>% 
    st_as_sf() %>%
    bind_cols(d) %>% 
    rename(geometry = x)
}

# filter transects data by aoi
filter_transects_by_aoi <- function(transect_lines) {
  tibble(transect_lines) %>% 
    mutate(
      in_aoi = st_intersects(transects_all_geom, aoi) %>% as.logical()) %>% 
  filter(in_aoi) %>%
  st_as_sf()
}

Generate transects df from original d_transects

segments <- d_transects %>%
  drop_na() %>%
  group_by(`Transect number`)

# original transect data split by Cruise & Date combination
d_list <- segments %>% 
  split(segments$Cruise, segments$Date)

# transect data NOT in linestring format
transects_all <- map(d_list, update_lines) %>%
  bind_rows() # bind all df's created from split d_list together into 1 df

# transect data in linestring form
transects_all_geom <- transects_all %>% 
  get_linestring()
# unique combinations of year + cruise for filtering purposes
year_cruise_choices <- transects_all %>% 
  mutate(year = year(date_start)) %>% 
  select(year, cruise) %>% 
  distinct()

Filter transect data by AOI (messy; best to also filter by year, cruise, etc.)

transects_in_aoi <- transects_all_geom %>% 
  filter_transects_by_aoi()

EXAMPLE PLOTS: filtered by different parameters

# transects filtered by AOI and cruise
transects_in_aoi %>% 
  filter(cruise == "CAC2005_07") %>% 
  mapview(zcol = "effort")
# year == 200, cruise number color-coded
transects_all_geom %>% 
  filter(year(date_start) == "2000") %>% 
  mapview(add=T, zcol="cruise")
# specific cruise number, effort color-coded
transects_all_geom %>% 
  filter(cruise == "CAC1987_05") %>% 
  mapview(add=T, zcol="effort")
# transect ==2, effort color-coded
transects_all_geom %>% 
  filter(transect == 2) %>% 
  mapview(add=T, zcol="effort")
# filtered by cruise + year combination, effort color-coded
transects_all_geom %>% 
  filter(
    year(date_start) == year_cruise_choices$year[1],
    cruise == year_cruise_choices$cruise[3]) %>% 
  mapview(add=T, zcol="effort")
transects_all_geom %>% 
  filter(
    year(date_start) == year_cruise_choices$year[1],
    cruise == year_cruise_choices$cruise[3]) %>% 
  mapview(add=T, zcol="effort")
transects_all_geom %>% 
  filter(year(date_start) == "2000") %>% 
  # pull(cruise) %>% unique()
  filter(cruise == "CAC2000_04") %>% 
  mapview(add=T, zcol="effort")

Join census observations with transect data

bird_mamm_census <- d_bird_mamm %>% 
  left_join(
    d_transects, 
    by = "GIS key") %>% 
  left_join(
    codes_bird_mamm_beh %>% 
      select(Behavior, Behavior_Description = Description), 
    by = "Behavior") %>%
  left_join(
    codes_bird_mamm_sp %>% 
      mutate(across(
        c("Bird", "LargeBird", "Fish", "Mammal", "Include", "Unidentified"), 
        as.logical)), 
    by = "Species")
 

# mapview(transects) # NOTE: this is prohibitively large to render to html (> 100 MB html)