# packages
if (!require("librarian")){
install.packages("librarian")
library(librarian)
}
librarian::shelf(
dplyr, DT, dygraphs, glue,googlesheets4, gstat, here, lubridate, mapview, purrr, readr,
raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select
glue <- glue::glue
mapviewOptions(fgb = FALSE)
# paths
dir_data <- switch(
Sys.info()["nodename"],
`Bens-MacBook-Pro.local` = "/Users/bbest/My Drive/projects/calcofi/data",
`Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data",
`Bens-MacBook-Air.local` = "/Volumes/GoogleDrive/My Drive/projects/calcofi/data")
# TODO: get Erin's Google Drive path and "nodename")
# bird & mammal counts from CalCOFI cruises
bird_mamm_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_observations.csv")
# log of transect datetimes & locations from CalCOFI cruises
transects_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_transects.csv")
# behavior codes
codes_bird_mamm_beh_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_behaviorcodes.csv")
# species codes
codes_bird_mamm_sp_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_allspecieslist.csv")
d_bird_mamm <- read_csv(bird_mamm_csv, guess_max = 1000000)
d_transects <- read_csv(transects_csv, guess_max = 1000000)
codes_bird_mamm_beh <- read_csv(codes_bird_mamm_beh_csv)
codes_bird_mamm_sp <- read_csv(codes_bird_mamm_sp_csv)
# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"
cinms_ply <- sf::st_read(sanctuaries_geo) %>%
dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source
## `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS: WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)
# get aoi
aoi = cinms_ply
[X] make TRANSECT LINES from Lat/Lon Start and Stop [X] create LINE SEGMENTS b/w individual transect lines. Separate each part of line segment w/ a field to represent EFFORT. [ ] get DATETIME data for transects (currently only have DATE)
# get transects as points
transect_pts <- d_transects %>%
sf::st_as_sf(coords = c("Longitude Mid (º)", "Latitude Mid (º)"), crs = 4326) %>%
distinct()
# correct for automatic re-formatting of dates
get_date <- function(date) { # for date in date_list
date %>% as.character() %>% as.Date("%Y-%m-%d")
}
# get lat/lng data for inferred 'off-effort' segments
infer_segments <- function(d) {
# d <- d_cruise_date
cruise <- list()
transect_start <- list()
transect_stop <- list()
date_start <- list()
date_stop <- list()
lat_start <- list()
lon_start <- list()
lat_stop <- list()
lon_stop <- list()
for (i in 1:nrow(d)) {
if (i < nrow(d)) {
cruise[i] <- d$Cruise[i]
transect_start[i] <- d$`Transect number`[i]
transect_stop[i] <- d$`Transect number`[i+1]
date_start[i] <- as.character(d$Date[i])
date_stop[i] <- as.character(d$Date[i+1])
lat_start[i] <- d$lat_stop[i]
lon_start[i] <- d$lon_stop[i]
lat_stop[i] <- d$lat_start[i+1]
lon_stop[i] <- d$lon_start[i+1]
}
}
date_start <- map(date_start, get_date)
date_stop <- map(date_stop, get_date)
tibble(
cruise = unlist(cruise),
transect_start = unlist(transect_start),
transect_stop = unlist(transect_stop),
date_start = date_start %>% reduce(c),
date_stop = date_stop %>% reduce(c),
lat_start = unlist(lat_start),
lon_start = unlist(lon_start),
lat_stop = unlist(lat_stop),
lon_stop = unlist(lon_stop),
effort = FALSE) %>%
mutate(
date = NA,
transect = (transect_start + transect_stop)/2,
lat_mid = (lat_start + lat_stop)/2,
lon_mid = (lon_start + lon_stop)/2) %>%
select(
cruise, transect, transect_start, transect_stop,
date, date_start, date_stop,
lat_start, lon_start, lat_stop, lon_stop, lat_mid,lon_mid, effort)
}
# prep/clean data, run `infer_segments(),
# and bind inferred and original segments into one dataset
update_lines <- function(d_cruise_date){
# for each df (cruise & date combination) in d_list
d_cruise_date <- d_cruise_date %>%
mutate(
transect_start = `Transect number`,
transect_stop = `Transect number`,
date_start = Date,
date_stop = Date,
lat_start = mean(`Latitude Start (º)`),
lon_start = mean(`Longitude Start (º)`),
lat_stop = mean(`Latitude Stop (º)`),
lon_stop = mean(`Longitude Stop (º)`),
lat_mid = mean(`Latitude Mid (º)`),
lon_mid = mean(`Longitude Mid (º)`)) %>%
select(
Cruise,
`Transect number`, transect_start, transect_stop,
Date, date_start, date_stop,
lat_start, lon_start, lat_stop, lon_stop, lat_mid, lon_mid) %>%
mutate(effort = TRUE) %>%
distinct()
d_cruise_date %>%
infer_segments() %>%
ungroup() %>%
bind_rows(
d_cruise_date %>%
rename(
cruise = Cruise,
transect = `Transect number`,
date = Date))
}
# convert lat/lng data to linestring format
get_linestring <- function(d){
# where d = output of mapping `update_lines()` across d_list
start <- tibble(lon = d$lon_start, lat = d$lat_start)
stop <- tibble(lon = d$lon_stop, lat = d$lat_stop)
lines_sf <- vector("list", nrow(d))
for (i in seq_along(lines_sf)) {
lines_sf[[i]] <- st_linestring(as.matrix(rbind(start[i,], stop[i,])))
}
st_sfc(lines_sf, crs = "+proj=longlat +datum=WGS84") %>%
st_as_sf() %>%
bind_cols(d) %>%
rename(geometry = x)
}
# filter transects data by aoi
filter_transects_by_aoi <- function(transect_lines) {
tibble(transect_lines) %>%
mutate(
in_aoi = st_intersects(transects_all_geom, aoi) %>% as.logical()) %>%
filter(in_aoi) %>%
st_as_sf()
}
d_transectssegments <- d_transects %>%
drop_na() %>%
group_by(`Transect number`)
# original transect data split by Cruise & Date combination
d_list <- segments %>%
split(segments$Cruise, segments$Date)
# transect data NOT in linestring format
transects_all <- map(d_list, update_lines) %>%
bind_rows() # bind all df's created from split d_list together into 1 df
# transect data in linestring form
transects_all_geom <- transects_all %>%
get_linestring()
# unique combinations of year + cruise for filtering purposes
year_cruise_choices <- transects_all %>%
mutate(year = year(date_start)) %>%
select(year, cruise) %>%
distinct()
transects_in_aoi <- transects_all_geom %>%
filter_transects_by_aoi()
# transects filtered by AOI and cruise
transects_in_aoi %>%
filter(cruise == "CAC2005_07") %>%
mapview(zcol = "effort")
# year == 200, cruise number color-coded
transects_all_geom %>%
filter(year(date_start) == "2000") %>%
mapview(add=T, zcol="cruise")
# specific cruise number, effort color-coded
transects_all_geom %>%
filter(cruise == "CAC1987_05") %>%
mapview(add=T, zcol="effort")
# transect ==2, effort color-coded
transects_all_geom %>%
filter(transect == 2) %>%
mapview(add=T, zcol="effort")
# filtered by cruise + year combination, effort color-coded
transects_all_geom %>%
filter(
year(date_start) == year_cruise_choices$year[1],
cruise == year_cruise_choices$cruise[3]) %>%
mapview(add=T, zcol="effort")
transects_all_geom %>%
filter(
year(date_start) == year_cruise_choices$year[1],
cruise == year_cruise_choices$cruise[3]) %>%
mapview(add=T, zcol="effort")
transects_all_geom %>%
filter(year(date_start) == "2000") %>%
# pull(cruise) %>% unique()
filter(cruise == "CAC2000_04") %>%
mapview(add=T, zcol="effort")
bird_mamm_census <- d_bird_mamm %>%
left_join(
d_transects,
by = "GIS key") %>%
left_join(
codes_bird_mamm_beh %>%
select(Behavior, Behavior_Description = Description),
by = "Behavior") %>%
left_join(
codes_bird_mamm_sp %>%
mutate(across(
c("Bird", "LargeBird", "Fish", "Mammal", "Include", "Unidentified"),
as.logical)),
by = "Species")
# mapview(transects) # NOTE: this is prohibitively large to render to html (> 100 MB html)