Setup gcloud and ssh tunneled to database per https://github.com/CalCOFI/server#ssh-tunnel-connection-to-postgis-db.
librarian::shelf(
calcofi/calcofi4r,
DT, here, leaflet, mapview, readr, sf, stringr)
source(here("../apps/libs/db.R"))
options(readr.show_col_types = F)
table(calcofi4r::stations$is_cce) # 113
##
## FALSE TRUE
## 2521 113
table(calcofi4r::stations$is_ccelter) # 66
##
## FALSE TRUE
## 2568 66
table(calcofi4r::stations$is_sccoos) # 9
##
## FALSE TRUE
## 2625 9
sta_n <- tbl(con, "ctd_casts") %>%
group_by(sta_id) %>%
summarize(
n = n(),
date_min = min(date, na.rm=T),
date_max = max(date, na.rm=T)) %>%
arrange(desc(n)) %>%
collect() %>%
left_join(
calcofi4r::stations,
by = "sta_id")
n_min_cce <- sta_n %>%
filter(is_cce) %>%
pull(n) %>%
min() # 1
n_min_ccelter <- sta_n %>%
filter(is_ccelter) %>%
pull(n) %>%
min() # 124
sta_nmin <- sta_n %>%
filter(n >= n_min_ccelter) %>%
mutate(
cat = case_when(
is_cce & is_ccelter ~ "ccelter",
is_cce & !is_ccelter ~ "cce",
!is_cce & !is_ccelter ~ "neither",
TRUE ~ "other")) %>%
st_as_sf()
table(sta_nmin$cat)
##
## cce ccelter neither
## 5 66 18
leaflet() %>%
addProviderTiles(providers$Stamen.Toner) %>%
addCircleMarkers(
data = sta_nmin %>%
filter(cat == "ccelter"))
sta_nmin$n <- as.numeric(sta_nmin$n)
mapview(sta_nmin, zcol = "n")
sta_n %>%
filter(is_sccoos & !is_ccelter)
## # A tibble: 9 × 13
## sta_id n date_min date_max sta_i…¹ sta_i…² lon lat is_of…³ is_cce
## <chr> <int> <date> <date> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 090.0 … 56 2004-07-19 2020-01-11 90 27.7 -118. 33.5 FALSE TRUE
## 2 086.8 … 56 2004-11-19 2020-01-11 86.8 32.5 -118. 33.9 FALSE TRUE
## 3 093.4 … 54 2004-07-13 2020-01-05 93.4 26.4 -117. 32.9 FALSE TRUE
## 4 083.3 … 54 2004-07-24 2020-01-16 83.3 39.4 -119. 34.3 FALSE TRUE
## 5 088.5 … 54 2004-07-19 2020-01-11 88.5 30.1 -118. 33.7 FALSE TRUE
## 6 081.7 … 50 2004-07-27 2020-01-16 81.7 43.5 -120. 34.4 FALSE TRUE
## 7 091.7 … 49 2004-07-28 2020-01-05 91.7 26.4 -117. 33.2 FALSE TRUE
## 8 080.0 … 48 2004-07-27 2020-01-16 80 50.5 -120. 34.5 FALSE TRUE
## 9 085.4 … 38 2004-07-28 2020-01-11 85.4 35.8 -119. 34.0 FALSE TRUE
## # … with 3 more variables: is_ccelter <lgl>, is_sccoos <lgl>, geom <POINT [°]>,
## # and abbreviated variable names ¹​sta_id_line, ²​sta_id_station, ³​is_offshore
Source: Station Positions – CalCOFI
75 Station Pattern: Summer and Fall surveys (~16 days at sea) since 1984
104 or 113 Station Pattern: Winter and Spring surveys (~23 days at sea)
sta_ord_csv <- "./data/station-positions/CalCOFIStationOrder.csv"
d_sta_ord <- read_csv(sta_ord_csv)
# ran once
# dbWriteTable(con, "stations_order", d_sta_ord)
datatable(d_sta_ord)
d_ctd_casts_10 <- tbl(con, "ctd_casts") %>%
slice_min(10) %>%
collect()
names(d_ctd_casts_10) %>% str_subset("sta|line")
## [1] "cruz_sta" "dbsta_id" "sta_id" "sta_code" "distance" "rptline"
## [7] "stline" "acline" "rptsta" "ststa" "acsta" "origstaid"
# "stline" "rptline" "acline"
# "ststa" "rptsta" "acsta"
# which station and line to use?
# - st* ?
# - rpt* reported?
# - ac* actual?
# tbl(con, "ctd_casts") %>%
# # filter(stline != rptline) %>%
# filter(stline != acline) %>%
# group_by(stline) %>%
# summarize(n = n()) %>%
# collect()
tbl(con, "stations_order") %>%
anti_join(
tbl(con, "ctd_casts"),
by = c(
# "LINE" = "stline",
# "STA" = "ststa")) %>%
# select(LINE, STA)
# # LINE STA
# # <dbl> <dbl>
# # 1 73.3 100
# # 2 63.3 100
"LINE" = "rptline",
"STA" = "rptsta")) %>%
select(LINE, STA) # 0
## # Source: SQL [0 x 2]
## # Database: postgres [admin@localhost:5432/gis]
## # … with 2 variables: LINE <dbl>, STA <dbl>
# ok, so going with rptline, rptsta
pts_sta <- tbl(con, "stations_order") %>%
left_join(
tbl(con, "ctd_casts"),
by = c(
"LINE" = "rptline",
"STA" = "rptsta")) %>%
rename(
sta_line = LINE,
sta_sta = STA,
lon = `LON (DEC)`,
lat = `LAT (DEC)`,
order = `ORDER OCC`) %>%
group_by(sta_line, sta_sta, lon, lat) %>%
summarize(
n = n(),
date_min = min(date, na.rm=T),
date_max = max(date, na.rm=T),
.groups = "drop") %>%
collect() %>%
st_as_sf(
coords = c("lon", "lat"),
crs = 4326)
# how many ctd_casts missing?
# tbl(con, "ctd_casts") %>% count() %>% pull(n) # 35,376
# sum(pts_sta$n) # - 17,852 = 17,524
pts_sta$n <- as.numeric(pts_sta$n)
mapView(pts_sta["n"])
mapView(pts_sta, cex = "n")
mapView(pts_sta, cex = "n", zcol = "n")