source(here::here("libs/db.R")) # defines variables: con, dir_gdrive
librarian::shelf(
DT, glue, janitor, lubridate, mapview, purrr, readr, readxl,
sf, skimr, stringr, tidyr)
options(readr.show_col_types = F)
mapviewOptions(fgb = FALSE)
# helper functions ----
# convert station ID to lon, lat using the proj library
proj <- "/Users/bbest/homebrew/bin/proj" # installed on Ben's MacBookPro from https://proj.org/apps/proj.html
lonlat_to_stationid <- function(lon, lat){
system(glue("echo {lon} {lat} | {proj} +proj=calcofi +epsg=4326 -f '%05.1f'"), intern=T) %>%
stringr::str_replace("\t", " ")
}
stationid_to_lonlat <- function(stationid){
# using 5th decimal place, a la CCE_Stations.txt
system(glue("echo {stationid} | {proj} +proj=calcofi +epsg=4326 -I -d 5"), intern=T) %>%
stringr::str_replace("\t", " ")
}
# (a_staid <- stations$Sta_ID[1]) # "001.0 168.0"
# (a_lonlat <- stationid_to_lonlat(a_staid)) # 93.3 26.7
# a_lon <- str_split(a_lonlat, " ", simplify=T)[1] %>% as.double()
# a_lat <- str_split(a_lonlat, " ", simplify=T)[2] %>% as.double()
# lonlat_to_stationid(a_lon, a_lat) # "1.00 168.00" -> "001.0 168.0"
# "1.0 168.0"
Time for the chunk packages
to run: 6.697 seconds
Initially copied from CalCOFI/calcofi4r:data-raw/
bottle.R
.
# paths ----
# oceanographic source paths
bottle_csv <- file.path(dir_gdrive, "data/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Bottle.csv")
cast_csv <- file.path(dir_gdrive, "data/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Cast.csv")
# bottle_cast_rds <- file.path(dir_gdrive, "data/oceanographic-data/bottle-database/bottle_cast.rds")
DIC_csv <- file.path(dir_gdrive, "data/DIC/CalCOFI_DICs_200901-201507_28June2018.csv")
# stations source paths
# source: Shonna Dovel <sdovel@ucsd.edu> 2022-03-17
stations_ccelter_xls <- file.path(dir_gdrive, "data/CalCOFI-stations/CalCOFI station LatLong.xlsx")
# sourc: http://cce.lternet.edu/data/gmt-mapper
stations_cce_txt <- file.path(dir_gdrive, "data/CalCOFI-stations/CCE_Stations.txt")
stations_sccoos_txt <- file.path(dir_gdrive, "data/CalCOFI-stations/SCCOOS_Stations.txt")
# study area destination paths
calcofi_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull.geojson")
calcofi_offshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_offshore.geojson")
calcofi_nearshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_nearshore.geojson")
# check paths
stopifnot(all(file.exists(
bottle_csv, cast_csv, DIC_csv,
stations_ccelter_xls, stations_cce_txt, stations_sccoos_txt)))
# read csv sources ----
d_bottle <- read_csv(bottle_csv, skip=1, col_names = F, guess_max = 1000000)
#d_bottle_problems() <- problems()
names(d_bottle) <- str_split(
readLines(bottle_csv, n=1), ",")[[1]] %>%
str_replace("\xb5", "µ")
d_cast <- read_csv(cast_csv)
d_DIC <- read_csv(DIC_csv, skip=1, col_names = F, guess_max = 1000000)
names(d_DIC) <- str_split(
readLines(DIC_csv, n=1), ",")[[1]] %>%
str_replace("\xb5", "µ")
# d_DIC %>% head() %>% View()
d_DIC <- d_DIC %>%
rename("Sta_ID"="Line Sta_ID")
Time for the chunk paths
to run: 62.65 seconds
stations
# get unique station IDs from the casts
stations <- d_cast %>%
select(Sta_ID) %>%
group_by(Sta_ID) %>%
summarize() %>%
mutate(
is_cast = TRUE,
lonlat_proj = map_chr(Sta_ID, stationid_to_lonlat)) %>%
separate(lonlat_proj, c("lon", "lat"), sep=" ", convert = T) %>%
separate(
Sta_ID, c("Sta_ID_line", "Sta_ID_station"), sep=" ", remove=F, convert=T) %>%
mutate(
is_offshore = ifelse(Sta_ID_station > 60, T, F)) %>%
st_as_sf(
coords = c("lon", "lat"), crs=4326, remove = F)
# stations$Sta_ID
# mapview(stations)
# + CCE field ----
stations_cce <- read_tsv(stations_cce_txt, skip = 2) %>%
mutate(
is_cce = TRUE,
Sta_ID = map2_chr(LonDec, LatDec, lonlat_to_stationid))
stopifnot(sum(!stations_cce$Sta_ID %in% stations$Sta_ID) == 0)
stations <- stations %>%
left_join(
stations_cce %>%
select(Sta_ID, is_cce),
by = "Sta_ID")
# + CCE-LTER field ----
stations_ccelter <- read_excel(stations_ccelter_xls) %>%
mutate(
is_ccelter = TRUE,
Sta_ID = glue("{sprintf('%05.1f', Line)} {sprintf('%05.1f', Sta)}"),
lonlat_proj = map_chr(Sta_ID, stationid_to_lonlat)) %>%
separate(
lonlat_proj, c("lon", "lat"), sep=" ", convert = T) %>%
select(Sta_ID, is_ccelter, lon, lon_0 = `Lon Dec`, lat, lat_0 = `Lat Dec`)
# stations_ccelter %>% View()
stopifnot(sum(!stations_ccelter$Sta_ID %in% stations$Sta_ID) == 0)
stations <- stations %>%
left_join(
stations_ccelter %>%
select(Sta_ID, is_ccelter),
by = "Sta_ID")
# + SCCOOS field ----
stations_sccoos <- read_tsv(stations_sccoos_txt) %>%
mutate(
is_sccoos = TRUE,
Sta_ID = map2_chr(LonDec, LatDec, lonlat_to_stationid))
stopifnot(sum(!stations_sccoos$Sta_ID %in% stations$Sta_ID) == 0)
stations <- stations %>%
left_join(
stations_sccoos %>%
select(Sta_ID, is_sccoos),
by = "Sta_ID")
# stations_0 <- stations
stations <- stations %>%
mutate(across(where(is.logical), ~replace_na(.,FALSE)))
stations %>%
select(is_cce, is_ccelter) %>%
st_drop_geometry() %>%
table(useNA = "ifany")
## is_ccelter
## is_cce FALSE TRUE
## FALSE 2521 0
## TRUE 47 66
# is_cce
# is_ccelter FALSE TRUE
# FALSE 2521 47
# TRUE 0 66
# So: 47 extra stations in CCE that are missing in CCE-LTER
stations %>%
select(is_cce, is_sccoos) %>%
st_drop_geometry() %>%
table(useNA = "ifany")
## is_sccoos
## is_cce FALSE TRUE
## FALSE 2521 0
## TRUE 104 9
# is_sccoos
# is_cce FALSE TRUE
# FALSE 2521 0
# TRUE 104 9
# So: only 9 stations in SCCOS, none of which are in CCE
# field order ----
stations <- stations %>%
select(
sta_id = Sta_ID,
sta_id_line = Sta_ID_line, sta_id_station = Sta_ID_station,
lon, lat,
is_offshore,
is_cce, is_ccelter, is_sccoos) %>%
rename(geom = geometry)
# write to database
st_write(stations, con, "stations")
# to get to show as layer in tile.calcofi.io
dbSendQuery(con, "SELECT UpdateGeometrySRID('stations', 'geom', 4326);")
## <PqResult>
## SQL SELECT UpdateGeometrySRID('stations', 'geom', 4326);
## ROWS Fetched: 0 [incomplete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS stations_geom_idx ON stations USING GIST (geom);")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS stations_geom_idx ON stations USING GIST (geom);
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS stations_staid_idx ON stations (sta_id)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS stations_staid_idx ON stations (sta_id)
## ROWS Fetched: 0 [complete]
## Changed: 0
# show map of stations
mapview(stations)
# show table summary
stations %>%
st_drop_geometry() %>%
skim()
Name | Piped data |
Number of rows | 2634 |
Number of columns | 9 |
_______________________ | |
Column type frequency: | |
character | 1 |
logical | 4 |
numeric | 4 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
sta_id | 0 | 1 | 11 | 11 | 0 | 2634 | 0 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
is_offshore | 0 | 1 | 0.42 | FAL: 1534, TRU: 1100 |
is_cce | 0 | 1 | 0.04 | FAL: 2521, TRU: 113 |
is_ccelter | 0 | 1 | 0.03 | FAL: 2568, TRU: 66 |
is_sccoos | 0 | 1 | 0.00 | FAL: 2625, TRU: 9 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
sta_id_line | 0 | 1 | 92.01 | 31.45 | 1.00 | 73.30 | 93.00 | 113.30 | 176.70 | ▁▃▇▅▁ |
sta_id_station | 0 | 1 | 72.87 | 67.35 | 0.00 | 39.00 | 56.00 | 80.00 | 531.00 | ▇▁▁▁▁ |
lon | 0 | 1 | -120.74 | 6.71 | -153.16 | -123.96 | -119.87 | -116.33 | -107.13 | ▁▁▂▇▃ |
lat | 0 | 1 | 31.64 | 5.28 | 17.41 | 28.52 | 31.60 | 34.49 | 47.94 | ▁▅▇▂▁ |
Time for the chunk stations
to run: 57.84 seconds
ctdcast
Note that cast
is a reserved word in SQL for a function to convert data types, so we’re
calling this table ctdcast
instead of
cast
.
Writing queries to the database is also much easier if we don’t have
to wrap field names in quotes. This means that all field names have to
be lower case and not use any spaces, dashes or special characters.
Thankfully janitor::
make_clean_names()
can handle this for us. We’ll just also keep track of the original field
names in a separate table fields_renamed
.
stopifnot(length(setdiff(unique(d_cast$Sta_ID), stations$sta_id)) == 0)
d_cast <- d_cast %>%
mutate(
datetime = map2_chr(Date, Time, function(f_date, f_time){
s_time <- ifelse(
is.na(f_time),
"00:00:00",
str_split(f_time, " ")[[1]][2])
glue("{f_date} {s_time}") }) %>%
as_datetime(
format = "%m/%d/%Y %H:%M:%S",
tz = "America/Los_Angeles"), # TODO: confirm PST and not UTC timezone
Date = as_date(Date, format = "%m/%d/%Y")) %>%
st_as_sf(
coords = c("Lon_Dec", "Lat_Dec"), crs=4326, remove = F) %>%
rename(geom = geometry)
# rename fields
flds_orig <- names(d_cast)
flds <- make_clean_names(flds_orig)
names(d_cast) <- flds
d_flds <- tibble(
table = "ctdcast",
field_orig = flds_orig,
field = flds)
# write spatial points to database
st_write(d_cast, con, "ctdcast")
# to get to show as layer in tile.calcofi.io
dbSendQuery(con, "SELECT UpdateGeometrySRID('ctdcast', 'geom', 4326);")
## <PqResult>
## SQL SELECT UpdateGeometrySRID('ctdcast', 'geom', 4326);
## ROWS Fetched: 0 [incomplete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcast_geom_idx ON ctdcast USING GIST (geom);")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcast_geom_idx ON ctdcast USING GIST (geom);
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcast_staid_idx ON ctdcast (sta_id)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcast_staid_idx ON ctdcast (sta_id)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcast_cstcnt_idx ON ctdcast (cst_cnt)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcast_cstcnt_idx ON ctdcast (cst_cnt)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcast_date_idx ON ctdcast (date)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcast_date_idx ON ctdcast (date)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcast_datetime_idx ON ctdcast (datetime)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcast_datetime_idx ON ctdcast (datetime)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcast_datetime_idx ON ctdcast (datetime)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcast_datetime_idx ON ctdcast (datetime)
## ROWS Fetched: 0 [complete]
## Changed: 0
# show table summary
d_cast %>%
st_drop_geometry() %>%
skim()
Name | Piped data |
Number of rows | 35376 |
Number of columns | 62 |
_______________________ | |
Column type frequency: | |
character | 20 |
Date | 1 |
numeric | 40 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
cruise_id | 0 | 1.00 | 17 | 17 | 0 | 658 | 0 |
db_sta_id | 0 | 1.00 | 8 | 8 | 0 | 2634 | 0 |
cast_id | 0 | 1.00 | 30 | 31 | 0 | 35376 | 0 |
sta_id | 0 | 1.00 | 11 | 11 | 0 | 2634 | 0 |
sta_code | 0 | 1.00 | 2 | 3 | 0 | 7 | 0 |
time | 324 | 0.99 | 19 | 19 | 0 | 3659 | 0 |
lat_hem | 0 | 1.00 | 1 | 1 | 0 | 1 | 0 |
lon_hem | 0 | 1.00 | 1 | 1 | 0 | 1 | 0 |
rpt_line | 0 | 1.00 | 3 | 5 | 0 | 698 | 0 |
rpt_sta | 0 | 1.00 | 2 | 5 | 0 | 880 | 0 |
ship_name | 0 | 1.00 | 4 | 24 | 0 | 36 | 0 |
ship_code | 0 | 1.00 | 4 | 4 | 0 | 34 | 0 |
data_type | 0 | 1.00 | 2 | 2 | 0 | 5 | 0 |
orig_sta_id | 0 | 1.00 | 1 | 10 | 0 | 4190 | 0 |
data_or | 16129 | 0.54 | 2 | 4 | 0 | 4 | 0 |
cruz_num | 0 | 1.00 | 4 | 8 | 0 | 391 | 0 |
inc_str | 33083 | 0.06 | 19 | 19 | 0 | 120 | 0 |
inc_end | 33083 | 0.06 | 19 | 19 | 0 | 203 | 0 |
pst_lan | 33083 | 0.06 | 19 | 19 | 0 | 80 | 0 |
civil_t | 33084 | 0.06 | 19 | 19 | 0 | 181 | 0 |
Variable type: Date
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
date | 0 | 1 | 1949-02-28 | 2020-01-26 | 1972-02-08 | 6677 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
cst_cnt | 0 | 1.00 | 1.768850e+04 | 1.021232e+04 | 1.00000e+00 | 8.844750e+03 | 1.768850e+04 | 2.653225e+04 | 3.537600e+04 | ▇▇▇▇▇ |
cruise | 0 | 1.00 | 1.975272e+05 | 2.076550e+03 | 1.94903e+05 | 1.957040e+05 | 1.972020e+05 | 1.990080e+05 | 2.020010e+05 | ▇▅▃▂▂ |
cruz_sta | 0 | 1.00 | 1.975273e+13 | 2.076544e+11 | 1.94903e+13 | 1.957041e+13 | 1.972021e+13 | 1.990081e+13 | 2.020011e+13 | ▇▅▃▂▂ |
quarter | 0 | 1.00 | 2.370000e+00 | 1.090000e+00 | 1.00000e+00 | 1.000000e+00 | 2.000000e+00 | 3.000000e+00 | 4.000000e+00 | ▇▇▁▇▆ |
distance | 14690 | 0.58 | -1.155500e+02 | 1.165800e+02 | -1.92015e+03 | -1.698000e+02 | -8.337000e+01 | -2.928000e+01 | -1.400000e-01 | ▁▁▁▁▇ |
year | 0 | 1.00 | 1.975210e+03 | 2.077000e+01 | 1.94900e+03 | 1.957000e+03 | 1.972000e+03 | 1.990000e+03 | 2.020000e+03 | ▇▅▃▂▂ |
month | 0 | 1.00 | 5.820000e+00 | 3.330000e+00 | 1.00000e+00 | 3.000000e+00 | 6.000000e+00 | 9.000000e+00 | 1.200000e+01 | ▇▆▅▃▆ |
julian_date | 0 | 1.00 | 2.730301e+04 | 7.013290e+03 | 1.79570e+04 | 2.093700e+04 | 2.633750e+04 | 3.308400e+04 | 4.111600e+04 | ▇▃▃▃▃ |
julian_day | 0 | 1.00 | 1.610200e+02 | 1.009800e+02 | 2.00000e+00 | 7.400000e+01 | 1.560000e+02 | 2.460000e+02 | 3.540000e+02 | ▇▇▆▅▆ |
lat_dec | 0 | 1.00 | 3.183000e+01 | 3.620000e+00 | 1.84200e+01 | 2.983000e+01 | 3.242000e+01 | 3.383000e+01 | 4.792000e+01 | ▁▃▇▁▁ |
lat_deg | 0 | 1.00 | 3.134000e+01 | 3.630000e+00 | 1.80000e+01 | 2.900000e+01 | 3.200000e+01 | 3.300000e+01 | 4.700000e+01 | ▁▃▇▁▁ |
lat_min | 0 | 1.00 | 2.978000e+01 | 1.724000e+01 | 0.00000e+00 | 1.476000e+01 | 2.910000e+01 | 4.500000e+01 | 5.990000e+01 | ▇▇▇▆▇ |
lon_dec | 0 | 1.00 | -1.197900e+02 | 3.950000e+00 | -1.64080e+02 | -1.220800e+02 | -1.196600e+02 | -1.174900e+02 | -1.059700e+02 | ▁▁▁▇▃ |
lon_deg | 0 | 1.00 | -1.192800e+02 | 3.970000e+00 | -1.64000e+02 | -1.220000e+02 | -1.190000e+02 | -1.170000e+02 | -1.050000e+02 | ▁▁▁▇▂ |
lon_min | 0 | 1.00 | 3.050000e+01 | 1.730000e+01 | 0.00000e+00 | 1.600000e+01 | 3.030000e+01 | 4.600000e+01 | 5.999000e+01 | ▆▇▇▇▇ |
st_line | 0 | 1.00 | 9.327000e+01 | 2.840000e+01 | 1.00000e+00 | 8.000000e+01 | 9.000000e+01 | 1.030000e+02 | 1.734000e+03 | ▇▁▁▁▁ |
ac_line | 0 | 1.00 | 9.306000e+01 | 2.201000e+01 | 0.00000e+00 | 8.040000e+01 | 9.000000e+01 | 1.035000e+02 | 1.765000e+02 | ▁▂▇▃▁ |
st_station | 0 | 1.00 | 6.210000e+01 | 3.284000e+01 | 5.00000e-01 | 4.000000e+01 | 5.500000e+01 | 8.000000e+01 | 5.310000e+02 | ▇▁▁▁▁ |
ac_sta | 0 | 1.00 | 6.167000e+01 | 3.318000e+01 | -3.06000e+01 | 4.000000e+01 | 5.510000e+01 | 7.990000e+01 | 5.311000e+02 | ▇▂▁▁▁ |
bottom_d | 2281 | 0.94 | 2.243810e+03 | 1.612050e+03 | 7.00000e+00 | 6.470000e+02 | 2.061000e+03 | 3.840000e+03 | 8.230000e+03 | ▇▃▇▁▁ |
secchi | 29791 | 0.16 | 1.982000e+01 | 8.730000e+00 | 1.00000e+00 | 1.300000e+01 | 1.900000e+01 | 2.600000e+01 | 5.800000e+01 | ▃▇▅▁▁ |
forel_u | 33299 | 0.06 | 3.040000e+00 | 1.810000e+00 | 1.00000e+00 | 2.000000e+00 | 3.000000e+00 | 4.000000e+00 | 2.100000e+01 | ▇▁▁▁▁ |
order_occ | 24450 | 0.31 | 5.038000e+01 | 7.126000e+01 | 1.00000e+00 | 1.900000e+01 | 3.900000e+01 | 6.000000e+01 | 3.003000e+03 | ▇▁▁▁▁ |
event_num | 4 | 1.00 | 1.507270e+03 | 1.219840e+03 | 1.00000e+00 | 5.260000e+02 | 1.188000e+03 | 2.212000e+03 | 3.002200e+04 | ▇▁▁▁▁ |
cruz_leg | 32940 | 0.07 | 1.040000e+00 | 4.400000e-01 | 0.00000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 3.000000e+00 | ▁▇▁▁▁ |
int_chl | 22228 | 0.37 | 5.042000e+01 | 6.066000e+01 | 2.40000e+00 | 2.550000e+01 | 3.300000e+01 | 4.870000e+01 | 1.043200e+03 | ▇▁▁▁▁ |
int_c14 | 33088 | 0.06 | 3.962600e+02 | 4.235200e+02 | 2.95000e+01 | 1.583500e+02 | 2.563500e+02 | 4.673800e+02 | 5.946400e+03 | ▇▁▁▁▁ |
time_zone | 33085 | 0.06 | 8.000000e+00 | 0.000000e+00 | 8.00000e+00 | 8.000000e+00 | 8.000000e+00 | 8.000000e+00 | 8.000000e+00 | ▁▁▇▁▁ |
wave_dir | 22311 | 0.37 | 2.950000e+01 | 8.140000e+00 | 0.00000e+00 | 2.800000e+01 | 3.100000e+01 | 3.300000e+01 | 4.900000e+01 | ▁▁▃▇▁ |
wave_ht | 22772 | 0.36 | 4.320000e+00 | 2.780000e+00 | 0.00000e+00 | 2.000000e+00 | 4.000000e+00 | 6.000000e+00 | 4.000000e+01 | ▇▁▁▁▁ |
wave_prd | 23240 | 0.34 | 6.470000e+00 | 2.290000e+00 | 0.00000e+00 | 5.000000e+00 | 6.000000e+00 | 8.000000e+00 | 5.000000e+01 | ▇▁▁▁▁ |
wind_dir | 1477 | 0.96 | 2.705000e+01 | 9.450000e+00 | 0.00000e+00 | 2.600000e+01 | 3.100000e+01 | 3.300000e+01 | 3.800000e+01 | ▁▁▁▃▇ |
wind_spd | 1679 | 0.95 | 1.114000e+01 | 6.830000e+00 | 0.00000e+00 | 6.000000e+00 | 1.000000e+01 | 1.500000e+01 | 9.000000e+01 | ▇▁▁▁▁ |
barometer | 15342 | 0.57 | 1.016250e+03 | 8.650000e+00 | 1.01400e+02 | 1.013700e+03 | 1.016300e+03 | 1.019100e+03 | 1.112600e+03 | ▁▁▁▁▇ |
dry_t | 15335 | 0.57 | 1.626000e+01 | 2.710000e+00 | 1.70000e+00 | 1.440000e+01 | 1.600000e+01 | 1.790000e+01 | 4.310000e+01 | ▁▇▂▁▁ |
wet_t | 15417 | 0.56 | 1.441000e+01 | 2.630000e+00 | 1.80000e+00 | 1.260000e+01 | 1.420000e+01 | 1.600000e+01 | 5.800000e+01 | ▃▇▁▁▁ |
wea | 8062 | 0.77 | 1.320000e+00 | 1.160000e+00 | 0.00000e+00 | 1.000000e+00 | 1.000000e+00 | 2.000000e+00 | 9.000000e+00 | ▇▃▁▁▁ |
cloud_typ | 25028 | 0.29 | 5.650000e+00 | 2.160000e+00 | 0.00000e+00 | 6.000000e+00 | 6.000000e+00 | 7.000000e+00 | 9.000000e+00 | ▁▁▁▇▂ |
cloud_amt | 22992 | 0.35 | 4.980000e+00 | 3.110000e+00 | 0.00000e+00 | 2.000000e+00 | 6.000000e+00 | 8.000000e+00 | 1.700000e+01 | ▆▃▇▁▁ |
visibility | 25363 | 0.28 | 6.800000e+00 | 1.520000e+00 | 0.00000e+00 | 6.000000e+00 | 7.000000e+00 | 8.000000e+00 | 9.000000e+00 | ▁▁▁▇▅ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
datetime | 4 | 1 | 1949-02-28 22:42:00 | 2020-01-26 01:51:16 | 1972-02-08 22:25:00 | 34509 |
Time for the chunk ctdcast
to run: 15.76 seconds
ctdcast_bottle
# rename fields
flds_orig <- names(d_bottle)
flds <- make_clean_names(flds_orig)
names(d_bottle) <- flds
d_flds <- bind_rows(
d_flds,
tibble(
table = "ctdcast_bottle",
field_orig = flds_orig,
field = flds))
# write table to database
dbWriteTable(con, "ctdcast_bottle", d_bottle, overwrite=T)
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcastbottle_staid_idx ON ctdcast_bottle (sta_id)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcastbottle_staid_idx ON ctdcast_bottle (sta_id)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcastbottle_cstcnt_idx ON ctdcast_bottle (cst_cnt)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcastbottle_cstcnt_idx ON ctdcast_bottle (cst_cnt)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcastbottle_btlcnt_idx ON ctdcast_bottle (btl_cnt)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcastbottle_btlcnt_idx ON ctdcast_bottle (btl_cnt)
## ROWS Fetched: 0 [complete]
## Changed: 0
# show table summary
skim(d_bottle)
Name | d_bottle |
Number of rows | 889500 |
Number of columns | 62 |
_______________________ | |
Column type frequency: | |
character | 4 |
numeric | 58 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
sta_id | 0 | 1.00 | 11 | 11 | 0 | 2634 | 0 |
depth_id | 0 | 1.00 | 38 | 38 | 0 | 889478 | 0 |
inc_tim | 873953 | 0.02 | 19 | 19 | 0 | 203 | 0 |
dic_quality_comment | 889445 | 0.00 | 28 | 117 | 0 | 37 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
cst_cnt | 0 | 1.00 | 17630.51 | 10510.10 | 1.00 | 8483.00 | 17502.00 | 27262.00 | 35376.00 | ▇▇▆▇▇ |
btl_cnt | 0 | 1.00 | 444750.50 | 256776.68 | 1.00 | 222375.75 | 444750.50 | 667125.25 | 889500.00 | ▇▇▇▇▇ |
depthm | 0 | 1.00 | 224.85 | 312.89 | 0.00 | 45.00 | 125.00 | 300.00 | 5351.00 | ▇▁▁▁▁ |
t_deg_c | 10965 | 0.99 | 10.81 | 4.23 | 1.44 | 7.70 | 10.07 | 13.90 | 31.14 | ▃▇▅▁▁ |
salnty | 47355 | 0.95 | 33.84 | 0.46 | 28.37 | 33.49 | 33.86 | 34.19 | 37.03 | ▁▁▃▇▁ |
o2ml_l | 169507 | 0.81 | 3.40 | 2.07 | -0.01 | 1.37 | 3.45 | 5.50 | 11.13 | ▇▅▇▁▁ |
s_theta | 52692 | 0.94 | 25.82 | 1.16 | 20.93 | 24.96 | 25.99 | 26.64 | 250.78 | ▇▁▁▁▁ |
o2sat | 204428 | 0.77 | 57.21 | 37.07 | -0.10 | 21.30 | 54.50 | 97.63 | 214.10 | ▇▅▆▁▁ |
oxy_mmol_kg | 204439 | 0.77 | 149.09 | 90.10 | -0.43 | 61.76 | 151.49 | 240.56 | 485.70 | ▇▅▇▁▁ |
btl_num | 753962 | 0.15 | 10.49 | 6.21 | 0.00 | 5.00 | 10.00 | 16.00 | 25.00 | ▇▇▆▆▂ |
rec_ind | 0 | 1.00 | 4.69 | 1.88 | 3.00 | 3.00 | 3.00 | 7.00 | 7.00 | ▇▁▂▁▆ |
t_prec | 10963 | 0.99 | 2.02 | 0.15 | 1.00 | 2.00 | 2.00 | 2.00 | 9.00 | ▇▁▁▁▁ |
t_qual | 866371 | 0.03 | 7.47 | 1.48 | 6.00 | 6.00 | 6.00 | 9.00 | 9.00 | ▇▁▁▁▇ |
s_prec | 47355 | 0.95 | 2.73 | 0.45 | 2.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▃▁▁▁▇ |
s_qual | 806691 | 0.09 | 7.76 | 1.46 | 6.00 | 6.00 | 9.00 | 9.00 | 9.00 | ▆▁▁▁▇ |
p_qual | 215744 | 0.76 | 9.00 | 0.00 | 6.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
o_qual | 693139 | 0.22 | 8.60 | 1.01 | 6.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
s_thtaq | 815766 | 0.08 | 8.20 | 1.30 | 6.00 | 6.00 | 9.00 | 9.00 | 9.00 | ▃▁▁▁▇ |
o2satq | 660023 | 0.26 | 8.66 | 1.05 | 2.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
chlor_a | 647809 | 0.27 | 0.45 | 1.21 | 0.00 | 0.05 | 0.16 | 0.39 | 66.11 | ▇▁▁▁▁ |
chlqua | 241760 | 0.73 | 9.00 | 0.01 | 8.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
phaeop | 647810 | 0.27 | 0.20 | 0.37 | -3.89 | 0.05 | 0.11 | 0.23 | 65.30 | ▇▁▁▁▁ |
phaqua | 241756 | 0.73 | 9.00 | 0.01 | 8.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
po4u_m | 454050 | 0.49 | 1.56 | 1.04 | 0.00 | 0.46 | 1.57 | 2.47 | 5.21 | ▇▅▆▁▁ |
po4q | 435207 | 0.51 | 9.00 | 0.03 | 4.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
si_o3u_m | 513274 | 0.42 | 26.50 | 27.47 | 0.00 | 3.10 | 18.00 | 41.59 | 196.00 | ▇▂▁▁▁ |
si_o3qu | 376055 | 0.58 | 9.00 | 0.02 | 4.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
no2u_m | 530094 | 0.40 | 0.04 | 0.10 | 0.00 | 0.00 | 0.01 | 0.03 | 8.19 | ▇▁▁▁▁ |
no2q | 344103 | 0.61 | 8.97 | 0.17 | 4.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
no3u_m | 529961 | 0.40 | 17.30 | 14.58 | -0.40 | 0.60 | 18.10 | 30.00 | 95.00 | ▇▆▂▁▁ |
no3q | 352930 | 0.60 | 8.99 | 0.16 | 4.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
nh3u_m | 803150 | 0.10 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.06 | 15.63 | ▇▁▁▁▁ |
nh3q | 61445 | 0.93 | 8.85 | 0.86 | 4.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
c14as1 | 873958 | 0.02 | 9.87 | 24.48 | -0.24 | 0.95 | 2.69 | 8.10 | 584.50 | ▇▁▁▁▁ |
c14a1p | 876740 | 0.01 | 1.27 | 0.44 | 1.00 | 1.00 | 1.00 | 2.00 | 2.00 | ▇▁▁▁▃ |
c14a1q | 17377 | 0.98 | 9.00 | 0.00 | 8.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
c14as2 | 873976 | 0.02 | 9.91 | 25.88 | -0.20 | 0.94 | 2.60 | 8.19 | 948.30 | ▇▁▁▁▁ |
c14a2p | 876758 | 0.01 | 1.27 | 0.44 | 1.00 | 1.00 | 1.00 | 2.00 | 2.00 | ▇▁▁▁▃ |
c14a2q | 17359 | 0.98 | 9.00 | 0.00 | 8.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
dark_as | 865741 | 0.03 | 0.16 | 0.25 | -0.01 | 0.06 | 0.10 | 0.18 | 9.82 | ▇▁▁▁▁ |
dark_ap | 869043 | 0.02 | 1.99 | 0.08 | 1.00 | 2.00 | 2.00 | 2.00 | 2.00 | ▁▁▁▁▇ |
dark_aq | 25542 | 0.97 | 9.00 | 0.00 | 8.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
mean_as | 865740 | 0.03 | 8.54 | 22.14 | -0.22 | 0.99 | 2.50 | 7.07 | 948.30 | ▇▁▁▁▁ |
mean_ap | 869043 | 0.02 | 1.26 | 0.44 | 1.00 | 1.00 | 1.00 | 2.00 | 2.00 | ▇▁▁▁▃ |
mean_aq | 25543 | 0.97 | 9.00 | 0.00 | 8.00 | 9.00 | 9.00 | 9.00 | 9.00 | ▁▁▁▁▇ |
light_p | 869733 | 0.02 | 17.34 | 29.29 | 0.00 | 0.25 | 1.40 | 21.00 | 99.90 | ▇▁▁▁▁ |
r_depth | 1 | 1.00 | 226.41 | 316.25 | 0.00 | 45.00 | 126.00 | 302.00 | 5458.00 | ▇▁▁▁▁ |
r_temp | 46050 | 0.95 | 10.86 | 4.22 | 0.00 | 7.75 | 10.11 | 13.94 | 31.14 | ▂▇▅▁▁ |
r_sal | 52781 | 0.94 | 221.32 | 93.29 | 0.40 | 144.10 | 203.80 | 300.20 | 683.40 | ▃▇▅▁▁ |
r_dynht | 46666 | 0.95 | 0.43 | 0.38 | 0.00 | 0.13 | 0.34 | 0.63 | 3.88 | ▇▂▁▁▁ |
r_nuts | 803130 | 0.10 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.06 | 15.63 | ▇▁▁▁▁ |
r_oxy_mmol_kg | 204439 | 0.77 | 149.09 | 90.10 | -0.43 | 61.76 | 151.49 | 240.56 | 485.70 | ▇▅▇▁▁ |
dic1 | 887501 | 0.00 | 2153.24 | 113.00 | 1948.85 | 2028.33 | 2170.64 | 2253.81 | 2367.80 | ▇▃▅▇▃ |
dic2 | 889276 | 0.00 | 2168.15 | 154.85 | 1969.44 | 2008.98 | 2265.89 | 2315.52 | 2364.42 | ▇▁▁▁▇ |
ta1 | 887416 | 0.00 | 2256.06 | 34.84 | 2181.57 | 2230.32 | 2244.32 | 2278.50 | 2434.90 | ▅▇▃▁▁ |
ta2 | 889266 | 0.00 | 2278.86 | 58.50 | 2198.15 | 2229.06 | 2247.50 | 2316.45 | 2437.00 | ▇▁▇▁▁ |
p_h1 | 889416 | 0.00 | 7.91 | 0.08 | 7.62 | 7.90 | 7.93 | 7.96 | 8.05 | ▁▁▁▇▂ |
p_h2 | 889490 | 0.00 | 7.95 | 0.02 | 7.92 | 7.93 | 7.95 | 7.96 | 7.99 | ▇▂▃▃▂ |
Time for the chunk ctdcast_bottle
to run: 140
seconds
ctdcast_bottle_dic
# ensure bottle.btl_cnt is unique in
stopifnot(d_bottle$btl_cnt %>% duplicated() %>% sum() == 0)
# rename fields
flds_orig <- names(d_DIC)
flds <- make_clean_names(flds_orig)
flds[which(flds_orig == "Cast_Index")] <- "cst_cnt"
flds[which(flds_orig == "Bottle_Index")] <- "btl_cnt"
names(d_DIC) <- flds
d_flds <- bind_rows(
d_flds,
tibble(
table = "ctdcast_bottle_dic",
field_orig = flds_orig,
field = flds))
# ensure uniquely matchable to cast_bottle
x <- anti_join(
d_DIC,
d_bottle,
by="btl_cnt")
stopifnot(nrow(x) == 0)
# write table to database
dbWriteTable(con, "ctdcast_bottle_dic", d_DIC, overwrite=T)
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcastbottledic_staid_idx ON ctdcast_bottle_dic (sta_id)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcastbottledic_staid_idx ON ctdcast_bottle_dic (sta_id)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcastbottledic_cstcnt_idx ON ctdcast_bottle_dic (cst_cnt)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcastbottledic_cstcnt_idx ON ctdcast_bottle_dic (cst_cnt)
## ROWS Fetched: 0 [complete]
## Changed: 0
dbSendQuery(con, "CREATE INDEX IF NOT EXISTS ctdcastbottledic_btlcnt_idx ON ctdcast_bottle_dic (btl_cnt)")
## <PqResult>
## SQL CREATE INDEX IF NOT EXISTS ctdcastbottledic_btlcnt_idx ON ctdcast_bottle_dic (btl_cnt)
## ROWS Fetched: 0 [complete]
## Changed: 0
# show table summary
skim(d_DIC)
Name | d_DIC |
Number of rows | 2084 |
Number of columns | 23 |
_______________________ | |
Column type frequency: | |
character | 3 |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
sta_id | 0 | 1.00 | 11 | 11 | 0 | 33 | 0 |
depth_id | 0 | 1.00 | 38 | 38 | 0 | 2084 | 0 |
dic_quality_comment | 2029 | 0.03 | 28 | 115 | 0 | 36 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
id | 0 | 1.00 | 1058.07 | 613.45 | 1.00 | 521.75 | 1061.50 | 1585.25 | 2130.00 | ▇▇▇▇▇ |
cst_cnt | 0 | 1.00 | 33165.93 | 596.82 | 31785.00 | 32780.00 | 33312.00 | 33646.00 | 33973.00 | ▂▂▅▆▇ |
btl_cnt | 0 | 1.00 | 833038.33 | 15412.45 | 796974.00 | 823082.75 | 836834.00 | 845385.50 | 853785.00 | ▂▂▅▅▇ |
cruise | 0 | 1.00 | 201258.07 | 196.70 | 200808.00 | 201108.00 | 201304.00 | 201407.00 | 201507.00 | ▂▁▆▃▇ |
depth_m | 0 | 1.00 | 186.48 | 326.67 | 1.00 | 30.00 | 100.00 | 231.00 | 3542.00 | ▇▁▁▁▁ |
dic1 | 85 | 0.96 | 2153.24 | 113.00 | 1948.85 | 2028.33 | 2170.64 | 2253.81 | 2367.80 | ▇▃▅▇▃ |
dic2 | 1860 | 0.11 | 2168.15 | 154.85 | 1969.44 | 2008.98 | 2265.89 | 2315.52 | 2364.42 | ▇▁▁▁▇ |
ta1 | 0 | 1.00 | 2256.06 | 34.84 | 2181.57 | 2230.32 | 2244.32 | 2278.50 | 2434.90 | ▅▇▃▁▁ |
ta2 | 1850 | 0.11 | 2278.86 | 58.50 | 2198.15 | 2229.06 | 2247.50 | 2316.45 | 2437.00 | ▇▁▇▁▁ |
p_h1 | 2000 | 0.04 | 7.91 | 0.08 | 7.62 | 7.90 | 7.93 | 7.96 | 8.05 | ▁▁▁▇▂ |
p_h2 | 2074 | 0.00 | 7.95 | 0.02 | 7.92 | 7.93 | 7.95 | 7.96 | 7.99 | ▇▂▃▃▂ |
salinity1 | 0 | 1.00 | 33.76 | 0.40 | 32.84 | 33.42 | 33.73 | 34.15 | 34.68 | ▂▇▃▇▁ |
salinity2 | 1849 | 0.11 | 33.83 | 0.52 | 32.94 | 33.35 | 33.66 | 34.29 | 34.82 | ▅▆▁▇▁ |
temperature_deg_c | 0 | 1.00 | 11.05 | 3.80 | 1.52 | 8.25 | 10.00 | 14.01 | 22.75 | ▁▇▅▃▁ |
bottle_salinity | 0 | 1.00 | 33.77 | 0.40 | 32.84 | 33.42 | 33.74 | 34.15 | 34.68 | ▂▇▃▇▁ |
bottle_o2_ml_l | 0 | 1.00 | 3.40 | 2.14 | 0.00 | 1.36 | 3.22 | 5.64 | 7.81 | ▆▅▂▇▁ |
bottle_o2_mmol_kg | 0 | 1.00 | 148.28 | 93.39 | 0.00 | 59.31 | 140.12 | 245.95 | 340.42 | ▆▅▂▇▁ |
sigma_theta | 0 | 1.00 | 25.74 | 0.98 | 22.98 | 24.91 | 25.96 | 26.58 | 27.78 | ▁▅▅▇▂ |
dic_bottle_id1 | 0 | 1.00 | 4269.62 | 3603.41 | 1.00 | 390.75 | 2952.50 | 7503.25 | 10004.00 | ▇▂▂▃▅ |
dic_bottle_id2 | 1852 | 0.11 | 3519.40 | 3441.65 | 2.00 | 331.25 | 2247.00 | 6907.50 | 9959.00 | ▇▂▂▂▃ |
Time for the chunk ctdcast_bottle_dic
to run: 2.989
seconds
fields_renamed
# write table to database
dbWriteTable(con, "fields_renamed", d_flds, overwrite=T)
# show table
datatable(d_flds)
Time for the chunk fields_renamed
to run: 1.857
seconds