Load packages
require(dplyr)
require(zooper)
require(lubridate)
require(readr)
require(tidyr)
require(ggplot2)
require(sf)
require(readxl)
require(stringr)
require(mgcv)
require(purrr)
Load zoop data
zoop_data<-Zoopsynther(Data_type="Community", Sources=c("EMP", "STN", "20mm", "FMWT"), Time_consistency = TRUE)
## [1] "These species have no relatives in their size class common to all datasets and have been removed from one or more size classes: Ostracoda Adult (Meso), Cumacea Adult (Meso), Annelida Adult (Meso), Gammarus Adult (Meso), Orientomysis aspera Adult (Meso), Chironomidae Larva (Meso), Insecta Larva (Meso)"
Read in zoop mass conversions
zoop_mass_conversions<-read_excel("~/ZoopSynth/Data paper/Biomass conversions.xlsx", sheet="Micro and Meso-zooplankton")%>%
mutate(Taxname=case_when(Taxname=="Sinocalanus"~"Sinocalanus doerrii", # Change to help this match to zoop data
TRUE ~ Taxname),
Taxlifestage=paste(Taxname, Lifestage))%>%
select(Taxlifestage, CarbonWeight_ug)
Read in zoop groupings
zoop_groups<-read_csv("Data/zoopcrosswalk2.csv", col_types=cols_only(Taxlifestage="c", IBMR="c"))%>%
distinct()
Load Mysid biomass data
zoop_mysid<-read_excel("Data/1972-2020MysidBPUEMatrix.xlsx", # EMP
sheet="Mysid_BPUE_matrix_1972-2020", na = "NA",
col_types = c(rep("numeric", 4), "date", "text", "text", rep("text", 7), rep("numeric", 8)))%>%
select(Date=SampleDate, Station=StationNZ, BPUE=`Hyperacanthomysis longirostris`)%>% # Only select Hyperacanthomysis longirostris
mutate(Source="EMP")%>%
bind_rows(read_csv("Data/FMWT STN 2007to2019 Mysid BPUE.csv", # FMWT/STN
col_types=cols_only(Station="c", SampleDate="c", Project="c", `Hyperacanthomysis longirostris`="d"))%>%
rename(Date=SampleDate, Source=Project, BPUE=`Hyperacanthomysis longirostris`)%>% # Only select Hyperacanthomysis longirostris
mutate(Date=mdy(Date),
Station=recode(Station, MONT="Mont", HONK="Honk")))%>% #Get station names to match to main dataset
mutate(BPUE_mysid=BPUE*1000, # Convert to ug
Taxlifestage="Hyperacanthomysis longirostris Adult",
SampleID=paste(Source, Station, Date),
SizeClass="Macro")%>%
select(SampleID, Taxlifestage, SizeClass, BPUE_mysid)
Start processing the zoop data
zoop_data_mass<-zoop_data%>%
mutate(Taxlifestage=str_remove(Taxlifestage, fixed("_UnID")))%>%
filter(
!(SizeClass=="Meso" & #eliminating species which are counted in meso and micro and retained better in the micro net from the meso calcs
Taxlifestage%in%c("Asplanchna Adult", "Copepoda Larva","Cyclopoida Juvenile", "Eurytemora Larva", "Harpacticoida Undifferentiated",
"Keratella Adult", "Limnoithona Adult", "Limnoithona Juvenile", "Limnoithona sinenesis Adult", "Limnoithona tetraspina
Adult", "Oithona Adult", "Oithona Juvenile", "Oithona davisae Adult", "Polyarthra Adult","Pseudodiaptomus Larva",
"Rotifera Adult", "Sinocalanus doerrii Larva", "Synchaeta Adult", "Synchaeta bicornis Adult", "Trichocerca Adult")) &
!(SizeClass=="Micro" &Taxlifestage%in%c("Cirripedia Larva", "Cyclopoida Adult", "Oithona similis")) & #removing categories better retained in meso net from micro net matrix
Order!="Amphipoda" & # Remove amphipods
(Order!="Mysida" | Taxlifestage=="Hyperacanthomysis longirostris Adult"))%>% #Only retain Hyperacanthomysis longirostris
mutate(Taxlifestage=recode(Taxlifestage, `Synchaeta bicornis Adult`="Synchaeta Adult", # Change some names to match to biomass conversion dataset
`Pseudodiaptomus Adult`="Pseudodiaptomus forbesi Adult",
`Acanthocyclops vernalis Adult`="Acanthocyclops Adult"))%>%
left_join(zoop_mass_conversions, by="Taxlifestage")%>% # Add biomass conversions
left_join(zoop_mysid, by=c("SampleID", "Taxlifestage", "SizeClass"))%>% # Add mysid biomass
left_join(zoop_groups, by="Taxlifestage")%>% # Add IBMR categories
mutate(BPUE=if_else(Taxlifestage=="Hyperacanthomysis longirostris Adult", BPUE_mysid, CPUE*CarbonWeight_ug))%>% # Create 1 BPUE variable
filter(!is.na(BPUE) & !is.na(Latitude) & !is.na(Longitude) & !is.na(SalSurf))%>% # Removes any data without BPUE, which is currently restricted to Decapod Larvae, and H. longirostris from STN. Also removes 20mm and EMP EZ stations without coordinates
group_by(IBMR)%>%
mutate(flag=if_else(all(c("Micro", "Meso")%in%SizeClass), "Remove", "Keep"))%>% # This and the next 2 lines are meant to ensure that all categories are consistent across the surveys. Since only EMP samples microzoops, only EMP data can be used for categories that include both micro and mesozoops.
ungroup()%>%
filter(!(flag=="Remove" & Source!="EMP"))%>%
select(SampleID, Station, Latitude, Longitude, SalSurf, Date, Year, IBMR, BPUE)%>%
group_by(across(-BPUE))%>%
summarise(BPUE=sum(BPUE), .groups="drop")%>% # Sum each IBMR categories
st_as_sf(coords=c("Longitude", "Latitude"), crs=4326)%>%
st_transform(crs=st_crs(deltamapr::R_EDSM_Subregions_Mahardja))%>% # This and next 4 lines to filter data to Suisun Marsh
st_join(deltamapr::R_EDSM_Subregions_Mahardja%>%
select(SubRegion))%>%
st_drop_geometry()%>%
filter(SubRegion=="Suisun Marsh")%>%
mutate(doy=yday(Date), #Day of year
Year_fac=factor(Year), # Factor year for model random effect
Station_fac=factor(Station), # Factor station for model random effect
across(c(SalSurf, doy), list(s=~(.x-mean(.x))/sd(.x))), # Center and standardize predictors
BPUE_log1p=log(BPUE+1)) # log1p transform BPUE for model
write_csv(zoop_data_mass, "Outputs/zoop_data_mass.csv") # Save data for Rosie
Set up prediction data for model
newdata<-expand_grid(date=mdy(paste(1:12, 15, 2001, sep="/")), # The 15th of each month on a non-leap year
SalSurf=seq(min(round(zoop_data_mass$SalSurf), 1),
round(max(zoop_data_mass$SalSurf), 1), by=0.1))%>% # Salinity sequence nicely rounded to 1 decimal
mutate(Month=month(date),
doy=yday(date), # Day of year
SalSurf_s=(SalSurf-mean(zoop_data_mass$SalSurf))/sd(zoop_data_mass$SalSurf), # center and standardize salinity to match data
doy_s=(doy-mean(zoop_data_mass$doy))/sd(zoop_data_mass$doy), # center and standardize doy to match data
Year_fac="2010", # Set arbitrary year (won't be used)
Station_fac="NZ032")%>% # Set arbitrary station (won't be used)
select(Month, doy, doy_s, Year_fac, Station_fac, SalSurf, SalSurf_s)
model
sal_model<-function(group){
par(mfrow=c(2,2))
cat("<<<<<<<<<<<<<<<<<<<<<<< modeling", group, ">>>>>>>>>>>>>>>>>>>>>>>>>\n\n")
model<-gam(BPUE_log1p ~ te(SalSurf_s, doy_s, k=c(5,5), bs=c("cs", "cc")) +
s(Year_fac, bs="re") + s(Station_fac, bs="re"),
data=filter(zoop_data_mass, IBMR==group & Year>=2000),
method="REML")
cat("-------------gam check-------------\n")
gam.check(model)
cat("\n\n-------------summary-------------\n")
print(summary(model))
sal<-predict(model, type="terms", terms="te(SalSurf_s,doy_s)", newdata=newdata, se.fit=T)%>%
as_tibble()%>%
mutate(across(everything(), as.vector))%>% # Make everything tidy
rename(fit=starts_with("fit"), se=starts_with("se.fit"))%>%
bind_cols(newdata%>% # Add covariate columns before these columns
select(-Year_fac, -Station_fac, -doy_s, -SalSurf_s),
.)
out<-list(model=model, sal=sal)
return(out)
}
Apply model to all groups
sal_models<-map(set_names(unique(zoop_data_mass$IBMR)), sal_model)
## <<<<<<<<<<<<<<<<<<<<<<< modeling acartela >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.0001530318,0.0001289009]
## (score 2406.445 & scale 1.608608).
## Hessian positive definite, eigenvalue range [1.371107,703.6882].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 15.43 0.93 <0.0000000000000002 ***
## s(Year_fac) 21.00 19.18 NA NA
## s(Station_fac) 8.00 5.67 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.7895 0.2326 16.29 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 15.435 19 1006.21 <0.0000000000000002 ***
## s(Year_fac) 19.179 20 23.97 <0.0000000000000002 ***
## s(Station_fac) 5.668 7 10.74 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.802 Deviance explained = 80.8%
## -REML = 2406.4 Scale est. = 1.6086 n = 1408
## <<<<<<<<<<<<<<<<<<<<<<< modeling daphnia >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-0.000003802226,0.000002679368]
## (score 1894.986 & scale 2.037998).
## Hessian positive definite, eigenvalue range [0.8837161,521.671].
## Model rank = 47 / 47
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.0 12.5 0.95 0.01 **
## s(Year_fac) 21.0 16.3 NA NA
## s(Station_fac) 6.0 3.1 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2868 0.1342 9.589 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 12.524 19 59.064 < 0.0000000000000002 ***
## s(Year_fac) 16.276 20 4.312 < 0.0000000000000002 ***
## s(Station_fac) 3.097 5 2.102 0.00872 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.486 Deviance explained = 50.2%
## -REML = 1895 Scale est. = 2.038 n = 1044
## <<<<<<<<<<<<<<<<<<<<<<< modeling eurytem >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-0.000009770776,0.000008031399]
## (score 2233.818 & scale 2.472308).
## Hessian positive definite, eigenvalue range [1.241239,582.1755].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 14.20 0.76 <0.0000000000000002 ***
## s(Year_fac) 21.00 16.74 NA NA
## s(Station_fac) 8.00 4.56 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3239 0.2032 21.28 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 14.196 19 193.868 <0.0000000000000002 ***
## s(Year_fac) 16.735 20 4.897 <0.0000000000000002 ***
## s(Station_fac) 4.558 7 9.354 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.667 Deviance explained = 67.7%
## -REML = 2233.8 Scale est. = 2.4723 n = 1165
## <<<<<<<<<<<<<<<<<<<<<<< modeling othcalad >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.0006356981,0.0005147218]
## (score 2440.351 & scale 2.001391).
## Hessian positive definite, eigenvalue range [1.704383,672.178].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 16.06 0.92 <0.0000000000000002 ***
## s(Year_fac) 21.00 17.61 NA NA
## s(Station_fac) 8.00 5.78 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7460 0.2445 23.5 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 16.061 19 331.388 <0.0000000000000002 ***
## s(Year_fac) 17.614 20 8.673 <0.0000000000000002 ***
## s(Station_fac) 5.777 7 10.936 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.525 Deviance explained = 53.9%
## -REML = 2440.4 Scale est. = 2.0014 n = 1345
## <<<<<<<<<<<<<<<<<<<<<<< modeling othcaljuv >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-0.0002635295,0.00007143662]
## (score 2060.153 & scale 0.9402621).
## Hessian positive definite, eigenvalue range [1.938979,718.1505].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 16.52 0.88 <0.0000000000000002 ***
## s(Year_fac) 21.00 15.81 NA NA
## s(Station_fac) 8.00 6.49 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.7008 0.2153 31.12 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 16.517 19 108.367 <0.0000000000000002 ***
## s(Year_fac) 15.814 20 4.031 <0.0000000000000002 ***
## s(Station_fac) 6.488 7 47.553 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.414 Deviance explained = 43%
## -REML = 2060.2 Scale est. = 0.94026 n = 1437
## <<<<<<<<<<<<<<<<<<<<<<< modeling othclad >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-0.00000176167,0.0000007777305]
## (score 1943.508 & scale 1.348785).
## Hessian positive definite, eigenvalue range [1.651184,601.6864].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 12.82 0.95 0.01 **
## s(Year_fac) 21.00 18.29 NA NA
## s(Station_fac) 8.00 5.02 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.589 0.178 8.927 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 12.82 19 330.89 <0.0000000000000002 ***
## s(Year_fac) 18.29 20 11.07 <0.0000000000000002 ***
## s(Station_fac) 5.02 7 10.64 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.561 Deviance explained = 57.4%
## -REML = 1943.5 Scale est. = 1.3488 n = 1204
## <<<<<<<<<<<<<<<<<<<<<<< modeling pdiapfor >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-0.001381544,0.001112387]
## (score 2560.156 & scale 1.87464).
## Hessian positive definite, eigenvalue range [1.017025,716.6863].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 15.08 0.88 <0.0000000000000002 ***
## s(Year_fac) 21.00 19.11 NA NA
## s(Station_fac) 8.00 6.13 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0523 0.2856 17.69 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 15.076 19 1649.53 <0.0000000000000002 ***
## s(Year_fac) 19.110 20 20.94 <0.0000000000000002 ***
## s(Station_fac) 6.127 7 22.48 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.701 Deviance explained = 71%
## -REML = 2560.2 Scale est. = 1.8746 n = 1434
## <<<<<<<<<<<<<<<<<<<<<<< modeling allcopnaup >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 9 iterations.
## Gradient range [-0.0002665227,0.0001511567]
## (score 1041.11 & scale 3.48814).
## Hessian positive definite, eigenvalue range [0.0002664258,245.3791].
## Model rank = 43 / 43
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.000000 9.993763 0.9 0.005 **
## s(Year_fac) 21.000000 17.626013 NA NA
## s(Station_fac) 2.000000 0.000836 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.8375 0.2524 7.279 0.00000000000146 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 9.9937631 19 18.626 <0.0000000000000002 ***
## s(Year_fac) 17.6260129 20 7.318 <0.0000000000000002 ***
## s(Station_fac) 0.0008362 1 0.000 0.546
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.454 Deviance explained = 48.4%
## -REML = 1041.1 Scale est. = 3.4881 n = 491
## <<<<<<<<<<<<<<<<<<<<<<< modeling limno >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-0.000002803489,0.000002590693]
## (score 921.1552 & scale 2.085479).
## Hessian positive definite, eigenvalue range [0.474912,247.7156].
## Model rank = 43 / 43
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.000 14.242 0.95 0.09 .
## s(Year_fac) 21.000 10.381 NA NA
## s(Station_fac) 2.000 0.976 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.2650 0.4288 14.61 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 14.2416 19 84.408 < 0.0000000000000002 ***
## s(Year_fac) 10.3811 20 1.092 0.00356 **
## s(Station_fac) 0.9758 1 40.575 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.73 Deviance explained = 74.4%
## -REML = 921.16 Scale est. = 2.0855 n = 496
## <<<<<<<<<<<<<<<<<<<<<<< modeling mysid >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.00000009163881,0.00000006254805]
## (score 1240.961 & scale 3.302209).
## Hessian positive definite, eigenvalue range [1.772933,293.8917].
## Model rank = 48 / 48
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.00 14.46 0.92 0.01 **
## s(Year_fac) 21.00 17.82 NA NA
## s(Station_fac) 7.00 5.37 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4655 0.6154 8.881 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 14.456 19 46.69 <0.0000000000000002 ***
## s(Year_fac) 17.818 20 7.94 <0.0000000000000002 ***
## s(Station_fac) 5.366 6 15.32 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.467 Deviance explained = 50.1%
## -REML = 1241 Scale est. = 3.3022 n = 588
## <<<<<<<<<<<<<<<<<<<<<<< modeling othcyc >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-0.0000002737279,0.0000001591475]
## (score 782.4187 & scale 1.203426).
## Hessian positive definite, eigenvalue range [0.4770929,247.7024].
## Model rank = 43 / 43
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.000 13.540 0.92 0.11
## s(Year_fac) 21.000 10.271 NA NA
## s(Station_fac) 2.000 0.978 NA NA
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.5247 0.3425 27.81 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 13.5395 19 42.507 < 0.0000000000000002 ***
## s(Year_fac) 10.2705 20 1.035 0.00573 **
## s(Station_fac) 0.9782 1 45.167 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.591 Deviance explained = 61.1%
## -REML = 782.42 Scale est. = 1.2034 n = 496
## <<<<<<<<<<<<<<<<<<<<<<< modeling other >>>>>>>>>>>>>>>>>>>>>>>>>
##
## -------------gam check-------------
##
## Method: REML Optimizer: outer newton
## full convergence after 11 iterations.
## Gradient range [-0.0002193929,0.000137932]
## (score 1034.276 & scale 3.43328).
## Hessian positive definite, eigenvalue range [0.0002193328,245.3345].
## Model rank = 43 / 43
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(SalSurf_s,doy_s) 19.000000 11.875902 1.02 0.65
## s(Year_fac) 21.000000 15.819162 NA NA
## s(Station_fac) 2.000000 0.000715 NA NA
##
##
## -------------summary-------------
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BPUE_log1p ~ te(SalSurf_s, doy_s, k = c(5, 5), bs = c("cs", "cc")) +
## s(Year_fac, bs = "re") + s(Station_fac, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7537 0.1888 30.48 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(SalSurf_s,doy_s) 11.8759018 19 11.915 <0.0000000000000002 ***
## s(Year_fac) 15.8191618 20 3.721 <0.0000000000000002 ***
## s(Station_fac) 0.0007154 1 0.000 0.533
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.373 Deviance explained = 40.9%
## -REML = 1034.3 Scale est. = 3.4333 n = 491
Extract salinity conversions
sal_conversions<-map_dfr(sal_models, ~.x[["sal"]], .id = "IBMR")
str(sal_conversions)
## tibble [22,608 x 6] (S3: tbl_df/tbl/data.frame)
## $ IBMR : chr [1:22608] "acartela" "acartela" "acartela" "acartela" ...
## $ Month : num [1:22608] 1 1 1 1 1 1 1 1 1 1 ...
## $ doy : num [1:22608] 15 15 15 15 15 15 15 15 15 15 ...
## $ SalSurf: num [1:22608] 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 ...
## $ fit : num [1:22608] -3.25 -3.11 -2.97 -2.84 -2.7 ...
## $ se : num [1:22608] 0.428 0.395 0.364 0.335 0.309 ...
plot salinity conversions
ggplot(sal_conversions, aes(x=SalSurf, y=fit, ymin=fit-se, ymax=fit+se, fill=IBMR))+
geom_ribbon(alpha=0.4)+
ylab("fit (log scale)")+
facet_wrap(~Month)+
scale_fill_viridis_d()+
theme_bw()
Load in SMSCG modeled salinity
SMSCG_sal<-read_csv("Data/SMSCG salinity modeling/bdl_summer_fall_2022_psu_filtered.csv", skip = 10)%>%
mutate(Month=month(datetime))%>%
drop_na()%>%
group_by(Month)%>%
summarise(across(contains("_bn_"), ~round(mean(.x), 1)))
## Rows: 30048 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (4): bdl_bn_noact, bdl_bn_smscg4, bdl_bn_smscg6, bdl_dry_noact
## dttm (1): datetime
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Plot SMSCG modeled salinity
ggplot(pivot_longer(SMSCG_sal, cols=contains("_bn_"), names_to="Scenario", values_to="Salinity")%>% # Prepare data for easier plotting
mutate(Scenario=factor(Scenario, levels=c("bdl_bn_noact", "bdl_bn_smscg6", "bdl_bn_smscg4"))),
aes(x=Month, y=Salinity, color=Scenario))+
geom_line()+
scale_color_viridis_d(direction = -1)+
scale_x_continuous(breaks=1:12)+
theme_bw()+
theme(legend.position = "bottom")
salinity conversion function
sal_fix<-function(data, sal_conversions, sal_target, target_column, months){
salinity_conversions<-sal_conversions%>%
filter(Month%in%months)%>% ## Only choose the months to be included
mutate(SalSurf=as.character(SalSurf))%>% # Change to character for safer joining
select(IBMR, Month, SalSurf, fit)
# Are any target salinities outside the range of the model?
target_outside<-setdiff(as.character(sal_target[[target_column]]), salinity_conversions$SalSurf)
if(length(target_outside)>0){
cat("Some target salinities are out of range for the prediction. They are:", sort(target_outside))
}
# select target salinities from the conversions dataset
sal_conversions_target<-salinity_conversions%>%
semi_join(sal_target%>%
mutate(SalSurf=as.character(.data[[target_column]])),
by=c("Month", "SalSurf"))%>%
select(IBMR, Month, fit_target=fit)
if(any(duplicated(paste(sal_conversions_target$Month, sal_conversions_target$IBMR)))){
stop("sal_conversions were duplicated in filtering out the target salinities")
}
#Make sure all months are present for each IBMR
monthcheck<-sal_conversions_target%>%
group_by(IBMR)%>%
summarise(N=n_distinct(Month))
if(any(monthcheck$N!=length(months))){
stop("Some months are missing in the sal_target")
}
# Set up salinity conversion factors for data and target
salinity_conversions_correction<-salinity_conversions%>%
left_join(sal_conversions_target, by=c("IBMR", "Month"))%>% # Join targets to full set of conversions
mutate(Correction=fit_target-fit)%>% # Calculate correction factor
select(IBMR, Month, SalSurf, Correction)
# Pull unique salinities for outrange (to test for any data salinities not present in model)
sals<-unique(salinity_conversions_correction$SalSurf)
# Convert salinity to a character rounded to 1 decimal for safer joining
data<-data%>%
mutate(Sal_merge=as.character(round(SalSurf, 1)))
# Are any salinities from the data not present in the model?
outrange<-setdiff(data$Sal_merge, sals)
if(length(outrange)>0){
cat("Some salinities are out of range for the prediction. They are:", sort(outrange))
}
# Join salinity conversions and calculate adjusted BPUE
data<-data%>%
left_join(salinity_conversions_correction, by=c("Month", "Sal_merge"="SalSurf", "IBMR"))%>%
mutate(BPUE_adj=exp(BPUE_log1p+Correction)-1) # Calculate adjusted BPUE and undo the log1p transformation
return(data)
}
Prepare data for conversion
# From Rosie:
#Calculate water year function
wtr_yr <- function(dates, start_month=12) {
# Convert dates into POSIXlt
dates.posix = as.POSIXlt(dates)
# Year offset
offset = ifelse(dates.posix$mon == start_month - 1, 1, 0)
# Water year
adj.year = dates.posix$year + 1900 + offset
# Return the water year
adj.year
}
# Read in water year types
year_types<-read_csv(url("https://raw.githubusercontent.com/rosehartman/DCGmodeling/main/yeartypes.csv"))%>%
rename(Yearadj = Year)
## Rows: 52 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Yr_type, Drought
## dbl (2): Year, Index
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Choose the months to be used
months<-c(6,7,8,9,10)
# Filter data to what we want to apply salinity conversions to
zoop_data_mass_bn<-zoop_data_mass%>%
mutate(Yearadj = wtr_yr(Date),
Month=month(Date)) %>%
filter(Year > 2000)%>%
left_join(year_types) %>%
filter(Yr_type == "Below Normal" & Month %in% months)%>%
select(Date, Month, Year, SalSurf, IBMR, BPUE, BPUE_log1p)
## Joining, by = "Yearadj"
Apply the conversion function
zoop_corrected<-map_dfr(set_names(c("bdl_bn_noact", "bdl_bn_smscg4", "bdl_bn_smscg6")), ~sal_fix(zoop_data_mass_bn, sal_conversions, SMSCG_sal, .x, months), .id="Scenario")%>%
mutate(Scenario=factor(Scenario, levels=c("bdl_bn_noact", "bdl_bn_smscg6", "bdl_bn_smscg4")))
Plot the result
ggplot(zoop_corrected, aes(x=Date, y=BPUE_adj, color=Scenario))+
geom_point(alpha=0.4)+
scale_y_continuous(trans="log1p", breaks = c(0, 10^(1:6)))+
scale_color_viridis_d(direction = -1, end = 0.9)+
facet_wrap(~paste(IBMR, Year), scales="free", ncol=5)+
theme_bw()+
theme(legend.position = "bottom")
Plot BPUE vs BPUE_adj
ggplot(zoop_corrected, aes(x=BPUE, y=BPUE_adj, color=Scenario))+
geom_point(alpha=0.4)+
scale_y_continuous(trans="log1p", breaks = c(0, 10^(1:6)))+
scale_x_continuous(trans="log1p", breaks = c(0, 10^(1:6)))+
scale_color_viridis_d(direction = -1, end = 0.9)+
facet_wrap(~IBMR, scales="free")+
theme_bw()+
theme(legend.position = "bottom")
Summarise zoops by IBMR, scenario, and month
zoop_bn_sum<-zoop_corrected%>%
group_by(IBMR, Month, Scenario)%>%
summarise(BPUE_sum=sum(BPUE_adj), BPUE_sd=sd(BPUE_adj), .groups="drop")
write_csv(zoop_bn_sum, "Outputs/zoop_salinity_corrected_sum.csv")
Plot summarized CPUE +/- SD
ggplot(zoop_bn_sum, aes(x=Month, y=BPUE_sum, ymin=BPUE_sum-BPUE_sd, ymax=BPUE_sum+BPUE_sd, fill=Scenario))+
geom_pointrange(position = position_dodge(width=0.6), shape=21, color="black", size=1)+
scale_fill_viridis_d(direction = -1, end = 0.9)+
facet_wrap(~IBMR, scales="free")+
theme_bw()+
theme(legend.position = "bottom")