require(dplyr)
require(DroughtData)
require(ggplot2)
require(patchwork)
require(knitr)
require(car)
require(multcomp)
require(emmeans)
require(stringr)
require(readr)
save_dir<-file.path("FLOATDrought", "Outputs")

Functions

model_plotter<-function(model, data){
  data<-data%>%
    mutate(Residuals=resid(model),
           Fitted=predict(model))
  
  p_hist<-ggplot(data, aes(x=Residuals))+
    geom_histogram()+
    xlab("Residuals (°C)")+
    theme_bw()
  
  p_res_fit<-ggplot(data, aes(x=Residuals, y=Fitted))+
    geom_point()+
    ylab("Predicted temperature (°C)")+
    xlab("Residuals (°C)")+
    theme_bw()
  
  p_obs_fit<-ggplot(data, aes(x=Temperature, y=Fitted))+
    geom_point()+
    geom_abline(slope=1, intercept=0, color="red")+
    ylab("Predicted temperature (°C)")+
    xlab("Observed temperature (°C)")+
    theme_bw()
  
  out<-(p_hist+plot_layout(ncol=1))+(p_res_fit+p_obs_fit+plot_layout(ncol=2))+plot_layout(nrow=2, widths=c(1, 0.5, 0.5))
  
  return(out)
}

tukey_plotter<-function(model, data, data_type, model_type){
  
  tuk<-emmeans(model, list(data=data_type, model=model_type))
  
  tuk_data<-as_tibble(cld(tuk$data, sort=FALSE, Letters = letters))%>%
    mutate(.group=str_remove_all(.group, fixed(" ")))%>%
    left_join(data%>%
                group_by(across(all_of(data_type)))%>%
                summarise(max_temp=max(Temperature), .groups="drop"),
              by=data_type)
  
  tuk_model<-as_tibble(cld(tuk$model, sort=FALSE, Letters = letters))%>%
    mutate(.group=str_remove_all(.group, fixed(" ")))%>%
    left_join(data%>%
                group_by(across(all_of(model_type)))%>%
                summarise(max_temp=max(Temperature), .groups="drop"),
              by=model_type)
  
  p_data<-ggplot(tuk_data, aes(x=.data[[data_type]], y=emmean, ymin=lower.CL, ymax=upper.CL, label=.group))+
    geom_boxplot(data=data, aes(x=.data[[data_type]], y=Temperature), inherit.aes = FALSE)+
    geom_pointrange(color="red", position=position_nudge(x=0.1))+
    geom_text(aes(y=max_temp+(max(data$Temperature)-min(data$Temperature))/20), size=6)+
    ylab("Temperature (°C)")+
    theme_bw(base_size=16)
  
  p_model<-ggplot(tuk_model, aes(x=.data[[model_type]], y=emmean, ymin=lower.CL, ymax=upper.CL, label=.group))+
    geom_boxplot(data=data, aes(x=.data[[model_type]], y=Temperature), inherit.aes = FALSE)+
    geom_pointrange(color="red", position=position_nudge(x=0.1))+
    geom_text(aes(y=max_temp+(max(data$Temperature)-min(data$Temperature))/20), angle=if_else(model_type=="Year_fac", 90, 0), hjust=if_else(model_type=="Year_fac", "left", NA_character_), vjust=0.25, size=6)+
    ylab("Temperature (°C)")+
    theme_bw(base_size=16)+
    {if(model_type=="Year_fac"){
      list(geom_tile(data=data, 
                     aes(x=Year_fac, y=min(Temperature)-(max(Temperature)-min(Temperature))/20, 
                         fill=Drought, height=(max(Temperature)-min(Temperature))/20), 
                     inherit.aes = FALSE),
           xlab("Year"),
           theme(axis.text.x=element_text(angle=90, vjust=0.5)),
           scale_y_continuous(expand = expansion(mult=c(0,0.1))),
           drt_color_pal_drought())
    }}
  
  out<-p_data/p_model+plot_annotation(tag_levels="A")
  
  if(model_type=="Year_fac"){
    out<-out+plot_layout(heights = c(0.8, 1))
  }
  
  return(out)
}

partial.r2<-function(ANOVA, factor){
  r2<-ANOVA[factor, "Sum Sq"]/(ANOVA[factor, "Sum Sq"]+ ANOVA["Residuals", "Sum Sq"])
  return(r2)
}

Load data

data_regional<-lt_regional%>%
  filter(!is.na(Temperature))%>%
  mutate(Region=factor(Region, levels=c("Suisun Marsh", "Suisun Bay", "Confluence", "SouthCentral", "North")),
         Year_fac=factor(YearAdj),
         Drought=factor(Drought, levels=c("D", "N", "W")))
data_seasonal<-lt_seasonal%>%
  filter(!is.na(Temperature))%>%
  mutate(Season=factor(Season, levels=c("Winter", "Spring", "Summer", "Fall")),
         Year_fac=factor(YearAdj),
         Drought=factor(Drought, levels=c("D", "N", "W")))

Plots

Regional

Plot regional data by year

ggplot(data_regional, aes(x=YearAdj, y=Temperature, fill=Drought))+
  geom_point(shape=21, color="black")+
  facet_wrap(~Region)+
  drt_color_pal_drought()+
  ylab("Temperature (°C)")+
  theme_bw()

Plot regional data by Drought index

ggplot(data_regional, aes(x=Drought, y=Temperature, fill=Drought))+
  geom_boxplot()+
  facet_wrap(~Region)+
  drt_color_pal_drought()+
  ylab("Temperature (°C)")+
  theme_bw()

Seasonal

Plot seasonal data by year

ggplot(data_seasonal, aes(x=YearAdj, y=Temperature, fill=Drought))+
  geom_point(shape=21, color="black")+
  facet_wrap(~Season, scales="free")+
  drt_color_pal_drought()+
  ylab("Temperature (°C)")+
  theme_bw()

Plot seasonal data by Drought index

ggplot(data_seasonal, aes(x=Drought, y=Temperature, fill=Drought))+
  geom_boxplot()+
  facet_wrap(~Season, scales="free")+
  drt_color_pal_drought()+
  ylab("Temperature (°C)")+
  theme_bw()

Analyses

Regional

Drought

m_reg_d<-aov(Temperature ~ Drought + Region, data=data_regional)

Check assumptions:

model_plotter(m_reg_d, data_regional)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Check results

m_reg_d_Anova<-Anova(m_reg_d, type=2)
m_reg_d_Anova
## Anova Table (Type II tests)
## 
## Response: Temperature
##           Sum Sq  Df F value    Pr(>F)    
## Drought   24.295   2  30.777 1.711e-12 ***
## Region    51.956   4  32.908 < 2.2e-16 ***
## Residuals 85.650 217                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

How much variability is explained by the Drought index (partial R2)

partial.r2(m_reg_d_Anova, "Drought")
## [1] 0.220976

post-hoc test

p_m_reg_d<-tukey_plotter(m_reg_d, data_regional, "Region", "Drought")
ggsave(plot=p_m_reg_d, filename=file.path(save_dir, "Temp_region_drought_model.png"), device="png", height=6, width=7, units="in")
p_m_reg_d

Year

m_reg_y<-aov(Temperature ~ Year_fac + Region, data=data_regional)

Check assumptions:

model_plotter(m_reg_y, data_regional)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Check results

m_reg_y_Anova<-Anova(m_reg_y, type=2)
m_reg_y_Anova
## Anova Table (Type II tests)
## 
## Response: Temperature
##           Sum Sq  Df F value    Pr(>F)    
## Year_fac  87.991  46  15.073 < 2.2e-16 ***
## Region    51.371   4 101.197 < 2.2e-16 ***
## Residuals 21.955 173                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

How much variability is explained by the year (partial R2)

partial.r2(m_reg_y_Anova, "Year_fac")
## [1] 0.8003103

post-hoc test

p_m_reg_y<-tukey_plotter(m_reg_y, data_regional, "Region", "Year_fac")
ggsave(plot=p_m_reg_y, filename=file.path(save_dir, "Temp_region_year_model.png"), device="png", height=12, width=15, units="in")
p_m_reg_y

Seasonal

Drought

m_seas_d<-aov(Temperature ~ Drought + Season, data=data_seasonal)

Check assumptions:

model_plotter(m_seas_d, data_seasonal)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Check results

m_seas_d_Anova<-Anova(m_seas_d, type=2)
m_seas_d_Anova
## Anova Table (Type II tests)
## 
## Response: Temperature
##            Sum Sq  Df  F value    Pr(>F)    
## Drought     13.34   2   10.077 7.317e-05 ***
## Season    3107.54   3 1564.917 < 2.2e-16 ***
## Residuals  112.53 170                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

How much variability is explained by the Drought index (partial R2)

partial.r2(m_seas_d_Anova, "Drought")
## [1] 0.1059843

post-hoc test

p_m_seas_d<-tukey_plotter(m_seas_d, data_seasonal, "Season", "Drought")
ggsave(plot=p_m_seas_d, filename=file.path(save_dir, "Temp_season_drought_model.png"), device="png", height=10, width=7, units="in")
p_m_seas_d

Year

m_seas_y<-aov(Temperature ~ Year_fac + Season, data=data_seasonal)

Check assumptions:

model_plotter(m_seas_y, data_seasonal)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Check results

m_seas_y_Anova<-Anova(m_seas_y, type=2)
m_seas_y_Anova
## Anova Table (Type II tests)
## 
## Response: Temperature
##            Sum Sq  Df   F value    Pr(>F)    
## Year_fac    61.75  46    2.6376 1.068e-05 ***
## Season    3096.02   3 2027.9377 < 2.2e-16 ***
## Residuals   64.12 126                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

How much variability is explained by the year (partial R2)

partial.r2(m_seas_y_Anova, "Year_fac")
## [1] 0.4905628

post-hoc test

p_m_seas_y<-tukey_plotter(m_seas_y, data_seasonal, "Season", "Year_fac")
ggsave(plot=p_m_seas_y, filename=file.path(save_dir, "Temp_season_year_model.png"), device="png", height=12, width=15, units="in")
p_m_seas_y

Save all Anova outputs

anovas<-bind_rows(
  mutate(as_tibble(m_reg_d_Anova, rownames = "Parameter"), model="Regional_Drought"),
  mutate(as_tibble(m_reg_y_Anova, rownames = "Parameter"), model="Regional_Year"),
  mutate(as_tibble(m_seas_d_Anova, rownames = "Parameter"), model="Seasonal_Drought"),
  mutate(as_tibble(m_seas_y_Anova, rownames = "Parameter"), model="Seasonal_Year")
)%>%
  mutate(`Pr(>F)`=if_else(`Pr(>F)`<0.001, "< 0.001", as.character(round(`Pr(>F)`, 4))))%>%
  write_csv(file.path(save_dir, "Temp_anovas.csv"))

Comparing 2021 to prior years

raw_data<-DroughtData::raw_wq_1975_2021%>%
  filter(!is.na(Temperature))%>%
  left_join(data_regional%>%
              distinct(YearAdj, SVIndex, YearType, Drought),
            by="YearAdj")%>%
  mutate(across(c(Drought, YearType), list(`20_21`=~case_when(YearAdj==2021 ~ "2021", 
                                                              YearAdj==2020 ~ "2020", 
                                                              TRUE ~ as.character(.x)))),
         across(c(YearType, YearType_20_21), ~factor(.x, levels=c("2020", "2021", "Critical", "Dry", "Below Normal", "Above Normal", "Wet"))),
         Region=factor(Region, levels=c("Suisun Marsh", "Suisun Bay", "Confluence", "SouthCentral", "North")),
         Season=factor(Season, levels=c("Winter", "Spring", "Summer", "Fall")))

How does 2021 compare to Drought, Normal, and Wet periods?

p_2021_d<-ggplot(raw_data, aes(x=Drought_20_21, y=Temperature, fill=Drought))+
  geom_boxplot()+
  drt_color_pal_drought()+
  xlab("Drought")+
  ylab("Temperature (°C)")+
  theme_bw()
ggsave(plot=p_2021_d, filename=file.path(save_dir, "Temp_drought_20_21.png"), device="png", height=4, width=5, units="in")
p_2021_d

Does that change regionally or seasonally?

p_2021_d_rs<-ggplot(raw_data, aes(x=Drought_20_21, y=Temperature, fill=Drought))+
  geom_boxplot()+
  drt_color_pal_drought()+
  facet_grid(Season~Region, scales = "free_y")+
  xlab("Drought")+
  ylab("Temperature (°C)")+
  theme_bw(base_size = 16)+
  theme(axis.text.x=element_text(angle = 45, hjust=1))
ggsave(plot=p_2021_d_rs, filename=file.path(save_dir, "Temp_drought_rs_20_21.png"), device="png", height=8, width=10, units="in")
p_2021_d_rs

How does 2021 compare to each water year type?

p_2021_yt<-ggplot(raw_data, aes(x=YearType_20_21, y=Temperature, fill=YearType))+
  geom_boxplot()+
  drt_color_pal_yrtype()+
  xlab("Year type")+
  ylab("Temperature (°C)")+
  theme_bw()+
  theme(axis.text.x=element_text(angle = 45, hjust=1))
ggsave(plot=p_2021_yt, filename=file.path(save_dir, "Temp_yeartype_20_21.png"), device="png", height=4, width=6, units="in")
p_2021_yt

Does that change regionally or seasonally?

p_2021_yt_rs<-ggplot(raw_data, aes(x=YearType_20_21, y=Temperature, fill=YearType))+
  geom_boxplot()+
  drt_color_pal_yrtype()+
  facet_grid(Season~Region, scales = "free_y")+
  xlab("Year type")+
  ylab("Temperature (°C)")+
  theme_bw(base_size = 16)+
  theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave(plot=p_2021_yt_rs, filename=file.path(save_dir, "Temp_yeartype_rs_20_21.png"), device="png", height=8, width=12, units="in")
p_2021_yt_rs