diff --git a/01_Tidying_WQ_data.Rmd b/Loudenback_01_Tidying_WQ_data.Rmd similarity index 76% rename from 01_Tidying_WQ_data.Rmd rename to Loudenback_01_Tidying_WQ_data.Rmd index d91d521..93f6859 100644 --- a/01_Tidying_WQ_data.Rmd +++ b/Loudenback_01_Tidying_WQ_data.Rmd @@ -254,7 +254,68 @@ data frame to `conc_daily` (i.e., a data frame of daily average ion concentrations). ```{r} -#<<<< YOUR CODE HERE >>>># +general_WQ <- tibble(siteid = c("?"), + basin = c("?")) + +WQ_tidying_data <- function(general_WQ){ +parameters <- c("Calcium", "Magnesium", "Sodium", "Potassium", "Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate", "Chloride", "Alkalinity, bicarbonate", "Bicarbonate") + +conc_wide_USGS <- readWQPdata(siteid = general_WQ$siteid, # pulling our siteids column in from the colorado object, becomes a vector + sampleMedia = "Water", # WQP also has bilogical data and sediment data + startDateLo = "1980-10-01", # must be formatted as YYYY-MM-DD + startDateHi = "2023-11-01", # must be formatted as YYYY-MM-DD + characteristicName = parameters) # our vector of `characteristcName`s + +conc_small_USGS <- conc_wide %>% + select(date = ActivityStartDate, + parameter = CharacteristicName, + units = ResultMeasure.MeasureUnitCode, + siteid = MonitoringLocationIdentifier, + org = OrganizationFormalName, + org_id = OrganizationIdentifier, + time = ActivityStartTime.Time, + value = ResultMeasureValue, + sample_method = SampleCollectionMethod.MethodName, + analytical_method = ResultAnalyticalMethod.MethodName, + particle_size = ResultParticleSizeBasisText, + date_time = ActivityStartDateTime, + media = ActivityMediaName, + sample_depth = ActivityDepthHeightMeasure.MeasureValue, + sample_depth_unit = ActivityDepthHeightMeasure.MeasureUnitCode, + fraction = ResultSampleFractionText, + status = ResultStatusIdentifier) %>% + mutate(units = trimws(units)) + +conc_meta_USGS <- conc_small %>% + left_join(., general_WQ, by = "siteid") %>% + dplyr::mutate(ion = dplyr::case_when(parameter == "Calcium" ~ "Ca", + parameter == "Magnesium" ~ "Mg", + parameter == "Sodium" ~ "Na", + parameter == "Potassium" ~ "K", + parameter %in% c("Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate") ~ "SO4", + parameter == "Chloride" ~ "Cl", + parameter %in% c("Alkalinity, bicarbonate", "Bicarbonate") ~ "HCO3")) %>% + select(siteid, basin, ion, parameter, date, everything()) + +conc_tidy_USGS <- conc_meta %>% + filter(units == 'mg/l') %>% + mutate(date = ymd(date)) %>% + select(date, + parameter, + ion, + siteid, + basin, + conc = value) + +conc_daily_USGS <- conc_tidy %>% + group_by(date, parameter, siteid, basin) %>% + summarize(conc = mean(conc, na.rm = T)) +} + +``` + +```{r} +WQ_tidying_data(general_WQ = colorado) ``` ## Question 2 @@ -265,6 +326,8 @@ quality data for the site IDs listed below: ```{r} additional_data <- tibble(siteid = c('USGS-09180000', 'USGS-09180500', 'USGS-09380000'), basin = c('dolores', 'colorado4', 'colorado5')) + +WQ_tidying_data(general_WQ = additional_data) ``` This output of running the function should look identical in format to @@ -277,10 +340,11 @@ Combine the data pulled in Question 2 with the original data from combined data as `tidied_full_wq.RDS` in a folder called data. ```{r} +library(dplyr) -#<<<< YOUR CODE HERE >>>># +tidied_full_wq <- bind_rows(conc_daily, additional_data) -#saveRDS(wq, file = 'data/tidied_full_wq.RDS') +saveRDS(tidied_full_wq, file = 'data/tidied_full_wq.RDS') ``` ## Question 4 @@ -297,6 +361,10 @@ you need to remove `USGS-` from each site. Discharge is automatically make the column names a little less annoying. ```{r} +library(dataRetrieval) + +# Save the discharge data as an RDS object +saveRDS(tidied_full_wq, file = 'data/Q.RDS') # Reminder! you can use ?readNWISdv to read about how the function works. sites <- colorado %>% #Bind the two datasets to get all 8 sites @@ -306,8 +374,14 @@ sites <- colorado %>% #Remove the USGS- prefix gsub('USGS-', '', .) -#<<<< YOUR CODE HERE >>>># - +# Download daily discharge data for all eight sites +q_data <- readNWISdv(siteNumbers = sites, + parameterCd = "00060", + startDate = "1980-10-01", + endDate = "2023-11-01") %>% + # Rename columns to be less annoying + renameNWISColumns() #saveRDS(q_data, file = 'data/Q.RDS') +saveRDS(q-data, file = 'data/tidied_full_wq.RDS') ``` diff --git a/02_Modelling_WQ_data.Rmd b/Loudenback_02_Modelling_WQ_data.Rmd similarity index 87% rename from 02_Modelling_WQ_data.Rmd rename to Loudenback_02_Modelling_WQ_data.Rmd index 7502d97..3968f13 100644 --- a/02_Modelling_WQ_data.Rmd +++ b/Loudenback_02_Modelling_WQ_data.Rmd @@ -269,29 +269,55 @@ The above workflow really focuses on trend detection with Sens slope, but here w Use `inner_join` to join our daily discharge data (`Q.RDS`) to our raw water quality data (`tidied_full_wq.RDS`). You want to join by both date and siteid. Remember! the discharge data has site IDs that we had to drop the `USGS-` from, so you will need to add that back in using `paste0`. ```{r} +names(discharge_data) +head(discharge_data) -#<<<< YOUR CODE HERE >>>># +names(water_quality_data) +head(water_quality_data) +``` + +```{r} +library(dplyr) + +discharge_data <- readRDS('~/Desktop/Classes/Fall 2023/ESS 523A/Week-5-Nested-Modelling/data/Q.RDS') +names(discharge_data) <- tolower(names(discharge_data)) +discharge_data <- rename(discharge_data, siteid = site_no) + +water_quality_data <- readRDS('~/Desktop/Classes/Fall 2023/ESS 523A/Week-5-Nested-Modelling/data/tidied_full_wq.RDS') + +discharge_data <- discharge_data %>% + mutate(siteid = paste0('USGS-', siteid)) + +joined_data <- inner_join(water_quality_data, discharge_data, by = c('date', 'siteid')) + +View(joined_data) ``` ### Pick any site and ion combination and plot discharge versus ion concentration ```{r} +selected_data <- joined_data %>% + filter(siteid == "USGS-09034500", ion == "Mg") -#<<<< YOUR CODE HERE >>>># - +ggplot(selected_data, aes(x = flow, y = conc)) + + geom_point() + + labs(title = paste("Discharge vs", "Mg", "Concentration at", "USGS-09034500"), + x = "Discharge", y = paste("Mg", "Concentration")) ``` -#### What do you see in this relationship? +#### What do you see in this relationship? When looking at discharge vs magensium concentration, I see that the concentrations at this specific height are greatest when water flow (discharge) is slower. ## Models everywhere Group your data by basin and water quality parameter and nest the data. ```{r} +grouped_data <- joined_data %>% + group_by(basin, parameter) %>% + nest() -#<<<< YOUR CODE HERE >>>># - +print(grouped_data) ``` ## Apply a linear model to the data @@ -299,9 +325,12 @@ Group your data by basin and water quality parameter and nest the data. You will need to use a `map` command like this: `map(data, ~lm(conc ~ q, data = .x))` ```{r} +library(purrr) -#<<<< YOUR CODE HERE >>>># +grouped_datalm <- grouped_data %>% + mutate(grouped_datalm = map(data, ~lm(conc ~ flow, data = .x))) +print(grouped_datalm) ``` ## Summarize your data using `tidy` @@ -309,19 +338,30 @@ You will need to use a `map` command like this: `map(data, ~lm(conc ~ q, data = You should have a new column called `mods` or something similar, and you need to `tidy` those mods and store this new, tidier data in another column. ```{r} +library(broom) -#<<<< YOUR CODE HERE >>>># - +grouped_datalm <- grouped_datalm %>% + mutate(mods = map(grouped_datalm, tidy)) +print(grouped_datalm) ``` ## Make a visual of your models' summaries that shows both which sites have significant relationships between discharge and concentration and the slope of that relationship. ```{r} - -#<<<< YOUR CODE HERE >>>># - - +tidy_data <- grouped_datalm %>% + unnest(mods) + +significant_data <- filter(tidy_data, p.value < 0.05) + +ggplot(tidy_data, aes(x = estimate, y = p.value, color = basin)) + + geom_point() + + labs( + title = "Relationship Between Discharge and Concentration", + x = "Slope of Relationship", + y = "P-value" + ) + + theme_minimal() ``` ## Bonus diff --git a/compiledcode_WQfunctions.R b/compiledcode_WQfunctions.R new file mode 100644 index 0000000..a667b89 --- /dev/null +++ b/compiledcode_WQfunctions.R @@ -0,0 +1,72 @@ +colorado <- tibble(siteid = c("USGS-09034500", "USGS-09069000", + "USGS-09085000", "USGS-09095500", "USGS-09152500"), + basin = c("colorado1", "eagle", + "roaring", "colorado3", "gunnison")) + +WQ_tidying_data <- function(){ + # Compile all these names into a single vector: + parameters <- c("Calcium", "Magnesium", "Sodium", "Potassium", "Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate", "Chloride", "Alkalinity, bicarbonate", "Bicarbonate") + + conc_wide <- readWQPdata(siteid = colorado$siteid, # pulling our siteids column in from the colorado object, becomes a vector + sampleMedia = "Water", # WQP also has bilogical data and sediment data + startDateLo = "1980-10-01", # must be formatted as YYYY-MM-DD + startDateHi = "2023-11-01", # must be formatted as YYYY-MM-DD + characteristicName = parameters) # our vector of `characteristcName`s + + View(conc_wide) + + # This code mostly just grabs and renames the most important data columns + conc_small <- conc_wide %>% + select(date = ActivityStartDate, + parameter = CharacteristicName, + units = ResultMeasure.MeasureUnitCode, + siteid = MonitoringLocationIdentifier, + org = OrganizationFormalName, + org_id = OrganizationIdentifier, + time = ActivityStartTime.Time, + value = ResultMeasureValue, + sample_method = SampleCollectionMethod.MethodName, + analytical_method = ResultAnalyticalMethod.MethodName, + particle_size = ResultParticleSizeBasisText, + date_time = ActivityStartDateTime, + media = ActivityMediaName, + sample_depth = ActivityDepthHeightMeasure.MeasureValue, + sample_depth_unit = ActivityDepthHeightMeasure.MeasureUnitCode, + fraction = ResultSampleFractionText, + status = ResultStatusIdentifier) %>% + # Remove trailing white space in labels + mutate(units = trimws(units)) + + conc_meta <- conc_small %>% + left_join(., colorado, by = "siteid") %>% + dplyr::mutate(ion = dplyr::case_when(parameter == "Calcium" ~ "Ca", + parameter == "Magnesium" ~ "Mg", + parameter == "Sodium" ~ "Na", + parameter == "Potassium" ~ "K", + parameter %in% c("Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate") ~ "SO4", + parameter == "Chloride" ~ "Cl", + parameter %in% c("Alkalinity, bicarbonate", "Bicarbonate") ~ "HCO3")) %>% + select(siteid, basin, ion, parameter, date, everything()) + + View(conc_meta) + + table(conc_meta$units) + + conc_tidy <- conc_meta %>% + filter(units == 'mg/l') %>% + mutate(date = ymd(date)) %>% + select(date, + parameter, + ion, + siteid, + basin, + conc = value) + + + # The amazing group_by function groups all the data so that the summary + # only applies to each subgroup (siteid, date, and parameter combination). + # So in the end you get a daily average concentration for each siteid and parameter type. + conc_daily <- conc_tidy %>% + group_by(date, parameter, siteid, basin) %>% + summarize(conc = mean(conc, na.rm = T)) + } diff --git a/data/Q.RDS b/data/Q.RDS index 2ae04dc..e6de16a 100644 Binary files a/data/Q.RDS and b/data/Q.RDS differ diff --git a/data/tidied_full_wq.RDS b/data/tidied_full_wq.RDS index 75a045d..e6de16a 100644 Binary files a/data/tidied_full_wq.RDS and b/data/tidied_full_wq.RDS differ