Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Loudenback Week 5 Assignments #17

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 79 additions & 5 deletions 01_Tidying_WQ_data.Rmd → Loudenback_01_Tidying_WQ_data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,68 @@ data frame to `conc_daily` (i.e., a data frame of daily average ion
concentrations).

```{r}
#<<<< YOUR CODE HERE >>>>#
general_WQ <- tibble(siteid = c("?"),
basin = c("?"))

WQ_tidying_data <- function(general_WQ){
parameters <- c("Calcium", "Magnesium", "Sodium", "Potassium", "Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate", "Chloride", "Alkalinity, bicarbonate", "Bicarbonate")

conc_wide_USGS <- readWQPdata(siteid = general_WQ$siteid, # pulling our siteids column in from the colorado object, becomes a vector
sampleMedia = "Water", # WQP also has bilogical data and sediment data
startDateLo = "1980-10-01", # must be formatted as YYYY-MM-DD
startDateHi = "2023-11-01", # must be formatted as YYYY-MM-DD
characteristicName = parameters) # our vector of `characteristcName`s

conc_small_USGS <- conc_wide %>%
select(date = ActivityStartDate,
parameter = CharacteristicName,
units = ResultMeasure.MeasureUnitCode,
siteid = MonitoringLocationIdentifier,
org = OrganizationFormalName,
org_id = OrganizationIdentifier,
time = ActivityStartTime.Time,
value = ResultMeasureValue,
sample_method = SampleCollectionMethod.MethodName,
analytical_method = ResultAnalyticalMethod.MethodName,
particle_size = ResultParticleSizeBasisText,
date_time = ActivityStartDateTime,
media = ActivityMediaName,
sample_depth = ActivityDepthHeightMeasure.MeasureValue,
sample_depth_unit = ActivityDepthHeightMeasure.MeasureUnitCode,
fraction = ResultSampleFractionText,
status = ResultStatusIdentifier) %>%
mutate(units = trimws(units))

conc_meta_USGS <- conc_small %>%
left_join(., general_WQ, by = "siteid") %>%
dplyr::mutate(ion = dplyr::case_when(parameter == "Calcium" ~ "Ca",
parameter == "Magnesium" ~ "Mg",
parameter == "Sodium" ~ "Na",
parameter == "Potassium" ~ "K",
parameter %in% c("Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate") ~ "SO4",
parameter == "Chloride" ~ "Cl",
parameter %in% c("Alkalinity, bicarbonate", "Bicarbonate") ~ "HCO3")) %>%
select(siteid, basin, ion, parameter, date, everything())

conc_tidy_USGS <- conc_meta %>%
filter(units == 'mg/l') %>%
mutate(date = ymd(date)) %>%
select(date,
parameter,
ion,
siteid,
basin,
conc = value)

conc_daily_USGS <- conc_tidy %>%
group_by(date, parameter, siteid, basin) %>%
summarize(conc = mean(conc, na.rm = T))
}

```

```{r}
WQ_tidying_data(general_WQ = colorado)
```

## Question 2
Expand All @@ -265,6 +326,8 @@ quality data for the site IDs listed below:
```{r}
additional_data <- tibble(siteid = c('USGS-09180000', 'USGS-09180500', 'USGS-09380000'),
basin = c('dolores', 'colorado4', 'colorado5'))

WQ_tidying_data(general_WQ = additional_data)
```

This output of running the function should look identical in format to
Expand All @@ -277,10 +340,11 @@ Combine the data pulled in Question 2 with the original data from
combined data as `tidied_full_wq.RDS` in a folder called data.

```{r}
library(dplyr)

#<<<< YOUR CODE HERE >>>>#
tidied_full_wq <- bind_rows(conc_daily, additional_data)

#saveRDS(wq, file = 'data/tidied_full_wq.RDS')
saveRDS(tidied_full_wq, file = 'data/tidied_full_wq.RDS')
```

## Question 4
Expand All @@ -297,6 +361,10 @@ you need to remove `USGS-` from each site. Discharge is
automatically make the column names a little less annoying.

```{r}
library(dataRetrieval)

# Save the discharge data as an RDS object
saveRDS(tidied_full_wq, file = 'data/Q.RDS')
# Reminder! you can use ?readNWISdv to read about how the function works.
sites <- colorado %>%
#Bind the two datasets to get all 8 sites
Expand All @@ -306,8 +374,14 @@ sites <- colorado %>%
#Remove the USGS- prefix
gsub('USGS-', '', .)

#<<<< YOUR CODE HERE >>>>#

# Download daily discharge data for all eight sites
q_data <- readNWISdv(siteNumbers = sites,
parameterCd = "00060",
startDate = "1980-10-01",
endDate = "2023-11-01") %>%
# Rename columns to be less annoying
renameNWISColumns()

#saveRDS(q_data, file = 'data/Q.RDS')
saveRDS(q-data, file = 'data/tidied_full_wq.RDS')
```
66 changes: 53 additions & 13 deletions 02_Modelling_WQ_data.Rmd → Loudenback_02_Modelling_WQ_data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -269,59 +269,99 @@ The above workflow really focuses on trend detection with Sens slope, but here w
Use `inner_join` to join our daily discharge data (`Q.RDS`) to our raw water quality data (`tidied_full_wq.RDS`). You want to join by both date and siteid. Remember! the discharge data has site IDs that we had to drop the `USGS-` from, so you will need to add that back in using `paste0`.

```{r}
names(discharge_data)
head(discharge_data)

#<<<< YOUR CODE HERE >>>>#
names(water_quality_data)
head(water_quality_data)
```

```{r}
library(dplyr)

discharge_data <- readRDS('~/Desktop/Classes/Fall 2023/ESS 523A/Week-5-Nested-Modelling/data/Q.RDS')
names(discharge_data) <- tolower(names(discharge_data))
discharge_data <- rename(discharge_data, siteid = site_no)

water_quality_data <- readRDS('~/Desktop/Classes/Fall 2023/ESS 523A/Week-5-Nested-Modelling/data/tidied_full_wq.RDS')

discharge_data <- discharge_data %>%
mutate(siteid = paste0('USGS-', siteid))

joined_data <- inner_join(water_quality_data, discharge_data, by = c('date', 'siteid'))

View(joined_data)

```

### Pick any site and ion combination and plot discharge versus ion concentration

```{r}
selected_data <- joined_data %>%
filter(siteid == "USGS-09034500", ion == "Mg")

#<<<< YOUR CODE HERE >>>>#

ggplot(selected_data, aes(x = flow, y = conc)) +
geom_point() +
labs(title = paste("Discharge vs", "Mg", "Concentration at", "USGS-09034500"),
x = "Discharge", y = paste("Mg", "Concentration"))
```

#### What do you see in this relationship?
#### What do you see in this relationship? When looking at discharge vs magensium concentration, I see that the concentrations at this specific height are greatest when water flow (discharge) is slower.

## Models everywhere

Group your data by basin and water quality parameter and nest the data.

```{r}
grouped_data <- joined_data %>%
group_by(basin, parameter) %>%
nest()

#<<<< YOUR CODE HERE >>>>#

print(grouped_data)
```

## Apply a linear model to the data

You will need to use a `map` command like this: `map(data, ~lm(conc ~ q, data = .x))`

```{r}
library(purrr)

#<<<< YOUR CODE HERE >>>>#
grouped_datalm <- grouped_data %>%
mutate(grouped_datalm = map(data, ~lm(conc ~ flow, data = .x)))

print(grouped_datalm)
```

## Summarize your data using `tidy`

You should have a new column called `mods` or something similar, and you need to `tidy` those mods and store this new, tidier data in another column.

```{r}
library(broom)

#<<<< YOUR CODE HERE >>>>#

grouped_datalm <- grouped_datalm %>%
mutate(mods = map(grouped_datalm, tidy))

print(grouped_datalm)
```

## Make a visual of your models' summaries that shows both which sites have significant relationships between discharge and concentration and the slope of that relationship.

```{r}

#<<<< YOUR CODE HERE >>>>#


tidy_data <- grouped_datalm %>%
unnest(mods)

significant_data <- filter(tidy_data, p.value < 0.05)

ggplot(tidy_data, aes(x = estimate, y = p.value, color = basin)) +
geom_point() +
labs(
title = "Relationship Between Discharge and Concentration",
x = "Slope of Relationship",
y = "P-value"
) +
theme_minimal()
```

## Bonus
Expand Down
72 changes: 72 additions & 0 deletions compiledcode_WQfunctions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
colorado <- tibble(siteid = c("USGS-09034500", "USGS-09069000",
"USGS-09085000", "USGS-09095500", "USGS-09152500"),
basin = c("colorado1", "eagle",
"roaring", "colorado3", "gunnison"))

WQ_tidying_data <- function(){
# Compile all these names into a single vector:
parameters <- c("Calcium", "Magnesium", "Sodium", "Potassium", "Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate", "Chloride", "Alkalinity, bicarbonate", "Bicarbonate")

conc_wide <- readWQPdata(siteid = colorado$siteid, # pulling our siteids column in from the colorado object, becomes a vector
sampleMedia = "Water", # WQP also has bilogical data and sediment data
startDateLo = "1980-10-01", # must be formatted as YYYY-MM-DD
startDateHi = "2023-11-01", # must be formatted as YYYY-MM-DD
characteristicName = parameters) # our vector of `characteristcName`s

View(conc_wide)

# This code mostly just grabs and renames the most important data columns
conc_small <- conc_wide %>%
select(date = ActivityStartDate,
parameter = CharacteristicName,
units = ResultMeasure.MeasureUnitCode,
siteid = MonitoringLocationIdentifier,
org = OrganizationFormalName,
org_id = OrganizationIdentifier,
time = ActivityStartTime.Time,
value = ResultMeasureValue,
sample_method = SampleCollectionMethod.MethodName,
analytical_method = ResultAnalyticalMethod.MethodName,
particle_size = ResultParticleSizeBasisText,
date_time = ActivityStartDateTime,
media = ActivityMediaName,
sample_depth = ActivityDepthHeightMeasure.MeasureValue,
sample_depth_unit = ActivityDepthHeightMeasure.MeasureUnitCode,
fraction = ResultSampleFractionText,
status = ResultStatusIdentifier) %>%
# Remove trailing white space in labels
mutate(units = trimws(units))

conc_meta <- conc_small %>%
left_join(., colorado, by = "siteid") %>%
dplyr::mutate(ion = dplyr::case_when(parameter == "Calcium" ~ "Ca",
parameter == "Magnesium" ~ "Mg",
parameter == "Sodium" ~ "Na",
parameter == "Potassium" ~ "K",
parameter %in% c("Sulfate", "Sulfate as SO4", "Sulfur Sulfate", "Total Sulfate") ~ "SO4",
parameter == "Chloride" ~ "Cl",
parameter %in% c("Alkalinity, bicarbonate", "Bicarbonate") ~ "HCO3")) %>%
select(siteid, basin, ion, parameter, date, everything())

View(conc_meta)

table(conc_meta$units)

conc_tidy <- conc_meta %>%
filter(units == 'mg/l') %>%
mutate(date = ymd(date)) %>%
select(date,
parameter,
ion,
siteid,
basin,
conc = value)


# The amazing group_by function groups all the data so that the summary
# only applies to each subgroup (siteid, date, and parameter combination).
# So in the end you get a daily average concentration for each siteid and parameter type.
conc_daily <- conc_tidy %>%
group_by(date, parameter, siteid, basin) %>%
summarize(conc = mean(conc, na.rm = T))
}
Binary file modified data/Q.RDS
Binary file not shown.
Binary file modified data/tidied_full_wq.RDS
Binary file not shown.