-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
functional format_sdg.py
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import pandas as pd | ||
|
||
def calc_sdg_conc ( | ||
inputFile, | ||
volGas="gasVolume", | ||
volH2O="waterVolume", | ||
baro="barometricPressure", | ||
waterTemp="waterTemp", | ||
headspaceTemp="headspaceTemp", | ||
eqCO2="concentrationCO2Gas", | ||
sourceCO2="concentrationCO2Air", | ||
eqCH4="concentrationCH4Gas", | ||
sourceCH4="concentrationCH4Air", | ||
eqN2O="concentrationN2OGas", | ||
sourceN2O="concentrationN2OAir" | ||
): | ||
|
||
if type(inputFile) is "character": | ||
|
||
inputFile = pd.read.csv(inputFile) | ||
|
||
##### Constants ##### | ||
cGas = 8.3144598 # universal gas constant (J K-1 mol-1) | ||
cKelvin = 273.15 # Conversion factor from Kelvin to Celsius | ||
cPresConv = 0.000001 # Constant to convert mixing ratio from umol/mol (ppmv) to mol/mol. Unit conversions from kPa to Pa, m^3 to L, cancel out. | ||
cT0 = 298.15 | ||
# Henry's law constant T0 | ||
# Henry's law constants and temperature dependence from Sander (2015) DOI: 10.5194/acp-15-4399-2015 | ||
ckHCO2 = 0.00033 # mol m-3 Pa, range: 0.00031 - 0.00045 | ||
ckHCH4 = 0.000014 # mol m-3 Pa, range: 0.0000096 - 0.000092 | ||
ckHN2O = 0.00024 # mol m-3 Pa, range: 0.00018 - 0.00025 | ||
cdHdTCO2 = 2400 # K, range: 2300 - 2600 | ||
cdHdTCH4 = 1900 # K, range: 1400-2400 | ||
cdHdTN2O = 2700 # K, range: 2600 - 3600 | ||
|
||
##### Populate mean global values for reference air where it isn't reported ##### | ||
inputFile[, sourceCO2] = ifelse( is.na(inputFile[, sourceCO2]), # if reported as NA | ||
405, # use global mean https://www.esrl.noaa.gov/gmd/ccgg/trends/global.html | ||
inputFile[, sourceCO2]) | ||
|
||
inputFile[, sourceCH4] = ifelse( is.na(inputFile[, sourceCH4]), # use global average if not measured | ||
1.85, # https://www.esrl.noaa.gov/gmd/ccgg/trends_ch4/ | ||
inputFile[, sourceCH4]) | ||
|
||
inputFile[, sourceN2O] = ifelse( is.na(inputFile[, sourceN2O]), # use global average if not measured | ||
0.330, # https://www.esrl.noaa.gov/gmd/hats/combined/N2O.html | ||
inputFile[, sourceN2O]) | ||
|
||
##### Calculate dissolved gas concentration in original water sample ##### | ||
inputFile['dissolvedCO2'] = NA | ||
inputFile['dissolvedCO2'] < - inputFile[, baro] *cPresConv * | ||
(inputFile[, volGas] * (inputFile[, eqCO2] - inputFile[, sourceCO2]) / ( | ||
cGas * (inputFile[, headspaceTemp] + cKelvin) * inputFile[, volH2O]) + | ||
ckHCO2 * exp( | ||
cdHdTCO2 * (1 / (inputFile[, headspaceTemp] + cKelvin) - 1 / cT0)) * inputFile[, eqCO2]) | ||
|
||
inputFile['dissolvedCH4'] = NA | ||
inputFile['dissolvedCH4'] = inputFile[, baro] *cPresConv * | ||
(inputFile[, volGas] * (inputFile[, eqCH4] - inputFile[, sourceCH4]) / ( | ||
cGas * (inputFile[, headspaceTemp] + cKelvin) * inputFile[, volH2O]) + | ||
ckHCH4 * exp( | ||
cdHdTCH4 * (1 / (inputFile[, headspaceTemp] + cKelvin) - 1 / cT0)) * inputFile[, eqCH4]) | ||
|
||
inputFile['dissolvedN2O'] = NA | ||
inputFile['dissolvedN2O'] = inputFile[, baro] *cPresConv * | ||
(inputFile[, volGas] * (inputFile[, eqN2O] - inputFile[, sourceN2O]) / ( | ||
cGas * (inputFile[, headspaceTemp] + cKelvin) * inputFile[, volH2O]) + | ||
ckHN2O * exp( | ||
cdHdTN2O * (1 / (inputFile[, headspaceTemp] + cKelvin) - 1 / cT0)) * inputFile[, eqN2O]) | ||
|
||
##### Step-by-step Calculation of dissolved gas concentrations for testing ##### | ||
|
||
# Dissolved gas concentration in the original water samples (dissolvedGas) is | ||
# calculated from a mass balance of the measured headspace concentration (eqGas), the | ||
# calculated concentration in the equilibrated headspace water (eqHeadspaceWaterCO2), | ||
# and the volumes of the headspace water and headspace gas, following: | ||
|
||
# dissolvedGas = ((eqGas * volGas) + (eqHeadspaceWaterGas * volH2O) - (sourceGas * volGas)) / volH2O | ||
|
||
# Measured headspace concentration should be expressed as mol L- for the mass | ||
# balance calculation and as partial pressure for the equilibrium calculation. | ||
|
||
# #Temperature corrected Henry's Law Constant | ||
# HCO2 <- ckHCO2 * exp(cdHdTCO2 * ((1/(headspaceTemp+cKelvin)) - (1/cT0))) | ||
# HCH4 <- ckHCH4 * exp(cdHdTCH4 * ((1/(headspaceTemp+cKelvin)) - (1/cT0))) | ||
# HN2O <- ckHN2O * exp(cdHdTN2O * ((1/(headspaceTemp+cKelvin)) - (1/cT0))) | ||
# | ||
# #Mol of gas in equilibrated water (using Henry's law) | ||
# CO2eqWat <- HCO2 * eqCO2 * cPresConv * baro * (volH2O/1000) | ||
# CH4eqWat <- HCH4 * eqCH4 * cPresConv * baro * (volH2O/1000) | ||
# N2OeqWat <- HN2O * eqN2O * cPresConv * baro * (volH2O/1000) | ||
# | ||
# #Mol of gas in equilibrated air (using ideal gas law) | ||
# CO2eqAir <- (eqCO2 * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin)) | ||
# CH4eqAir <- (eqCH4 * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin)) | ||
# N2OeqAir <- (eqN2O * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin)) | ||
# | ||
# #Mol of gas in source gas (using ideal gas law) | ||
# CO2air <- (inputFile[,sourceCO2] * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin)) | ||
# CH4air <- (inputFile[,sourceCH4] * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin)) | ||
# N2Oair <- (inputFile[,sourceN2O] * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin)) | ||
# | ||
# #Total mol of gas is sum of equilibrated water and equilibrated air | ||
# CO2tot <- CO2eqWat + CO2eqAir | ||
# CH4tot <- CH4eqWat + CH4eqAir | ||
# N2Otot <- N2OeqWat + N2OeqAir | ||
# | ||
# #Total mol of gas minus reference air mol gas to get water mol gas before equilibration | ||
# CO2wat <- CO2tot - CO2air | ||
# CH4wat <- CH4tot - CH4air | ||
# N2Owat <- N2Otot - N2Oair | ||
# | ||
# #Concentration is mol of gas divided by volume of water | ||
# inputFile$dissolvedCO2 <- CO2wat/(volH2O/1000) | ||
# inputFile$dissolvedCH4 <- CH4wat/(volH2O/1000) | ||
# inputFile$dissolvedN2O <- N2Owat/(volH2O/1000) | ||
|
||
# Round to significant figures | ||
inputFile$dissolvedCO2 < - signif(inputFile$dissolvedCO2, digits = 3) | ||
inputFile$dissolvedCH4 < - signif(inputFile$dissolvedCH4, digits = 3) | ||
inputFile$dissolvedN2O < - signif(inputFile$dissolvedN2O, digits = 3) | ||
|
||
return (inputFile) | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from testing import outputDF, externalLabData | ||
|
||
for x in range(len(outputDF['waterSampleID'])): | ||
# print(x) | ||
# FOR DEMO | ||
holder = outputDF.loc[outputDF.index[[x]], 'referenceAirSampleID'] | ||
# FOR DEMO | ||
holder2 = externalLabData.loc[:, 'sampleID'] | ||
# Need to actually access the single cell value (string) in holder using .item() | ||
# FOR DEMO | ||
booleanHolder = holder2 == holder.item() | ||
# FOR DEMO | ||
holder3 = externalLabData.loc[booleanHolder, 'concentrationCO2'] | ||
|
||
# This is all of the above intermediate variables put together into one line, and is the right hand side of the | ||
# assignment into the 'concentrationCO2Air' column of the outputDF data frame (at row 'x') | ||
# FOR DEMO | ||
rhsOfAssignment = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[ | ||
outputDF.index[[x]], 'referenceAirSampleID'].item(), 'concentrationCO2'] | ||
|
||
# This needs to be in a try block because the outputDF has sampleIDs that are populated from the field samples, | ||
# and this is looping through those sampleIDs to look for matches with the lab data, but for recently | ||
# collected samples, there won't be lab data for them (but will obviously still be field samples) so this | ||
# look-up will fail. Put it in a try block with nothing in the except block to imitate the silent try | ||
# that this code has in R | ||
try: | ||
outputDF.loc[outputDF.index[[x]], 'concentrationCO2Air'] = externalLabData.loc[ | ||
externalLabData.loc[:, 'sampleID'] == outputDF.loc[ | ||
outputDF.index[[x]], 'referenceAirSampleID'].item(), 'concentrationCO2'].item() | ||
# NEEDS THE OTHER ASSIGNMENT LINE HERE (THERE ARE 2 ASSIGNMENT LINES WITHIN EACH TRY BLOCK IN THE R CODE) | ||
|
||
except Exception: | ||
pass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import pandas as pd | ||
import os | ||
import os.path | ||
import re | ||
import numpy as np | ||
|
||
import rpy2 | ||
import rpy2.robjects as robjects | ||
from rpy2.robjects.packages import importr | ||
|
||
#utils = importr('utils') | ||
|
||
#utils.install_packages('neonUtilities', repos='https://cran.rstudio.com/') | ||
neonUtilities = importr('neonUtilities') | ||
|
||
# -----------------------------------------------------------------------------------------------# | ||
|
||
|
||
def def_format_sdg(data_dir = os.getcwd() + '/NEON_dissolved-gases-surfacewater.zip'): | ||
|
||
##### Default values #### | ||
volH2O = 40 #mL | ||
volGas = 20 #mL | ||
|
||
#Check if the data is loaded already using loadByProduct | ||
if 'externalLabData' and 'fieldDataProc' and 'fieldSuperParent' not in locals() or globals(): | ||
print("data is not loaded") # testing code | ||
|
||
# If data is not loaded, stack field and external lab data | ||
if os.path.isdir(re.sub("\\.zip", "", data_dir)): | ||
neonUtilities.stackByTable(dpID="DP1.20097.001", filepath=data_dir) | ||
|
||
externalLabData = pd.read_csv(re.sub("\\.zip", "", data_dir) + "/stackedFiles" + "/sdg_externalLabData.csv") | ||
fieldDataProc = pd.read_csv(re.sub("\\.zip", "", data_dir) + "/stackedFiles" + "/sdg_fieldDataProc.csv") | ||
fieldSuperParent = pd.read_csv(re.sub("\\.zip", "", data_dir) + "/stackedFiles" + "/sdg_fieldSuperParent.csv") | ||
|
||
|
||
print("Data is loaded") #testing code | ||
#Flag and set default field values | ||
|
||
if fieldDataProc['waterVolumeSyringe'].isna() is True: | ||
fieldDataProc['volH2OSource'] = 1 | ||
else: | ||
fieldDataProc['volH2OSource'] = 0 | ||
|
||
if fieldDataProc['gasVolumeSyringe'].isna() is True: | ||
fieldDataProc['volGasSource'] = 1 | ||
else: | ||
fieldDataProc['volGasSource'] = 0 | ||
|
||
if fieldDataProc['waterVolumeSyringe'].isna() is True: | ||
fieldDataProc['waterVolumeSyringe'] = volH2O | ||
|
||
if fieldDataProc['gasVolumeSyringe'].isna() is True: | ||
fieldDataProc['gasVolumeSyringe'] = volGas | ||
|
||
outputDFNames = [ | ||
'waterSampleID', | ||
'referenceAirSampleID', | ||
'equilibratedAirSampleID', | ||
'collectDate', | ||
'processedDate', | ||
'stationID', | ||
'barometricPressure', | ||
'headspaceTemp', | ||
'waterTemp', | ||
'concentrationCO2Air', | ||
'concentrationCO2Gas', | ||
'concentrationCH4Air', | ||
'concentrationCH4Gas', | ||
'concentrationN2OAir', | ||
'concentrationN2OGas', | ||
'waterVolume', | ||
'gasVolume', | ||
#'CO2BelowDetection', | ||
#'CH4BelowDetection', | ||
#'N2OBelowDetection', | ||
'volH2OSource', | ||
'volGasSource' | ||
] | ||
|
||
# Assign the number of rows and columns to the new DataFrame, outputDF | ||
# The number of rows = # of rows there is in fieldDataProc['waterSampleID'] & the number of columns = # of items in the list, outputDFNames | ||
outputDF = pd.DataFrame(index=np.arange(len(fieldDataProc['waterSampleID'])), columns=np.arange(len(outputDFNames))) | ||
# Assigns the items inside outputDFNames to the columns in the outputDF DataFrame | ||
outputDF.columns = outputDFNames | ||
|
||
# Populate the output file with field data | ||
for k in range(len(outputDF.columns)): | ||
#for ind, row in fieldDataProc.iterrows(): | ||
if outputDF.columns[k] in fieldDataProc.columns: | ||
print("Found: " + outputDF.columns[k]) | ||
|
||
outputDF.iloc[:,k] = fieldDataProc.loc[:,fieldDataProc.columns == outputDF.columns[k]] | ||
|
||
outputDF['headspaceTemp'] = fieldDataProc['storageWaterTemp'] | ||
outputDF['barometricPressure'] = fieldDataProc['ptBarometricPressure'] | ||
outputDF['waterVolume'] = fieldDataProc['waterVolumeSyringe'] | ||
outputDF['gasVolume'] = fieldDataProc['gasVolumeSyringe'] | ||
outputDF['stationID'] = fieldDataProc['namedLocation'] | ||
|
||
#outputDF = np.array(outputDF) | ||
#externalLabData = np.array(outputDF) | ||
|
||
#Populate the output file with external lab data | ||
for l in range(len(outputDF['waterSampleID'])): | ||
try: | ||
outputDF.loc[outputDF.index[[l]], 'concentrationCO2Air'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'referenceAirSampleID'].item(), 'concentrationCO2'].item() | ||
outputDF.loc[outputDF.index[[l]], 'concentrationCO2Gas'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'equilibratedAirSampleID'].item(), 'concentrationCO2'].item() | ||
|
||
except Exception: | ||
pass | ||
try: | ||
outputDF.loc[outputDF.index[[l]], 'concentrationCH4Air'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'referenceAirSampleID'].item(), 'concentrationCH4'].item() | ||
outputDF.loc[outputDF.index[[x]], 'concentrationCH4Gas'] = externalLabData.loc[ externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'equilibratedAirSampleID'].item(), 'concentrationCH4'].item() | ||
|
||
except Exception: | ||
pass | ||
|
||
try: | ||
outputDF.loc[outputDF.index[[l]], 'concentrationN2OAir'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'referenceAirSampleID'].item(), 'concentrationN2O'].item() | ||
outputDF.loc[outputDF.index[[l]], 'concentrationN2OGas'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'equilibratedAirSampleID'].item(), 'concentrationN2O'].item() | ||
except Exception: | ||
pass | ||
|
||
#Populate the output file with water temperature data for streams | ||
for m in range(len(outputDF['waterSampleID'])): | ||
try: | ||
outputDF.loc[outputDF.index[[m]], 'waterTemp'] = fieldSuperParent.loc[fieldSuperParent.loc[:, 'parentSampleID'] == outputDF.loc[outputDF.index[[m]], 'waterSampleID'].item(), 'waterTemp'].item() | ||
except Exception: | ||
pass | ||
if pd.isna(outputDF['headspaceTemp'][m]) is True: | ||
try: | ||
outputDF.loc[outputDF.index[[m]], 'headspaceTemp'] = fieldSuperParent.loc[fieldSuperParent.loc[:, 'parentSampleID'] == outputDF.loc[outputDF.index[[m]], 'waterSampleID'].item(), 'waterTemp'].item() | ||
except Exception: | ||
pass | ||
|
||
#Flag values below detection (TBD) | ||
print("**********************************************************************") | ||
print("EXTERNAL LAB DATA") | ||
print(externalLabData) | ||
print("**********************************************************************") | ||
print("FIELD DATA PROC") | ||
print(fieldDataProc) | ||
print("**********************************************************************") | ||
print("FIELD SUPER PARENT") | ||
print(fieldSuperParent) | ||
|
||
return externalLabData, fieldDataProc, fieldSuperParent, outputDF | ||
|
||
|
||
external_Lab_Data, field_Data_Proc, field_Super_Parent, output_DF = def_format_sdg() |