Skip to content

Commit

Permalink
Merge pull request #1 from rodrmd99/rodrmd99-patch-1
Browse files Browse the repository at this point in the history
functional format_sdg.py
  • Loading branch information
rodrmd99 authored Jul 3, 2020
2 parents a0146c1 + f0cad7c commit 6c919cf
Show file tree
Hide file tree
Showing 9 changed files with 346 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/NEON-DissolvedGas.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

126 changes: 126 additions & 0 deletions NEON_dissolved-gases-surfacewater/def_cal_sdg_conc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import pandas as pd

def calc_sdg_conc (
inputFile,
volGas="gasVolume",
volH2O="waterVolume",
baro="barometricPressure",
waterTemp="waterTemp",
headspaceTemp="headspaceTemp",
eqCO2="concentrationCO2Gas",
sourceCO2="concentrationCO2Air",
eqCH4="concentrationCH4Gas",
sourceCH4="concentrationCH4Air",
eqN2O="concentrationN2OGas",
sourceN2O="concentrationN2OAir"
):

if type(inputFile) is "character":

inputFile = pd.read.csv(inputFile)

##### Constants #####
cGas = 8.3144598 # universal gas constant (J K-1 mol-1)
cKelvin = 273.15 # Conversion factor from Kelvin to Celsius
cPresConv = 0.000001 # Constant to convert mixing ratio from umol/mol (ppmv) to mol/mol. Unit conversions from kPa to Pa, m^3 to L, cancel out.
cT0 = 298.15
# Henry's law constant T0
# Henry's law constants and temperature dependence from Sander (2015) DOI: 10.5194/acp-15-4399-2015
ckHCO2 = 0.00033 # mol m-3 Pa, range: 0.00031 - 0.00045
ckHCH4 = 0.000014 # mol m-3 Pa, range: 0.0000096 - 0.000092
ckHN2O = 0.00024 # mol m-3 Pa, range: 0.00018 - 0.00025
cdHdTCO2 = 2400 # K, range: 2300 - 2600
cdHdTCH4 = 1900 # K, range: 1400-2400
cdHdTN2O = 2700 # K, range: 2600 - 3600

##### Populate mean global values for reference air where it isn't reported #####
inputFile[, sourceCO2] = ifelse( is.na(inputFile[, sourceCO2]), # if reported as NA
405, # use global mean https://www.esrl.noaa.gov/gmd/ccgg/trends/global.html
inputFile[, sourceCO2])

inputFile[, sourceCH4] = ifelse( is.na(inputFile[, sourceCH4]), # use global average if not measured
1.85, # https://www.esrl.noaa.gov/gmd/ccgg/trends_ch4/
inputFile[, sourceCH4])

inputFile[, sourceN2O] = ifelse( is.na(inputFile[, sourceN2O]), # use global average if not measured
0.330, # https://www.esrl.noaa.gov/gmd/hats/combined/N2O.html
inputFile[, sourceN2O])

##### Calculate dissolved gas concentration in original water sample #####
inputFile['dissolvedCO2'] = NA
inputFile['dissolvedCO2'] < - inputFile[, baro] *cPresConv *
(inputFile[, volGas] * (inputFile[, eqCO2] - inputFile[, sourceCO2]) / (
cGas * (inputFile[, headspaceTemp] + cKelvin) * inputFile[, volH2O]) +
ckHCO2 * exp(
cdHdTCO2 * (1 / (inputFile[, headspaceTemp] + cKelvin) - 1 / cT0)) * inputFile[, eqCO2])

inputFile['dissolvedCH4'] = NA
inputFile['dissolvedCH4'] = inputFile[, baro] *cPresConv *
(inputFile[, volGas] * (inputFile[, eqCH4] - inputFile[, sourceCH4]) / (
cGas * (inputFile[, headspaceTemp] + cKelvin) * inputFile[, volH2O]) +
ckHCH4 * exp(
cdHdTCH4 * (1 / (inputFile[, headspaceTemp] + cKelvin) - 1 / cT0)) * inputFile[, eqCH4])

inputFile['dissolvedN2O'] = NA
inputFile['dissolvedN2O'] = inputFile[, baro] *cPresConv *
(inputFile[, volGas] * (inputFile[, eqN2O] - inputFile[, sourceN2O]) / (
cGas * (inputFile[, headspaceTemp] + cKelvin) * inputFile[, volH2O]) +
ckHN2O * exp(
cdHdTN2O * (1 / (inputFile[, headspaceTemp] + cKelvin) - 1 / cT0)) * inputFile[, eqN2O])

##### Step-by-step Calculation of dissolved gas concentrations for testing #####

# Dissolved gas concentration in the original water samples (dissolvedGas) is
# calculated from a mass balance of the measured headspace concentration (eqGas), the
# calculated concentration in the equilibrated headspace water (eqHeadspaceWaterCO2),
# and the volumes of the headspace water and headspace gas, following:

# dissolvedGas = ((eqGas * volGas) + (eqHeadspaceWaterGas * volH2O) - (sourceGas * volGas)) / volH2O

# Measured headspace concentration should be expressed as mol L- for the mass
# balance calculation and as partial pressure for the equilibrium calculation.

# #Temperature corrected Henry's Law Constant
# HCO2 <- ckHCO2 * exp(cdHdTCO2 * ((1/(headspaceTemp+cKelvin)) - (1/cT0)))
# HCH4 <- ckHCH4 * exp(cdHdTCH4 * ((1/(headspaceTemp+cKelvin)) - (1/cT0)))
# HN2O <- ckHN2O * exp(cdHdTN2O * ((1/(headspaceTemp+cKelvin)) - (1/cT0)))
#
# #Mol of gas in equilibrated water (using Henry's law)
# CO2eqWat <- HCO2 * eqCO2 * cPresConv * baro * (volH2O/1000)
# CH4eqWat <- HCH4 * eqCH4 * cPresConv * baro * (volH2O/1000)
# N2OeqWat <- HN2O * eqN2O * cPresConv * baro * (volH2O/1000)
#
# #Mol of gas in equilibrated air (using ideal gas law)
# CO2eqAir <- (eqCO2 * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin))
# CH4eqAir <- (eqCH4 * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin))
# N2OeqAir <- (eqN2O * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin))
#
# #Mol of gas in source gas (using ideal gas law)
# CO2air <- (inputFile[,sourceCO2] * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin))
# CH4air <- (inputFile[,sourceCH4] * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin))
# N2Oair <- (inputFile[,sourceN2O] * cPresConv * baro * (volGas/1000))/(cGas * (headspaceTemp + cKelvin))
#
# #Total mol of gas is sum of equilibrated water and equilibrated air
# CO2tot <- CO2eqWat + CO2eqAir
# CH4tot <- CH4eqWat + CH4eqAir
# N2Otot <- N2OeqWat + N2OeqAir
#
# #Total mol of gas minus reference air mol gas to get water mol gas before equilibration
# CO2wat <- CO2tot - CO2air
# CH4wat <- CH4tot - CH4air
# N2Owat <- N2Otot - N2Oair
#
# #Concentration is mol of gas divided by volume of water
# inputFile$dissolvedCO2 <- CO2wat/(volH2O/1000)
# inputFile$dissolvedCH4 <- CH4wat/(volH2O/1000)
# inputFile$dissolvedN2O <- N2Owat/(volH2O/1000)

# Round to significant figures
inputFile$dissolvedCO2 < - signif(inputFile$dissolvedCO2, digits = 3)
inputFile$dissolvedCH4 < - signif(inputFile$dissolvedCH4, digits = 3)
inputFile$dissolvedN2O < - signif(inputFile$dissolvedN2O, digits = 3)

return (inputFile)

}

34 changes: 34 additions & 0 deletions SDG_FORLOOP_WITHTRYCATCH_TEMPLATE.PY
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from testing import outputDF, externalLabData

for x in range(len(outputDF['waterSampleID'])):
# print(x)
# FOR DEMO
holder = outputDF.loc[outputDF.index[[x]], 'referenceAirSampleID']
# FOR DEMO
holder2 = externalLabData.loc[:, 'sampleID']
# Need to actually access the single cell value (string) in holder using .item()
# FOR DEMO
booleanHolder = holder2 == holder.item()
# FOR DEMO
holder3 = externalLabData.loc[booleanHolder, 'concentrationCO2']

# This is all of the above intermediate variables put together into one line, and is the right hand side of the
# assignment into the 'concentrationCO2Air' column of the outputDF data frame (at row 'x')
# FOR DEMO
rhsOfAssignment = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[
outputDF.index[[x]], 'referenceAirSampleID'].item(), 'concentrationCO2']

# This needs to be in a try block because the outputDF has sampleIDs that are populated from the field samples,
# and this is looping through those sampleIDs to look for matches with the lab data, but for recently
# collected samples, there won't be lab data for them (but will obviously still be field samples) so this
# look-up will fail. Put it in a try block with nothing in the except block to imitate the silent try
# that this code has in R
try:
outputDF.loc[outputDF.index[[x]], 'concentrationCO2Air'] = externalLabData.loc[
externalLabData.loc[:, 'sampleID'] == outputDF.loc[
outputDF.index[[x]], 'referenceAirSampleID'].item(), 'concentrationCO2'].item()
# NEEDS THE OTHER ASSIGNMENT LINE HERE (THERE ARE 2 ASSIGNMENT LINES WITHIN EACH TRY BLOCK IN THE R CODE)

except Exception:
pass

152 changes: 152 additions & 0 deletions format_sdg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import pandas as pd
import os
import os.path
import re
import numpy as np

import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr

#utils = importr('utils')

#utils.install_packages('neonUtilities', repos='https://cran.rstudio.com/')
neonUtilities = importr('neonUtilities')

# -----------------------------------------------------------------------------------------------#


def def_format_sdg(data_dir = os.getcwd() + '/NEON_dissolved-gases-surfacewater.zip'):

##### Default values ####
volH2O = 40 #mL
volGas = 20 #mL

#Check if the data is loaded already using loadByProduct
if 'externalLabData' and 'fieldDataProc' and 'fieldSuperParent' not in locals() or globals():
print("data is not loaded") # testing code

# If data is not loaded, stack field and external lab data
if os.path.isdir(re.sub("\\.zip", "", data_dir)):
neonUtilities.stackByTable(dpID="DP1.20097.001", filepath=data_dir)

externalLabData = pd.read_csv(re.sub("\\.zip", "", data_dir) + "/stackedFiles" + "/sdg_externalLabData.csv")
fieldDataProc = pd.read_csv(re.sub("\\.zip", "", data_dir) + "/stackedFiles" + "/sdg_fieldDataProc.csv")
fieldSuperParent = pd.read_csv(re.sub("\\.zip", "", data_dir) + "/stackedFiles" + "/sdg_fieldSuperParent.csv")


print("Data is loaded") #testing code
#Flag and set default field values

if fieldDataProc['waterVolumeSyringe'].isna() is True:
fieldDataProc['volH2OSource'] = 1
else:
fieldDataProc['volH2OSource'] = 0

if fieldDataProc['gasVolumeSyringe'].isna() is True:
fieldDataProc['volGasSource'] = 1
else:
fieldDataProc['volGasSource'] = 0

if fieldDataProc['waterVolumeSyringe'].isna() is True:
fieldDataProc['waterVolumeSyringe'] = volH2O

if fieldDataProc['gasVolumeSyringe'].isna() is True:
fieldDataProc['gasVolumeSyringe'] = volGas

outputDFNames = [
'waterSampleID',
'referenceAirSampleID',
'equilibratedAirSampleID',
'collectDate',
'processedDate',
'stationID',
'barometricPressure',
'headspaceTemp',
'waterTemp',
'concentrationCO2Air',
'concentrationCO2Gas',
'concentrationCH4Air',
'concentrationCH4Gas',
'concentrationN2OAir',
'concentrationN2OGas',
'waterVolume',
'gasVolume',
#'CO2BelowDetection',
#'CH4BelowDetection',
#'N2OBelowDetection',
'volH2OSource',
'volGasSource'
]

# Assign the number of rows and columns to the new DataFrame, outputDF
# The number of rows = # of rows there is in fieldDataProc['waterSampleID'] & the number of columns = # of items in the list, outputDFNames
outputDF = pd.DataFrame(index=np.arange(len(fieldDataProc['waterSampleID'])), columns=np.arange(len(outputDFNames)))
# Assigns the items inside outputDFNames to the columns in the outputDF DataFrame
outputDF.columns = outputDFNames

# Populate the output file with field data
for k in range(len(outputDF.columns)):
#for ind, row in fieldDataProc.iterrows():
if outputDF.columns[k] in fieldDataProc.columns:
print("Found: " + outputDF.columns[k])

outputDF.iloc[:,k] = fieldDataProc.loc[:,fieldDataProc.columns == outputDF.columns[k]]

outputDF['headspaceTemp'] = fieldDataProc['storageWaterTemp']
outputDF['barometricPressure'] = fieldDataProc['ptBarometricPressure']
outputDF['waterVolume'] = fieldDataProc['waterVolumeSyringe']
outputDF['gasVolume'] = fieldDataProc['gasVolumeSyringe']
outputDF['stationID'] = fieldDataProc['namedLocation']

#outputDF = np.array(outputDF)
#externalLabData = np.array(outputDF)

#Populate the output file with external lab data
for l in range(len(outputDF['waterSampleID'])):
try:
outputDF.loc[outputDF.index[[l]], 'concentrationCO2Air'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'referenceAirSampleID'].item(), 'concentrationCO2'].item()
outputDF.loc[outputDF.index[[l]], 'concentrationCO2Gas'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'equilibratedAirSampleID'].item(), 'concentrationCO2'].item()

except Exception:
pass
try:
outputDF.loc[outputDF.index[[l]], 'concentrationCH4Air'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'referenceAirSampleID'].item(), 'concentrationCH4'].item()
outputDF.loc[outputDF.index[[x]], 'concentrationCH4Gas'] = externalLabData.loc[ externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'equilibratedAirSampleID'].item(), 'concentrationCH4'].item()

except Exception:
pass

try:
outputDF.loc[outputDF.index[[l]], 'concentrationN2OAir'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'referenceAirSampleID'].item(), 'concentrationN2O'].item()
outputDF.loc[outputDF.index[[l]], 'concentrationN2OGas'] = externalLabData.loc[externalLabData.loc[:, 'sampleID'] == outputDF.loc[outputDF.index[[l]], 'equilibratedAirSampleID'].item(), 'concentrationN2O'].item()
except Exception:
pass

#Populate the output file with water temperature data for streams
for m in range(len(outputDF['waterSampleID'])):
try:
outputDF.loc[outputDF.index[[m]], 'waterTemp'] = fieldSuperParent.loc[fieldSuperParent.loc[:, 'parentSampleID'] == outputDF.loc[outputDF.index[[m]], 'waterSampleID'].item(), 'waterTemp'].item()
except Exception:
pass
if pd.isna(outputDF['headspaceTemp'][m]) is True:
try:
outputDF.loc[outputDF.index[[m]], 'headspaceTemp'] = fieldSuperParent.loc[fieldSuperParent.loc[:, 'parentSampleID'] == outputDF.loc[outputDF.index[[m]], 'waterSampleID'].item(), 'waterTemp'].item()
except Exception:
pass

#Flag values below detection (TBD)
print("**********************************************************************")
print("EXTERNAL LAB DATA")
print(externalLabData)
print("**********************************************************************")
print("FIELD DATA PROC")
print(fieldDataProc)
print("**********************************************************************")
print("FIELD SUPER PARENT")
print(fieldSuperParent)

return externalLabData, fieldDataProc, fieldSuperParent, outputDF


external_Lab_Data, field_Data_Proc, field_Super_Parent, output_DF = def_format_sdg()

0 comments on commit 6c919cf

Please sign in to comment.