Skip to content

Commit

Permalink
Fix: Adapt to old silixa xml versions
Browse files Browse the repository at this point in the history
- Split functions for the old vs new xml readers
- Added some checks for mismatched libraries and datasets in plotting
- Bug fix for modern versions of xarray in merging single into double-ended
  • Loading branch information
klapo committed Mar 14, 2023
1 parent c3a8eac commit 2f7aadd
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 10 deletions.
14 changes: 9 additions & 5 deletions src/pyfocs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def double_end_dv_clean(ds):


def merge_single(
dstore_fw, dstore_bw, shift_window=20, fixed_shift=None, plot_result=False
dstore_fw, dstore_bw, shift_window=500, fixed_shift=None, plot_result=False
):
"""
Merge two single-ended channels to a single double-ended configuration.
Expand All @@ -172,11 +172,15 @@ def merge_single(
if not fixed_shift:
shift_lims = np.arange(-shift_window, shift_window, 1, dtype=int)

# Estimate the number of indices to shift the two channels to align them.
# I use some overly generous limits for searching
# This parameter should be made an optional argument passed to the function.
# Estimate the number of indices to shift the two channels to align them. The
# time mean is used, but dtscalibration relies on having a matrix of a
# particular dimensionality. Accommodate this by expanding back out the time
# dimension after averaging.
shift1, shift2 = suggest_cable_shift_double_ended(
double.mean(dim="time").compute(),
double[["st", "ast", "rst", "rast"]]
.mean(dim="time")
.compute()
.expand_dims("time", axis=1),
shift_lims,
plot_result=plot_result,
)
Expand Down
5 changes: 5 additions & 0 deletions src/pyfocs/dts_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def bath_validation(
except KeyError:
del callib[bn]

if not callib:
raise KeyError(
"None of the specified reference sections could be found in the dataset."
)

if plot_var == "bias":
label_text = "Bias (K)"
if not bath_lims:
Expand Down
2 changes: 1 addition & 1 deletion src/pyfocs/dtsarch.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def archiver(cfg):
break

# If the archiving mode is active, do not process
# the last archiving interval, as it may be incomplete. Instead
# the last archiving interval, as it may be incomplete. Instead,
# exit and handle this interval with the next scheduled call
# to the archiver. This step just requires no action here.
if mode == "active":
Expand Down
88 changes: 84 additions & 4 deletions src/pyfocs/readDTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,83 @@ def xml_read(dumbXMLFile):
# general catch here and hope for the best.
# Raising this error allows us to catch corrupted files.
raise CorruptedXMLError
# Remove all of the bullshit
doc = doc["logs"]["log"]
# Strip the unneeded metadata and accommodate the various versions.

if "logs" in doc:
doc = doc["logs"]["log"]
return read_new_silixa_xml(doc)
# The xml file is older.
elif "wellLogs" in doc:
doc = doc["wellLogs"]["wellLog"]
return read_old_silixa_xml(doc)


def read_old_silixa_xml(doc):
# Extract units/metadata info out of xml dictionary
metaData = {
"LAF_beg": float(doc["minIndex"]["#text"]),
"LAF_end": float(doc["maxIndex"]["#text"]),
"dLAF": float(doc["blockInfo"]["stepIncrement"]["#text"]),
"dt_start": pd.to_datetime(
doc["minDateTimeIndex"], infer_datetime_format=True, utc=True
),
"dt_end": pd.to_datetime(
doc["maxDateTimeIndex"], infer_datetime_format=True, utc=True
),
"probe1Temperature": float(doc["customData"]["probe1Temperature"]),
"probe2Temperature": float(doc["customData"]["probe2Temperature"]),
}

# Extract data
data = doc["logData"]["data"]

numEntries = np.size(data)
LAF = np.empty(numEntries)
Ps = np.empty_like(LAF)
Pas = np.empty_like(LAF)
temp = np.empty_like(LAF)

# Check dts type based on the number of columns
if len(data[0]["#text"].split(",")) == 4:
dtsType = "single_ended"
elif len(data[0]["#text"].split(",")) == 6:
dtsType = "double_ended"
else:
raise IOError("Unrecognized xml format... dumping first row \n" + data[0])

# Single ended data
if "single_ended" in dtsType:
for dnum, dlist in enumerate(data):
LAF[dnum], Ps[dnum], Pas[dnum], temp[dnum] = list(
map(float, dlist["#text"].split(","))
)
actualData = pd.DataFrame.from_dict(
{"LAF": LAF, "Ps": Ps, "Pas": Pas, "temp": temp}
).set_index("LAF")

# Double ended data
elif "double_ended" in dtsType:
rPs = np.empty_like(LAF)
rPas = np.empty_like(LAF)

for dnum, dlist in enumerate(data):
(
LAF[dnum],
Ps[dnum],
Pas[dnum],
rPs[dnum],
rPas[dnum],
temp[dnum],
) = list(map(float, dlist.split(",")))

actualData = pd.DataFrame.from_dict(
{"LAF": LAF, "Ps": Ps, "Pas": Pas, "rPs": rPs, "rPas": rPas, "temp": temp}
).set_index("LAF")

return (actualData, metaData)


def read_new_silixa_xml(doc):
# Extract units/metadata info out of xml dictionary
metaData = {
"LAF_beg": float(doc["startIndex"]["#text"]),
Expand Down Expand Up @@ -143,6 +217,8 @@ def archive_read(cfg, write_mode="preserve", prevNumChunk=0):
# List of files to iterate over
dirConTar = [dC for dC in os.listdir() if chan in dC and ".tar.gz" in dC]
dirConTar.sort()
if not dirConTar:
print("No archives were found for {}.".format(chan))

# Untar files
for tFile in dirConTar:
Expand All @@ -161,13 +237,17 @@ def archive_read(cfg, write_mode="preserve", prevNumChunk=0):
# Extract the archive
t = tarfile.open(tFile)
t.extractall()
t.close
t.close()

# List of files to iterate over
dirConXML = [dC for dC in os.listdir() if chan in dC and ".xml" in dC]
# No .xml files could be found so an error needs to be raised.
if not dirConXML:
raise ValueError("No xml files found in {}".format(tFile))

# Sorting by name sould be the equivalent to sorting by date.
dirConXML.sort()
nTotal = np.size(dirConXML)
ds = None
ds_list = []

# Read each xml file, assign to an xarray Dataset, concatenate
Expand Down

0 comments on commit 2f7aadd

Please sign in to comment.