From c81a6580e6705ce00b8b15df571d3684e1d393c8 Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Mon, 16 Dec 2019 16:06:38 -0700 Subject: [PATCH] Refactoring the NIMS-specific code into a separate datalogger class. This will no longer fail on EMTF's that didn't use NIMS dataloggers. The datalogger is created when reading the MT XML in right away, and then utilized in the function to transform the recorded fields properly from data units to physical units. --- bezpy/mt/datalogger.py | 237 +++++++++++++++++++++++++++++++++++++++++ bezpy/mt/io.py | 32 ++++-- bezpy/mt/site.py | 220 +++----------------------------------- 3 files changed, 271 insertions(+), 218 deletions(-) create mode 100644 bezpy/mt/datalogger.py diff --git a/bezpy/mt/datalogger.py b/bezpy/mt/datalogger.py new file mode 100644 index 0000000..5cc1b08 --- /dev/null +++ b/bezpy/mt/datalogger.py @@ -0,0 +1,237 @@ +""" +Magnetotelluric datalogger classes. + +""" +import datetime +import numpy as np +import pandas as pd +from scipy.signal import zpk2tf, filtfilt, bilinear_zpk + +# Putting this here, so we don't have to reinstantiate a client +# every time we download waveforms +_IRIS_CLIENT = None + + +class DataLogger: + """DataLogger class to enable transformations from samples to practical units.""" + + def __init__(self): + self.nimsid = None + self.runlist = [] + self.runinfo = None + self.samplingrate = None + self.zpk = None + self.timedelays = None + + def download_iris_waveforms(self, name, start_time, end_time): + """Download waveforms from the IRIS client.""" + # The only reason we are putting a global in here is because + # of how long it takes to create the Client, and with multiple + # downloads from different sites, we really only need one client + # to be stored somewhere in the code + global _IRIS_CLIENT # pylint: disable=global-statement + if _IRIS_CLIENT is None: + # pylint: disable=import-error,import-outside-toplevel + from obspy.clients.fdsn import Client + _IRIS_CLIENT = Client("IRIS") + + # Download the stream + # The channels are + # E: LQN/LQE + # B: LFN/LFE/LFZ + stream = _IRIS_CLIENT.get_waveforms("EM", name, "*", "*", + start_time, end_time) + # Convert the stream to a pandas DataFrame + waveforms = _convert_stream_to_df(stream) + # Channel conversion factors and renaming + # The conversion factors were obtained from Anna Kelbert + # and are standard for the IRIS database + + # Magnetic Field + waveforms[["FE", "FN", "FZ"]] *= 0.01 # nT + # Electric Field (assuming the dipole length as 100m) + waveforms[["QN", "QE"]] *= 2.44141221047903e-05 # mV/km + + # Process the waveforms if there is a runlist object for this DataLogger object + if self.runlist: + waveforms = self._process_waveforms(waveforms) + + # Renaming + waveforms.rename(columns={"FE": "BE", "FN": "BN", "FZ": "BZ", + "QE": "EE", "QN": "EN"}, + inplace=True) + return waveforms + + def _process_waveforms(self, waveforms): + """Process the waveforms with the DataLogger information.""" + # Correcting electric Field with actual length of dipole + for runid in self.runlist: + try: + mask = ((waveforms.index > self.runinfo[runid]['Start']) & + (waveforms.index < self.runinfo[runid]['End'])) + waveforms["QN"].loc[mask] *= (100.0/self.runinfo[runid]['Ex']) # mV/km + waveforms["QE"].loc[mask] *= (100.0/self.runinfo[runid]['Ey']) # mV/km + + # If there's no info about length, then use default length. + except KeyError: + pass + + if self.zpk is None: + return waveforms + + # Filtering waveforms by NIMS system response + for channel in waveforms: # loop for FE, FN, FZ, QN, QE + for filt in self.zpk[channel]: # loop for filter + zval = self.zpk[channel][filt]['z'] + pval = self.zpk[channel][filt]['p'] + kval = self.zpk[channel][filt]['k'] + + # convert analog zpk to digital filter + zval, pval, kval = bilinear_zpk(zval, pval, kval, self.samplingrate) + b, a = zpk2tf(zval, pval, kval) + + waveforms[channel] = filtfilt(b, a, waveforms[channel].interpolate()) + return waveforms + + def add_run_info(self, runinfo, nimsid, samplingrate): + """Add run information.""" + self.runinfo = runinfo + self.nimsid = nimsid + self.samplingrate = samplingrate + + def nim_system_response(self): + """reads NIMS id and sampling rate and set parameters of Butterworth filter.""" + + # get logger and hardware, backbone from sysid and sampling rate (Hz) + logger, hardware, backbone = _get_logger_info(self.nimsid, self.samplingrate) + + # This overrides anything about time delays in John Booker's nimsread. + # This is a product of lengthy correspondence between Gary Egbert and + # Barry Narod, with reference to diagrams on the NIMS firmware, and is + # believed to be correct. + if logger == 'HP200': # 1 hour files, 4 Hz after decimation by nimsread + timedelays = [-0.0055, -0.0145, -0.0235, 0.1525, 0.0275] + elif self.samplingrate == 1: # MT1 data logger + timedelays = [-0.1920, -0.2010, -0.2100, -0.2850, -0.2850] + elif self.samplingrate == 8: # MT1 data logger + timedelays = [0.2455, 0.2365, 0.2275, 0.1525, 0.1525] + else: + raise ValueError('Unknown sampling rate, please check!') + self.timedelays = timedelays + + z1mag = [] + p1mag = [-6.28319+1j*10.8825, -6.28319-1j*10.8825, -12.5664] + k1mag = 1984.31 + + # based on the NIMS hardware, we determine the filter characteristics. + if hardware == 'PC104': + z1ele = [0.0] + p1ele = [-3.333333E-05] + k1ele = 1.0 + else: + z1ele = [0.0] + p1ele = [-1.666670E-04] + k1ele = 1.0 + + z2ele = [] + p2ele = [-3.88301+1j*11.9519, -3.88301-1j*11.9519, -10.1662+1j*7.38651, + -10.1662-1j*7.38651, -12.5664] + k2ele = 313384 + + # z: zero, p: pole, k:gain + self.zpk = dict.fromkeys(['FN', 'FE', 'FZ'], {'F1': {'z': z1mag, 'p': p1mag, 'k': k1mag}}) + + if backbone: # no high pass filters + self.zpk.update(dict.fromkeys(['QN', 'QE'], + {'F1': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) + else: + self.zpk.update(dict.fromkeys(['QN', 'QE'], + {'F1': {'z': z1ele, 'p': p1ele, 'k': k1ele}, + 'F2': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) + + +def _convert_stream_to_df(stream): + """Converts an obspy stream (and traces within the stream) to a dataframe.""" + cols = {} + for trace in stream: + # The first letter is sampling frequency, which is already in + # the stats object. A stream could contain multiple sampling rates + # which would name these different columns, which we don't want. + channel = trace.stats.channel[1:] + index = pd.date_range(start=trace.stats.starttime.datetime, + freq=1./trace.stats.sampling_rate*datetime.timedelta(seconds=1), + periods=trace.stats.npts) + df_trace = pd.DataFrame(index=index, + data={channel: trace.data}) + + if channel in cols: + cols[channel] = pd.concat([cols[channel], df_trace]) + else: + cols[channel] = df_trace + + df = pd.concat(cols.values(), axis=1) + return df + + +def _get_logger_info(sysid, samplingrate): + """Returns logger and hardware, backbone from sysid and sampling rate (Hz)""" + + # first search for an 'H' for the hourly time series, and get rid of it for + # the further parsing + logger = 'MT1' + i = sysid.find('H') + if not i == -1: + logger = 'HP200' + sysid = sysid[0:i] + sysid[i+2:] + + # parse the system ID. If it does not make sense, return an empty + # response structure. + j = sysid.find('-') + if j == -1: + raise ValueError('Invalid system ID in NIMsysRsp') + + nim1 = int(sysid[0:j]) + # we know the following NIMS series + nimlist1 = [2106, 2311, 2405, 2406, 2501, 2502, 2503, 2508, 2509, 2606, 2611, + 2612, 1303, 1305, 1105] + + if np.isnan(nim1): + raise ValueError('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') + if nim1 not in nimlist1: + raise ValueError('We do not know NIMS series ' + str(nim1) + '. Check the system ID.') + + nim2str = sysid[j+2:] + nim2 = int(nim2str) + + # and assume that the possible numbers are 1-30 + nimlist2 = range(1, 31) + backbone = 0 + + if nim2str[0] == 'B' or nim2str[0] == 'b': + # recognized backbone NIMS ID + backbone = 1 + elif nim2str[0] == 'A' or nim2str[0] == 'a': + # recognized new experimental NIMS + print('NIMS ID ' + sysid + ' is a new experimental system. Look out for clock drift.') + elif np.isnan(nim2): + raise ValueError('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') + elif nim2 not in nimlist2: + raise ValueError('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') + + # if 2106-1/10, assume PC104 hardware of v2001 or v2006 + hardware = 'STE' + if (nim1 == 2106) & (nim2 <= 10): + hardware = 'PC104' + + # if 2106-1/10 and the sampling rate is 4 Hz, assume HP200 (hourly files) + if (nim1 == 2106) & (nim2 <= 10) & (samplingrate == 4): + logger = 'HP200' + + # verify HP200 data logger: assuming these can only be 2106-1/10 + if logger == 'HP200': + if (nim1 != 2106) or (nim2 > 10): + raise ValueError('A possible problem with the system ID detected. HP200 data' + + 'logger has been inferred, but the system is not 2106-1/10.\n' + + 'Please make sure ' + sysid + ' does not have an H character.') + + return logger, hardware, backbone diff --git a/bezpy/mt/io.py b/bezpy/mt/io.py index 31bb000..eaa5b73 100644 --- a/bezpy/mt/io.py +++ b/bezpy/mt/io.py @@ -9,7 +9,8 @@ import pandas as pd import pkg_resources -from . import Site1d, Site3d +from .site import Site1d, Site3d +from .datalogger import DataLogger DATA_PATH_1D = pkg_resources.resource_filename('bezpy', 'mt/data_1d') + "/" @@ -97,6 +98,7 @@ def read_xml(fname): site = Site3d(name) # Store the parsed root xml element site.xml = root + site.product_id = get_text(root, "ProductId") loc = xml_site.find("Location") site.latitude = convert_float(get_text(loc, "Latitude")) @@ -129,14 +131,21 @@ def read_xml(fname): # No variance in the data fields site.Z_var = None - # NIMS stations have a RunList attribute in them - run_list = get_text(xml_site, "RunList") - if run_list is not None: - site.runlist = run_list.split() - site.runinfo, site.nimsid, site.samplingrate = read_runinfo(root) - site.nim_sys_rsp() - site.calc_resisitivity() + + site.datalogger = DataLogger() + try: + runinfo, nimsid, samplingrate = read_logger_info(site.xml) + # No info about the nimsid from the logger read, so just return + # without updating any information about it. + if nimsid is None: + return site + site.datalogger.add_run_info(runinfo, nimsid, samplingrate) + # Fill out the NIM System Response + site.datalogger.nim_system_response() + except ValueError: + pass + return site @@ -197,9 +206,11 @@ def get_1d_site(name): raise ValueError("No 1d site profile with the name: " + name) -def read_runinfo(root): +def read_logger_info(root): """Returns the run info, if present.""" runinfo = {} + nimsid = "" + samplingrate = 1.0 # Run through for each runid for field in root.findall("FieldNotes"): @@ -211,8 +222,7 @@ def read_runinfo(root): nimsid = get_text(field.find('Instrument'), 'Id') samplingrate = convert_float(field.find('SamplingRate').text) except KeyError: - nimsid = None - samplingrate = 1.0 + pass # Run through E component for ecomp in field.findall("Dipole"): diff --git a/bezpy/mt/site.py b/bezpy/mt/site.py index 66ac329..89516eb 100644 --- a/bezpy/mt/site.py +++ b/bezpy/mt/site.py @@ -4,12 +4,9 @@ """ __all__ = ["Site", "Site1d", "Site3d"] -import sys -import datetime import numpy as np import pandas as pd import scipy.interpolate -from scipy.signal import zpk2tf, filtfilt, bilinear_zpk import matplotlib.pyplot as plt from .utils import apparent_resistivity @@ -20,10 +17,6 @@ # ---------------------------- MU = 4*np.pi*1e-7 # Magnetic Permeability (H/m) -# Putting this here, so we don't have to reinstantiate a client -# every time we download waveforms -_IRIS_CLIENT = None - class Site: """Magnetotelluric Site base class.""" @@ -46,11 +39,6 @@ def __init__(self, name): self.min_period = None self.max_period = None - self.samplingrate = None - self.nimsid = None - self.zpk = None - self.timedelays = None - def convolve_fft(self, mag_x, mag_y, dt=60): """Convolution in frequency space.""" # pylint: disable=invalid-name @@ -90,122 +78,6 @@ def calcZ(self, freqs): # pylint: disable=invalid-name """Calculates transfer function, Z, from the input frequencies.""" raise NotImplementedError("calcZ not implemented for this object yet.") - def sysidcheck(self): - """Returns logger and hardware, backbone from sysid and sampling rate (Hz)""" - sysid = self.nimsid - samplingrate = self.samplingrate - - # first search for an 'H' for the hourly time series, and get rid of it for - # the further parsing - logger = 'MT1' - i = sysid.find('H') - if not i == -1: - logger = 'HP200' - sysid = sysid[0:i] + sysid[i+2:] - - # parse the system ID. If it does not make sense, return an empty - # response structure. - j = sysid.find('-') - if j == -1: - sys.exit('Invalid system ID in NIMsysRsp') - - nim1 = int(sysid[0:j]) - # we know the following NIMS series - nimlist1 = [2106, 2311, 2405, 2406, 2501, 2502, 2503, 2508, 2509, 2606, 2611, - 2612, 1303, 1305, 1105] - - if np.isnan(nim1): - sys.exit('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') - elif nim1 not in nimlist1: - sys.exit('We do not know NIMS series ' + str(nim1) + '. Check the system ID.') - - nim2str = sysid[j+2:] - nim2 = int(nim2str) - - # and assume that the possible numbers are 1-30 - nimlist2 = range(1, 31) - backbone = 0 - - if nim2str[0] == 'B' or nim2str[0] == 'b': - # recognized backbone NIMS ID - backbone = 1 - elif nim2str[0] == 'A' or nim2str[0] == 'a': - # recognized new experimental NIMS - print('NIMS ID ' + sysid + ' is a new experimental system. Look out for clock drift.') - elif np.isnan(nim2): - sys.exit('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') - elif nim2 not in nimlist2: - sys.exit('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') - - # if 2106-1/10, assume PC104 hardware of v2001 or v2006 - hardware = 'STE' - if (nim1 == 2106) & (nim2 <= 10): - hardware = 'PC104' - - # if 2106-1/10 and the sampling rate is 4 Hz, assume HP200 (hourly files) - if (nim1 == 2106) & (nim2 <= 10) & (samplingrate == 4): - logger = 'HP200' - - # verify HP200 data logger: assuming these can only be 2106-1/10 - if logger == 'HP200': - if (nim1 != 2106) or (nim2 > 10): - print('A possible problem with the system ID detected. HP200 data \ - logger has been inferred, but the system is not 2106-1/10.') - sys.exit('Please make sure ' + sysid + ' does not have an H character.') - - return logger, hardware, backbone - - def nim_sys_rsp(self): - """reads NIMS id and sampling rate and set parameters of Butterworth filter.""" - - # get logger and hardware, backbone from sysid and sampling rate (Hz) - logger, hardware, backbone = self.sysidcheck() - - # This overrides anything about time delays in John Booker's nimsread. - # This is a product of lengthy correspondence between Gary Egbert and - # Barry Narod, with reference to diagrams on the NIMS firmware, and is - # believed to be correct. - if logger == 'HP200': # 1 hour files, 4 Hz after decimation by nimsread - timedelays = [-0.0055, -0.0145, -0.0235, 0.1525, 0.0275] - elif self.samplingrate == 1: # MT1 data logger - timedelays = [-0.1920, -0.2010, -0.2100, -0.2850, -0.2850] - elif self.samplingrate == 8: # MT1 data logger - timedelays = [0.2455, 0.2365, 0.2275, 0.1525, 0.1525] - else: - sys.exit('Unknown sampling rate, please check!') - - z1mag = [] - p1mag = [-6.28319+1j*10.8825, -6.28319-1j*10.8825, -12.5664] - k1mag = 1984.31 - - # based on the NIMS hardware, we determine the filter characteristics. - if hardware == 'PC104': - z1ele = [0.0] - p1ele = [-3.333333E-05] - k1ele = 1.0 - else: - z1ele = [0.0] - p1ele = [-1.666670E-04] - k1ele = 1.0 - - z2ele = [] - p2ele = [-3.88301+1j*11.9519, -3.88301-1j*11.9519, -10.1662+1j*7.38651, - -10.1662-1j*7.38651, -12.5664] - k2ele = 313384 - - # z: zero, p: pole, k:gain - self.zpk = dict.fromkeys(['FN', 'FE', 'FZ'], {'F1': {'z': z1mag, 'p': p1mag, 'k': k1mag}}) - - if backbone: # no high pass filters - self.zpk.update(dict.fromkeys(['QN', 'QE'], - {'F1': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) - else: - self.zpk.update(dict.fromkeys(['QN', 'QE'], - {'F1': {'z': z1ele, 'p': p1ele, 'k': k1ele}, - 'F2': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) - - self.timedelays = timedelays - # --------------------------------- # 3d EarthScope Sites @@ -217,12 +89,13 @@ def __init__(self, name): # Call the base site class and initialize all those variables super(Site3d, self).__init__(name) + self.xml = None + self.product_id = None self.waveforms = None self.start_time = None self.end_time = None - self.runlist = [] - self.runinfo = {} + self.datalogger = None def _repr_html_(self): return f"

Site 3d: {self.name}

" + \ @@ -346,60 +219,16 @@ def calc_resisitivity(self): def download_waveforms(self): """Download the site's collected data 'waveforms' from the IRIS servers.""" - # The only reason we are putting a global in here is because - # of how long it takes to create the Client, and with multiple - # downloads from different sites, we really only need one client - # to be stored somewhere in the code - global _IRIS_CLIENT # pylint: disable=global-statement - if _IRIS_CLIENT is None: - from obspy.clients.fdsn import Client - _IRIS_CLIENT = Client("IRIS") - - # Download the stream - # The channels are - # E: LQN/LQE - # B: LFN/LFE/LFZ - stream = _IRIS_CLIENT.get_waveforms("EM", self.name, "*", "*", - self.start_time, self.end_time) - # Convert the stream to a pandas DataFrame - self.waveforms = convert_stream_to_df(stream) - # Channel conversion factors and renaming - # The conversion factors were obtained from Anna Kelbert - # and are standard for the IRIS database - - # Magnetic Field - self.waveforms[["FE", "FN", "FZ"]] *= 0.01 # nT - # Electric Field (assuming the dipole length as 100m) - self.waveforms[["QN", "QE"]] *= 2.44141221047903e-05 # mV/km - # Correcting electric Field with actual length of dipole - for runid in self.runlist: - try: - mask = ((self.waveforms.index > self.runinfo[runid]['Start']) & - (self.waveforms.index < self.runinfo[runid]['End'])) - self.waveforms["QN"].loc[mask] *= (100.0/self.runinfo[runid]['Ex']) # mV/km - self.waveforms["QE"].loc[mask] *= (100.0/self.runinfo[runid]['Ey']) # mV/km - - # If there's no info of length, then use default length. - except KeyError: - pass - - # Filtering waveforms by NIMS system response - for name in self.waveforms: # loop for FE, FN, FZ, QN, QE - for filt in self.zpk[name]: # loop for filter - zval = self.zpk[name][filt]['z'] - pval = self.zpk[name][filt]['p'] - kval = self.zpk[name][filt]['k'] - - # convert analog zpk to digital filter - zval, pval, kval = bilinear_zpk(zval, pval, kval, self.samplingrate) - b, a = zpk2tf(zval, pval, kval) - - self.waveforms[name] = filtfilt(b, a, self.waveforms[name].interpolate()) - - # Renaming - self.waveforms.rename(columns={"FE": "BE", "FN": "BN", "FZ": "BZ", - "QE": "EE", "QN": "EN"}, - inplace=True) + # If there is no runlist in the datalogger, then it wasn't able to be + # initialized, so raise that this is unavailable. + if not self.datalogger.runlist: + raise ValueError("Cannot download waveforms because the DataLogger has " + + "not been initialized properly. Possibly because this is " + + "not an IRIS MT Station.") + + self.waveforms = self.datalogger.download_iris_waveforms(self.name, + self.start_time, + self.end_time) def load_waveforms(self, directory="./"): """Load the waveform data that has already been downloaded.""" @@ -496,26 +325,3 @@ def plot_apparent_resistivity(self, interp_freqs=None): fig.suptitle(r"1D Region: {name}".format(name=self.name), size=20) return fig - - -def convert_stream_to_df(stream): - """Converts an obspy stream (and traces within the stream) to a dataframe.""" - cols = {} - for trace in stream: - # The first letter is sampling frequency, which is already in - # the stats object. A stream could contain multiple sampling rates - # which would name these different columns, which we don't want. - channel = trace.stats.channel[1:] - index = pd.date_range(start=trace.stats.starttime.datetime, - freq=1./trace.stats.sampling_rate*datetime.timedelta(seconds=1), - periods=trace.stats.npts) - df_trace = pd.DataFrame(index=index, - data={channel: trace.data}) - - if channel in cols: - cols[channel] = pd.concat([cols[channel], df_trace]) - else: - cols[channel] = df_trace - - df = pd.concat(cols.values(), axis=1) - return df