diff --git a/bezpy/mt/datalogger.py b/bezpy/mt/datalogger.py new file mode 100644 index 0000000..5cc1b08 --- /dev/null +++ b/bezpy/mt/datalogger.py @@ -0,0 +1,237 @@ +""" +Magnetotelluric datalogger classes. + +""" +import datetime +import numpy as np +import pandas as pd +from scipy.signal import zpk2tf, filtfilt, bilinear_zpk + +# Putting this here, so we don't have to reinstantiate a client +# every time we download waveforms +_IRIS_CLIENT = None + + +class DataLogger: + """DataLogger class to enable transformations from samples to practical units.""" + + def __init__(self): + self.nimsid = None + self.runlist = [] + self.runinfo = None + self.samplingrate = None + self.zpk = None + self.timedelays = None + + def download_iris_waveforms(self, name, start_time, end_time): + """Download waveforms from the IRIS client.""" + # The only reason we are putting a global in here is because + # of how long it takes to create the Client, and with multiple + # downloads from different sites, we really only need one client + # to be stored somewhere in the code + global _IRIS_CLIENT # pylint: disable=global-statement + if _IRIS_CLIENT is None: + # pylint: disable=import-error,import-outside-toplevel + from obspy.clients.fdsn import Client + _IRIS_CLIENT = Client("IRIS") + + # Download the stream + # The channels are + # E: LQN/LQE + # B: LFN/LFE/LFZ + stream = _IRIS_CLIENT.get_waveforms("EM", name, "*", "*", + start_time, end_time) + # Convert the stream to a pandas DataFrame + waveforms = _convert_stream_to_df(stream) + # Channel conversion factors and renaming + # The conversion factors were obtained from Anna Kelbert + # and are standard for the IRIS database + + # Magnetic Field + waveforms[["FE", "FN", "FZ"]] *= 0.01 # nT + # Electric Field (assuming the dipole length as 100m) + waveforms[["QN", "QE"]] *= 2.44141221047903e-05 # mV/km + + # Process the waveforms if there is a runlist object for this DataLogger object + if self.runlist: + waveforms = self._process_waveforms(waveforms) + + # Renaming + waveforms.rename(columns={"FE": "BE", "FN": "BN", "FZ": "BZ", + "QE": "EE", "QN": "EN"}, + inplace=True) + return waveforms + + def _process_waveforms(self, waveforms): + """Process the waveforms with the DataLogger information.""" + # Correcting electric Field with actual length of dipole + for runid in self.runlist: + try: + mask = ((waveforms.index > self.runinfo[runid]['Start']) & + (waveforms.index < self.runinfo[runid]['End'])) + waveforms["QN"].loc[mask] *= (100.0/self.runinfo[runid]['Ex']) # mV/km + waveforms["QE"].loc[mask] *= (100.0/self.runinfo[runid]['Ey']) # mV/km + + # If there's no info about length, then use default length. + except KeyError: + pass + + if self.zpk is None: + return waveforms + + # Filtering waveforms by NIMS system response + for channel in waveforms: # loop for FE, FN, FZ, QN, QE + for filt in self.zpk[channel]: # loop for filter + zval = self.zpk[channel][filt]['z'] + pval = self.zpk[channel][filt]['p'] + kval = self.zpk[channel][filt]['k'] + + # convert analog zpk to digital filter + zval, pval, kval = bilinear_zpk(zval, pval, kval, self.samplingrate) + b, a = zpk2tf(zval, pval, kval) + + waveforms[channel] = filtfilt(b, a, waveforms[channel].interpolate()) + return waveforms + + def add_run_info(self, runinfo, nimsid, samplingrate): + """Add run information.""" + self.runinfo = runinfo + self.nimsid = nimsid + self.samplingrate = samplingrate + + def nim_system_response(self): + """reads NIMS id and sampling rate and set parameters of Butterworth filter.""" + + # get logger and hardware, backbone from sysid and sampling rate (Hz) + logger, hardware, backbone = _get_logger_info(self.nimsid, self.samplingrate) + + # This overrides anything about time delays in John Booker's nimsread. + # This is a product of lengthy correspondence between Gary Egbert and + # Barry Narod, with reference to diagrams on the NIMS firmware, and is + # believed to be correct. + if logger == 'HP200': # 1 hour files, 4 Hz after decimation by nimsread + timedelays = [-0.0055, -0.0145, -0.0235, 0.1525, 0.0275] + elif self.samplingrate == 1: # MT1 data logger + timedelays = [-0.1920, -0.2010, -0.2100, -0.2850, -0.2850] + elif self.samplingrate == 8: # MT1 data logger + timedelays = [0.2455, 0.2365, 0.2275, 0.1525, 0.1525] + else: + raise ValueError('Unknown sampling rate, please check!') + self.timedelays = timedelays + + z1mag = [] + p1mag = [-6.28319+1j*10.8825, -6.28319-1j*10.8825, -12.5664] + k1mag = 1984.31 + + # based on the NIMS hardware, we determine the filter characteristics. + if hardware == 'PC104': + z1ele = [0.0] + p1ele = [-3.333333E-05] + k1ele = 1.0 + else: + z1ele = [0.0] + p1ele = [-1.666670E-04] + k1ele = 1.0 + + z2ele = [] + p2ele = [-3.88301+1j*11.9519, -3.88301-1j*11.9519, -10.1662+1j*7.38651, + -10.1662-1j*7.38651, -12.5664] + k2ele = 313384 + + # z: zero, p: pole, k:gain + self.zpk = dict.fromkeys(['FN', 'FE', 'FZ'], {'F1': {'z': z1mag, 'p': p1mag, 'k': k1mag}}) + + if backbone: # no high pass filters + self.zpk.update(dict.fromkeys(['QN', 'QE'], + {'F1': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) + else: + self.zpk.update(dict.fromkeys(['QN', 'QE'], + {'F1': {'z': z1ele, 'p': p1ele, 'k': k1ele}, + 'F2': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) + + +def _convert_stream_to_df(stream): + """Converts an obspy stream (and traces within the stream) to a dataframe.""" + cols = {} + for trace in stream: + # The first letter is sampling frequency, which is already in + # the stats object. A stream could contain multiple sampling rates + # which would name these different columns, which we don't want. + channel = trace.stats.channel[1:] + index = pd.date_range(start=trace.stats.starttime.datetime, + freq=1./trace.stats.sampling_rate*datetime.timedelta(seconds=1), + periods=trace.stats.npts) + df_trace = pd.DataFrame(index=index, + data={channel: trace.data}) + + if channel in cols: + cols[channel] = pd.concat([cols[channel], df_trace]) + else: + cols[channel] = df_trace + + df = pd.concat(cols.values(), axis=1) + return df + + +def _get_logger_info(sysid, samplingrate): + """Returns logger and hardware, backbone from sysid and sampling rate (Hz)""" + + # first search for an 'H' for the hourly time series, and get rid of it for + # the further parsing + logger = 'MT1' + i = sysid.find('H') + if not i == -1: + logger = 'HP200' + sysid = sysid[0:i] + sysid[i+2:] + + # parse the system ID. If it does not make sense, return an empty + # response structure. + j = sysid.find('-') + if j == -1: + raise ValueError('Invalid system ID in NIMsysRsp') + + nim1 = int(sysid[0:j]) + # we know the following NIMS series + nimlist1 = [2106, 2311, 2405, 2406, 2501, 2502, 2503, 2508, 2509, 2606, 2611, + 2612, 1303, 1305, 1105] + + if np.isnan(nim1): + raise ValueError('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') + if nim1 not in nimlist1: + raise ValueError('We do not know NIMS series ' + str(nim1) + '. Check the system ID.') + + nim2str = sysid[j+2:] + nim2 = int(nim2str) + + # and assume that the possible numbers are 1-30 + nimlist2 = range(1, 31) + backbone = 0 + + if nim2str[0] == 'B' or nim2str[0] == 'b': + # recognized backbone NIMS ID + backbone = 1 + elif nim2str[0] == 'A' or nim2str[0] == 'a': + # recognized new experimental NIMS + print('NIMS ID ' + sysid + ' is a new experimental system. Look out for clock drift.') + elif np.isnan(nim2): + raise ValueError('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') + elif nim2 not in nimlist2: + raise ValueError('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') + + # if 2106-1/10, assume PC104 hardware of v2001 or v2006 + hardware = 'STE' + if (nim1 == 2106) & (nim2 <= 10): + hardware = 'PC104' + + # if 2106-1/10 and the sampling rate is 4 Hz, assume HP200 (hourly files) + if (nim1 == 2106) & (nim2 <= 10) & (samplingrate == 4): + logger = 'HP200' + + # verify HP200 data logger: assuming these can only be 2106-1/10 + if logger == 'HP200': + if (nim1 != 2106) or (nim2 > 10): + raise ValueError('A possible problem with the system ID detected. HP200 data' + + 'logger has been inferred, but the system is not 2106-1/10.\n' + + 'Please make sure ' + sysid + ' does not have an H character.') + + return logger, hardware, backbone diff --git a/bezpy/mt/io.py b/bezpy/mt/io.py index 31bb000..eaa5b73 100644 --- a/bezpy/mt/io.py +++ b/bezpy/mt/io.py @@ -9,7 +9,8 @@ import pandas as pd import pkg_resources -from . import Site1d, Site3d +from .site import Site1d, Site3d +from .datalogger import DataLogger DATA_PATH_1D = pkg_resources.resource_filename('bezpy', 'mt/data_1d') + "/" @@ -97,6 +98,7 @@ def read_xml(fname): site = Site3d(name) # Store the parsed root xml element site.xml = root + site.product_id = get_text(root, "ProductId") loc = xml_site.find("Location") site.latitude = convert_float(get_text(loc, "Latitude")) @@ -129,14 +131,21 @@ def read_xml(fname): # No variance in the data fields site.Z_var = None - # NIMS stations have a RunList attribute in them - run_list = get_text(xml_site, "RunList") - if run_list is not None: - site.runlist = run_list.split() - site.runinfo, site.nimsid, site.samplingrate = read_runinfo(root) - site.nim_sys_rsp() - site.calc_resisitivity() + + site.datalogger = DataLogger() + try: + runinfo, nimsid, samplingrate = read_logger_info(site.xml) + # No info about the nimsid from the logger read, so just return + # without updating any information about it. + if nimsid is None: + return site + site.datalogger.add_run_info(runinfo, nimsid, samplingrate) + # Fill out the NIM System Response + site.datalogger.nim_system_response() + except ValueError: + pass + return site @@ -197,9 +206,11 @@ def get_1d_site(name): raise ValueError("No 1d site profile with the name: " + name) -def read_runinfo(root): +def read_logger_info(root): """Returns the run info, if present.""" runinfo = {} + nimsid = "" + samplingrate = 1.0 # Run through for each runid for field in root.findall("FieldNotes"): @@ -211,8 +222,7 @@ def read_runinfo(root): nimsid = get_text(field.find('Instrument'), 'Id') samplingrate = convert_float(field.find('SamplingRate').text) except KeyError: - nimsid = None - samplingrate = 1.0 + pass # Run through E component for ecomp in field.findall("Dipole"): diff --git a/bezpy/mt/site.py b/bezpy/mt/site.py index 66ac329..89516eb 100644 --- a/bezpy/mt/site.py +++ b/bezpy/mt/site.py @@ -4,12 +4,9 @@ """ __all__ = ["Site", "Site1d", "Site3d"] -import sys -import datetime import numpy as np import pandas as pd import scipy.interpolate -from scipy.signal import zpk2tf, filtfilt, bilinear_zpk import matplotlib.pyplot as plt from .utils import apparent_resistivity @@ -20,10 +17,6 @@ # ---------------------------- MU = 4*np.pi*1e-7 # Magnetic Permeability (H/m) -# Putting this here, so we don't have to reinstantiate a client -# every time we download waveforms -_IRIS_CLIENT = None - class Site: """Magnetotelluric Site base class.""" @@ -46,11 +39,6 @@ def __init__(self, name): self.min_period = None self.max_period = None - self.samplingrate = None - self.nimsid = None - self.zpk = None - self.timedelays = None - def convolve_fft(self, mag_x, mag_y, dt=60): """Convolution in frequency space.""" # pylint: disable=invalid-name @@ -90,122 +78,6 @@ def calcZ(self, freqs): # pylint: disable=invalid-name """Calculates transfer function, Z, from the input frequencies.""" raise NotImplementedError("calcZ not implemented for this object yet.") - def sysidcheck(self): - """Returns logger and hardware, backbone from sysid and sampling rate (Hz)""" - sysid = self.nimsid - samplingrate = self.samplingrate - - # first search for an 'H' for the hourly time series, and get rid of it for - # the further parsing - logger = 'MT1' - i = sysid.find('H') - if not i == -1: - logger = 'HP200' - sysid = sysid[0:i] + sysid[i+2:] - - # parse the system ID. If it does not make sense, return an empty - # response structure. - j = sysid.find('-') - if j == -1: - sys.exit('Invalid system ID in NIMsysRsp') - - nim1 = int(sysid[0:j]) - # we know the following NIMS series - nimlist1 = [2106, 2311, 2405, 2406, 2501, 2502, 2503, 2508, 2509, 2606, 2611, - 2612, 1303, 1305, 1105] - - if np.isnan(nim1): - sys.exit('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') - elif nim1 not in nimlist1: - sys.exit('We do not know NIMS series ' + str(nim1) + '. Check the system ID.') - - nim2str = sysid[j+2:] - nim2 = int(nim2str) - - # and assume that the possible numbers are 1-30 - nimlist2 = range(1, 31) - backbone = 0 - - if nim2str[0] == 'B' or nim2str[0] == 'b': - # recognized backbone NIMS ID - backbone = 1 - elif nim2str[0] == 'A' or nim2str[0] == 'a': - # recognized new experimental NIMS - print('NIMS ID ' + sysid + ' is a new experimental system. Look out for clock drift.') - elif np.isnan(nim2): - sys.exit('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') - elif nim2 not in nimlist2: - sys.exit('NIMS ID ' + sysid + ' does not seem to be valid. Please correct.') - - # if 2106-1/10, assume PC104 hardware of v2001 or v2006 - hardware = 'STE' - if (nim1 == 2106) & (nim2 <= 10): - hardware = 'PC104' - - # if 2106-1/10 and the sampling rate is 4 Hz, assume HP200 (hourly files) - if (nim1 == 2106) & (nim2 <= 10) & (samplingrate == 4): - logger = 'HP200' - - # verify HP200 data logger: assuming these can only be 2106-1/10 - if logger == 'HP200': - if (nim1 != 2106) or (nim2 > 10): - print('A possible problem with the system ID detected. HP200 data \ - logger has been inferred, but the system is not 2106-1/10.') - sys.exit('Please make sure ' + sysid + ' does not have an H character.') - - return logger, hardware, backbone - - def nim_sys_rsp(self): - """reads NIMS id and sampling rate and set parameters of Butterworth filter.""" - - # get logger and hardware, backbone from sysid and sampling rate (Hz) - logger, hardware, backbone = self.sysidcheck() - - # This overrides anything about time delays in John Booker's nimsread. - # This is a product of lengthy correspondence between Gary Egbert and - # Barry Narod, with reference to diagrams on the NIMS firmware, and is - # believed to be correct. - if logger == 'HP200': # 1 hour files, 4 Hz after decimation by nimsread - timedelays = [-0.0055, -0.0145, -0.0235, 0.1525, 0.0275] - elif self.samplingrate == 1: # MT1 data logger - timedelays = [-0.1920, -0.2010, -0.2100, -0.2850, -0.2850] - elif self.samplingrate == 8: # MT1 data logger - timedelays = [0.2455, 0.2365, 0.2275, 0.1525, 0.1525] - else: - sys.exit('Unknown sampling rate, please check!') - - z1mag = [] - p1mag = [-6.28319+1j*10.8825, -6.28319-1j*10.8825, -12.5664] - k1mag = 1984.31 - - # based on the NIMS hardware, we determine the filter characteristics. - if hardware == 'PC104': - z1ele = [0.0] - p1ele = [-3.333333E-05] - k1ele = 1.0 - else: - z1ele = [0.0] - p1ele = [-1.666670E-04] - k1ele = 1.0 - - z2ele = [] - p2ele = [-3.88301+1j*11.9519, -3.88301-1j*11.9519, -10.1662+1j*7.38651, - -10.1662-1j*7.38651, -12.5664] - k2ele = 313384 - - # z: zero, p: pole, k:gain - self.zpk = dict.fromkeys(['FN', 'FE', 'FZ'], {'F1': {'z': z1mag, 'p': p1mag, 'k': k1mag}}) - - if backbone: # no high pass filters - self.zpk.update(dict.fromkeys(['QN', 'QE'], - {'F1': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) - else: - self.zpk.update(dict.fromkeys(['QN', 'QE'], - {'F1': {'z': z1ele, 'p': p1ele, 'k': k1ele}, - 'F2': {'z': z2ele, 'p': p2ele, 'k': k2ele}})) - - self.timedelays = timedelays - # --------------------------------- # 3d EarthScope Sites @@ -217,12 +89,13 @@ def __init__(self, name): # Call the base site class and initialize all those variables super(Site3d, self).__init__(name) + self.xml = None + self.product_id = None self.waveforms = None self.start_time = None self.end_time = None - self.runlist = [] - self.runinfo = {} + self.datalogger = None def _repr_html_(self): return f"
Site 3d: {self.name}
" + \ @@ -346,60 +219,16 @@ def calc_resisitivity(self): def download_waveforms(self): """Download the site's collected data 'waveforms' from the IRIS servers.""" - # The only reason we are putting a global in here is because - # of how long it takes to create the Client, and with multiple - # downloads from different sites, we really only need one client - # to be stored somewhere in the code - global _IRIS_CLIENT # pylint: disable=global-statement - if _IRIS_CLIENT is None: - from obspy.clients.fdsn import Client - _IRIS_CLIENT = Client("IRIS") - - # Download the stream - # The channels are - # E: LQN/LQE - # B: LFN/LFE/LFZ - stream = _IRIS_CLIENT.get_waveforms("EM", self.name, "*", "*", - self.start_time, self.end_time) - # Convert the stream to a pandas DataFrame - self.waveforms = convert_stream_to_df(stream) - # Channel conversion factors and renaming - # The conversion factors were obtained from Anna Kelbert - # and are standard for the IRIS database - - # Magnetic Field - self.waveforms[["FE", "FN", "FZ"]] *= 0.01 # nT - # Electric Field (assuming the dipole length as 100m) - self.waveforms[["QN", "QE"]] *= 2.44141221047903e-05 # mV/km - # Correcting electric Field with actual length of dipole - for runid in self.runlist: - try: - mask = ((self.waveforms.index > self.runinfo[runid]['Start']) & - (self.waveforms.index < self.runinfo[runid]['End'])) - self.waveforms["QN"].loc[mask] *= (100.0/self.runinfo[runid]['Ex']) # mV/km - self.waveforms["QE"].loc[mask] *= (100.0/self.runinfo[runid]['Ey']) # mV/km - - # If there's no info of length, then use default length. - except KeyError: - pass - - # Filtering waveforms by NIMS system response - for name in self.waveforms: # loop for FE, FN, FZ, QN, QE - for filt in self.zpk[name]: # loop for filter - zval = self.zpk[name][filt]['z'] - pval = self.zpk[name][filt]['p'] - kval = self.zpk[name][filt]['k'] - - # convert analog zpk to digital filter - zval, pval, kval = bilinear_zpk(zval, pval, kval, self.samplingrate) - b, a = zpk2tf(zval, pval, kval) - - self.waveforms[name] = filtfilt(b, a, self.waveforms[name].interpolate()) - - # Renaming - self.waveforms.rename(columns={"FE": "BE", "FN": "BN", "FZ": "BZ", - "QE": "EE", "QN": "EN"}, - inplace=True) + # If there is no runlist in the datalogger, then it wasn't able to be + # initialized, so raise that this is unavailable. + if not self.datalogger.runlist: + raise ValueError("Cannot download waveforms because the DataLogger has " + + "not been initialized properly. Possibly because this is " + + "not an IRIS MT Station.") + + self.waveforms = self.datalogger.download_iris_waveforms(self.name, + self.start_time, + self.end_time) def load_waveforms(self, directory="./"): """Load the waveform data that has already been downloaded.""" @@ -496,26 +325,3 @@ def plot_apparent_resistivity(self, interp_freqs=None): fig.suptitle(r"1D Region: {name}".format(name=self.name), size=20) return fig - - -def convert_stream_to_df(stream): - """Converts an obspy stream (and traces within the stream) to a dataframe.""" - cols = {} - for trace in stream: - # The first letter is sampling frequency, which is already in - # the stats object. A stream could contain multiple sampling rates - # which would name these different columns, which we don't want. - channel = trace.stats.channel[1:] - index = pd.date_range(start=trace.stats.starttime.datetime, - freq=1./trace.stats.sampling_rate*datetime.timedelta(seconds=1), - periods=trace.stats.npts) - df_trace = pd.DataFrame(index=index, - data={channel: trace.data}) - - if channel in cols: - cols[channel] = pd.concat([cols[channel], df_trace]) - else: - cols[channel] = df_trace - - df = pd.concat(cols.values(), axis=1) - return df