diff --git a/pyhdx/__init__.py b/pyhdx/__init__.py index c96df3d0..501de776 100644 --- a/pyhdx/__init__.py +++ b/pyhdx/__init__.py @@ -1,9 +1,13 @@ from .models import PeptideMasterTable, PeptideMeasurements, HDXMeasurement, Coverage, HDXMeasurementSet from .fileIO import read_dynamx from .fitting_torch import TorchSingleFitResult, TorchBatchFitResult -from .output import Output, Report from ._version import get_versions +try: + from .output import Output, Report +except ModuleNotFoundError: + pass + __version__ = get_versions()['version'] diff --git a/pyhdx/cli.py b/pyhdx/cli.py index 5c9ffc98..309f62dd 100644 --- a/pyhdx/cli.py +++ b/pyhdx/cli.py @@ -1,7 +1,7 @@ import argparse from ipaddress import ip_address from pyhdx.web import serve -from pyhdx.config import ConfigurationSettings +from pyhdx.config import cfg from pyhdx.local_cluster import verify_cluster, default_cluster @@ -15,8 +15,6 @@ def main(): parser.add_argument('--scheduler_address', help="Run with local cluster :") args = parser.parse_args() - cfg = ConfigurationSettings() - if args.scheduler_address: ip, port = args.scheduler_address.split(':') if not ip_address(ip): diff --git a/pyhdx/config.ini b/pyhdx/config.ini index b71e5f76..e9632af9 100644 --- a/pyhdx/config.ini +++ b/pyhdx/config.ini @@ -1,3 +1,7 @@ [cluster] scheduler_address = 127.0.0.1:52123 n_workers = 10 + +[fitting] +dtype = float64 +device = cpu diff --git a/pyhdx/config.py b/pyhdx/config.py index 502b3db1..0e3cef79 100644 --- a/pyhdx/config.py +++ b/pyhdx/config.py @@ -1,10 +1,15 @@ import configparser from pathlib import Path -from pyhdx import __version__ +from pyhdx._version import get_versions from packaging import version +import torch import warnings +__version__ = get_versions()['version'] +del get_versions + + def read_config(path): """read .ini config file at path, return configparser.ConfigParser object""" config = configparser.ConfigParser() @@ -86,6 +91,21 @@ def write_config(self, path=None): with open(pth, 'w') as config_file: self._config.write(config_file) + @property + def TORCH_DTYPE(self): + dtype = self.get('fitting', 'dtype') + if dtype in ['float64', 'double']: + return torch.float64 + elif dtype in ['float32', 'float']: + return torch.float32 + else: + raise ValueError(f'Unsupported data type: {dtype}') + + @property + def TORCH_DEVICE(self): + device = self.get('fitting', 'device') + return torch.device(device) + def valid_config(): """Checks if the current config file in the user home directory is a valid config @@ -111,4 +131,6 @@ def valid_config(): config_file_path = config_dir / 'config.ini' if not valid_config(): - reset_config() \ No newline at end of file + reset_config() + +cfg = ConfigurationSettings() \ No newline at end of file diff --git a/pyhdx/fitting.py b/pyhdx/fitting.py index 1591e64e..74cde61d 100644 --- a/pyhdx/fitting.py +++ b/pyhdx/fitting.py @@ -10,9 +10,10 @@ from tqdm import trange from pyhdx.fit_models import SingleKineticModel, TwoComponentAssociationModel, TwoComponentDissociationModel -from pyhdx.fitting_torch import DeltaGFit, TorchSingleFitResult, TorchBatchFitResult, TORCH_DTYPE, TORCH_DEVICE -from pyhdx.models import Protein +from pyhdx.fitting_torch import DeltaGFit, TorchSingleFitResult, TorchBatchFitResult from pyhdx.support import temporary_seed +from pyhdx.models import Protein +from pyhdx.config import cfg EmptyResult = namedtuple('EmptyResult', ['chi_squared', 'params']) er = EmptyResult(np.nan, {k: np.nan for k in ['tau1', 'tau2', 'r']}) @@ -451,7 +452,7 @@ def fit_gibbs_global(hdxm, initial_guess, r1=R1, epochs=EPOCHS, patience=PATIENC assert len(initial_guess) == hdxm.Nr, "Invalid length of initial guesses" dtype = torch.float64 - deltaG_par = torch.nn.Parameter(torch.tensor(initial_guess, dtype=TORCH_DTYPE, device=TORCH_DEVICE).unsqueeze(-1)) #reshape (nr, 1) + deltaG_par = torch.nn.Parameter(torch.tensor(initial_guess, dtype=cfg.TORCH_DTYPE, device=cfg.TORCH_DEVICE).unsqueeze(-1)) #reshape (nr, 1) model = DeltaGFit(deltaG_par) criterion = torch.nn.MSELoss(reduction='mean') @@ -580,7 +581,7 @@ def _batch_fit(hdx_set, initial_guess, reg_func, fit_kwargs, optimizer_kwargs): assert initial_guess.shape == (hdx_set.Ns, hdx_set.Nr), "Invalid shape of initial guesses" - deltaG_par = torch.nn.Parameter(torch.tensor(initial_guess, dtype=TORCH_DTYPE, device=TORCH_DEVICE).reshape(hdx_set.Ns, hdx_set.Nr, 1)) + deltaG_par = torch.nn.Parameter(torch.tensor(initial_guess, dtype=cfg.TORCH_DTYPE, device=cfg.TORCH_DEVICE).reshape(hdx_set.Ns, hdx_set.Nr, 1)) model = DeltaGFit(deltaG_par) criterion = torch.nn.MSELoss(reduction='mean') diff --git a/pyhdx/fitting_torch.py b/pyhdx/fitting_torch.py index a471f827..23cfdd73 100644 --- a/pyhdx/fitting_torch.py +++ b/pyhdx/fitting_torch.py @@ -8,9 +8,10 @@ from pyhdx.fileIO import dataframe_to_file from pyhdx.models import Protein +from pyhdx.config import cfg -TORCH_DTYPE = t.double -TORCH_DEVICE = t.device('cpu') +# TORCH_DTYPE = t.double +# TORCH_DEVICE = t.device('cpu') class DeltaGFit(nn.Module): def __init__(self, deltaG): @@ -46,11 +47,12 @@ def estimate_errors(hdxm, deltaG): ------- """ + dtype = t.float64 joined = pd.concat([deltaG, hdxm.coverage['exchanges']], axis=1, keys=['dG', 'ex']) dG = joined.query('ex==True')['dG'] - deltaG = t.tensor(dG.to_numpy(), dtype=TORCH_DTYPE) + deltaG = t.tensor(dG.to_numpy(), dtype=dtype) - tensors = {k: v.cpu() for k, v in hdxm.get_tensors(exchanges=True).items()} + tensors = {k: v.cpu() for k, v in hdxm.get_tensors(exchanges=True, dtype=dtype).items()} def hes_loss(deltaG_input): criterion = t.nn.MSELoss(reduction='sum') diff --git a/pyhdx/local_cluster.py b/pyhdx/local_cluster.py index 00715881..40ffb580 100644 --- a/pyhdx/local_cluster.py +++ b/pyhdx/local_cluster.py @@ -1,10 +1,8 @@ from dask.distributed import LocalCluster, Client import time -from pyhdx.config import ConfigurationSettings +from pyhdx.config import cfg import argparse -cfg = ConfigurationSettings() - def default_client(timeout='2s'): """Return Dask client at scheduler adress as defined by the global config""" scheduler_address = cfg.get('cluster', 'scheduler_address') diff --git a/pyhdx/models.py b/pyhdx/models.py index 6eb09020..4a2d7802 100644 --- a/pyhdx/models.py +++ b/pyhdx/models.py @@ -12,6 +12,7 @@ from pyhdx.alignment import align_dataframes from pyhdx.fileIO import dataframe_to_file from pyhdx.support import reduce_inter, fields_view +from pyhdx.config import cfg def protein_wrapper(func, *args, **kwargs): @@ -748,7 +749,7 @@ def d_exp(self): df.columns.name = 'exposure' return df - def get_tensors(self, exchanges=False): + def get_tensors(self, exchanges=False, dtype=None): """ Returns a dictionary of tensor variables for fitting to Linderstrøm-Lang kinetics. @@ -784,8 +785,8 @@ def get_tensors(self, exchanges=False): else: bools = np.ones(self.Nr, dtype=bool) - dtype = pyhdx.fitting_torch.TORCH_DTYPE - device = pyhdx.fitting_torch.TORCH_DEVICE + dtype = dtype or cfg.TORCH_DTYPE + device = cfg.TORCH_DEVICE tensors = { 'temperature': torch.tensor([self.temperature], dtype=dtype, device=device).unsqueeze(-1), @@ -1130,7 +1131,7 @@ def add_alignment(self, alignment, first_r_numbers=None): self.aligned_indices = df.to_numpy(dtype=int).T - def get_tensors(self): + def get_tensors(self, dtype=None): #todo create correct shapes as per table X for all temperature = np.array([kf.temperature for kf in self.hdxm_list]) @@ -1142,8 +1143,8 @@ def get_tensors(self): k_int = np.zeros((self.Ns, self.Nr)) k_int[self.masks['sr']] = k_int_values - dtype = pyhdx.fitting_torch.TORCH_DTYPE - device = pyhdx.fitting_torch.TORCH_DEVICE + dtype = dtype or cfg.TORCH_DTYPE + device = cfg.TORCH_DEVICE tensors = { 'temperature': torch.tensor(temperature, dtype=dtype, device=device).reshape(self.Ns, 1, 1), diff --git a/pyhdx/web/apps.py b/pyhdx/web/apps.py index 27c79228..d96e46e7 100644 --- a/pyhdx/web/apps.py +++ b/pyhdx/web/apps.py @@ -15,7 +15,7 @@ import logging import panel as pn from pyhdx.web.log import logger -from pyhdx.config import ConfigurationSettings +from pyhdx.config import cfg from pyhdx.local_cluster import default_client from pathlib import Path @@ -27,7 +27,6 @@ current_dir = Path(__file__).parent data_dir = current_dir.parent.parent / 'tests' / 'test_data' global_opts = {'show_grid': True} -cfg = ConfigurationSettings() @logger('pyhdx') def main_app(client='default'): diff --git a/pyhdx/web/serve.py b/pyhdx/web/serve.py index 3206576e..cb8d9906 100644 --- a/pyhdx/web/serve.py +++ b/pyhdx/web/serve.py @@ -4,7 +4,7 @@ import numpy as np import torch -from pyhdx.config import ConfigurationSettings +from pyhdx.config import cfg from pyhdx.local_cluster import verify_cluster import logging @@ -24,7 +24,7 @@ def run_main(): np.random.seed(43) torch.manual_seed(43) - scheduler_address = ConfigurationSettings().get('cluster', 'scheduler_address') + scheduler_address = cfg.get('cluster', 'scheduler_address') if not verify_cluster(scheduler_address): print(f"No valid Dask scheduler found at specified address: '{scheduler_address}'") return diff --git a/tests/test_fitting.py b/tests/test_fitting.py index 4e2dc508..b6b9d2e9 100644 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -3,6 +3,7 @@ from pyhdx.fileIO import read_dynamx, csv_to_protein, csv_to_dataframe, save_fitresult, load_fitresult from pyhdx.fitting import fit_rates_weighted_average, fit_gibbs_global, fit_gibbs_global_batch, fit_gibbs_global_batch_aligned from pyhdx.models import HDXMeasurementSet +from pyhdx.config import cfg import numpy as np import torch import time @@ -10,6 +11,7 @@ from pathlib import Path import pandas as pd +from pandas.testing import assert_series_equal cwd = Path(__file__).parent input_dir = cwd / 'test_data' / 'input' @@ -52,6 +54,35 @@ def test_initial_guess(self): # todo additional tests: # result = fit_rates_half_time_interpolate() + def test_dtype_cuda(self): + check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit.csv') + initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') + + cfg.set('fitting', 'device', 'cuda') + gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate']).to_numpy() + + if torch.cuda.is_available(): + fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=1000, r1=2) + out_deltaG = fr_global.output + for field in ['deltaG', 'k_obs', 'covariance']: + assert_series_equal(check_deltaG[field], out_deltaG[field], rtol=0.01, check_dtype=False) + else: + with pytest.raises(AssertionError, match=r".* CUDA .*"): + fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=1000, r1=2) + + cfg.set('fitting', 'device', 'cpu') + cfg.set('fitting', 'dtype', 'float32') + + fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=1000, r1=2) + dg = fr_global.model.deltaG + assert dg.dtype == torch.float32 + + out_deltaG = fr_global.output + for field in ['deltaG', 'k_obs']: + assert_series_equal(check_deltaG[field], out_deltaG[field], rtol=0.01, check_dtype=False) + + cfg.set('fitting', 'dtype', 'float64') + def test_global_fit(self): initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') @@ -64,33 +95,50 @@ def test_global_fit(self): out_deltaG = fr_global.output check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit.csv') - assert np.allclose(check_deltaG['deltaG'], out_deltaG['deltaG'], equal_nan=True, rtol=0.01) - assert np.allclose(check_deltaG['covariance'], out_deltaG['covariance'], equal_nan=True, rtol=0.01) - assert np.allclose(check_deltaG['k_obs'], out_deltaG['k_obs'], equal_nan=True, rtol=0.01) + for field in ['deltaG', 'covariance', 'k_obs']: + assert_series_equal(check_deltaG[field], out_deltaG[field], rtol=0.01) mse = fr_global.get_mse() assert mse.shape == (self.hdxm_apo.Np, self.hdxm_apo.Nt) @pytest.mark.skip(reason="Longer fit is not checked by default due to long computation times") def test_global_fit_extended(self): + check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit_epochs_20000.csv') initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') + gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate']).to_numpy() t0 = time.time() # Very crude benchmarks - gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate']).to_numpy() fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=20000, r1=2) t1 = time.time() assert t1 - t0 < 20 out_deltaG = fr_global.output - check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit_epochs_20000.csv') - - assert np.allclose(check_deltaG['deltaG'], out_deltaG['deltaG'], equal_nan=True, rtol=0.01) - assert np.allclose(check_deltaG['covariance'], out_deltaG['covariance'], equal_nan=True, rtol=0.01) - assert np.allclose(check_deltaG['k_obs'], out_deltaG['k_obs'], equal_nan=True, rtol=0.01) + for field in ['deltaG', 'k_obs', 'covariance']: + assert_series_equal(check_deltaG[field], out_deltaG[field], rtol=0.01, check_dtype=False) mse = fr_global.get_mse() assert mse.shape == (self.hdxm_apo.Np, self.hdxm_apo.Nt) + @pytest.mark.skip(reason="Longer fit is not checked by default due to long computation times") + def test_global_fit_extended_cuda(self): + check_deltaG = csv_to_protein(output_dir / 'ecSecB_torch_fit_epochs_20000.csv') + initial_rates = csv_to_dataframe(output_dir / 'ecSecB_guess.csv') + gibbs_guess = self.hdxm_apo.guess_deltaG(initial_rates['rate']).to_numpy() + + #todo allow contextmanger? + cfg.set('fitting', 'device', 'cuda') + cfg.set('fitting', 'dtype', 'float32') + + fr_global = fit_gibbs_global(self.hdxm_apo, gibbs_guess, epochs=20000, r1=2) + out_deltaG = fr_global.output + + for field in ['deltaG', 'k_obs']: + assert_series_equal(check_deltaG[field], out_deltaG[field], rtol=0.01, check_dtype=False) + + cfg.set('fitting', 'device', 'cpu') + cfg.set('fitting', 'dtype', 'float64') + + def test_batch_fit(self, tmp_path): hdx_set = HDXMeasurementSet([self.hdxm_apo, self.hdxm_dimer]) guess = csv_to_dataframe(output_dir / 'ecSecB_guess.csv')