diff --git a/AUTHORS.rst b/AUTHORS.rst index ef236b7..36e749f 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -2,14 +2,7 @@ Credits ======= -This package is a collection of two implemenations of calculating intrinsic deuterium exchange rates of amide hydrogens based on a protein's primary sequence and the temperature and pH of the experiment. - - -ExPfact -------- -https://github.com/skinnersp/exPfact - - Skinner, S. P., Radou, G., Tuma, R., Houwing-Duistermaat, J. J. & Paci, E. Estimating Constraints for Protection Factors from HDX-MS Data. `Biophysical Journal `_ 116, 1194–1203 (2019). +This package is a collection of one implemenation of calculating intrinsic deuterium exchange rates of amide hydrogens based on a protein's primary sequence and the temperature and pH of the experiment. PSX diff --git a/README.rst b/README.rst index bce0c46..0f5c685 100644 --- a/README.rst +++ b/README.rst @@ -16,7 +16,7 @@ HDXrate -Python package collection for HDX intrinsic exchange rate calculation. This package bundles two existing implementations of this calculation, exPfact and PSX. +Python package collection for HDX intrinsic exchange rate calculation. This package bundles existing implementations of this calculation. The calculations are based on the following papers: @@ -49,13 +49,6 @@ Usage Credits ------- -ExPfact -------- -https://github.com/skinnersp/exPfact - - Skinner, S. P., Radou, G., Tuma, R., Houwing-Duistermaat, J. J. & Paci, E. Estimating Constraints for Protection Factors from HDX-MS Data. `Biophysical Journal `__ 116, 1194–1203 (2019). - - PSX ``` https://github.com/Niels-Bohr-Institute-XNS-StructBiophys/PSX diff --git a/comparisons/Rate differences histograms.png b/comparisons/Rate differences histograms.png deleted file mode 100644 index 9048116..0000000 Binary files a/comparisons/Rate differences histograms.png and /dev/null differ diff --git a/comparisons/Rate differences.png b/comparisons/Rate differences.png deleted file mode 100644 index d2bc76f..0000000 Binary files a/comparisons/Rate differences.png and /dev/null differ diff --git a/comparisons/comparison.py b/comparisons/comparison.py deleted file mode 100644 index 0ca9797..0000000 --- a/comparisons/comparison.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -This files calculates and plots intrinsic rates of a sample protein (ecSecB) and looks at differences between -methods. -""" - -from hdxrate.expfact.api import calc_k_int as expfact_k_int -from hdxrate.psx.api import calc_k_int as psx_k_int -import numpy as np -import matplotlib.pyplot as plt - - -sequence = list('MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLG') -r_number = np.arange(len(sequence)) + 1 - -temperature = 300 -pH = 8. - -psx_poly = psx_k_int(sequence, temperature, pH, 'poly') -psx_oligo = psx_k_int(sequence, temperature, pH, 'oligo') -expfact = expfact_k_int(sequence, temperature, pH) - -fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 5), sharex=True) -ax1.set_yscale('log') -ax1.plot(r_number, psx_poly, label='psx_poly', marker='o', linestyle='None') -ax1.plot(r_number, psx_oligo, label='psx_oligo', marker='o', linestyle='None') -ax1.plot(r_number, expfact, label='expfact', marker='o', linestyle='None') -#ax1.set_ylim(0.1, 1e5) -ax1.set_xlim(0, 80) -ax1.set_ylabel('Intrinsic Exchange \n rate (1/s)') -ax1.legend() - -ax2.plot(r_number, psx_poly / expfact, color='k', label='psx_poly / expfact') -ax2.plot(r_number, psx_oligo / expfact, color='k', linestyle='--', label='psx_oligo / expfact') -ax2.set_ylabel('Fold difference') -ax2.set_xlabel('Residue number') -ax2.legend() -plt.savefig('Rate differences.png') \ No newline at end of file diff --git a/comparisons/random_sequences.py b/comparisons/random_sequences.py deleted file mode 100644 index 22821c4..0000000 --- a/comparisons/random_sequences.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -This files calculates and plots intrinsic rates of randomly generated protein sequences and looks at differences between -methods. - -""" - -from hdxrate.expfact.api import calc_k_int as expfact_k_int -from hdxrate.psx.api import calc_k_int as psx_k_int -import numpy as np -import matplotlib.pyplot as plt - -np.random.seed(43) -amino_acids = list('GALMFWKQESPVICYHRNDT') -repeats = 100 -temperature = 300 -pHs = [6, 7, 8, 9] -fig, axes = plt.subplots(2, 2) - -for ax, pH in zip(axes.flatten(), pHs): - diffs = [] - for i in range(repeats): - sequence = list(np.random.choice(np.array(amino_acids), 800)) - r_number = np.arange(len(sequence)) + 1 - - psx_poly = psx_k_int(sequence, temperature, pH, 'poly') - expfact = expfact_k_int(sequence, temperature, pH) - - d = psx_poly / expfact - diffs += list(psx_poly / expfact) - - diffs = np.array(diffs) - diffs = diffs[~np.isnan(diffs)] # remove NaN - - ax.set_yscale('log') - ax.hist(diffs, bins=75) - ax.set_title(f'pH {pH}') - ax.set_ylabel('Count') - ax.set_xlabel('Fold difference') - ax.set_xlim(0, None) - #print(pH, np.min(diffs), np.max(diffs)) : - # 6 1.586076428945057 4.323111639721702 - # 7 1.6062294252044966 6.2163497448764105 - # 8 1.606402448281534 31.978492966181015 - # 9 1.6064194484834653 45.168683743072926 - -plt.tight_layout() -#plt.show() -plt.savefig('Rate differences histograms.png') diff --git a/examples.rst b/examples.rst deleted file mode 100644 index cd92fd8..0000000 --- a/examples.rst +++ /dev/null @@ -1,20 +0,0 @@ -Examples --------- - -See `/comparisons` for the code to generate the graphs below. - -The graph below shows intrinsic rates of H/D exchange calculated with exPfact / PSX (PSX: 'poly' and 'oligo'). -The overall trend in exchange rates is similar but there is an constant 1.8-2.5 fold difference between different methods, -with outliers up to ~10-fold. - -.. raw:: html - - - -The histograms show fold-difference between exPfact and PSX (poly) for different pH values (T=300K). Differences range -from 1.6-fold to 40-fold. - - -.. raw:: html - - \ No newline at end of file diff --git a/hdxrate/expfact/__init__.py b/hdxrate/expfact/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hdxrate/expfact/api.py b/hdxrate/expfact/api.py deleted file mode 100644 index 8a8902c..0000000 --- a/hdxrate/expfact/api.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -This script provides and API to expfact intrinsic exchange rate calculation -Copyright (C) 2020 Jochem Smit - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -""" - - -from hdxrate.expfact.kint import calculate_kint_per_residue -import numpy as np - - -def calc_k_int(sequence, temperature, pH): - """ - - Parameters - ---------- - sequence: iterable of strings - Iterable of one-letter code amino acids. Unknown amino acids can be specified with 'X' - temperature: :obj:`float` - Temperature of the labelling buffer in Kelvin - pH: :obj:`float` - pH of labelling buffer. (pH read? pH corrected? pD?) - - Returns - ------- - - k_int: ndarray, float - Numpy array of intrinsic exchange rates (in units of per second) - - """ - - list(sequence).copy() - - k_int_list = [0.] # first residue - for i, (previous, current) in enumerate(zip(sequence[:-1], sequence[1:])): - if previous == 'X' or current == 'X': - k_int_list.append(0.) - elif current == 'P': - k_int_list.append(0.) - else: - k_int = calculate_kint_per_residue(previous, current, i + 2, len(sequence), temperature, pH) - k_int_list.append(k_int) - - return np.array(k_int_list) / 60 - - -if __name__ == '__main__': - chain = list('MSEQNNTEMTFQIQRIYTK') - k_int = calc_k_int(chain, 300, 8.) - - print(k_int) diff --git a/hdxrate/expfact/constants.py b/hdxrate/expfact/constants.py deleted file mode 100644 index 7db5712..0000000 --- a/hdxrate/expfact/constants.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -This script calculates the intrinsic deuteration rates of each residue of a protein into D2O -Copyright (C) 2019 Simon Skinner - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -""" - -from math import exp, log10 - -# Here are the parameters measured in Bai et al. (1993) -# The parameters for D and E are based on the work of Mori and al. (1997) -# and measured by and calculated in the functions acid and base -para = { - "A": [0.00, 0.00, 0.00, 0.00], - "C": [-0.54, -0.46, 0.62, 0.55], - "F": [-0.52, -0.43, -0.235859464059171, 0.0631315866300978], - "G": [-0.22, 0.218176047120386, -0.03, 0.17], - "I": [-0.91, -0.59, -0.73, -0.23], - "K": [-0.56, -0.29, -0.04, 0.12], - "L": [-0.57, -0.13, -0.576252727721677, -0.21], - "M": [-0.64, -0.28, -0.00895484265292644, 0.11], - "N": [-0.58, -0.13, 0.49, 0.38], - "P": [9999, -0.194773472023435, 9999, -0.24], - "Q": [-0.47, -0.27, 0.06, 0.20], - "R": [-0.59, -0.32, 0.0767122542818456, 0.22], - "S": [-0.437992277698594, -0.388518934646472, 0.37, 0.299550285605933], - "T": [-0.79, -0.448073125742265, -0.0662579798400606, 0.20], - "V": [-0.739022273362575, -0.30, -0.701934483299758, -0.14], - "W": [-0.40, -0.44, -0.41, -0.11], - "Y": [-0.41, -0.37, -0.27, 0.05], -} - -rho_Nterm_acid = -1.32 -rho_Nterm_base = 1.62 - -lamb_Cterm_acid = 0.96 -lamb_Cterm_base = -1.80 - -pKD = 15.05 -R = 1.987 - -ka = 10 ** (1.62) / 60 -kb = 10 ** (10.18) / 60 -kw = 10 ** (-1.5) / 60 - -Ea = 14000 -Eb = 17000 -Ew = 19000 - -lamb_cterm_acid = 0.96 - -lamb_cterm_base = -1.80 - - -def get_D(pH): - return 10 ** (-pH) - - -def get_OD(pH): - return 10 ** (pH - pKD) - - -def get_l_DTR(temperature): - return (1. / temperature - 1 / 293.) / R - - -def get_pK_his(temperature): - Ea_his = 7500 - return -log10(10 ** (-7.42) * exp(-Ea_his * (1 / (R * temperature) - 1 / (278 * R)))) - - -def get_pK_asp(temperature): - Ea_asp = 1000 - return -log10(10 ** (-4.48) * exp(-Ea_asp * (1 / (R * temperature) - 1 / (278 * R)))) - - -def get_pK_glu(temperature): - Ea_glu = 1083 - return -log10(10 ** (-4.93) * exp(-Ea_glu * (1 / (R * temperature) - 1 / (278 * R)))) - - -def get_Fta(temperature): - return exp(-Ea * get_l_DTR(temperature)) - - -def get_Ftb(temperature): - return exp(-Eb * get_l_DTR(temperature)) - - -def get_Ftw(temperature): - return exp(-Ew * get_l_DTR(temperature)) diff --git a/hdxrate/expfact/kint.py b/hdxrate/expfact/kint.py deleted file mode 100644 index 624a2f6..0000000 --- a/hdxrate/expfact/kint.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -This script calculates the intrinsic deuteration rates of each residue of a protein into D2O -Copyright (C) 2019 Simon Skinner - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -""" - -#!/usr/bin/python - -# This script calculates the intrinsic deuteration rates of each residue -# of a protein into D2O -# The argument has to be the sequence of the protein in one letter code -# The temperature (in Kelvin) and the pH can be changed -# This script is based on the work of Bai and al. (1993) and the excel table proposed on the Englander group website. Notice that the parameters for E and D has been updated to take into account the work of Mori and al. (1997) -# kint is in min-1 - - -import numpy as np -from hdxrate.expfact.constants import * - - -def calculate_kint_for_sequence(first_residue, last_residue, seq, temperature, pH): - prolines = [] - kint = np.zeros((last_residue)) - kint.fill(-1) - res1 = "" - jj = 0 - for assignment in range(1, len(seq) + 1): - res = seq[jj] - if not res1 == "": - if assignment - first_residue == 0: - kint[assignment - 1] = -1 - elif seq[jj] == "P": - kint[assignment - 1] = -1 - prolines.append(first_residue + jj) - else: - kint[assignment - 1] = calculate_kint_per_residue(res1, res, assignment, len(seq), temperature, pH) - print("**", assignment, len(seq)) - print("***", kint[assignment - 1]) - jj += 1 - res1 = res - print("Residue\tkint") - for residue, value in zip([x for x in range(1, last_residue + 1)], kint): - print("{}\t{}".format(residue, value)) - - return kint, prolines - - -# This function calculate the kint of a residue -# The first argument is the residue i and the second the residue i-1 - -def calculate_kint_per_residue(residue1, residue2, num, length, temperature, pH): - lamb1 = acid(residue2, temperature, pH, "lamb") - rho1 = acid(residue1, temperature, pH, "rho") - - if num == 2: - rho1 += rho_Nterm_acid - elif (num == length): - lamb1 = lamb1 + lamb_Cterm_acid - - Fa = 10 ** (lamb1 + rho1) - lamb2 = base(residue2, temperature, pH, "lamb") - rho2 = base(residue1, temperature, pH, "rho") - - if num == 2: - rho2 += +rho_Nterm_base - elif (num == length): - lamb2 = lamb2 + lamb_Cterm_base - - Fb = 10 ** (lamb2 + rho2) - - kint = Fa * ka * get_D(pH) * get_Fta(temperature) * 60 + Fb * kb * get_OD(pH) * get_Ftb(temperature) * 60 + Fb * \ - kw * get_Ftw(temperature) * 60 - - return kint - - -def acid(residue, temperature, pH, value): - if residue == "H": - lamb = log10( - 10 ** (-0.8 - pH) / (10 ** (-get_pK_his(temperature)) + 10 ** (-pH)) + 10 ** (-get_pK_his(temperature)) / ( - 10 ** (-get_pK_his(temperature)) + 10 ** (-pH))) - rho = log10( - 10 ** (-0.51 - pH) / (10 ** (-get_pK_his(temperature)) + 10 ** (-pH)) + 10 ** (-get_pK_his(temperature)) / ( - 10 ** (-get_pK_his(temperature)) + 10 ** (-pH))) - elif residue == "D": - lamb = log10(10 ** (-0.9 - pH) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH)) + 10 ** ( - 0.9 - get_pK_asp(temperature)) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH))) - rho = log10(10 ** (-0.12 - pH) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH)) + 10 ** ( - 0.58 - get_pK_asp(temperature)) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH))) - elif residue == "E": - lamb = log10(10 ** (-0.6 - pH) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH)) + 10 ** ( - -0.9 - get_pK_glu(temperature)) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH))) - rho = log10(10 ** (-0.27 - pH) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH)) + 10 ** ( - 0.31 - get_pK_glu(temperature)) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH))) - else: - lamb = para[residue][0] - rho = para[residue][1] - if value == "lamb": - return lamb - elif value == "rho": - return rho - - -def base(residue, temperature, pH, value): - if residue == "H": - lamb = log10(10 ** (0.8 - pH) / (10 ** (-get_pK_his(temperature)) + 10 ** (-pH)) + 10 ** ( - -0.1 - get_pK_his(temperature)) / (10 ** (-get_pK_his(temperature)) + 10 ** (-pH))) - rho = log10(10 ** (0.83 - pH) / (10 ** (-get_pK_his(temperature)) + 10 ** (-pH)) + 10 ** ( - 0.14 - get_pK_his(temperature)) / (10 ** (-get_pK_his(temperature)) + 10 ** (-pH))) - elif residue == "D": - lamb = log10(10 ** (0.69 - pH) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH)) + 10 ** ( - 0.1 - get_pK_asp(temperature)) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH))) - rho = log10(10 ** (0.6 - pH) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH)) + 10 ** ( - -0.18 - get_pK_asp(temperature)) / (10 ** (-get_pK_asp(temperature)) + 10 ** (-pH))) - elif residue == "E": - lamb = log10(10 ** (0.24 - pH) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH)) + 10 ** ( - -0.11 - get_pK_glu(temperature)) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH))) - rho = log10(10 ** (-0.39 - pH) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH)) + 10 ** ( - -0.15 - get_pK_glu(temperature)) / (10 ** (-get_pK_glu(temperature)) + 10 ** (-pH))) - else: - lamb = para[residue][2] - rho = para[residue][3] - - if value == "lamb": - return lamb - elif value == "rho": - return rho diff --git a/hdxrate/hdxrate.py b/hdxrate/hdxrate.py index 12c22ea..8840a2b 100644 --- a/hdxrate/hdxrate.py +++ b/hdxrate/hdxrate.py @@ -1,5 +1,5 @@ """ -This script provides and API to expfact and psx intrinsic exchange rate calculation +This script provides and API to psx intrinsic exchange rate calculation Copyright (C) 2020 Jochem Smit This program is free software: you can redistribute it and/or modify @@ -18,12 +18,10 @@ """ - -from hdxrate.expfact.api import calc_k_int as expfact_k_int from hdxrate.psx.api import calc_k_int as psx_k_int -def k_int_from_sequence(sequence, temperature, pH, module='expfact', **kwargs): +def k_int_from_sequence(sequence, temperature, pH, module='psx', **kwargs): """ Calculate intrinsic rate of amide hydrogen exchange. @@ -40,7 +38,7 @@ def k_int_from_sequence(sequence, temperature, pH, module='expfact', **kwargs): pH: :obj:`float` pH of labelling buffer. (pH read? pH corrected? pD?) module: :obj:`str` - Which module to use for calculating intrinsic rates. Default is 'expfact', options are 'expfact', 'psx' + Which module to use for calculating intrinsic rates. Default is 'psx'. **kwargs: Additional module-specific kwargs to pass on intrinsic rate calculation. See module .api for details. @@ -53,12 +51,10 @@ def k_int_from_sequence(sequence, temperature, pH, module='expfact', **kwargs): """ sequence = list(sequence) - if module == 'expfact': - func = expfact_k_int - elif module == 'psx': + if module == 'psx': func = psx_k_int else: - raise ValueError(f"Invalid value '{module}' specified for module. Options are 'expfact' or 'psx')") + raise ValueError(f"Invalid value '{module}' specified for module. Options are 'psx'") k_int = func(sequence, temperature, pH, **kwargs) diff --git a/hdxrate/psx/IntrinsicExchange.py b/hdxrate/psx/IntrinsicExchange.py index 19aba95..55a4b27 100644 --- a/hdxrate/psx/IntrinsicExchange.py +++ b/hdxrate/psx/IntrinsicExchange.py @@ -3,7 +3,6 @@ ########### import sys import math -import numpy as np ################################################################ @@ -224,16 +223,3 @@ def CalculateExchangeRatesForASingleChain(Chain, Temperature, pH, ReferenceData) return IntrinsicEnchangeRates - -if __name__ == '__main__': - # chain = list( - # 'MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA') - - chain = list('MSEQNNTEMTFXXXXQIQRIYTK') - - k_int = CalculateExchangeRatesForASingleChain(chain.copy(), 300, 8., 'poly') - np.array(k_int) * 60 - for k, c in zip(k_int, chain): - print(c, k) - -# np.savetxt(r'C:\Users\jhsmi\pp\PyHDX\pyhdx\expfact\k_int_psx_poly.txt', np.array(k_int) * 60) diff --git a/hdxrate/psx/api.py b/hdxrate/psx/api.py index 4137e42..dfd4272 100644 --- a/hdxrate/psx/api.py +++ b/hdxrate/psx/api.py @@ -13,8 +13,6 @@ def calc_k_int(sequence, temperature, pH, reference_data='poly'): Temperature of the labelling buffer in Kelvin pH: :obj:`float` pH of labelling buffer. (pH read? pH corrected? pD?) - module: :obj:`str` - Which module to use for calculating intrinsic rates. Default is 'expfact', options are 'expfact', 'psx' reference_data: :obj:`str` 'poly' or 'oligo' diff --git a/tests/test_hdxrate.py b/tests/test_hdxrate.py index f019ec9..7063fa7 100644 --- a/tests/test_hdxrate.py +++ b/tests/test_hdxrate.py @@ -1,7 +1,6 @@ """Tests for `hdxrate` package.""" import numpy as np -from hdxrate.expfact.api import calc_k_int as expfact_k_int from hdxrate.psx.api import calc_k_int as psx_k_int from hdxrate import k_int_from_sequence @@ -15,9 +14,6 @@ def setup_class(cls): def test_k_int_calculation(self): - expfact = expfact_k_int(self.sequence, 300, 8.) - k_int = k_int_from_sequence(self.sequence, 300, 8., 'expfact') - assert np.allclose(k_int, expfact) psx = psx_k_int(self.sequence, 300, 8., 'poly') k_int = k_int_from_sequence(self.sequence, 300, 8., 'psx')