Skip to content

Commit

Permalink
match fd/nd control to data using start/end as index
Browse files Browse the repository at this point in the history
in favour of using pd.merge
  • Loading branch information
Jhsmit committed Sep 27, 2021
1 parent 64a63aa commit 6bdc32c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 24 deletions.
44 changes: 23 additions & 21 deletions pyhdx/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,13 @@ def __init__(self, data, drop_first=1, ignore_prolines=True, d_percentage=100.,
assert 0 <= d_percentage <= 100., 'Deuteration percentage must be between 0 and 100'
d_percentage /= 100.

self.data = data.copy()
self.data = data.copy().reset_index(drop=True)
self.data.index.name = 'peptide_index'

if remove_nan:
self.data = self.data.dropna(subset=['uptake'])
if sort:
self.data = self.data.sort_values(['start', 'end', 'sequence', 'exposure', 'state'])
self.data = self.data.sort_values(['start', 'end', 'sequence', 'state', 'exposure'])

for col in ['start', 'end', 'sequence']:
target = '_' + col
Expand Down Expand Up @@ -329,33 +331,33 @@ def set_control(self, control_1, control_0=None):
"""

fd_df = self.get_data(*control_1)[['start', 'end', 'uptake']]
fd_df.rename(columns={'uptake': 'FD_uptake'}, inplace=True)
try:
fd_df = self.get_data(*control_1)[['start', 'end', 'uptake']].set_index(['start', 'end'], verify_integrity=True)
except ValueError as e:
raise ValueError("FD control has duplicate entries") from e

if fd_df.size == 0:
raise ValueError(f'No matching peptides with state {control_1[0]} and exposure {control_1[1]}')

if control_0 is None:
nd_df = self.get_data(*control_1).copy()[['start', 'end', 'uptake']]
nd_df['uptake'] = 0
if nd_df.size == 0:
raise ValueError(f'No matching peptides with state {control_0[0]} and exposure {control_0[1]}')
else:
nd_df = self.get_data(*control_0)[['start', 'end', 'uptake']]


try:
if control_0 is None:
nd_df = self.get_data(*control_1).copy()[['start', 'end', 'uptake']].set_index(['start', 'end'], verify_integrity=True)
nd_df['uptake'] = 0

nd_df.rename(columns={'uptake': 'ND_uptake'}, inplace=True)
else:
nd_df = self.get_data(*control_0)[['start', 'end', 'uptake']].set_index(['start', 'end'], verify_integrity=True)
if nd_df.size == 0:
raise ValueError(f'No matching peptides with state {control_0[0]} and exposure {control_0[1]}')
except ValueError as e:
raise ValueError("ND control has duplicate entries") from e

# this should probably go to the log (but atm there isnt any for running without GUI)
# assert control_1.size > 0, f"No peptides found with state '{control_1[0]}' and exposure '{control_1[1]}'"
# assert control_0.size > 0, f"No peptides found with state '{control_0[0]}' and exposure '{control_0[1]}'"
self.data.set_index(['start', 'end'], append=True, inplace=True)
self.data.reset_index(level=0, inplace=True)

self.data = pd.merge(self.data, fd_df, on=['start', 'end'], how='left')
self.data = pd.merge(self.data, nd_df, on=['start', 'end'], how='left')
self.data['rfu'] = (self.data['uptake'] - nd_df['uptake']) / (fd_df['uptake'] - nd_df['uptake'])
self.data['uptake_corrected'] = self.data['rfu'] * self.data['ex_residues']

self.data['rfu'] = (self.data['uptake'] - self.data['ND_uptake']) / (self.data['FD_uptake'] - self.data['ND_uptake'])
self.data['uptake_corrected'] = (self.data['uptake'] / self.data['FD_uptake'] * self.data['ex_residues'])
self.data = self.data.set_index('peptide_index', append=True).reset_index(level=[0, 1])

def select(self, **kwargs):
"""
Expand Down
7 changes: 4 additions & 3 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from operator import add
from pathlib import Path
import pandas as pd
from pandas.testing import assert_frame_equal
import tempfile
import pickle
import pytest
Expand Down Expand Up @@ -60,9 +61,9 @@ def test_tensors(self):
def test_rfu(self):
rfu_residues = self.hdxm.rfu_residues
compare = csv_to_dataframe(output_dir / 'ecSecB_rfu_per_exposure.csv')
compare_array = compare.to_numpy()

np.testing.assert_allclose(rfu_residues, compare_array)
compare.columns = compare.columns.astype(float)
compare.columns.name = 'exposure'
assert_frame_equal(rfu_residues, compare)

def test_to_file(self):
with tempfile.TemporaryDirectory() as tempdir:
Expand Down

0 comments on commit 6bdc32c

Please sign in to comment.