Skip to content

Commit

Permalink
More cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaron Meyer committed Jan 19, 2024
1 parent 6780573 commit 51a154e
Show file tree
Hide file tree
Showing 16 changed files with 477 additions and 28,905 deletions.
288 changes: 0 additions & 288 deletions ddmc/data/MS/GrowthFactors/20180817_JG_AM_TMT10plex_R1_psms_raw.csv

This file was deleted.

This file was deleted.

815 changes: 0 additions & 815 deletions ddmc/data/MS/GrowthFactors/CombinedBR3_TR1&2_raw.csv

This file was deleted.

27,226 changes: 0 additions & 27,226 deletions ddmc/data/MS/KRAS_G12C_Haura.csv

This file was deleted.

390 changes: 385 additions & 5 deletions ddmc/figures/common.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ddmc/figures/figureM3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..validations import preprocess_ebdt_mcf7
from .common import subplotLabel, getSetup
from ..pca import plotPCA
from .figure2 import plotDistanceToUpstreamKinase, plotMotifs, ShuffleClusters
from .common import plotDistanceToUpstreamKinase, plotMotifs, ShuffleClusters
from .figureM5 import plot_NetPhoresScoreByKinGroup
from ..clustering import compute_control_pssm
from ..binomial import AAlist
Expand Down
2 changes: 1 addition & 1 deletion ddmc/figures/figureM5.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ..clustering import DDMC
from .common import subplotLabel, getSetup
from ..logistic_regression import plotClusterCoefficients, plotROC
from ..figures.figure2 import plotDistanceToUpstreamKinase
from .common import plotDistanceToUpstreamKinase
from ..pca import plotPCA
from ..pre_processing import filter_NaNpeptides

Expand Down
2 changes: 1 addition & 1 deletion ddmc/figures/figureM6.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from bioinfokit import visuz
from ..clustering import DDMC
from ..pre_processing import filter_NaNpeptides
from .figure2 import plotDistanceToUpstreamKinase
from .common import plotDistanceToUpstreamKinase
from .figureM4 import find_patients_with_NATandTumor
from .figureM5 import (
plot_clusters_binaryfeatures,
Expand Down
2 changes: 1 addition & 1 deletion ddmc/figures/figureM7.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
calculate_mannW_pvals,
plot_clusters_binaryfeatures,
)
from .figure2 import plotDistanceToUpstreamKinase
from .commmon import plotDistanceToUpstreamKinase
from ..clustering import DDMC
from ..logistic_regression import plotROC, plotClusterCoefficients
from ..pre_processing import filter_NaNpeptides
Expand Down
2 changes: 1 addition & 1 deletion ddmc/figures/figureMS2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import seaborn as sns
from .common import subplotLabel, getSetup
from .figure2 import plotMotifs
from .common import plotMotifs
from ..pre_processing import filter_NaNpeptides
from ..clustering import DDMC

Expand Down
2 changes: 1 addition & 1 deletion ddmc/figures/figureMS6.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.preprocessing import StandardScaler
from ..clustering import DDMC
from ..pre_processing import filter_NaNpeptides
from .figure2 import plotDistanceToUpstreamKinase
from .common import plotDistanceToUpstreamKinase
from .figureM4 import find_patients_with_NATandTumor
from .figureM5 import (
plot_clusters_binaryfeatures,
Expand Down
119 changes: 10 additions & 109 deletions ddmc/pre_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,115 +12,6 @@


###-------------------------- Pre-processing MS data --------------------------###
def preprocessing(
AXLwt_GF=False,
AXLm_ErlAF154=False,
AXL_Das_DR=False,
Vfilter=False,
FCfilter=False,
log2T=False,
rawdata=False,
corrCut=0.5,
):
"""Input: Raw MS bio-replicates. Output: Mean-centered merged data set.
1. Concatenation, 2. log-2 transformation, 3. Mean-Center, 4. Merging, 5. Fold-change,
6. Filters: 'Vfilter' filters by correlation when 2 overlapping peptides or std cutoff if >= 3.
Note 1: 'motifs' redefines peptide sequences as XXXXXyXXXXX which affects merging.
Note 2: Data is converted back to linear scale before filtering so 'log2T=True' to use log-scale for analysis.
Note 3: CPTAC is already normalized, so: mc_row and mc_col = False"""
filesin = list()

if AXLwt_GF:
filesin.append(
pd.read_csv(
os.path.join(
path,
"./data/MS/GrowthFactors/20180817_JG_AM_TMT10plex_R1_psms_raw.csv",
)
)
)
filesin.append(
pd.read_csv(
os.path.join(
path,
"./data/MS/GrowthFactors/20190214_JG_AM_PC9_AXL_TMT10_AC28_R2_PSMs_raw.csv",
)
)
)
filesin.append(
pd.read_csv(
os.path.join(path, "./data/MS/GrowthFactors/CombinedBR3_TR1&2_raw.csv")
)
)
if AXLm_ErlAF154:
filesin.append(
pd.read_csv(
os.path.join(
path, "./data/MS/AXL/PC9_mutants_ActivatingAb_BR1_raw_wAcc.csv"
)
)
)
filesin.append(
pd.read_csv(
os.path.join(
path, "./data/MS/AXL/PC9_mutants_ActivatingAb_BR3_raw_wAcc.csv"
)
)
)
filesin.append(
pd.read_csv(
os.path.join(
path, "./data/MS/AXL/PC9_mutants_ActivatingAb_BR4_raw_wAcc.csv"
)
)
)
if AXL_Das_DR:
filesin.append(
pd.read_csv(
"ddmc/data/Validations/Experimental/MassSpec/06232021-DasDR_BR1_raw.csv"
).iloc[:, 1:]
)
filesin.append(
pd.read_csv(
"ddmc/data/Validations/Experimental/MassSpec/06232021-DasDR_BR2_raw.csv"
).iloc[:, 1:]
)

data_headers = list(filesin[0].select_dtypes(include=["float64"]).columns)
FCto = data_headers[0]
X = Log2T(pd.concat(filesin))

# mean center rows
numeric_cols = X.select_dtypes(include=["float64"]).columns
X[numeric_cols] = X[numeric_cols].sub(X[numeric_cols].mean(axis=1), axis=0)

fullnames, genes = FormatName(X)
X["Protein"] = fullnames
X.insert(3, "Gene", genes)
merging_indices = list(X.select_dtypes(include=["object"]).columns)

if rawdata:
return X

X = MapMotifs(X, genes)
merging_indices.insert(3, "Position")

if Vfilter:
X = VFilter(X, merging_indices, data_headers, corrCut=corrCut, stdCut=0.6)

X = MergeDfbyMean(X.copy(), data_headers, merging_indices).reset_index()[
merging_indices + data_headers
]

if FCfilter:
X = FoldChangeFilterBasedOnMaxFC(X, data_headers, cutoff=0.40)

if not log2T:
X = Linear(X, data_headers)

return X


def preprocessCPTAC():
"""Replace patient identifiers, fill NaNs, and make it compatible with current code."""
X = pd.read_csv(
Expand Down Expand Up @@ -352,6 +243,16 @@ def TripsMeanAndStd(triplicates, merging_indices, data_headers):
return X.reset_index()


def MeanCenter(X, mc_row, mc_col):
""" Mean centers each row of values. logT also optionally log2-transforms. """
data_headers = X.select_dtypes(include=["float64"]).columns
if mc_row:
X[data_headers] = X[data_headers].sub(X[data_headers].mean(axis=1), axis=0)
if mc_col:
X[data_headers] = X[data_headers].sub(X[data_headers].mean(axis=0), axis=1)
return X


def FilterByRange(X, rangeCut=0.4):
"""Filter rows for those containing more than a range threshold."""
Rg = X.iloc[:, X.columns.get_level_values(1) == "ptp"]
Expand Down
6 changes: 4 additions & 2 deletions ddmc/tests/test_CoClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@

import pytest
import numpy as np
import pandas as pd
from ..clustering import DDMC
from ..pre_processing import preprocessing
from ..pre_processing import filter_NaNpeptides


X = preprocessing(AXLwt_GF=True, Vfilter=True, FCfilter=True, log2T=True)
X = pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:]
X = filter_NaNpeptides(X, tmt=25)
data = X.select_dtypes(include=["float64"]).T
info = X.select_dtypes(include=["object"])

Expand Down
29 changes: 0 additions & 29 deletions ddmc/tests/test_import.py

This file was deleted.

Loading

0 comments on commit 51a154e

Please sign in to comment.