Skip to content

Commit

Permalink
More cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
aarmey committed Jan 21, 2024
1 parent fb310e2 commit 42958bb
Show file tree
Hide file tree
Showing 11 changed files with 21 additions and 297 deletions.
25 changes: 0 additions & 25 deletions .github/workflows/autopep8.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
- name: Install dependencies
run: poetry install
- name: Build figures
run: make -j 2 all
run: make -i all
- name: Upload files
uses: actions/upload-artifact@v4
with:
Expand Down
10 changes: 4 additions & 6 deletions ddmc/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,11 @@ def _m_step(self, X: np.ndarray, log_resp: np.ndarray):
"""
if self._missing:
labels = np.argmax(log_resp, axis=1)
centers = self.means_.T # samples x clusters
centers = np.array(self.means_) # samples x clusters
centers_fill = centers[labels, :]

assert len(labels) == X.shape[0]
for ii in range(X.shape[0]): # X is peptides x samples
X[ii, self.missing_d[ii, :]] = centers[
self.missing_d[ii, :], labels[ii]
]
assert centers_fill.shape == X.shape
X[self.missing_d] = centers_fill[self.missing_d]

super()._m_step(X, log_resp) # Do the regular m step

Expand Down
4 changes: 2 additions & 2 deletions ddmc/figures/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def genFigure():
print(f"Figure {sys.argv[1]} is done after {time.time() - start} seconds.\n")


def getDDMC_CPTAC(n_components: int, SeqWeight: float) -> DDMC:
def getDDMC_CPTAC(n_components: int, SeqWeight: float):
# Import signaling data
X = filter_NaNpeptides(
pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
Expand All @@ -143,7 +143,7 @@ def getDDMC_CPTAC(n_components: int, SeqWeight: float) -> DDMC:
distance_method="Binomial",
random_state=5,
).fit(d)
return model
return model, X


def plotMotifs(pssm, ax: axes.Axes, titles=False, yaxis=False):
Expand Down
6 changes: 3 additions & 3 deletions ddmc/figures/figureM4.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def makeFigure():
ax[0].legend(prop={"size": 5}, loc=0)

# Fit Data, Mix, and Seq Models
dataM = getDDMC_CPTAC(n_components=30, SeqWeight=0.0)
mixM = getDDMC_CPTAC(n_components=30, SeqWeight=250.0)
seqM = getDDMC_CPTAC(n_components=30, SeqWeight=1.0e6)
dataM, _ = getDDMC_CPTAC(n_components=30, SeqWeight=0.0)
mixM, _ = getDDMC_CPTAC(n_components=30, SeqWeight=250.0)
seqM, _ = getDDMC_CPTAC(n_components=30, SeqWeight=1.0e6)
models = [dataM, mixM, seqM]

# Center to peptide distance
Expand Down
8 changes: 1 addition & 7 deletions ddmc/figures/figureM5.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,8 @@ def makeFigure():
# Get list of axis objects
ax, f = getSetup((11, 10), (3, 3), multz={0: 1, 4: 1})

# Import signaling data
X = filter_NaNpeptides(
pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
tmt=2,
)

# Fit DDMC
model = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
model, X = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)

# Normalize
centers = pd.DataFrame(model.transform()).T
Expand Down
8 changes: 1 addition & 7 deletions ddmc/figures/figureM6.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,8 @@ def makeFigure():
# Get list of axis objects
ax, f = getSetup((11, 7), (2, 3), multz={0: 1})

# Import signaling data
X = filter_NaNpeptides(
pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
tmt=2,
)

# Fit DDMC
model = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
model, X = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)

# Import Genotype data
mutations = pd.read_csv("ddmc/data/MS/CPTAC/Patient_Mutations.csv")
Expand Down
19 changes: 3 additions & 16 deletions ddmc/figures/figureMS2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,25 @@
This creates Supplemental Figure 2: Cluster motifs
"""

import pandas as pd
import numpy as np
from .common import getSetup
from .common import getSetup, getDDMC_CPTAC
from .common import plotMotifs
from ..pre_processing import filter_NaNpeptides
from ..clustering import DDMC


def makeFigure():
"""Get a list of the axis objects and create a figure"""
# Get list of axis objects
ax, f = getSetup((9, 9), (5, 5))

# Import signaling data
X = filter_NaNpeptides(
pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
tmt=2,
)
d = X.select_dtypes(include=[float]).T
i = X["Sequence"]

# Fit DDMC
model = DDMC(
i, n_components=30, SeqWeight=100, distance_method="Binomial", random_state=5
).fit(d)
model, _ = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)

pssms, cl_num = model.pssms(PsP_background=False)
ylabels = np.arange(0, 21, 5)
xlabels = [20, 21, 22, 23, 24, 25]
for ii, cc in enumerate(cl_num):
cluster = "Cluster " + str(cc)
plotMotifs(pssms[ii], axes=ax[ii], titles=cluster, yaxis=[0, 10])
plotMotifs(pssms[ii], ax=ax[ii], titles=cluster, yaxis=[0, 10])
if ii not in ylabels:
ax[ii].set_ylabel("")
ax[ii].get_yaxis().set_visible(False)
Expand Down
20 changes: 5 additions & 15 deletions ddmc/figures/figureMS3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@

import pandas as pd
from sklearn.linear_model import LogisticRegressionCV
from .common import getSetup
from .common import getSetup, getDDMC_CPTAC
from .figureM4 import TransformCenters, HotColdBehavior, find_patients_with_NATandTumor
from ..pre_processing import filter_NaNpeptides
from ..clustering import DDMC
from ..logistic_regression import plotROC


Expand All @@ -16,14 +14,6 @@ def makeFigure():
# Get list of axis objects
ax, f = getSetup((15, 10), (3, 5))

# Signaling
X = filter_NaNpeptides(
pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
tmt=2,
)
d = X.select_dtypes(include=[float]).T
i = X["Sequence"]

# Genotype data
mutations = pd.read_csv("ddmc/data/MS/CPTAC/Patient_Mutations.csv")
mOI = mutations[
Expand All @@ -46,7 +36,7 @@ def makeFigure():
folds = 5
weights = [0, 100, 500, 1000, 1000000]
for ii, w in enumerate(weights):
model = DDMC(i, n_components=30, SeqWeight=w, distance_method="Binomial").fit(d)
model, X = getDDMC_CPTAC(n_components=30, SeqWeight=w)

# Find and scale centers
centers_gen, centers_hcb = TransformCenters(model, X)
Expand All @@ -63,7 +53,7 @@ def makeFigure():
ax[ii],
lr,
centers_gen.values,
y["STK11.mutation.status"],
y["STK11.mutation.status"].values, # type: ignore
cv_folds=folds,
title="STK11m " + "w=" + str(model.SeqWeight) + prio,
)
Expand All @@ -73,7 +63,7 @@ def makeFigure():
ax[ii + 5],
lr,
centers_gen.values,
y["EGFR.mutation.status"],
y["EGFR.mutation.status"].values, # type: ignore
cv_folds=folds,
title="EGFRm " + "w=" + str(model.SeqWeight) + prio,
)
Expand All @@ -84,7 +74,7 @@ def makeFigure():
ax[ii + 10],
lr,
centers_hcb.values,
y_hcb,
y_hcb.values,
cv_folds=folds,
title="Infiltration " + "w=" + str(model.SeqWeight) + prio,
)
Expand Down
2 changes: 1 addition & 1 deletion ddmc/figures/figureMS6.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def makeFigure():
ax, f = getSetup((11, 7), (2, 3), multz={0: 1})

# Fit DDMC
model = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
model, X = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)

# Import Genotype data
mutations = pd.read_csv("ddmc/data/MS/CPTAC/Patient_Mutations.csv")
Expand Down
Loading

0 comments on commit 42958bb

Please sign in to comment.