Skip to content

Commit

Permalink
Fix various issues
Browse files Browse the repository at this point in the history
  • Loading branch information
aarmey committed Jan 19, 2024
1 parent a9b07ac commit da585f5
Show file tree
Hide file tree
Showing 14 changed files with 100 additions and 37 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@ jobs:
build:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Install dependencies
run: |
make clean
make venv
run: poetry install
- name: Build figures
run: make -j 3 all
- name: Upload files
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: files
path: output
9 changes: 4 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ jobs:
build:
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Install dependencies
run: make venv
run: poetry install
- name: Test with pytest
run: |
. venv/bin/activate && pytest --cov=ddmc --cov-report=xml --cov-config=.github/workflows/coveragerc
run: make coverage.xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
Expand Down
10 changes: 8 additions & 2 deletions ddmc/clustering.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Clustering functions. """

from typing import Literal
import warnings
from copy import deepcopy
import itertools
Expand All @@ -22,7 +23,12 @@ class DDMC(GaussianMixture):
should have a larger effect on the peptide assignment."""

def __init__(
self, info, n_components, SeqWeight, distance_method, random_state=None
self,
info: pd.DataFrame,
n_components: int,
SeqWeight: float,
distance_method: Literal["PAM250", "Binomial"],
random_state=None,
):
super().__init__(
n_components=n_components,
Expand All @@ -40,7 +46,7 @@ def __init__(
seqs = [s.upper() for s in info["Sequence"]]

if distance_method == "PAM250":
self.seqDist = PAM250(seqs)
self.seqDist: PAM250 | Binomial = PAM250(seqs)
elif distance_method == "Binomial":
self.seqDist = Binomial(info["Sequence"], seqs)
else:
Expand Down
8 changes: 3 additions & 5 deletions ddmc/figures/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
import svgutils.transform as st
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import textwrap
import mygene
from matplotlib import gridspec, pyplot as plt
from string import ascii_uppercase
Expand Down Expand Up @@ -468,13 +466,13 @@ def ExportClusterFile(cluster, cptac=False, mcf7=False):
"""Export cluster SVG file for NetPhorest and GO analysis."""
if cptac:
c = pd.read_csv(
"msresist/data/cluster_members/CPTAC_DDMC_35CL_W100_MembersCluster"
"ddmc/data/cluster_members/CPTAC_DDMC_35CL_W100_MembersCluster"
+ str(cluster)
+ ".csv"
)
if mcf7:
c = pd.read_csv(
"msresist/data/cluster_members/msresist/data/cluster_members/CPTAC_MF7_20CL_W5_MembersCluster"
"ddmc/data/cluster_members/msresist/data/cluster_members/CPTAC_MF7_20CL_W5_MembersCluster"
+ str(cluster)
+ ".csv"
)
Expand Down Expand Up @@ -622,7 +620,7 @@ def TransformCenters(model, X):
def HotColdBehavior(centers):
# Import Cold-Hot Tumor data
y = (
pd.read_csv("msresist/data/CPTAC_LUAD/Hot_Cold.csv")
pd.read_csv("ddmc/data/CPTAC_LUAD/Hot_Cold.csv")
.dropna(axis=1)
.sort_values(by="Sample ID")
)
Expand Down
10 changes: 5 additions & 5 deletions ddmc/figures/figureM2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,14 @@ def ErrorAcross(distance_method, weights, n_clusters, n_runs=1, tmt=6):
"""Calculate missingness error across different number of clusters."""
assert len(weights) == len(n_clusters)
X = filter_NaNpeptides(
pd.read_csv("msresist/data/CPTAC_LUAD/CPTAC-preprocessedMotfis.csv").iloc[
:, 1:
],
pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
tmt=tmt,
)
X.index = np.arange(X.shape[0])
md = X.copy()
info = md.select_dtypes(include=["object"])
X = X.select_dtypes(include=["float64"])
StoE = pd.read_csv("msresist/data/CPTAC_LUAD/IDtoExperiment.csv")
StoE = pd.read_csv("ddmc/data/MS/CPTAC/IDtoExperiment.csv")
assert all(StoE.iloc[:, 0] == X.columns), "Sample labels don't match."
X = X.to_numpy()
tmtIDX = StoE["Experiment (TMT10plex)"].to_numpy()
Expand Down Expand Up @@ -166,7 +164,9 @@ def ErrorAcross(distance_method, weights, n_clusters, n_runs=1, tmt=6):
dfs = pd.Series(
[ii, cluster, weights[jj], eDDMC, *baseline_errors], index=df.columns
)
df = df.append(dfs, ignore_index=True)
print(df)
print(dfs)
df = pd.concat([df, dfs], ignore_index=True)

return df

Expand Down
4 changes: 2 additions & 2 deletions ddmc/figures/figureM4.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def makeFigure():
i = X.select_dtypes(include=[object])

# Plot mean AUCs per model
p = pd.read_csv("ddmc/data/Validations/preds_phenotypes_rs_15cl.csv").iloc[:, 1:]
p = pd.read_csv("ddmc/data/Performance/preds_phenotypes_rs_15cl.csv").iloc[:, 1:]
p = p.melt(
id_vars=["Run", "Weight"],
value_vars=d.columns[2:],
Expand Down Expand Up @@ -110,7 +110,7 @@ def calculate_AUCs_phenotypes(ax, X, nRuns=3, n_components=35):
run.append(r)
ws.append(w)
model = DDMC(
i, n_components=ncl, SeqWeight=w, distance_method="Binomial"
i, n_components=n_components, SeqWeight=w, distance_method="Binomial"
).fit(d)

# Find and scale centers
Expand Down
1 change: 0 additions & 1 deletion ddmc/figures/figureM5.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ def plot_clusters_binaryfeatures(centers, id_var, ax, pvals=False, loc="best"):
dodge=True,
ax=ax,
linewidth=0.25,
fliersize=2,
)
ax.legend(prop={"size": 8}, loc=loc)

Expand Down
3 changes: 1 addition & 2 deletions ddmc/figures/figureM7.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
import textwrap
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from .common import subplotLabel, getSetup
from .common import subplotLabel, getSetup, plotDistanceToUpstreamKinase
from .figureM5 import (
build_pval_matrix,
calculate_mannW_pvals,
plot_clusters_binaryfeatures,
)
from .commmon import plotDistanceToUpstreamKinase
from ..clustering import DDMC
from ..logistic_regression import plotROC, plotClusterCoefficients
from ..pre_processing import filter_NaNpeptides
Expand Down
1 change: 0 additions & 1 deletion ddmc/figures/figureMS7.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import matplotlib
import numpy as np
import pandas as pd
from scipy.sparse.construct import random
import seaborn as sns
from sklearn.linear_model import LogisticRegressionCV
from sklearn.cluster import KMeans
Expand Down
3 changes: 0 additions & 3 deletions ddmc/gsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
All functions relaed to GSEA analysis of clusters
"""

import pickle
import pandas as pd
import mygene
from ddmc.pre_processing import preprocessing, filter_NaNpeptides


path = "/Users/creixell/Desktop/"
Expand Down
6 changes: 1 addition & 5 deletions ddmc/motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,12 @@
import glob
import pandas as pd
import numpy as np
import os
import re
from Bio import SeqIO
from Bio.Seq import Seq
from .binomial import AAlist


path = os.path.dirname(os.path.abspath(__file__))


def MapMotifs(X, names):
"""Generate pY motifs for pre-processing."""
names, seqs, pXpos, Xidx = GeneratingKinaseMotifs(names, FormatSeq(X))
Expand Down Expand Up @@ -153,7 +149,7 @@ def GeneratingKinaseMotifs(names, seqs):
"""Main function to generate motifs using 'findmotif'."""
motif_size = 5
proteome = open(
os.path.join(path, "./data/Sequence_analysis/proteome_uniprot2019.fa"), "r"
"./data/Sequence_analysis/proteome_uniprot2019.fa", "r"
)
ProteomeDict = DictProteomeNameToSeq(proteome, n="gene")
protnames, seqs, Xidx = MatchProtNames(ProteomeDict, names, seqs)
Expand Down
3 changes: 3 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ coverage.xml:

clean:
rm -rf *.pdf pylint.log output

mypy:
poetry run mypy --install-types --non-interactive --ignore-missing-imports ddmc
70 changes: 69 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ logomaker = "^0.8"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.4"
pytest-cov = "^4.1.0"
mypy = "^1.8.0"

[build-system]
requires = ["poetry-core"]
Expand Down

0 comments on commit da585f5

Please sign in to comment.