diff --git a/.github/workflows/autopep8.yml b/.github/workflows/autopep8.yml
deleted file mode 100644
index 31b6db25..00000000
--- a/.github/workflows/autopep8.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: autopep8
-
-on:
-  schedule:
-    - cron: '0 18 * * *'
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - name: autopep8
-        id: autopep8
-        uses: peter-evans/autopep8@v1
-        with:
-          args: --exit-code -raai --max-line-length 200 .
-      - name: Create Pull Request
-        if: steps.autopep8.outputs.exit-code == 2
-        uses: peter-evans/create-pull-request@v3
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: autopep8 action fixes
-          title: Fixes by autopep8 action
-          body: This is an auto-generated PR with fixes by autopep8.
-          labels: autopep8, automated pr
-          branch: create-pull-request/autopep8
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1fad3c15..c105f0dd 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -10,7 +10,7 @@ jobs:
     - name: Install dependencies
       run: poetry install
     - name: Build figures
-      run: make -j 2 all
+      run: make -i all
     - name: Upload files
       uses: actions/upload-artifact@v4
       with:
diff --git a/ddmc/clustering.py b/ddmc/clustering.py
index fff8a5e4..3a4d6eab 100644
--- a/ddmc/clustering.py
+++ b/ddmc/clustering.py
@@ -70,13 +70,11 @@ def _m_step(self, X: np.ndarray, log_resp: np.ndarray):
         """
         if self._missing:
             labels = np.argmax(log_resp, axis=1)
-            centers = self.means_.T  # samples x clusters
+            centers = np.array(self.means_)  # samples x clusters
+            centers_fill = centers[labels, :]
 
-            assert len(labels) == X.shape[0]
-            for ii in range(X.shape[0]):  # X is peptides x samples
-                X[ii, self.missing_d[ii, :]] = centers[
-                    self.missing_d[ii, :], labels[ii]
-                ]
+            assert centers_fill.shape == X.shape
+            X[self.missing_d] = centers_fill[self.missing_d]
 
         super()._m_step(X, log_resp)  # Do the regular m step
 
diff --git a/ddmc/figures/common.py b/ddmc/figures/common.py
index 70756654..0ecd8933 100644
--- a/ddmc/figures/common.py
+++ b/ddmc/figures/common.py
@@ -126,7 +126,7 @@ def genFigure():
     print(f"Figure {sys.argv[1]} is done after {time.time() - start} seconds.\n")
 
 
-def getDDMC_CPTAC(n_components: int, SeqWeight: float) -> DDMC:
+def getDDMC_CPTAC(n_components: int, SeqWeight: float):
     # Import signaling data
     X = filter_NaNpeptides(
         pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
@@ -143,7 +143,7 @@ def getDDMC_CPTAC(n_components: int, SeqWeight: float) -> DDMC:
         distance_method="Binomial",
         random_state=5,
     ).fit(d)
-    return model
+    return model, X
 
 
 def plotMotifs(pssm, ax: axes.Axes, titles=False, yaxis=False):
diff --git a/ddmc/figures/figureM4.py b/ddmc/figures/figureM4.py
index 493aa5fd..f16de360 100644
--- a/ddmc/figures/figureM4.py
+++ b/ddmc/figures/figureM4.py
@@ -46,9 +46,9 @@ def makeFigure():
     ax[0].legend(prop={"size": 5}, loc=0)
 
     # Fit Data, Mix, and Seq Models
-    dataM = getDDMC_CPTAC(n_components=30, SeqWeight=0.0)
-    mixM = getDDMC_CPTAC(n_components=30, SeqWeight=250.0)
-    seqM = getDDMC_CPTAC(n_components=30, SeqWeight=1.0e6)
+    dataM, _ = getDDMC_CPTAC(n_components=30, SeqWeight=0.0)
+    mixM, _ = getDDMC_CPTAC(n_components=30, SeqWeight=250.0)
+    seqM, _ = getDDMC_CPTAC(n_components=30, SeqWeight=1.0e6)
     models = [dataM, mixM, seqM]
 
     # Center to peptide distance
diff --git a/ddmc/figures/figureM5.py b/ddmc/figures/figureM5.py
index 8f6fb32e..2b8c637f 100644
--- a/ddmc/figures/figureM5.py
+++ b/ddmc/figures/figureM5.py
@@ -24,14 +24,8 @@ def makeFigure():
     # Get list of axis objects
     ax, f = getSetup((11, 10), (3, 3), multz={0: 1, 4: 1})
 
-    # Import signaling data
-    X = filter_NaNpeptides(
-        pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
-        tmt=2,
-    )
-
     # Fit DDMC
-    model = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
+    model, X = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
 
     # Normalize
     centers = pd.DataFrame(model.transform()).T
diff --git a/ddmc/figures/figureM6.py b/ddmc/figures/figureM6.py
index 8a7f4a6c..0862c6d2 100644
--- a/ddmc/figures/figureM6.py
+++ b/ddmc/figures/figureM6.py
@@ -26,14 +26,8 @@ def makeFigure():
     # Get list of axis objects
     ax, f = getSetup((11, 7), (2, 3), multz={0: 1})
 
-    # Import signaling data
-    X = filter_NaNpeptides(
-        pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
-        tmt=2,
-    )
-
     # Fit DDMC
-    model = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
+    model, X = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
 
     # Import Genotype data
     mutations = pd.read_csv("ddmc/data/MS/CPTAC/Patient_Mutations.csv")
diff --git a/ddmc/figures/figureMS2.py b/ddmc/figures/figureMS2.py
index 55658823..42e4c261 100644
--- a/ddmc/figures/figureMS2.py
+++ b/ddmc/figures/figureMS2.py
@@ -2,12 +2,9 @@
 This creates Supplemental Figure 2: Cluster motifs
 """
 
-import pandas as pd
 import numpy as np
-from .common import getSetup
+from .common import getSetup, getDDMC_CPTAC
 from .common import plotMotifs
-from ..pre_processing import filter_NaNpeptides
-from ..clustering import DDMC
 
 
 def makeFigure():
@@ -15,25 +12,15 @@ def makeFigure():
     # Get list of axis objects
     ax, f = getSetup((9, 9), (5, 5))
 
-    # Import signaling data
-    X = filter_NaNpeptides(
-        pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
-        tmt=2,
-    )
-    d = X.select_dtypes(include=[float]).T
-    i = X["Sequence"]
-
     # Fit DDMC
-    model = DDMC(
-        i, n_components=30, SeqWeight=100, distance_method="Binomial", random_state=5
-    ).fit(d)
+    model, _ = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
 
     pssms, cl_num = model.pssms(PsP_background=False)
     ylabels = np.arange(0, 21, 5)
     xlabels = [20, 21, 22, 23, 24, 25]
     for ii, cc in enumerate(cl_num):
         cluster = "Cluster " + str(cc)
-        plotMotifs(pssms[ii], axes=ax[ii], titles=cluster, yaxis=[0, 10])
+        plotMotifs(pssms[ii], ax=ax[ii], titles=cluster, yaxis=[0, 10])
         if ii not in ylabels:
             ax[ii].set_ylabel("")
             ax[ii].get_yaxis().set_visible(False)
diff --git a/ddmc/figures/figureMS3.py b/ddmc/figures/figureMS3.py
index f6cfd00a..2d54f982 100644
--- a/ddmc/figures/figureMS3.py
+++ b/ddmc/figures/figureMS3.py
@@ -4,10 +4,8 @@
 
 import pandas as pd
 from sklearn.linear_model import LogisticRegressionCV
-from .common import getSetup
+from .common import getSetup, getDDMC_CPTAC
 from .figureM4 import TransformCenters, HotColdBehavior, find_patients_with_NATandTumor
-from ..pre_processing import filter_NaNpeptides
-from ..clustering import DDMC
 from ..logistic_regression import plotROC
 
 
@@ -16,14 +14,6 @@ def makeFigure():
     # Get list of axis objects
     ax, f = getSetup((15, 10), (3, 5))
 
-    # Signaling
-    X = filter_NaNpeptides(
-        pd.read_csv("ddmc/data/MS/CPTAC/CPTAC-preprocessedMotfis.csv").iloc[:, 1:],
-        tmt=2,
-    )
-    d = X.select_dtypes(include=[float]).T
-    i = X["Sequence"]
-
     # Genotype data
     mutations = pd.read_csv("ddmc/data/MS/CPTAC/Patient_Mutations.csv")
     mOI = mutations[
@@ -46,7 +36,7 @@ def makeFigure():
     folds = 5
     weights = [0, 100, 500, 1000, 1000000]
     for ii, w in enumerate(weights):
-        model = DDMC(i, n_components=30, SeqWeight=w, distance_method="Binomial").fit(d)
+        model, X = getDDMC_CPTAC(n_components=30, SeqWeight=w)
 
         # Find and scale centers
         centers_gen, centers_hcb = TransformCenters(model, X)
@@ -63,7 +53,7 @@ def makeFigure():
             ax[ii],
             lr,
             centers_gen.values,
-            y["STK11.mutation.status"],
+            y["STK11.mutation.status"].values, # type: ignore
             cv_folds=folds,
             title="STK11m " + "w=" + str(model.SeqWeight) + prio,
         )
@@ -73,7 +63,7 @@ def makeFigure():
             ax[ii + 5],
             lr,
             centers_gen.values,
-            y["EGFR.mutation.status"],
+            y["EGFR.mutation.status"].values, # type: ignore
             cv_folds=folds,
             title="EGFRm " + "w=" + str(model.SeqWeight) + prio,
         )
@@ -84,7 +74,7 @@ def makeFigure():
             ax[ii + 10],
             lr,
             centers_hcb.values,
-            y_hcb,
+            y_hcb.values,
             cv_folds=folds,
             title="Infiltration " + "w=" + str(model.SeqWeight) + prio,
         )
diff --git a/ddmc/figures/figureMS6.py b/ddmc/figures/figureMS6.py
index 749e39d0..23deb716 100644
--- a/ddmc/figures/figureMS6.py
+++ b/ddmc/figures/figureMS6.py
@@ -22,7 +22,7 @@ def makeFigure():
     ax, f = getSetup((11, 7), (2, 3), multz={0: 1})
 
     # Fit DDMC
-    model = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
+    model, X = getDDMC_CPTAC(n_components=30, SeqWeight=100.0)
 
     # Import Genotype data
     mutations = pd.read_csv("ddmc/data/MS/CPTAC/Patient_Mutations.csv")
diff --git a/ddmc/pre_processing.py b/ddmc/pre_processing.py
index 5517cc0f..8d54e9bb 100644
--- a/ddmc/pre_processing.py
+++ b/ddmc/pre_processing.py
@@ -3,9 +3,7 @@
 import os
 import numpy as np
 import pandas as pd
-from scipy import stats
 from collections import defaultdict
-from .motifs import FormatName, MapMotifs
 
 
 path = os.path.dirname(os.path.abspath(__file__))
@@ -124,125 +122,6 @@ def FoldChangeFilterBasedOnMaxFC(X, data_headers, cutoff=0.5):
 
 ###------------ Filter by variance (stdev or range/pearson's) ------------------###
 
-
-def VFilter(ABC, merging_indices, data_headers, corrCut=0.55, stdCut=0.5):
-    """Filter based on variability across recurrent peptides in MS biological replicates"""
-    NonRecPeptides, CorrCoefPeptides, StdPeptides = MapOverlappingPeptides(ABC)
-
-    NonRecTable = BuildMatrix(NonRecPeptides, ABC, data_headers)
-    NonRecTable = NonRecTable.assign(BioReps=list("1" * NonRecTable.shape[0]))
-    NonRecTable = NonRecTable.assign(r2_Std=list(["N/A"] * NonRecTable.shape[0]))
-
-    CorrCoefPeptides = BuildMatrix(CorrCoefPeptides, ABC, data_headers)
-    DupsTable = CorrCoefFilter(CorrCoefPeptides, corrCut=corrCut)
-    DupsTable = MergeDfbyMean(
-        DupsTable, DupsTable[data_headers], merging_indices + ["r2_Std"]
-    )
-    DupsTable = DupsTable.assign(BioReps=list("2" * DupsTable.shape[0])).reset_index()
-
-    StdPeptides = BuildMatrix(StdPeptides, ABC, data_headers)
-    TripsTable = TripsMeanAndStd(
-        StdPeptides, merging_indices + ["BioReps"], data_headers
-    )
-    TripsTable = FilterByStdev(TripsTable, merging_indices + ["BioReps"], stdCut=stdCut)
-
-    merging_indices.insert(4, "BioReps")
-    merging_indices.insert(5, "r2_Std")
-
-    ABC = pd.concat([NonRecTable, DupsTable, TripsTable]).reset_index()[
-        merging_indices[:2] + list(ABC[data_headers].columns) + merging_indices[2:]
-    ]
-
-    # Including non-overlapping peptides
-    return ABC
-
-
-def MapOverlappingPeptides(ABC):
-    """Find recurrent peptides across biological replicates. Grouping those showing up 2 to later calculate
-    correlation, those showing up >= 3 to take the std. Those showing up 1 can be included or not in the final data set.
-    Final dfs are formed by 'Name', 'Peptide', '#Recurrences'."""
-    dups = pd.pivot_table(
-        ABC, index=["Protein", "Sequence"], aggfunc="size"
-    ).sort_values()
-    dups = pd.DataFrame(dups).reset_index()
-    dups.columns = [ABC.columns[0], ABC.columns[1], "Recs"]
-    NonRecPeptides = dups[dups["Recs"] == 1]
-    RangePeptides = dups[dups["Recs"] == 2]
-    StdPeptides = dups[dups["Recs"] >= 3]
-    return NonRecPeptides, RangePeptides, StdPeptides
-
-
-def BuildMatrix(peptides, ABC, data_headers):
-    """Map identified recurrent peptides to generate complete matrices with values.
-    If recurrent peptides = 2, the correlation coefficient is included in a new column.
-    """
-    ABC = ABC.reset_index().set_index(["Sequence", "Protein"], drop=False)
-    ABC = ABC.sort_index()
-
-    corrcoefs, peptideslist, bioReps = [], [], []
-    for idx, seq in enumerate(peptides.iloc[:, 1]):
-        name = peptides.iloc[idx, 0]
-
-        # Skip blank
-        if name == "(blank)":
-            continue
-
-        pepts = ABC.loc[seq, name]
-        pepts = pepts.iloc[:, 1:]
-        names = pepts.iloc[:, 0]
-
-        if len(pepts) == 1:
-            peptideslist.append(pepts.iloc[0, :])
-        elif len(pepts) == 2 and len(set(names)) == 1:
-            fc = Linear(pepts[data_headers].copy(), data_headers)
-            corrcoef, _ = stats.pearsonr(fc.iloc[0, :], fc.iloc[1, :])
-            for i in range(len(pepts)):
-                corrcoefs.append(np.round(corrcoef, decimals=2))
-                peptideslist.append(pepts.iloc[i, :])
-        elif len(pepts) >= 3 and len(set(names)) == 1:
-            for i in range(len(pepts)):
-                peptideslist.append(pepts.iloc[i, :])
-                bioReps.append(len(pepts))
-        else:
-            print("check this", pepts)
-
-    if corrcoefs:
-        matrix = (
-            pd.DataFrame(peptideslist).reset_index(drop=True).assign(r2_Std=corrcoefs)
-        )
-
-    elif bioReps:
-        matrix = (
-            pd.DataFrame(peptideslist).reset_index(drop=True).assign(BioReps=bioReps)
-        )
-
-    else:
-        matrix = pd.DataFrame(peptideslist).reset_index(drop=True)
-
-    return matrix
-
-
-def CorrCoefFilter(X, corrCut=0.6):
-    """Filter rows for those containing more than a correlation threshold."""
-    Xidx = X.iloc[:, -1].values >= corrCut
-    return X.iloc[Xidx, :]
-
-
-def TripsMeanAndStd(triplicates, merging_indices, data_headers):
-    """Merge all triplicates by mean and standard deviation across conditions. Note this builds a multilevel header
-    meaning we have 2 values for each condition (eg within Erlotinib -> Mean | Std)."""
-    func_tri = {}
-    for i in triplicates[data_headers].columns:
-        func_tri[i] = "mean", "std"
-    X = pd.pivot_table(
-        triplicates,
-        values=triplicates[data_headers].columns,
-        index=merging_indices,
-        aggfunc=func_tri,
-    )
-    return X.reset_index()
-
-
 def MeanCenter(X, mc_row, mc_col):
     """Mean centers each row of values. logT also optionally log2-transforms."""
     data_headers = X.select_dtypes(include=["float64"]).columns
@@ -251,96 +130,3 @@ def MeanCenter(X, mc_row, mc_col):
     if mc_col:
         X[data_headers] = X[data_headers].sub(X[data_headers].mean(axis=0), axis=1)
     return X
-
-
-def FilterByRange(X, rangeCut=0.4):
-    """Filter rows for those containing more than a range threshold."""
-    Rg = X.iloc[:, X.columns.get_level_values(1) == "ptp"]
-    Xidx = np.all(Rg.values <= rangeCut, axis=1)
-    return X.iloc[Xidx, :]
-
-
-def FilterByStdev(X, merging_indices, stdCut=0.4):
-    """Filter rows for those containing more than a standard deviation threshold."""
-    Stds = X.iloc[:, X.columns.get_level_values(1) == "std"]
-    StdMeans = list(np.round(Stds.mean(axis=1), decimals=2))
-    Xidx = np.mean(Stds.values, axis=1) <= stdCut
-    Means = pd.concat(
-        [X[merging_indices], X.iloc[:, X.columns.get_level_values(1) == "mean"]], axis=1
-    )
-    Means = Means.iloc[Xidx, :]
-    Means.columns = Means.columns.droplevel(1)
-    StdMeans = pd.DataFrame(StdMeans).iloc[Xidx, :]
-    return Means.assign(r2_Std=StdMeans)
-
-
-def peptidefinder(X, loc, Protein=False, Gene=False, Sequence=False):
-    """Search for a specific peptide either by name or sequence."""
-    if Protein:
-        found = X[X["Protein"].str.contains(loc)]
-    if Gene:
-        found = X[X["Gene"].str.contains(loc)]
-    if Sequence:
-        found = X[X["Sequence"].str.contains(loc)]
-    return found
-
-
-######----------pre-processing of phenotype data----------######
-
-
-def MergeTR(data):
-    """Convenient to merge by mean all TRs of IncuCyte"""
-    for i in range(1, data.shape[1], 2):
-        data.iloc[:, i] = data.iloc[:, i : i + 2].mean(axis=1)
-
-    return data.drop(
-        data.columns[[i + 1 for i in range(1, data.shape[1], 2)]], axis="columns"
-    )
-
-
-def y_pre(ds, tr, ftp, phenotype, all_lines, itp=False):
-    """Preprocesses cell phenotype data sets for analysis."""
-    z = []
-    for sl in ds:
-        c = sl.loc[:, sl.columns.str.contains(tr)]
-        c.insert(0, "Elapsed", ds[0].iloc[:, 0])
-        c = c[
-            list(c.columns[:3]) + [c.columns[4]] + [c.columns[3]] + list(c.columns[5:])
-        ]
-        if not isinstance(itp, bool):
-            c = (
-                c[c["Elapsed"] == ftp]
-                .iloc[0, 1:]
-                .div(c[c["Elapsed"] == itp].iloc[0, 1:])
-            )
-        else:
-            c = c[c["Elapsed"] == ftp].iloc[0, 1:]
-        z.append(c)
-
-    y = pd.DataFrame(pd.concat(z, axis=0)).reset_index()
-    y.columns = ["Lines", phenotype]
-    y = y.groupby("Lines").mean().T[c.index].T.reset_index()
-
-    y["Lines"] = [s.split(tr)[0] for s in y.iloc[:, 0]]
-    y["Treatment"] = tr
-
-    if "-" in y["Lines"][1]:
-        y["Lines"] = [s.split("-")[0] for s in y.iloc[:, 0]]
-
-    y["Lines"] = all_lines
-    return y[["Lines", "Treatment", phenotype]]
-
-
-def FixColumnLabels(cv):
-    """Fix column labels to use pandas locators."""
-    l = []
-    for label in cv[0].columns:
-        if "-" not in label and label != "Elapsed":
-            l.append(label + "-UT")
-        if "-" in label or label == "Elapsed":
-            l.append(label)
-
-    for d in cv:
-        d.columns = l
-
-    return cv