Skip to content

Commit

Permalink
Merge pull request #64 from broadinstitute/bug-fix-and-perform-flag
Browse files Browse the repository at this point in the history
Bug fixes and adding "perform" flags to all scripts
  • Loading branch information
gwaybio authored Mar 17, 2021
2 parents 06a8101 + 59168ca commit 886a601
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 2 deletions.
2 changes: 1 addition & 1 deletion 0.preprocess-sites/0.prefilter-features.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import argparse
import numpy as np
import pandas as pd
from scripts.site_processing_utils import prefilter_features
from scripts.site_processing_utils import prefilter_features, load_features

sys.path.append("config")
from utils import parse_command_args, process_configuration
Expand Down
5 changes: 5 additions & 0 deletions 0.preprocess-sites/1.process-spots.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@
spot_score_cols = spot_config["spot_score_cols"]
foci_cols = spot_config["foci_cols"]
force = spot_config["force_overwrite"]
perform = spot_config["perform"]

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

# Forced overwrite can be achieved in one of two ways.
# The command line overrides the config file, check here if it is provided
Expand Down
5 changes: 5 additions & 0 deletions 0.preprocess-sites/2.process-cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,14 @@
merge_info = cell_config["merge_columns"]
foci_site_col = cell_config["foci_site_col"]
force = cell_config["force_overwrite"]
perform = cell_config["perform"]
metadata_merge_foci_cols = cell_config["metadata_merge_columns"]["foci_cols"]
metadata_merge_cell_cols = cell_config["metadata_merge_columns"]["cell_cols"]

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

# Forced overwrite can be achieved in one of two ways.
# The command line overrides the config file, check here if it is provided
if not force:
Expand Down
17 changes: 16 additions & 1 deletion 0.preprocess-sites/3.visualize-cell-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
total_cell_count_file = config["files"]["total_cell_count_file"]

force = config["options"]["preprocess"]["summarize-cells"]["force_overwrite"]
perform = config["options"]["preprocess"]["summarize-cells"]["perform"]

# Perform the pipeline
cell_quality = CellQuality(
Expand All @@ -66,6 +67,11 @@
cell_category_df = pd.DataFrame(cell_category_dict, index=[quality_col])
cell_category_list = list(cell_category_dict.values())

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))


# Forced overwrite can be achieved in one of two ways.
# The command line overrides the config file, check here if it is provided
if not force:
Expand Down Expand Up @@ -251,12 +257,21 @@
# Process overall perturbation counts per batch
pert_count_df = pd.concat(pert_counts_list, axis="rows").reset_index()

# Output a full count of perturbations per site
output_file = pathlib.Path(
output_resultsdir, "complete_perturbation_count_per_site.tsv.gz"
)

if check_if_write(output_file, force, throw_warning=True):
pert_count_df.to_csv(output_file, index=False, sep="\t")

# Summarize counts further in preparation for plotting
pert_count_df = (
pert_count_df.loc[
~pert_count_df.loc[:, gene_cols].isin(control_barcodes).squeeze(),
]
.reset_index(drop=True)
.groupby(gene_cols + barcode_cols + quality_col)["Cell_Count_Per_Guide"]
.groupby(gene_cols + barcode_cols + [quality_col])["Cell_Count_Per_Guide"]
.sum()
.reset_index()
)
Expand Down
5 changes: 5 additions & 0 deletions 0.preprocess-sites/4.image-and-segmentation-qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
painting_image_names = plate_summary_config["painting_image_names"]
barcoding_prefix = plate_summary_config["barcoding_prefix"]
force = plate_summary_config["force_overwrite"]
perform = plate_summary_config["perform"]

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

# Forced overwrite can be achieved in one of two ways.
# The command line overrides the config file, check here if it is provided
Expand Down
5 changes: 5 additions & 0 deletions 1.generate-profiles/0.merge-single-cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,14 @@
merge_info = sc_config["merge_columns"]
single_file_only = sc_config["output_one_single_cell_file_only"]
force = sc_config["force_overwrite"]
perform = sc_config["perform"]

gene_col = config["options"]["profile"]["aggregate"]["levels"]["gene"]

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

# Forced overwrite can be achieved in one of two ways.
# The command line overrides the config file, check here if it is provided
if not force:
Expand Down
9 changes: 9 additions & 0 deletions 1.generate-profiles/1.aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@
experiment_config=experiment_config_file,
)


# Extract config arguments
perform = config["options"]["profile"]["aggregate"]["perform"]

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

ignore_files = config["options"]["core"]["ignore_files"]
float_format = config["options"]["core"]["float_format"]
compression = config["options"]["core"]["compression"]
Expand All @@ -43,6 +50,8 @@
aggregate_features = aggregate_args["features"]
aggregate_levels = aggregate_args["levels"]

force = aggregate_args["force_overwrite"]

# Input argument flow control
if aggregate_from_single_file:
assert (
Expand Down
7 changes: 7 additions & 0 deletions 1.generate-profiles/2.normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
)

# Extract config arguments
perform = config["options"]["profile"]["normalize"]["perform"]

# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

float_format = config["options"]["core"]["float_format"]
compression = config["options"]["core"]["compression"]

Expand All @@ -37,6 +43,7 @@
normalize_by_samples = normalize_args["by_samples"]
normalize_these_features = normalize_args["features"]
normalize_method = normalize_args["method"]
force = normalize_args["force_overwrite"]

for data_level in normalize_levels:
if data_level == "single_cell":
Expand Down
6 changes: 6 additions & 0 deletions 1.generate-profiles/3.feature-select.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
)

# Extract config arguments
perform = config["options"]["profile"]["feature_select"]["perform"]
# check if this step should be performed
if not perform:
sys.exit("Config file set to perform=False, not performing {}".format(__file__))

float_format = config["options"]["core"]["float_format"]
compression = config["options"]["core"]["compression"]

Expand All @@ -39,6 +44,7 @@
feature_select_features = feature_select_args["features"]
feature_select_nacutoff = feature_select_args["na_cutoff"]
feature_select_corr_threshold = feature_select_args["corr_threshold"]
force = feature_select_args["force_overwrite"]

for data_level in feature_select_levels:
if data_level == "single_cell":
Expand Down

0 comments on commit 886a601

Please sign in to comment.