Skip to content

Commit

Permalink
use only the simulated mass list in simulated runs
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jun 18, 2024
1 parent 8e89455 commit 6d47748
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 21 deletions.
6 changes: 3 additions & 3 deletions src/depiction/calibration/perform_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,17 @@ def calibrate_image(
read_full = read_peaks

logger.info("Extracting all features...")
all_features = self._extract_all_features(read_peaks)
all_features = self._extract_all_features(read_peaks).transpose("i", "c")
self._validate_per_spectra_array(all_features, coordinates_2d=read_peaks.coordinates_2d)
self._write_data_array(all_features, group="features_raw")

logger.info("Preprocessing features...")
all_features = self._calibration.preprocess_image_features(all_features=all_features)
all_features = self._calibration.preprocess_image_features(all_features=all_features).transpose("i", "c")
self._validate_per_spectra_array(all_features, coordinates_2d=read_peaks.coordinates_2d)
self._write_data_array(all_features, group="features_processed")

logger.info("Fitting models...")
model_coefs = self._fit_all_models(all_features=all_features)
model_coefs = self._fit_all_models(all_features=all_features).transpose("i", "c")
self._validate_per_spectra_array(model_coefs, coordinates_2d=read_peaks.coordinates_2d)
self._write_data_array(model_coefs, group="model_coefs")

Expand Down
2 changes: 2 additions & 0 deletions src/depiction/tools/simulate/synthetic_msi_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from depiction.persistence import ImzmlWriteFile, ImzmlWriter


# TODO configure shifts

class SyntheticMSIDataGenerator:
"""Helper that creates synthetic MSI data."""

Expand Down
10 changes: 7 additions & 3 deletions src/depiction_targeted_preproc/example_sim/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ calibration:
max_distance_unit: ppm
reg_model_type: linear_siegelslopes
reg_model_unit: mz
input_smoothing_activated: yes
input_smoothing_kernel_size: 5
input_smoothing_kernel_std: 1.0
input_smoothing_activated: no
# TODO investigate the problem!
#input_smoothing_activated: yes
#input_smoothing_kernel_size: 5
#input_smoothing_kernel_std: 1.0
min_points: 3
n_jobs: 10
requested_artifacts:
Expand All @@ -29,3 +31,5 @@ simulate:
image_height: 100
n_labels: 30
bin_width_ppm: 80
target_mass_min: 850
target_mass_max: 1900
8 changes: 4 additions & 4 deletions src/depiction_targeted_preproc/example_sim/run_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
def setup_sim_dir(path: Path) -> None:
path.mkdir(exist_ok=True, parents=True)
shutil.copyfile(Path(__file__).parent / "default.yml", path / "pipeline_params.yml")
path_source_mass_list = Path(__file__).parents[1] / "example" / "data-raw" / "mass_list_vend.csv"
shutil.copyfile(path_source_mass_list, path / "mass_list.raw.csv")
# path_source_mass_list = Path(__file__).parents[1] / "example" / "data-raw" / "mass_list_vend.csv"
# shutil.copyfile(path_source_mass_list, path / "mass_list.raw.csv")


# TODO why does it not work?

def main() -> None:
dir_work = Path(__file__).parent / "data-work"
#dir_output = Path(__file__).parent / "data-output"
# dir_output = Path(__file__).parent / "data-output"
dir_work.mkdir(exist_ok=True, parents=True)
#dir_output.mkdir(exist_ok=True, parents=True)
# dir_output.mkdir(exist_ok=True, parents=True)

sample_name = "dummy01_sim"
setup_sim_dir(dir_work / sample_name)
Expand Down
2 changes: 2 additions & 0 deletions src/depiction_targeted_preproc/pipeline_config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class SimulateParameters(BaseModel):
image_height: int = 100
n_labels: int = 30
bin_width_ppm: float = 100.0
target_mass_min: float = 850.
target_mass_max: float = 1900.


class PipelineArtifact(str, Enum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,19 @@ rule simulate_create_labels:

rule simulate_create_mass_list:
input:
mass_list="{sample}_sim/mass_list.calibration.csv",
config="{sample}_sim/pipeline_params.yml",
output:
mass_list="{sample}_sim/mass_list.simulated.csv"
mass_list="{sample}_sim/mass_list.raw.csv"
shell:
"python -m depiction_targeted_preproc.workflow.simulate.create_mass_list"
" --input-mass-list-path {input.mass_list} --config-path {input.config}"
" --config-path {input.config}"
" --output-mass-list-path {output.mass_list}"


rule simulate_generate_imzml:
input:
image="{sample}_sim/true_labels.hdf5",
mass_list="{sample}_sim/mass_list.simulated.csv",
mass_list="{sample}_sim/mass_list.raw.csv",
config="{sample}_sim/pipeline_params.yml",
output:
imzml=multiext("{sample}_sim/raw",".imzML",".ibd"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,16 @@


def simulate_create_mass_list(
input_mass_list_path: Annotated[Path, Option()],
config_path: Annotated[Path, Option()],
output_mass_list_path: Annotated[Path, Option()],
) -> None:
# parse the config
config = PipelineParameters.parse_yaml(config_path)

# parse the input
df_input = pl.read_csv(input_mass_list_path)
mass_min = df_input["mass"].min()
mass_max = df_input["mass"].max()

# compute the masses
lambda_avg = 1.0 + 4.95e-4
n_labels = config.simulate.n_labels
masses = np.linspace(mass_min, mass_max, n_labels)
masses = np.linspace(config.simulate.target_mass_min, config.simulate.target_mass_max, n_labels)
masses -= masses % lambda_avg

# create the output mass list
Expand Down

0 comments on commit 6d47748

Please sign in to comment.