Skip to content

Commit

Permalink
Fix fast eulertigs bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Guilucand committed Jan 13, 2025
1 parent bcb8698 commit 08ccdb6
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 21 deletions.
6 changes: 5 additions & 1 deletion crates/assembler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,20 +427,24 @@ pub fn run_assembler<
let (reorganized_reads, _final_unitigs_bucket) = if step
<= AssemblerStartingStep::ReorganizeReads
{
if generate_maximal_unitigs_links || compute_tigs_mode.needs_matchtigs_library() {
if generate_maximal_unitigs_links || compute_tigs_mode.needs_temporary_tigs() {
reorganize_reads::<MergingHash, AssemblerColorsManager, StructSeqBinaryWriter<_, _>>(
k,
sequences,
reads_map,
temp_dir.as_path(),
compressed_temp_unitigs_file.as_ref().unwrap(),
circular_temp_unitigs_file.as_ref(),
buckets_count,
)
} else {
reorganize_reads::<MergingHash, AssemblerColorsManager, OutputMode::Backend<_, _>>(
k,
sequences,
reads_map,
temp_dir.as_path(),
&final_unitigs_file,
None,
buckets_count,
)
}
Expand Down
8 changes: 8 additions & 0 deletions crates/assembler/src/pipeline/compute_matchtigs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ pub enum MatchtigMode {

pub trait MatchtigHelperTrait {
fn needs_simplitigs(&self) -> bool;
fn needs_temporary_tigs(&self) -> bool;
fn needs_matchtigs_library(&self) -> bool;
fn get_matchtigs_mode(&self) -> Self;
}
Expand All @@ -326,6 +327,13 @@ impl MatchtigHelperTrait for Option<MatchtigMode> {
*self == Some(MatchtigMode::FastSimpliTigs) || *self == Some(MatchtigMode::FastEulerTigs)
}

fn needs_temporary_tigs(&self) -> bool {
*self == Some(MatchtigMode::EulerTigs)
|| *self == Some(MatchtigMode::GreedyTigs)
|| *self == Some(MatchtigMode::PathTigs)
|| *self == Some(MatchtigMode::FastEulerTigs)
}

fn needs_matchtigs_library(&self) -> bool {
*self == Some(MatchtigMode::EulerTigs)
|| *self == Some(MatchtigMode::GreedyTigs)
Expand Down
4 changes: 3 additions & 1 deletion crates/assembler/src/pipeline/eulertigs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ impl CircularUnitig {
last.rc ^= self.rc;

let end_offset = if !last.rc && !write_full { 0 } else { k - 1 };
let start_offset = if last.rc && !write_full { k - 1 } else { 0 };

let last_part_entry = unitigs.get(&last.orig_index).unwrap();

Expand All @@ -176,7 +177,8 @@ impl CircularUnitig {
};
}

let last_part_slice = last.start_pos..last.start_pos + last.length + end_offset;
let last_part_slice =
last.start_pos + start_offset..last.start_pos + last.length + end_offset;
let last_part = last_part_entry
.0
.as_reference(unitigs_kmers)
Expand Down
67 changes: 49 additions & 18 deletions crates/assembler/src/pipeline/reorganize_reads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,12 @@ pub fn reorganize_reads<
CX: ColorsManager,
BK: StructuredSequenceBackend<PartialUnitigsColorStructure<CX>, ()>,
>(
k: usize,
mut reads: Vec<SingleBucket>,
mut mapping_files: Vec<SingleBucket>,
temp_path: &Path,
out_file: &StructuredSequenceWriter<PartialUnitigsColorStructure<CX>, (), BK>,
circular_out_file: Option<&StructuredSequenceWriter<PartialUnitigsColorStructure<CX>, (), BK>>,
buckets_count: usize,
) -> (Vec<SingleBucket>, PathBuf) {
PHASES_TIMES_MONITOR
Expand Down Expand Up @@ -199,10 +201,13 @@ pub fn reorganize_reads<
NoMultiplicity,
>,
>::new(&buckets, buffers.take());

let mut tmp_lonely_unitigs_buffer =
FastaWriterConcurrentBuffer::new(out_file, DEFAULT_OUTPUT_BUFFER_SIZE, true);

let mut tmp_circular_unitigs_buffer = circular_out_file.map(|out_file| {
FastaWriterConcurrentBuffer::new(out_file, DEFAULT_OUTPUT_BUFFER_SIZE, true)
});

let mut mappings = Vec::new();

assert_eq!(read_file.index, mapping_file.index);
Expand Down Expand Up @@ -268,23 +273,49 @@ pub fn reorganize_reads<
);
map_index += 1;
} else {
// No mapping, write unitig to file

tmp_lonely_unitigs_buffer.add_read(
seq,
None,
extra_data.colors,
color_buffer,
(),
&(),
#[cfg(feature = "support_kmer_counters")]
SequenceAbundance {
first: extra_data.counters.first,
sum: extra_data.counters.sum,
last: extra_data.counters.last,
},
);

// Loop to allow skipping code parts with break
'skip_writing: loop {
let first_kmer_node = &seq[0..k - 1];
let last_kmer_node = &seq[seq.len() - k + 1..];
if let Some(circular_unitigs_buffer) = &mut tmp_circular_unitigs_buffer {
// Check if unitig is circular
if first_kmer_node == last_kmer_node {
circular_unitigs_buffer.add_read(
seq,
None,
extra_data.colors,
color_buffer,
(),
&(),
#[cfg(feature = "support_kmer_counters")]
SequenceAbundance {
first: extra_data.counters.first,
sum: extra_data.counters.sum,
last: extra_data.counters.last,
},
);
break 'skip_writing;
}
}

// No mapping, write unitig to file
tmp_lonely_unitigs_buffer.add_read(
seq,
None,
extra_data.colors,
color_buffer,
(),
&(),
#[cfg(feature = "support_kmer_counters")]
SequenceAbundance {
first: extra_data.counters.first,
sum: extra_data.counters.sum,
last: extra_data.counters.last,
},
);

break;
}
// write_fasta_entry::<MH, CX, _>(
// &mut fasta_temp_buffer,
// &mut tmp_lonely_unitigs_buffer,
Expand Down
2 changes: 1 addition & 1 deletion libs-crates/instrumenter-rs
Submodule instrumenter-rs updated 1 files
+1 −0 Cargo.toml

0 comments on commit 08ccdb6

Please sign in to comment.