Python API Documentation¶
+ + +
+ SnipeSketch
+
+
+¶
+
+
+ SnipeSketch is responsible for creating FracMinHash sketches from genomic data. +It supports parallel processing, progress monitoring, and different sketching modes +including sample, genome, and amplicon sketching.
+ + + + + + +Source code in src/snipe/api/sketch.py
+ 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 |
|
+ __init__(enable_logging)
+
+¶
+
+
+ Initialize the SnipeSketch instance.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ enable_logging
+ |
+
+ bool
+ |
+
+
+
+ Flag to enable or disable logging. + |
+ + required + | +
Source code in src/snipe/api/sketch.py
+
+ amplicon_sketching(fasta_file, ksize=51, scale=10000, amplicon_name='amplicon')
+
+¶
+
+
+ Create a FracMinHash sketch for an amplicon.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ fasta_file
+ |
+
+ str
+ |
+
+
+
+ Path to the FASTA file. + |
+ + required + | +
+ ksize
+ |
+
+ int
+ |
+
+
+
+ K-mer size. Defaults to 51. + |
+
+ 51
+ |
+
+ scale
+ |
+
+ int
+ |
+
+
+
+ Scaling factor for MinHash. Defaults to 10_000. + |
+
+ 10000
+ |
+
+ amplicon_name
+ |
+
+ str
+ |
+
+
+
+ Name of the amplicon. Defaults to "amplicon". + |
+
+ 'amplicon'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SourmashSignature
+ |
+
+
+
+ sourmash.SourmashSignature: The resulting amplicon signature. + |
+
Source code in src/snipe/api/sketch.py
+
+ export_sigs_to_zip(sigs, output_file)
+
+
+ staticmethod
+
+
+¶
+
+
+ Export a list of signatures to a ZIP file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ sigs
+ |
+
+ List[SourmashSignature]
+ |
+
+
+
+ List of Sourmash signatures. + |
+ + required + | +
+ output_file
+ |
+
+ str
+ |
+
+
+
+ Path to the output ZIP file. + |
+ + required + | +
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the output file does not have a .zip extension. + |
+
+ FileExistsError
+ |
+
+
+
+ If the output file already exists. + |
+
Source code in src/snipe/api/sketch.py
+
+ parallel_genome_sketching(fasta_file, cores=1, ksize=51, scale=10000, assigned_genome_name='full_genome', **kwargs)
+
+¶
+
+
+ Perform parallel genome sketching from a FASTA file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ fasta_file
+ |
+
+ str
+ |
+
+
+
+ Path to the FASTA file. + |
+ + required + | +
+ cores
+ |
+
+ int
+ |
+
+
+
+ Number of parallel cores. Defaults to 1. + |
+
+ 1
+ |
+
+ ksize
+ |
+
+ int
+ |
+
+
+
+ K-mer size. Defaults to 51. + |
+
+ 51
+ |
+
+ scale
+ |
+
+ int
+ |
+
+
+
+ Scaling factor for MinHash. Defaults to 10_000. + |
+
+ 10000
+ |
+
+ assigned_genome_name
+ |
+
+ str
+ |
+
+
+
+ Name for the genome signature. Defaults to "full_genome". + |
+
+ 'full_genome'
+ |
+
+ **kwargs
+ |
+
+ Any
+ |
+
+
+
+ Additional keyword arguments. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Tuple[SourmashSignature, Dict[str, SourmashSignature]]
+ |
+
+
+
+ Tuple[sourmash.SourmashSignature, Dict[str, sourmash.SourmashSignature]]: +The full genome signature and a dictionary of chromosome signatures. + |
+
Source code in src/snipe/api/sketch.py
+ 377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 |
|
+ parse_fasta_header(header)
+
+¶
+
+
+ Parse a FASTA header and categorize the sequence type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ header
+ |
+
+ str
+ |
+
+
+
+ The FASTA header string. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[str, str]
+ |
+
+
+
+ Tuple[str, str]: A tuple containing the sequence type and name. + |
+
Source code in src/snipe/api/sketch.py
+
+ process_sequences(fasta_file, thread_id, total_threads, progress_queue, batch_size=100000, ksize=51, scaled=10000)
+
+¶
+
+
+ Process a subset of sequences to create a FracMinHash sketch.
+Each process creates its own MinHash instance and processes sequences +assigned based on the thread ID. Progress is reported via a shared queue.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ fasta_file
+ |
+
+ str
+ |
+
+
+
+ Path to the FASTA file. + |
+ + required + | +
+ thread_id
+ |
+
+ int
+ |
+
+
+
+ Identifier for the current thread. + |
+ + required + | +
+ total_threads
+ |
+
+ int
+ |
+
+
+
+ Total number of threads. + |
+ + required + | +
+ progress_queue
+ |
+
+ Queue
+ |
+
+
+
+ Queue for reporting progress. + |
+ + required + | +
+ batch_size
+ |
+
+ int
+ |
+
+
+
+ Number of sequences per progress update. Defaults to 100_000. + |
+
+ 100000
+ |
+
+ ksize
+ |
+
+ int
+ |
+
+
+
+ K-mer size. Defaults to 51. + |
+
+ 51
+ |
+
+ scaled
+ |
+
+ int
+ |
+
+
+
+ Scaling factor for MinHash. Defaults to 10_000. + |
+
+ 10000
+ |
+
Returns:
+Type | +Description | +
---|---|
+ MinHash
+ |
+
+
+
+ sourmash.MinHash: The resulting FracMinHash sketch. + |
+
Source code in src/snipe/api/sketch.py
+
+ progress_monitor(progress_queue, progress_interval, total_threads, stop_event)
+
+¶
+
+
+ Monitor and display the progress of sequence processing.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ progress_queue
+ |
+
+ Queue
+ |
+
+
+
+ Queue for receiving progress updates. + |
+ + required + | +
+ progress_interval
+ |
+
+ int
+ |
+
+
+
+ Interval for progress updates. + |
+ + required + | +
+ total_threads
+ |
+
+ int
+ |
+
+
+
+ Number of processing threads. + |
+ + required + | +
+ stop_event
+ |
+
+ Event
+ |
+
+
+
+ Event to signal the monitor to stop. + |
+ + required + | +
Source code in src/snipe/api/sketch.py
+
+ sample_sketch(sample_name, filename, num_processes, batch_size, ksize, scale, **kwargs)
+
+¶
+
+
+ Generate a sketch for a sample and return its signature.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ sample_name
+ |
+
+ str
+ |
+
+
+
+ Name of the sample. + |
+ + required + | +
+ filename
+ |
+
+ str
+ |
+
+
+
+ Path to the input FASTA file. + |
+ + required + | +
+ num_processes
+ |
+
+ int
+ |
+
+
+
+ Number of processes to use. + |
+ + required + | +
+ batch_size
+ |
+
+ int
+ |
+
+
+
+ Batch size for processing. + |
+ + required + | +
+ ksize
+ |
+
+ int
+ |
+
+
+
+ K-mer size. + |
+ + required + | +
+ scale
+ |
+
+ int
+ |
+
+
+
+ Scaling factor. + |
+ + required + | +
+ **kwargs
+ |
+
+ Any
+ |
+
+
+
+ Additional keyword arguments. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SourmashSignature
+ |
+
+
+
+ sourmash.SourmashSignature: The generated signature. + |
+
Raises:
+Type | +Description | +
---|---|
+ RuntimeError
+ |
+
+
+
+ If an error occurs during sketching. + |
+