Skip to content

Commit

Permalink
feat(cram): region parsing func
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdoret committed Feb 9, 2024
1 parent 109f213 commit 0627c74
Showing 1 changed file with 21 additions and 12 deletions.
33 changes: 21 additions & 12 deletions modo/cram.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,27 @@
# is being sliced (e.g., if it is a CRAM file, it will call slice_cram())


def slice(data_name: str, data_path: str, coords: str):
"Returns a slice of the requested region for the requested omics type"

# get the data type (e.g., CRAM, array) of the requested data
# data_type = # Cyril, please add in the logic to get the data type
# data_path = # and the data_path from the metadata

if data_type == "cram":
return slice_cram(data_path, coords)
elif data_type == "array":
return slice_array(data_path, coords) # To be added after we know
# what this data looks like
def parse_region(region: str) -> tuple[str, int, int]:
"""Parses an input UCSC-format region string into
(chrom, start, end).
Examples
--------
>>> parse_region('chr1:10-320')
('chr1', 10, 320)
>>> parse_region('chr-1ba:32-0100')
('chr-1ba', 32, 100)
"""

if not re.match(r"[^:]+:[0-9]+-[0-9]+", region):
raise ValueError(
f"Invalid region format: {region}. Expected chr:start-end"
)

chrom, coords = region.split(":")
start, end = coords.split("-")

return (chrom, int(start), int(end))


def slice_cram(cram_path: AlignmentFile, coords: str): # -> AlignmentFile:
Expand Down

0 comments on commit 0627c74

Please sign in to comment.