Skip to content

Commit

Permalink
update cstag.to_html to represent repeat substitutions
Browse files Browse the repository at this point in the history
  • Loading branch information
akikuno committed Sep 14, 2022
1 parent 79e5b6a commit 4bad010
Show file tree
Hide file tree
Showing 6 changed files with 313 additions and 13 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="cstag",
version="0.2.3",
version="0.3.0",
author="Akihiro Kuno",
author_email="[email protected]",
description="Python module to manipulate the minimap2's CS tag",
Expand Down
21 changes: 12 additions & 9 deletions src/cstag/to_html.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
import re


def to_html(CSTAG: str, OUTPUT_FILE_NAME: str, DESCRIPTION: str = "") -> None:
def to_html(CSTAG: str, DESCRIPTION: str = "") -> None:
"""Output HTML file showing a sequence with mutations colored
Args:
CSTAG (str): cs tag in the **long** format
OUTPUT_FILE_NAME (str): output file name
DESCRIPTION (str): (optional) header information in the output file
Return:
HTML file (*OUTPUT_FILE_NAME.html*)
Example:
>>> import cstag
>>> CSTAG = "cs:Z:=AC+GGG=T-ACGT*at~gt10cg=GNNN"
>>> OUTPUT = "Report"
>>> DESCRIPTION = "Example"
>>> cstag.to_html(CSTAG, OUTPUT, DESCRIPTION)
>>> cstag_html = cstag.to_html(CSTAG, DESCRIPTION)
https://user-images.githubusercontent.com/15861316/158910398-67f480d2-8742-412a-b528-40e545c46513.png
"""
if not re.search(r"[ACGT]", CSTAG):
Expand Down Expand Up @@ -109,12 +107,18 @@ def to_html(CSTAG: str, OUTPUT_FILE_NAME: str, DESCRIPTION: str = "") -> None:
list_cs = [i + j for i, j in zip(list_cs[0::2], list_cs[1::2])]

html_cs = []
for cs in list_cs:
idx = 0
while idx < len(list_cs):
cs = list_cs[idx]
if cs[0] == "=":
cs = re.sub(r"(N+)", r"<span class='Unknown'>\1</span>", cs)
html_cs.append(cs[1:])
elif cs[0] == "*":
html_cs.append(f"<span class='Sub'>{cs[2].upper()}</span>")
html_cs.append(f"<span class='Sub'>{cs[2].upper()}")
while idx < len(list_cs) - 1 and list_cs[idx+1].startswith("*"):
html_cs.append(f"{list_cs[idx+1][2].upper()}")
idx += 1
html_cs.append("</span>")
elif cs[0] == "+":
html_cs.append(f"<span class='Ins'>{cs[1:].upper()}</span>")
elif cs[0] == "-":
Expand All @@ -124,6 +128,7 @@ def to_html(CSTAG: str, OUTPUT_FILE_NAME: str, DESCRIPTION: str = "") -> None:
splice = "-" * int(cs[3:-2])
right = cs[-2:].upper()
html_cs.append(f"<span class='Splice'>{left + splice + right}</span>")
idx += 1

html_cs = "".join(html_cs)
html_cs = f"<p class='p_seq'>{html_cs}</p>"
Expand All @@ -137,6 +142,4 @@ def to_html(CSTAG: str, OUTPUT_FILE_NAME: str, DESCRIPTION: str = "") -> None:
html_footer,
]
)

with open(OUTPUT_FILE_NAME + ".html", "w", newline="\n") as f:
f.write(report)
return report
87 changes: 87 additions & 0 deletions tests/data/to_html/report_substitution.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
<!DOCTYPE html>
<html>

<head>
<style>
h1 {
font-family: Consolas, 'Courier New', monospace;
color: #333;
padding: 0.1em 0;
border-top: solid 3px #333;
border-bottom: solid 3px #333;
}

.p_seq {
font-family: Consolas, 'Courier New', monospace;
color: #585858;
word-wrap: break-word;
letter-spacing: 0.15em;
}

.p_legend {
font-family: Consolas, 'Courier New', monospace;
color: #585858;
word-wrap: break-word;
}

.Ins {
color: #333;
border: 0.1em solid;
background-color: #ee827c;
font-weight: bold;
# border-radius: 5px;
}

.Del {
color: #333;
border: 0.1em solid;
background-color: #a0d8ef;
font-weight: bold;
# border-radius: 5px;
}

.Sub {
color: #333;
border: 0.1em solid;
background-color: #98d98e;
font-weight: bold;
# border-radius: 5px;
}

.Splice {
color: #333;
border: 0.1em solid;
background-color: #f8e58c;
font-weight: bold;
}

.Unknown {
color: #333;
border: 0.1em solid;
background-color: #c0c6c9;
font-weight: bold;
# border-radius: 5px;
}

</style>
</head>

<body>

<h1>Example</h1>

<p class="p_legend">
Labels:
<span class="Ins">Insertion</span>
<span class="Del">Deletion</span>
<span class="Sub">Substitution</span>
<span class="Splice">Splicing</span>
<span class="Unknown">Unknown</span>
</p>
<hr>

<p class='p_seq'>A<span class='Sub'>TG</span>A</p>

</body>

</html>
87 changes: 87 additions & 0 deletions tests/data/to_html/report_substitution_end.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
<!DOCTYPE html>
<html>

<head>
<style>
h1 {
font-family: Consolas, 'Courier New', monospace;
color: #333;
padding: 0.1em 0;
border-top: solid 3px #333;
border-bottom: solid 3px #333;
}

.p_seq {
font-family: Consolas, 'Courier New', monospace;
color: #585858;
word-wrap: break-word;
letter-spacing: 0.15em;
}

.p_legend {
font-family: Consolas, 'Courier New', monospace;
color: #585858;
word-wrap: break-word;
}

.Ins {
color: #333;
border: 0.1em solid;
background-color: #ee827c;
font-weight: bold;
# border-radius: 5px;
}

.Del {
color: #333;
border: 0.1em solid;
background-color: #a0d8ef;
font-weight: bold;
# border-radius: 5px;
}

.Sub {
color: #333;
border: 0.1em solid;
background-color: #98d98e;
font-weight: bold;
# border-radius: 5px;
}

.Splice {
color: #333;
border: 0.1em solid;
background-color: #f8e58c;
font-weight: bold;
}

.Unknown {
color: #333;
border: 0.1em solid;
background-color: #c0c6c9;
font-weight: bold;
# border-radius: 5px;
}

</style>
</head>

<body>

<h1>Example</h1>

<p class="p_legend">
Labels:
<span class="Ins">Insertion</span>
<span class="Del">Deletion</span>
<span class="Sub">Substitution</span>
<span class="Splice">Splicing</span>
<span class="Unknown">Unknown</span>
</p>
<hr>

<p class='p_seq'>A<span class='Sub'>TG</span></p>

</body>

</html>
87 changes: 87 additions & 0 deletions tests/data/to_html/report_substitution_start.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
<!DOCTYPE html>
<html>

<head>
<style>
h1 {
font-family: Consolas, 'Courier New', monospace;
color: #333;
padding: 0.1em 0;
border-top: solid 3px #333;
border-bottom: solid 3px #333;
}

.p_seq {
font-family: Consolas, 'Courier New', monospace;
color: #585858;
word-wrap: break-word;
letter-spacing: 0.15em;
}

.p_legend {
font-family: Consolas, 'Courier New', monospace;
color: #585858;
word-wrap: break-word;
}

.Ins {
color: #333;
border: 0.1em solid;
background-color: #ee827c;
font-weight: bold;
# border-radius: 5px;
}

.Del {
color: #333;
border: 0.1em solid;
background-color: #a0d8ef;
font-weight: bold;
# border-radius: 5px;
}

.Sub {
color: #333;
border: 0.1em solid;
background-color: #98d98e;
font-weight: bold;
# border-radius: 5px;
}

.Splice {
color: #333;
border: 0.1em solid;
background-color: #f8e58c;
font-weight: bold;
}

.Unknown {
color: #333;
border: 0.1em solid;
background-color: #c0c6c9;
font-weight: bold;
# border-radius: 5px;
}

</style>
</head>

<body>

<h1>Example</h1>

<p class="p_legend">
Labels:
<span class="Ins">Insertion</span>
<span class="Del">Deletion</span>
<span class="Sub">Substitution</span>
<span class="Splice">Splicing</span>
<span class="Unknown">Unknown</span>
</p>
<hr>

<p class='p_seq'><span class='Sub'>TG</span>A</p>

</body>

</html>
42 changes: 39 additions & 3 deletions tests/test_to_html.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,49 @@
import os
from pathlib import Path
import filecmp
import tempfile
from src.cstag import to_html


def test_html():
cs = "cs:Z:=AC+GGG=T-ACGT*at~gt10cg=GNNN"
description = "Example"
cs_html = to_html(cs, description)
output = tempfile.NamedTemporaryFile().name
Path(output).write_text(cs_html)
assert filecmp.cmp(output, Path("tests", "data", "to_html", "report.html"))

def test_html_repeat_substitution():
cs = "cs:Z:=A*at*ag=A"
description = "Example"
cs_html = to_html(cs, description)
test = [h for h in cs_html.split("\n") if h.count("<p class='p_seq'>")]
test = test[0].split()
answer = Path("tests", "data", "to_html", "report_substitution.html").read_text().split("\n")
answer = [h for h in answer if h.count(r"<p class='p_seq'>")]
answer = answer[0].split()
assert test == answer


def test_html_repeat_substitution_start():
cs = "cs:Z:*at*ag=A"
description = "Example"
cs_html = to_html(cs, description)
test = [h for h in cs_html.split("\n") if h.count("<p class='p_seq'>")]
test = test[0].split()
answer = Path("tests", "data", "to_html", "report_substitution_start.html").read_text().split("\n")
answer = [h for h in answer if h.count(r"<p class='p_seq'>")]
answer = answer[0].split()
assert test == answer


def test_html_repeat_substitution_end():
cs = "cs:Z:=A*at*ag"
description = "Example"
to_html(cs, output, description)
cs_html = to_html(cs, description)
test = [h for h in cs_html.split("\n") if h.count("<p class='p_seq'>")]
test = test[0].split()
answer = Path("tests", "data", "to_html", "report_substitution_end.html").read_text().split("\n")
answer = [h for h in answer if h.count(r"<p class='p_seq'>")]
answer = answer[0].split()
assert test == answer

assert filecmp.cmp(output + ".html", os.path.join("tests", "data", "to_html", "report.html"))

0 comments on commit 4bad010

Please sign in to comment.