-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a CI check to validate IP address present in tests, and a helper to clean them #165
base: develop
Are you sure you want to change the base?
Changes from all commits
f63372e
a7a9ae4
3cf8ce4
6e578f8
35c5ec5
120022f
2cb5379
c831197
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
"""Helper script to check and clean IP Addresses in test data.""" | ||
# pylint: disable=too-many-branches | ||
import re | ||
import os | ||
import sys | ||
import argparse | ||
import fileinput | ||
|
||
TEST_DATA_PATH = os.path.join("tests", "unit", "data") | ||
REPLACE_TEXT_IPV4 = "192.0.2.1" | ||
REPLACE_TEXT_IPV6 = "2001:DB8::1" | ||
|
||
# Patterns that could match IP address regex and are false positives | ||
PATTERNS_TO_SKIP = ("PRODID:Data::ICal",) | ||
|
||
# Reference: https://gist.github.com/dfee/6ed3a4b05cfe7a6faf40a2102408d5d8 | ||
IPV4SEG = r"(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])" | ||
IPV4_ADDR_REGEX = r"(?:(?:" + IPV4SEG + r"\.){3,3}" + IPV4SEG + r")" | ||
IPV6SEG = r"(?:(?:[0-9a-fA-F]){1,4})" | ||
IPV6GROUPS = ( | ||
r"(?:" + IPV6SEG + r":){7,7}" + IPV6SEG, # 1:2:3:4:5:6:7:8 | ||
r"(?:" + IPV6SEG + r":){1,7}:", # 1:: 1:2:3:4:5:6:7:: | ||
r"(?:" + IPV6SEG + r":){1,6}:" + IPV6SEG, # 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8 | ||
r"(?:" + IPV6SEG + r":){1,5}(?::" + IPV6SEG + r"){1,2}", # 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8 | ||
r"(?:" + IPV6SEG + r":){1,4}(?::" + IPV6SEG + r"){1,3}", # 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8 | ||
r"(?:" + IPV6SEG + r":){1,3}(?::" + IPV6SEG + r"){1,4}", # 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8 | ||
r"(?:" + IPV6SEG + r":){1,2}(?::" + IPV6SEG + r"){1,5}", # 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8 | ||
IPV6SEG + r":(?:(?::" + IPV6SEG + r"){1,6})", # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8 | ||
r":(?:(?::" + IPV6SEG + r"){1,7}|:)", # ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 :: | ||
r"fe80:(?::" | ||
+ IPV6SEG | ||
+ r"){0,4}%[0-9a-zA-Z]{1,}", # fe80::7:8%eth0 fe80::7:8%1 (link-local IPv6 addresses with zone index) | ||
r"::(?:ffff(?::0{1,4}){0,1}:){0,1}[^\s:]" | ||
+ IPV4_ADDR_REGEX, # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255 (IPv4-mapped IPv6 addresses and IPv4-translated addresses) | ||
r"(?:" | ||
+ IPV6SEG | ||
+ r":){1,4}:[^\s:]" | ||
+ IPV4_ADDR_REGEX, # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 (IPv4-Embedded IPv6 Address) | ||
) | ||
IPV6_ADDR_REGEX = "|".join([f"(?:{g})" for g in IPV6GROUPS[::-1]]) # Reverse rows for greedy match | ||
|
||
|
||
def replace(filename, search_exp, replace_exp): | ||
"""Replace line when a substitution is needed.""" | ||
report = "" | ||
try: | ||
for line in fileinput.input(filename, inplace=True): | ||
if any(pattern_to_skip in line for pattern_to_skip in PATTERNS_TO_SKIP): | ||
newline = line | ||
else: | ||
newline = re.sub(search_exp, replace_exp, line) | ||
sys.stdout.write(newline) | ||
if line != newline: | ||
if not report: | ||
report = f"IP Address found and replaced in {filename}\n" | ||
report += f" - {line}" | ||
except UnicodeDecodeError as error: | ||
return f"Warning: Not able to process {filename}: {error}" | ||
return report | ||
|
||
|
||
def main(): | ||
"""Main function.""" | ||
parser = argparse.ArgumentParser(description="Clean IP Addresses.") | ||
parser.add_argument("--clean", action="store_true") | ||
args = parser.parse_args() | ||
clean = args.clean | ||
|
||
ip_found = False | ||
for (dirpath, _, files) in os.walk(TEST_DATA_PATH): | ||
for file in files: | ||
filename = os.path.join(dirpath, file) | ||
if not clean: | ||
report = "" | ||
with open(filename, "r", encoding="utf-8") as test_file: | ||
try: | ||
content = test_file.readlines() | ||
except UnicodeDecodeError as error: | ||
print(f"Warning: Not able to process {filename}: {error}") | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sys.exit here? Or do we have files that we are expecting to have decoding errors in? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, the CSV one is not supported, for now. We could solve it at some point if needed |
||
for content_line in content: | ||
if any(pattern_to_skip in content_line for pattern_to_skip in PATTERNS_TO_SKIP): | ||
continue | ||
content_new = re.sub(IPV4_ADDR_REGEX, REPLACE_TEXT_IPV4, content_line) | ||
content_new = re.sub(IPV6_ADDR_REGEX, REPLACE_TEXT_IPV6, content_new) | ||
if content_line != content_new: | ||
if not report: | ||
report = f"IP Address found in {filename}\n" | ||
report += f" - {content_line}" | ||
else: | ||
report = replace(filename, IPV4_ADDR_REGEX, REPLACE_TEXT_IPV4) | ||
report += replace(filename, IPV6_ADDR_REGEX, REPLACE_TEXT_IPV6) | ||
|
||
if report: | ||
ip_found = True | ||
print(report) | ||
|
||
if ip_found and not clean: | ||
print("\nHINT - you can clean up these IPs with 'invoke clean-anonymize-ips'") | ||
sys.exit(1) | ||
elif ip_found and clean: | ||
print( | ||
f"\nIPv4 and IPv6 addresses have been changed to {REPLACE_TEXT_IPV4} and {REPLACE_TEXT_IPV6} respectively.", | ||
" \nPlease, keep in mind that this could uncover some parsing dependencies on white spaces.", | ||
) | ||
else: | ||
print("Only documentation IPs found.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,91 +1,91 @@ | ||
MIME-Version: 1.0 | ||
Date: Thu, 26 Aug 2021 18:22:09 +0100 | ||
Subject: Aqua Comms Planned Outage Work ISSUE=11111 PROJ=999 | ||
Content-Type: multipart/alternative; boundary="000000000000015b7605ca799cf2" | ||
--000000000000015b7605ca799cf2 | ||
Content-Type: text/html; charset="UTF-8" | ||
Content-Transfer-Encoding: quoted-printable | ||
<font face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> | ||
When replying, type your text above this line. | ||
<hr> | ||
<br> | ||
<table border=3D"0" cellpadding=3D"0" cellspacing=3D"0"> | ||
<tbody><tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Dear Network to Code,<br><br></font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">This notice is being sent to notify you of t= | ||
he following Maintenance Event.<br><br></font></td> | ||
</tr> | ||
</tbody></table> | ||
<table border=3D"0" cellpadding=3D"0" cellspacing=3D"0"> | ||
<tbody><tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Ticket Number: </b></font></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> 11111</font></t= | ||
d> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Scheduled Start Date & Time: </b></f= | ||
ont></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> 22:00 12/10/202= | ||
0 GMT</font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Scheduled End Date & Time: </b></fon= | ||
t></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> 08:00 13/10/202= | ||
0 GMT</font></td> | ||
</tr> | ||
=20 | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Service ID: </b></font></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2">111-AAA-11-11BB-= | ||
33333</font></td> | ||
</tr> | ||
=20 | ||
</tbody></table> | ||
<br> | ||
<table border=3D"0" cellpadding=3D"0" cellspacing=3D"0"> | ||
<tbody><tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Please note that all time stamps below will = | ||
appear in US Central time.<br><br></font></td> | ||
</tr> | ||
=20 | ||
=20 | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, H= | ||
elvetica, sans-serif" size=3D"2">- Update - There are no new updates.<br></= | ||
font></td> | ||
</tr> | ||
=20 | ||
=20 | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, H= | ||
elvetica, sans-serif" size=3D"2"><br></font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Sincerely,</font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Aqua Comms NOC</font></td> | ||
</tr> | ||
</tbody></table> | ||
</font> | ||
--000000000000015b7605ca799cf2-- | ||
MIME-Version: 1.0 | ||
glennmatthews marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Date: Thu, 26 Aug 2021 18:22:09 +0100 | ||
Subject: Aqua Comms Planned Outage Work ISSUE=11111 PROJ=999 | ||
Content-Type: multipart/alternative; boundary="000000000000015b7605ca799cf2" | ||
|
||
--000000000000015b7605ca799cf2 | ||
Content-Type: text/html; charset="UTF-8" | ||
Content-Transfer-Encoding: quoted-printable | ||
|
||
<font face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> | ||
When replying, type your text above this line. | ||
<hr> | ||
<br> | ||
<table border=3D"0" cellpadding=3D"0" cellspacing=3D"0"> | ||
<tbody><tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Dear Network to Code,<br><br></font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">This notice is being sent to notify you of t= | ||
he following Maintenance Event.<br><br></font></td> | ||
</tr> | ||
|
||
</tbody></table> | ||
<table border=3D"0" cellpadding=3D"0" cellspacing=3D"0"> | ||
<tbody><tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Ticket Number: </b></font></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> 11111</font></t= | ||
d> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Scheduled Start Date & Time: </b></f= | ||
ont></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> 22:00 12/10/202= | ||
0 GMT</font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Scheduled End Date & Time: </b></fon= | ||
t></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2"> 08:00 13/10/202= | ||
0 GMT</font></td> | ||
</tr> | ||
=20 | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2"><b> Service ID: </b></font></td> | ||
<td valign=3D"TOP" align=3D"LEFT" style=3D"padding-left:10px"><font= | ||
face=3D"Arial, Verdana, Helvetica, sans-serif" size=3D"2">111-AAA-11-11BB-= | ||
33333</font></td> | ||
</tr> | ||
=20 | ||
</tbody></table> | ||
<br> | ||
<table border=3D"0" cellpadding=3D"0" cellspacing=3D"0"> | ||
<tbody><tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Please note that all time stamps below will = | ||
appear in US Central time.<br><br></font></td> | ||
</tr> | ||
=20 | ||
=20 | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, H= | ||
elvetica, sans-serif" size=3D"2">- Update - There are no new updates.<br></= | ||
font></td> | ||
</tr> | ||
=20 | ||
=20 | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, H= | ||
elvetica, sans-serif" size=3D"2"><br></font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Sincerely,</font></td> | ||
</tr> | ||
<tr> | ||
<td valign=3D"TOP" align=3D"LEFT"><font face=3D"Arial, Verdana, Hel= | ||
vetica, sans-serif" size=3D"2">Aqua Comms NOC</font></td> | ||
</tr> | ||
</tbody></table> | ||
</font> | ||
|
||
--000000000000015b7605ca799cf2-- |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a bit surprising to me that
PATTERNS_TO_SKIP
is a list of exact substring matches, rather than a list of regex patterns.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it could be changed to regex if at some point is needed. For the current need, he simple substring matching was good enough