Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AWS parser update #244

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 90 additions & 22 deletions circuit_maintenance_parser/parsers/aws.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""AquaComms parser."""
"""AWS parser."""
import hashlib
import logging
import quopri
Expand All @@ -24,9 +24,29 @@ def parse_subject(self, subject):
Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001]
"""
data = {}
search = re.search(r"\[AWS Account ?I?D?: ([0-9]+)\]", subject)
if search:
data["account"] = search.group(1)
# Common Subject strings for matching:
subject_map = [{"account": r"\[AWS Account ?I?D?: ([0-9]+)\]"}]

subject_list = []
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the exactly same code you have in the other function, extract this to a staticmethod to reuse

for each_subject in subject_map:
for key, value in each_subject.items():
subject_list.append(value)

regex_keys = re.compile("|".join(subject_list), re.IGNORECASE)

# in case of a multi-line subject
# match the subject map
for line in subject.splitlines():
line_matched = re.search(regex_keys, line)
if not line_matched:
continue
for group_match in line_matched.groups():
if not group_match:
continue
for search_string in subject_map:
for key, value in search_string.items():
if re.search(key, line, re.IGNORECASE):
data[key] = group_match
return [data]


Expand Down Expand Up @@ -60,31 +80,79 @@ def parse_text(self, text):
This maintenance is scheduled to avoid disrupting redundant connections at =
the same time.
"""
text_map = [
{"account": "^Account ?I?D?: ([0-9]+)"},
{
"start": "^Start Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})"
},
{
"end": "^End Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})"
},
{
"start_and_end": "(?<=from )([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})"
},
]

each_textmap = []
for search_string in text_map:
for key, value in search_string.items():
each_textmap.append(value)

regex_keys = re.compile("|".join(each_textmap), re.IGNORECASE)

data = {"circuits": []}
impact = Impact.OUTAGE
maintenace_id = ""
maintenance_id = ""
status = Status.CONFIRMED

for line in text.splitlines():
if "planned maintenance" in line.lower():
data["summary"] = line
search = re.search(
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})",
line,
)
if search:
data["start"] = self.dt2ts(parser.parse(search.group(1)))
data["end"] = self.dt2ts(parser.parse(search.group(2)))
maintenace_id += str(data["start"])
maintenace_id += str(data["end"])
if "may become unavailable" in line.lower():
impact = Impact.OUTAGE
elif "has been cancelled" in line.lower():
status = Status.CANCELLED
elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
maintenace_id += line
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
# match against the regex strings
line_matched = re.search(regex_keys, line)
# if we have a string that's not in our text_map
# there may still be some strings with data to capture.
# otherwise, continue on.
if not line_matched:
if re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious why haven't you added this Regex to the others?

maintenance_id += line
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
continue

# for lines that do match our regex strings.
# grab the data and map the values to keys.
for group_match in line_matched.groups():
if not group_match:
continue
for search_string in text_map:
for key, value in search_string.items():
if re.search(value, line_matched.string, re.IGNORECASE):
# Due to having a single line on some emails
# This causes multiple match groups
# However this needs to be split across keys.
# This could probably be cleaned up.
if key == "start_and_end" and "start" not in data:
data["start"] = group_match
elif key == "start_and_end":
data["end"] = group_match
else:
data[key] = group_match

# Let's determine impact and status
if "may become unavailable" in line.lower():
impact = Impact.OUTAGE
elif "has been cancelled" in line.lower():
status = Status.CANCELLED

# Let's get our times in order.
if all((key in data for key in ["start", "end"])):
data["start"] = self.dt2ts(parser.parse(data["start"]))
data["end"] = self.dt2ts(parser.parse(data["end"]))
maintenance_id += str(data["start"])
maintenance_id += str(data["end"])

# No maintenance ID found in emails, so a hash value is being generated using the start,
# end and IDs of all circuits in the notification.
data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec
data["maintenance_id"] = hashlib.md5(maintenance_id.encode("utf-8")).hexdigest() # nosec
data["status"] = status
return [data]
1 change: 1 addition & 0 deletions circuit_maintenance_parser/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class AWS(GenericProvider):
"""AWS provider custom class."""

_processors: List[GenericProcessor] = [
CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1]),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have you tried to invert them? so we test first the most complete parsing, including the Subject, and only then it fails, we try the second approach.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep! I think we had even hinted in that direction, to ensure that the more complete parser would take precedent in it's processing. Thanks for the reminder! I've swapped those!

CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]),
]
_default_organizer = "[email protected]"
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/data/aws/aws1_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
}
],
"end": 1621519200,
"maintenance_id": "15faf02fcf2e999792668df97828bc76",
"maintenance_id": "1cae19787c16a01d90fd5937a94c6737",
"organizer": "[email protected]",
"provider": "aws",
"sequence": 1,
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/data/aws/aws1_text_parser_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
}
],
"end": 1621519200,
"maintenance_id": "15faf02fcf2e999792668df97828bc76",
"maintenance_id": "1cae19787c16a01d90fd5937a94c6737",
"start": 1621497600,
"status": "CONFIRMED",
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect router in A Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 May 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable."
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/data/aws/aws2_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
}
],
"end": 1631584920,
"maintenance_id": "47876b7d5a5198643a1a9cb7f954487a",
"maintenance_id": "303e7bb374f620bfcc9ad3644647fce1",
"organizer": "[email protected]",
"provider": "aws",
"sequence": 1,
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/data/aws/aws2_text_parser_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
}
],
"end": 1631584920,
"maintenance_id": "47876b7d5a5198643a1a9cb7f954487a",
"maintenance_id": "303e7bb374f620bfcc9ad3644647fce1",
"start": 1631559720,
"status": "CANCELLED",
"summary": "We would like to inform you that the planned maintenance that was scheduled for AWS Direct Connect endpoint in Equinix SG2, Singapore, SGP from Mon, 13 Sep 2021 19:02:00 GMT to Tue, 14 Sep 2021 02:02:00 GMT has been cancelled. Please find below your AWS Direct Connect services that would have been affected by this planned maintenance."
Expand Down
119 changes: 119 additions & 0 deletions tests/unit/data/aws/aws3.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
Delivered-To: [email protected]
Received: by 2002:a05:7300:a498:b0:db:5402:54d2 with SMTP id ci24csp3189966dyb;
Wed, 27 Sep 2023 23:45:14 -0700 (PDT)
X-Received: by 2002:a05:6830:1b64:b0:6c4:ded2:44d0 with SMTP id d4-20020a0568301b6400b006c4ded244d0mr393870ote.27.1695883513842;
Wed, 27 Sep 2023 23:45:13 -0700 (PDT)
ARC-Seal: i=3; a=rsa-sha256; t=1695883513; cv=pass;
d=google.com; s=arc-20160816;
b=J2AUxyAHbkdx5YmP5xAbEeJM3elEoa14Iwv5t4wjz1RTKLla7KfqHxEZMD1LYoenxs
ZLpE/YDGT+ZSphagfW1mo+veHY27kppDSD00YOjDWdWqOLNUvH6KDvGlkMIEIKATDfI/
5lWcAOTP2h5x7kha2YFpModQRq/fL2727THiXX+BTTWi6r1kF0IAO8lcivXAuM8jaZtM
DYgJAzQW/hcpexKfXz5idvB7cM4TA6+EtsbntwuTILFG6QuY6l09nQOLSnaiDC9WpxXk
Nnveuzzzkx4XklLFCn6AaangILYMa/Ac9kfcOT6MBwOHngH27Pj06PjxF5x7Otk/VLtD
NTqA==
ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816;
h=list-unsubscribe:list-archive:list-help:list-post:list-id
:mailing-list:precedence:feedback-id:content-transfer-encoding
:mime-version:subject:message-id:to:from:date:sender:dkim-signature;
bh=YXHwVUeIH05Eff5uu/QBh14WkQXPczhkl0K2x/xaxHI=;
fh=/BZp/CoWS56RYHJk/Nq7+rYFbsXKZMYT25P0V/4q4R8=;
b=El2IxnqZD03wQKR3T3OgYD9VZmQlP/4/0F/G9rkTXGnpqdk6LMM7wtFngfyND14s2/
j49qP6rvxFdU2YNPbm6K0v6UplLq6sq57eE8kKVJ9qIu4hebNR3r75ohqPRwU7rzvoGB
mPjuE2UevG9YfswuQ8/gDeLbBFAP9gyj6BMwYnC9uQRwdy1fYLMizPxPP/WxV+RLbY6C
s7UzQudntDwdE+hFpKHx8mUMrx+4AN/PU4eYyuGjXSrT8qb5LWCC6xFmIIN5LluDMGfO
VNXZBnDMf6paqOUk6SdRDDGR60D5dTR+KMXvc5ilbgmErZDLGMDqlm93ku65R3nnFot2
OmzA==
ARC-Authentication-Results: i=3; mx.google.com;
dkim=pass [email protected] header.s=testdomain header.b=GLHcVmBa;
arc=pass (i=2 spf=pass spfdomain=us-west-2.amazonses.com dkim=pass dkdomain=sns.amazonaws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=amazonaws.com);
spf=pass (google.com: domain of [email protected] designates 7.7.7.7 as permitted sender) smtp.mailfrom=rd-notices+bncBCSJVSHO64CBB6OB2SUAMGQE3DQKCSQ@testdomain.com;
dmarc=fail (p=QUARANTINE sp=NONE dis=NONE arc=pass) header.from=amazonaws.com
Return-Path: <[email protected]>
Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [7.7.7.7)
by mx.google.com with SMTPS id z195-20020a4a49cc000000b0057b8079d2f9sor1288663ooa.3.2023.09.27.23.45.13
for <[email protected]>
(Google Transport Security);
Wed, 27 Sep 2023 23:45:13 -0700 (PDT)
Received-SPF: pass (google.com: domain of [email protected] designates 7.7.7.7 as permitted sender) client-ip=7.7.7.7;
Sender: [email protected]
X-Gm-Message-State: AOJu0YwA1ncYCL1JhSr58XiTdslkwS2bbAyUG8XhiJs3xZZJ3Ccy9WF5
b8y79QbLjF9OquocCHSQC0PxicdI
X-Google-Smtp-Source: AGHT+IHRVxhXNJLs7Sr7hKiGQj5axz7trO3ifhk17zVerbtpqBwzCR3N9tJiSMksqUUrB6MOLmrSLg==
X-Received: by 2002:a4a:d138:0:b0:57e:1618:e700 with SMTP id n24-20020a4ad138000000b0057e1618e700mr147723oor.7.1695883513473;
Wed, 27 Sep 2023 23:45:13 -0700 (PDT)
X-BeenThere: [email protected]
Received: by 2002:a4a:554d:0:b0:573:f543:8c29 with SMTP id e74-20020a4a554d000000b00573f5438c29ls1795866oob.1.-pod-prod-01-us;
Wed, 27 Sep 2023 23:45:12 -0700 (PDT)
X-Received: by 2002:a54:4002:0:b0:3a7:8725:f37c with SMTP id x2-20020a544002000000b003a78725f37cmr391884oie.10.1695883512779;
Wed, 27 Sep 2023 23:45:12 -0700 (PDT)
Received: from a59-201.smtp-out.us-west-2.amazonses.com (a59-201.smtp-out.us-west-2.amazonses.com. [7.7.7.7])
by mx.google.com with ESMTPS id f20-20020a637554000000b00578b785d46csi18216323pgn.193.2023.09.27.23.45.12
for <[email protected]>
(version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128);
Wed, 27 Sep 2023 23:45:12 -0700 (PDT)
Date: Thu, 28 Sep 2023 06:45:12 +0000
From: DXMaintNotify-RealDirect <[email protected]>
To: [email protected]
Message-ID: <0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83442c6-000000@us-west-2.amazonses.com>
Subject: [rd-notices] AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED
MIME-Version: 1.0
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
x-amz-sns-message-id: c08baa17-4211-5fca-a32c-f79861293c18
x-amz-sns-subscription-arn: arn:aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2b2e141
Feedback-ID: 1.us-west-2.c55J8LO2Yl1R0Ht+ysI6VjzUH6Cvo3dHPF80AUVC/G8=:AmazonSES
X-SES-Outgoing: 2023.09.28-54.240.59.201
X-Original-Sender: [email protected]
Precedence: list
Mailing-list: list [email protected]; contact [email protected]
List-ID: <rd-notices.testdomain.com>
X-Spam-Checked-In-Group: [email protected]
X-Google-Group-Id: 536184160288
List-Post: <https://groups.google.com/a/testdomain.com/group/rd-notices/post>, <mailto:[email protected]>
List-Help: <https://support.google.com/a/testdomain.com/bin/topic.py?topic=25838>,
<mailto:[email protected]>
List-Archive: <https://groups.google.com/a/testdomain.com/group/rd-notices/>
List-Unsubscribe: <mailto:[email protected]>,
<https://groups.google.com/a/testdomain.com/group/rd-notices/subscribe>

Planned maintenance has been scheduled on an AWS Direct Connect endpoint in=
Westin Building Exchange, Seattle, WA. During this maintenance window, you=
r AWS Direct Connect services associated with this event may become unavail=
able.\n\nThis maintenance is scheduled to avoid disrupting redundant connec=
tions at the same time.\n\nIf you encounter any problems with your connecti=
on after the end of this maintenance window, please contact AWS Support(1).=
\n\n(1) https://aws.amazon.com/support. For more details, please see https:=
//phd.aws.amazon.com/phd/home?region=3Dus-west-2#/dashboard/open-issues

Region: us-west-2
Account Id: 0000000000001

Affected Resources:
xxxxx-ffffffff
yyyyy-uuuuuuuu
mmmmm-iiiiiiii
rrrrr-pppppppp
fffff-qqqqqqqq

Start Time: Thu, 12 Oct 2023 07:00:00 GMT
End Time: Thu, 12 Oct 2023 13:00:00 GMT

--
If you wish to stop receiving notifications from this topic, please click o=
r visit the link below to unsubscribe:
https://sns.us-west-2.amazonaws.com/unsubscribe.html?SubscriptionArn=3Darn:=
aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2=
b2e141&[email protected]

Please do not reply directly to this email. If you have any questions or co=
mments regarding this email, please contact us at https://aws.amazon.com/su=
pport

--=20
You received this message because you are subscribed to the Google Groups "=
Real Direct Notices" group.
To unsubscribe from this group and stop receiving emails from it, send an e=
mail to [email protected].
To view this discussion on the web visit https://groups.google.com/a/Realga=
mes.com/d/msgid/rd-notices/0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83=
442c6-000000%40us-west-2.amazonses.com.
37 changes: 37 additions & 0 deletions tests/unit/data/aws/aws3_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[
{
"account": "0000000000001",
"circuits": [
{
"circuit_id": "xxxxx-ffffffff",
"impact": "OUTAGE"
},
{
"circuit_id": "yyyyy-uuuuuuuu",
"impact": "OUTAGE"
},
{
"circuit_id": "mmmmm-iiiiiiii",
"impact": "OUTAGE"
},
{
"circuit_id": "rrrrr-pppppppp",
"impact": "OUTAGE"
},
{
"circuit_id": "fffff-qqqqqqqq",
"impact": "OUTAGE"
}
],
"end": 1697115600,
"maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8",
"organizer": "[email protected]",
"provider": "aws",
"sequence": 1,
"stamp": 1695883512,
"start": 1697094000,
"status": "CONFIRMED",
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues",
"uid": "0"
}
]
32 changes: 32 additions & 0 deletions tests/unit/data/aws/aws3_text_parser_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"account": "0000000000001",
"circuits": [
{
"circuit_id": "xxxxx-ffffffff",
"impact": "OUTAGE"
},
{
"circuit_id": "yyyyy-uuuuuuuu",
"impact": "OUTAGE"
},
{
"circuit_id": "mmmmm-iiiiiiii",
"impact": "OUTAGE"
},
{
"circuit_id": "rrrrr-pppppppp",
"impact": "OUTAGE"
},
{
"circuit_id": "fffff-qqqqqqqq",
"impact": "OUTAGE"
}
],
"end": 1697115600,
"maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8",
"start": 1697094000,
"status": "CONFIRMED",
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues"
}
]
9 changes: 9 additions & 0 deletions tests/unit/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,15 @@
Path(dir_path, "data", "aws", "aws2_result.json"),
],
),
(
AWS,
[
("email", Path(dir_path, "data", "aws", "aws3.eml")),
],
[
Path(dir_path, "data", "aws", "aws3_result.json"),
],
),
# BSO
(
BSO,
Expand Down
Loading