-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AWS parser update #244
base: develop
Are you sure you want to change the base?
AWS parser update #244
Changes from 7 commits
0dbe49a
7d7d21c
bc93e85
38eeedc
02547c0
5ef19d2
f67ae20
d1e36da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
"""AquaComms parser.""" | ||
"""AWS parser.""" | ||
import hashlib | ||
import logging | ||
import quopri | ||
|
@@ -24,9 +24,29 @@ def parse_subject(self, subject): | |
Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001] | ||
""" | ||
data = {} | ||
search = re.search(r"\[AWS Account ?I?D?: ([0-9]+)\]", subject) | ||
if search: | ||
data["account"] = search.group(1) | ||
# Common Subject strings for matching: | ||
subject_map = [{"account": r"\[AWS Account ?I?D?: ([0-9]+)\]"}] | ||
|
||
subject_list = [] | ||
for each_subject in subject_map: | ||
for key, value in each_subject.items(): | ||
subject_list.append(value) | ||
|
||
regex_keys = re.compile("|".join(subject_list), re.IGNORECASE) | ||
|
||
# in case of a multi-line subject | ||
# match the subject map | ||
for line in subject.splitlines(): | ||
line_matched = re.search(regex_keys, line) | ||
if not line_matched: | ||
continue | ||
for group_match in line_matched.groups(): | ||
if not group_match: | ||
continue | ||
for search_string in subject_map: | ||
for key, value in search_string.items(): | ||
if re.search(key, line, re.IGNORECASE): | ||
data[key] = group_match | ||
return [data] | ||
|
||
|
||
|
@@ -60,31 +80,79 @@ def parse_text(self, text): | |
This maintenance is scheduled to avoid disrupting redundant connections at = | ||
the same time. | ||
""" | ||
text_map = [ | ||
{"account": "^Account ?I?D?: ([0-9]+)"}, | ||
{ | ||
"start": "^Start Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})" | ||
}, | ||
{ | ||
"end": "^End Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})" | ||
}, | ||
{ | ||
"start_and_end": "(?<=from )([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})" | ||
}, | ||
] | ||
|
||
each_textmap = [] | ||
for search_string in text_map: | ||
for key, value in search_string.items(): | ||
each_textmap.append(value) | ||
|
||
regex_keys = re.compile("|".join(each_textmap), re.IGNORECASE) | ||
|
||
data = {"circuits": []} | ||
impact = Impact.OUTAGE | ||
maintenace_id = "" | ||
maintenance_id = "" | ||
status = Status.CONFIRMED | ||
|
||
for line in text.splitlines(): | ||
if "planned maintenance" in line.lower(): | ||
data["summary"] = line | ||
search = re.search( | ||
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", | ||
line, | ||
) | ||
if search: | ||
data["start"] = self.dt2ts(parser.parse(search.group(1))) | ||
data["end"] = self.dt2ts(parser.parse(search.group(2))) | ||
maintenace_id += str(data["start"]) | ||
maintenace_id += str(data["end"]) | ||
if "may become unavailable" in line.lower(): | ||
impact = Impact.OUTAGE | ||
elif "has been cancelled" in line.lower(): | ||
status = Status.CANCELLED | ||
elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line): | ||
maintenace_id += line | ||
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) | ||
# match against the regex strings | ||
line_matched = re.search(regex_keys, line) | ||
# if we have a string that's not in our text_map | ||
# there may still be some strings with data to capture. | ||
# otherwise, continue on. | ||
if not line_matched: | ||
if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just curious why haven't you added this Regex to the others? |
||
maintenance_id += line | ||
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) | ||
continue | ||
|
||
# for lines that do match our regex strings. | ||
# grab the data and map the values to keys. | ||
for group_match in line_matched.groups(): | ||
if not group_match: | ||
continue | ||
for search_string in text_map: | ||
for key, value in search_string.items(): | ||
if re.search(value, line_matched.string, re.IGNORECASE): | ||
# Due to having a single line on some emails | ||
# This causes multiple match groups | ||
# However this needs to be split across keys. | ||
# This could probably be cleaned up. | ||
if key == "start_and_end" and "start" not in data: | ||
data["start"] = group_match | ||
elif key == "start_and_end": | ||
data["end"] = group_match | ||
else: | ||
data[key] = group_match | ||
|
||
# Let's determine impact and status | ||
if "may become unavailable" in line.lower(): | ||
impact = Impact.OUTAGE | ||
elif "has been cancelled" in line.lower(): | ||
status = Status.CANCELLED | ||
|
||
# Let's get our times in order. | ||
if all((key in data for key in ["start", "end"])): | ||
data["start"] = self.dt2ts(parser.parse(data["start"])) | ||
data["end"] = self.dt2ts(parser.parse(data["end"])) | ||
maintenance_id += str(data["start"]) | ||
maintenance_id += str(data["end"]) | ||
|
||
# No maintenance ID found in emails, so a hash value is being generated using the start, | ||
# end and IDs of all circuits in the notification. | ||
data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec | ||
data["maintenance_id"] = hashlib.md5(maintenance_id.encode("utf-8")).hexdigest() # nosec | ||
data["status"] = status | ||
return [data] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -186,6 +186,7 @@ class AWS(GenericProvider): | |
"""AWS provider custom class.""" | ||
|
||
_processors: List[GenericProcessor] = [ | ||
CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. have you tried to invert them? so we test first the most complete parsing, including the Subject, and only then it fails, we try the second approach. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep! I think we had even hinted in that direction, to ensure that the more complete parser would take precedent in it's processing. Thanks for the reminder! I've swapped those! |
||
CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]), | ||
] | ||
_default_organizer = "[email protected]" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ | |
} | ||
], | ||
"end": 1621519200, | ||
"maintenance_id": "15faf02fcf2e999792668df97828bc76", | ||
"maintenance_id": "1cae19787c16a01d90fd5937a94c6737", | ||
"organizer": "[email protected]", | ||
"provider": "aws", | ||
"sequence": 1, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ | |
} | ||
], | ||
"end": 1631584920, | ||
"maintenance_id": "47876b7d5a5198643a1a9cb7f954487a", | ||
"maintenance_id": "303e7bb374f620bfcc9ad3644647fce1", | ||
"organizer": "[email protected]", | ||
"provider": "aws", | ||
"sequence": 1, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
Delivered-To: [email protected] | ||
Received: by 2002:a05:7300:a498:b0:db:5402:54d2 with SMTP id ci24csp3189966dyb; | ||
Wed, 27 Sep 2023 23:45:14 -0700 (PDT) | ||
X-Received: by 2002:a05:6830:1b64:b0:6c4:ded2:44d0 with SMTP id d4-20020a0568301b6400b006c4ded244d0mr393870ote.27.1695883513842; | ||
Wed, 27 Sep 2023 23:45:13 -0700 (PDT) | ||
ARC-Seal: i=3; a=rsa-sha256; t=1695883513; cv=pass; | ||
d=google.com; s=arc-20160816; | ||
b=J2AUxyAHbkdx5YmP5xAbEeJM3elEoa14Iwv5t4wjz1RTKLla7KfqHxEZMD1LYoenxs | ||
ZLpE/YDGT+ZSphagfW1mo+veHY27kppDSD00YOjDWdWqOLNUvH6KDvGlkMIEIKATDfI/ | ||
5lWcAOTP2h5x7kha2YFpModQRq/fL2727THiXX+BTTWi6r1kF0IAO8lcivXAuM8jaZtM | ||
DYgJAzQW/hcpexKfXz5idvB7cM4TA6+EtsbntwuTILFG6QuY6l09nQOLSnaiDC9WpxXk | ||
Nnveuzzzkx4XklLFCn6AaangILYMa/Ac9kfcOT6MBwOHngH27Pj06PjxF5x7Otk/VLtD | ||
NTqA== | ||
ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; | ||
h=list-unsubscribe:list-archive:list-help:list-post:list-id | ||
:mailing-list:precedence:feedback-id:content-transfer-encoding | ||
:mime-version:subject:message-id:to:from:date:sender:dkim-signature; | ||
bh=YXHwVUeIH05Eff5uu/QBh14WkQXPczhkl0K2x/xaxHI=; | ||
fh=/BZp/CoWS56RYHJk/Nq7+rYFbsXKZMYT25P0V/4q4R8=; | ||
b=El2IxnqZD03wQKR3T3OgYD9VZmQlP/4/0F/G9rkTXGnpqdk6LMM7wtFngfyND14s2/ | ||
j49qP6rvxFdU2YNPbm6K0v6UplLq6sq57eE8kKVJ9qIu4hebNR3r75ohqPRwU7rzvoGB | ||
mPjuE2UevG9YfswuQ8/gDeLbBFAP9gyj6BMwYnC9uQRwdy1fYLMizPxPP/WxV+RLbY6C | ||
s7UzQudntDwdE+hFpKHx8mUMrx+4AN/PU4eYyuGjXSrT8qb5LWCC6xFmIIN5LluDMGfO | ||
VNXZBnDMf6paqOUk6SdRDDGR60D5dTR+KMXvc5ilbgmErZDLGMDqlm93ku65R3nnFot2 | ||
OmzA== | ||
ARC-Authentication-Results: i=3; mx.google.com; | ||
dkim=pass [email protected] header.s=testdomain header.b=GLHcVmBa; | ||
arc=pass (i=2 spf=pass spfdomain=us-west-2.amazonses.com dkim=pass dkdomain=sns.amazonaws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=amazonaws.com); | ||
spf=pass (google.com: domain of [email protected] designates 7.7.7.7 as permitted sender) smtp.mailfrom=rd-notices+bncBCSJVSHO64CBB6OB2SUAMGQE3DQKCSQ@testdomain.com; | ||
dmarc=fail (p=QUARANTINE sp=NONE dis=NONE arc=pass) header.from=amazonaws.com | ||
Return-Path: <[email protected]> | ||
Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [7.7.7.7) | ||
by mx.google.com with SMTPS id z195-20020a4a49cc000000b0057b8079d2f9sor1288663ooa.3.2023.09.27.23.45.13 | ||
for <[email protected]> | ||
(Google Transport Security); | ||
Wed, 27 Sep 2023 23:45:13 -0700 (PDT) | ||
Received-SPF: pass (google.com: domain of [email protected] designates 7.7.7.7 as permitted sender) client-ip=7.7.7.7; | ||
Sender: [email protected] | ||
X-Gm-Message-State: AOJu0YwA1ncYCL1JhSr58XiTdslkwS2bbAyUG8XhiJs3xZZJ3Ccy9WF5 | ||
b8y79QbLjF9OquocCHSQC0PxicdI | ||
X-Google-Smtp-Source: AGHT+IHRVxhXNJLs7Sr7hKiGQj5axz7trO3ifhk17zVerbtpqBwzCR3N9tJiSMksqUUrB6MOLmrSLg== | ||
X-Received: by 2002:a4a:d138:0:b0:57e:1618:e700 with SMTP id n24-20020a4ad138000000b0057e1618e700mr147723oor.7.1695883513473; | ||
Wed, 27 Sep 2023 23:45:13 -0700 (PDT) | ||
X-BeenThere: [email protected] | ||
Received: by 2002:a4a:554d:0:b0:573:f543:8c29 with SMTP id e74-20020a4a554d000000b00573f5438c29ls1795866oob.1.-pod-prod-01-us; | ||
Wed, 27 Sep 2023 23:45:12 -0700 (PDT) | ||
X-Received: by 2002:a54:4002:0:b0:3a7:8725:f37c with SMTP id x2-20020a544002000000b003a78725f37cmr391884oie.10.1695883512779; | ||
Wed, 27 Sep 2023 23:45:12 -0700 (PDT) | ||
Received: from a59-201.smtp-out.us-west-2.amazonses.com (a59-201.smtp-out.us-west-2.amazonses.com. [7.7.7.7]) | ||
by mx.google.com with ESMTPS id f20-20020a637554000000b00578b785d46csi18216323pgn.193.2023.09.27.23.45.12 | ||
for <[email protected]> | ||
(version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); | ||
Wed, 27 Sep 2023 23:45:12 -0700 (PDT) | ||
Date: Thu, 28 Sep 2023 06:45:12 +0000 | ||
From: DXMaintNotify-RealDirect <[email protected]> | ||
To: [email protected] | ||
Message-ID: <0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83442c6-000000@us-west-2.amazonses.com> | ||
Subject: [rd-notices] AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED | ||
MIME-Version: 1.0 | ||
Content-Type: text/plain; charset="UTF-8" | ||
Content-Transfer-Encoding: quoted-printable | ||
x-amz-sns-message-id: c08baa17-4211-5fca-a32c-f79861293c18 | ||
x-amz-sns-subscription-arn: arn:aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2b2e141 | ||
Feedback-ID: 1.us-west-2.c55J8LO2Yl1R0Ht+ysI6VjzUH6Cvo3dHPF80AUVC/G8=:AmazonSES | ||
X-SES-Outgoing: 2023.09.28-54.240.59.201 | ||
X-Original-Sender: [email protected] | ||
Precedence: list | ||
Mailing-list: list [email protected]; contact [email protected] | ||
List-ID: <rd-notices.testdomain.com> | ||
X-Spam-Checked-In-Group: [email protected] | ||
X-Google-Group-Id: 536184160288 | ||
List-Post: <https://groups.google.com/a/testdomain.com/group/rd-notices/post>, <mailto:[email protected]> | ||
List-Help: <https://support.google.com/a/testdomain.com/bin/topic.py?topic=25838>, | ||
<mailto:[email protected]> | ||
List-Archive: <https://groups.google.com/a/testdomain.com/group/rd-notices/> | ||
List-Unsubscribe: <mailto:[email protected]>, | ||
<https://groups.google.com/a/testdomain.com/group/rd-notices/subscribe> | ||
|
||
Planned maintenance has been scheduled on an AWS Direct Connect endpoint in= | ||
Westin Building Exchange, Seattle, WA. During this maintenance window, you= | ||
r AWS Direct Connect services associated with this event may become unavail= | ||
able.\n\nThis maintenance is scheduled to avoid disrupting redundant connec= | ||
tions at the same time.\n\nIf you encounter any problems with your connecti= | ||
on after the end of this maintenance window, please contact AWS Support(1).= | ||
\n\n(1) https://aws.amazon.com/support. For more details, please see https:= | ||
//phd.aws.amazon.com/phd/home?region=3Dus-west-2#/dashboard/open-issues | ||
|
||
Region: us-west-2 | ||
Account Id: 0000000000001 | ||
|
||
Affected Resources: | ||
xxxxx-ffffffff | ||
yyyyy-uuuuuuuu | ||
mmmmm-iiiiiiii | ||
rrrrr-pppppppp | ||
fffff-qqqqqqqq | ||
|
||
Start Time: Thu, 12 Oct 2023 07:00:00 GMT | ||
End Time: Thu, 12 Oct 2023 13:00:00 GMT | ||
|
||
-- | ||
If you wish to stop receiving notifications from this topic, please click o= | ||
r visit the link below to unsubscribe: | ||
https://sns.us-west-2.amazonaws.com/unsubscribe.html?SubscriptionArn=3Darn:= | ||
aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2= | ||
b2e141&[email protected] | ||
|
||
Please do not reply directly to this email. If you have any questions or co= | ||
mments regarding this email, please contact us at https://aws.amazon.com/su= | ||
pport | ||
|
||
--=20 | ||
You received this message because you are subscribed to the Google Groups "= | ||
Real Direct Notices" group. | ||
To unsubscribe from this group and stop receiving emails from it, send an e= | ||
mail to [email protected]. | ||
To view this discussion on the web visit https://groups.google.com/a/Realga= | ||
mes.com/d/msgid/rd-notices/0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83= | ||
442c6-000000%40us-west-2.amazonses.com. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
[ | ||
{ | ||
"account": "0000000000001", | ||
"circuits": [ | ||
{ | ||
"circuit_id": "xxxxx-ffffffff", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "yyyyy-uuuuuuuu", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "mmmmm-iiiiiiii", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "rrrrr-pppppppp", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "fffff-qqqqqqqq", | ||
"impact": "OUTAGE" | ||
} | ||
], | ||
"end": 1697115600, | ||
"maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8", | ||
"organizer": "[email protected]", | ||
"provider": "aws", | ||
"sequence": 1, | ||
"stamp": 1695883512, | ||
"start": 1697094000, | ||
"status": "CONFIRMED", | ||
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues", | ||
"uid": "0" | ||
} | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[ | ||
{ | ||
"account": "0000000000001", | ||
"circuits": [ | ||
{ | ||
"circuit_id": "xxxxx-ffffffff", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "yyyyy-uuuuuuuu", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "mmmmm-iiiiiiii", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "rrrrr-pppppppp", | ||
"impact": "OUTAGE" | ||
}, | ||
{ | ||
"circuit_id": "fffff-qqqqqqqq", | ||
"impact": "OUTAGE" | ||
} | ||
], | ||
"end": 1697115600, | ||
"maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8", | ||
"start": 1697094000, | ||
"status": "CONFIRMED", | ||
"summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues" | ||
} | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is the exactly same code you have in the other function, extract this to a staticmethod to reuse