Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tighten up minutes identification, add test #36

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 25 additions & 18 deletions lametro/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,12 +525,12 @@ def find_approved_minutes(self, event):
"/matters/",
"MatterId",
(
f"{associated_with_meeting_body} and " +
f"{meeting_date_in_title} and " +
"(" +
f"({matter_type_minutes}) or " +
f"({minutes_in_title} and {matter_type_informational})" +
")"
f"{associated_with_meeting_body} and "
+ f"{meeting_date_in_title} and "
+ "("
+ f"({matter_type_minutes}) or "
+ f"({minutes_in_title} and {matter_type_informational})"
+ ")"
),
)

Expand All @@ -540,10 +540,12 @@ def find_approved_minutes(self, event):
# Sometimes, the search returns more than one board report.
# Go through each matter yielded from this generator to account for that.
for matter in result:
if (matter['MatterRestrictViewViaWeb'] or
matter['MatterStatusName'] == 'Draft' or
matter['MatterBodyName'] == 'TO BE REMOVED'):
# Ignore this matter if there are signs that it shouldn't be processed.
if (
matter["MatterRestrictViewViaWeb"]
or matter["MatterStatusName"] == "Draft"
or matter["MatterBodyName"] == "TO BE REMOVED"
):
# Ignore this matter if there are signs that it shouldn't be processed.
continue

attachment_url = self.BASE_URL + "/matters/{}/attachments".format(
Expand All @@ -554,7 +556,9 @@ def find_approved_minutes(self, event):

try:
if len(attachments) == 0:
raise MissingAttachmentsException(matter["MatterId"], attachment_url)
raise MissingAttachmentsException(
matter["MatterId"], attachment_url
)
except MissingAttachmentsException as e:
capture_exception(e)
continue
Expand All @@ -577,7 +581,7 @@ def find_approved_minutes(self, event):
except PDFSyntaxError as e:
capture_message(
f"PDFPlumber encountered an error opening a file: {e}",
"warning"
"warning",
)
continue
cover_page = pdf.pages[0]
Expand All @@ -591,16 +595,19 @@ def find_approved_minutes(self, event):
with io.BytesIO() as in_mem_image:
pdf_image.save(in_mem_image)
in_mem_image.seek(0)
cover_page_text = pytesseract.image_to_string(Image.open(in_mem_image))

if "MINUTES" in cover_page_text.upper():
cover_page_text = pytesseract.image_to_string(
Image.open(in_mem_image)
)

if all(
substr in cover_page_text.lower()
for substr in (name.lower(), "minutes")
):
Comment on lines +602 to +605
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fix!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I dig this solution!

yield attach
n_minutes += 1

if n_minutes == 0:
self.warning(
f"Couldn't find minutes for the {name} meeting of {date}."
)
self.warning(f"Couldn't find minutes for the {name} meeting of {date}.")


class LAMetroAPIEvent(dict):
Expand Down
26 changes: 26 additions & 0 deletions tests/fixtures/event.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"EventId": 2907,
"EventGuid": "01EE5D79-B958-46F9-B8EB-384B745A57EC",
"EventLastModifiedUtc": "2024-09-21T00:49:38.617",
Comment on lines +1 to +4
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Always a fan of more data to test with

"EventRowVersion": "AAAAAAJZKHU=",
"EventBodyId": 138,
"EventBodyName": "Board of Directors - Regular Board Meeting",
"EventDate": "2024-09-26T00:00:00",
"EventTime": "10:00 AM",
"EventVideoStatus": "Public",
"EventAgendaStatusId": 10,
"EventAgendaStatusName": "Final",
"EventMinutesStatusId": 9,
"EventMinutesStatusName": "Draft",
"EventLocation": "One Gateway Plaza, Los Angeles, CA 90012, \r\n3rd Floor, Metro Board Room",
"EventAgendaFile": "https://metro.legistar1.com/metro/meetings/2024/9/2907_A_Board_of_Directors_-_Regular_Board_Meeting_24-09-26_Agenda.pdf",
"EventMinutesFile": null,
"EventAgendaLastPublishedUTC": "2024-09-21T00:49:38.513",
"EventMinutesLastPublishedUTC": null,
"EventComment": "Watch online: https://boardagendas.metro.net\r\nListen by phone: Dial 202-735-3323 and enter Access Code:\r\n5647249# (English) or 7292892# (Español) \r\n\r\nTo give written or live public comment, please see the top of page 4",
"EventVideoPath": null,
"EventMedia": "3231",
"EventInSiteURL": "https://metro.legistar.com/MeetingDetail.aspx?LEGID=2907&GID=557&G=A5FAA737-A54D-4A6C-B1E8-FF70F765FA94",
"EventItems":
[]
}
58 changes: 58 additions & 0 deletions tests/fixtures/matter_candidates.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[
{
"MatterId": 10881,
"MatterGuid": "5E324116-B670-49F2-AB53-02E768DBA6DD",
"MatterLastModifiedUtc": "2024-12-13T01:07:23.667",
"MatterRowVersion": "AAAAAAJpCV0=",
"MatterFile": "2024-1026",
"MatterName": null,
"MatterTitle": "APPROVE Minutes of the Regular Board Meeting held September 26, 2024 and the Special Board Meeting/Ad Hoc 2028 Olympic & Paralympic Games Committee held October 23, 2024.",
"MatterTypeId": 62,
"MatterTypeName": "Minutes",
"MatterStatusId": 72,
"MatterStatusName": "Passed",
"MatterBodyId": 138,
"MatterBodyName": "Board of Directors - Regular Board Meeting",
"MatterIntroDate": "2024-10-22T00:00:00",
"MatterAgendaDate": "2024-10-31T00:00:00",
"MatterPassedDate": "2024-10-31T00:00:00",
"MatterEnactmentDate": null,
"MatterEnactmentNumber": null,
"MatterRequester": null,
"MatterNotes": null,
"MatterVersion": "1",
"MatterCost": null,
"MatterText1": null,
"MatterText2": null,
"MatterText3": null,
"MatterText4": null,
"MatterText5": null,
"MatterDate1": null,
"MatterDate2": null,
"MatterEXText1": null,
"MatterEXText2": null,
"MatterEXText3": null,
"MatterEXText4": null,
"MatterEXText5": "Classification success: 2024-12-13T01:17:23 (UTC)",
"MatterEXText6": null,
"MatterEXText7": null,
"MatterEXText8": null,
"MatterEXText9": null,
"MatterEXText10": "",
"MatterEXText11": null,
"MatterEXDate1": null,
"MatterEXDate2": null,
"MatterEXDate3": null,
"MatterEXDate4": null,
"MatterEXDate5": null,
"MatterEXDate6": null,
"MatterEXDate7": null,
"MatterEXDate8": null,
"MatterEXDate9": null,
"MatterEXDate10": "2024-12-13T01:17:23",
"MatterAgiloftId": 0,
"MatterReference": null,
"MatterRestrictViewViaWeb": false,
"MatterReports": []
}
]
82 changes: 82 additions & 0 deletions tests/fixtures/minutes_candidates.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
[
{
"MatterAttachmentId": 28753,
"MatterAttachmentGuid": "6583B3CE-A71C-4446-BAA3-9193D82D5428",
"MatterAttachmentLastModifiedUtc": "2024-12-06T18:48:49.63",
"MatterAttachmentRowVersion": "AAAAAAJn7iQ=",
"MatterAttachmentName": "Regular Board Meeting MINUTES - September 26, 2024",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/73425e96-9569-465d-b4c2-14ef9398e6ee.pdf",
"MatterAttachmentFileName": "73425e96-9569-465d-b4c2-14ef9398e6ee.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 3
},
{
"MatterAttachmentId": 28754,
"MatterAttachmentGuid": "8FD724C9-ACAF-4631-B0A3-68A64ABAA7BA",
"MatterAttachmentLastModifiedUtc": "2024-12-06T16:43:12.877",
"MatterAttachmentRowVersion": "AAAAAAJn3jI=",
"MatterAttachmentName": "September 2024 RBM Public Comments",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/cf3d93b7-3286-4487-85bc-938dbbb14a4d.pdf",
"MatterAttachmentFileName": "cf3d93b7-3286-4487-85bc-938dbbb14a4d.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 4
},
{
"MatterAttachmentId": 28800,
"MatterAttachmentGuid": "D0647F1B-2680-4CD6-8564-D0D826CA0642",
"MatterAttachmentLastModifiedUtc": "2024-12-06T16:35:54.977",
"MatterAttachmentRowVersion": "AAAAAAJn3ac=",
"MatterAttachmentName": "Special Board Meeting/Ad Hoc 2028 MINUTES - October 23, 2024",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/b9206789-2b9a-4742-9b57-bee1d3d52581.pdf",
"MatterAttachmentFileName": "b9206789-2b9a-4742-9b57-bee1d3d52581.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 5
},
{
"MatterAttachmentId": 28801,
"MatterAttachmentGuid": "2525A3F4-2A93-4EE7-B41B-AE8AD7D8B895",
"MatterAttachmentLastModifiedUtc": "2024-10-25T17:49:17.703",
"MatterAttachmentRowVersion": "AAAAAAJgEQY=",
"MatterAttachmentName": "October 2024 SBM/Ad Hoc Public Comments",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/3ec341d3-49c3-4e13-af4d-26f834855032.pdf",
"MatterAttachmentFileName": "3ec341d3-49c3-4e13-af4d-26f834855032.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 6
}
]
Binary file added tests/fixtures/right_minutes_file.pdf
Binary file not shown.
Binary file added tests/fixtures/wrong_minutes_file.pdf
Binary file not shown.
Loading
Loading