Skip to content

Commit

Permalink
Merge pull request #36 from Metro-Records/hcg/minutes-fix
Browse files Browse the repository at this point in the history
Tighten up minutes identification, add test
  • Loading branch information
hancush authored Jan 16, 2025
2 parents fa42520 + d57fc2a commit df2d467
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 65 deletions.
43 changes: 25 additions & 18 deletions lametro/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,12 +525,12 @@ def find_approved_minutes(self, event):
"/matters/",
"MatterId",
(
f"{associated_with_meeting_body} and " +
f"{meeting_date_in_title} and " +
"(" +
f"({matter_type_minutes}) or " +
f"({minutes_in_title} and {matter_type_informational})" +
")"
f"{associated_with_meeting_body} and "
+ f"{meeting_date_in_title} and "
+ "("
+ f"({matter_type_minutes}) or "
+ f"({minutes_in_title} and {matter_type_informational})"
+ ")"
),
)

Expand All @@ -540,10 +540,12 @@ def find_approved_minutes(self, event):
# Sometimes, the search returns more than one board report.
# Go through each matter yielded from this generator to account for that.
for matter in result:
if (matter['MatterRestrictViewViaWeb'] or
matter['MatterStatusName'] == 'Draft' or
matter['MatterBodyName'] == 'TO BE REMOVED'):
# Ignore this matter if there are signs that it shouldn't be processed.
if (
matter["MatterRestrictViewViaWeb"]
or matter["MatterStatusName"] == "Draft"
or matter["MatterBodyName"] == "TO BE REMOVED"
):
# Ignore this matter if there are signs that it shouldn't be processed.
continue

attachment_url = self.BASE_URL + "/matters/{}/attachments".format(
Expand All @@ -554,7 +556,9 @@ def find_approved_minutes(self, event):

try:
if len(attachments) == 0:
raise MissingAttachmentsException(matter["MatterId"], attachment_url)
raise MissingAttachmentsException(
matter["MatterId"], attachment_url
)
except MissingAttachmentsException as e:
capture_exception(e)
continue
Expand All @@ -577,7 +581,7 @@ def find_approved_minutes(self, event):
except PDFSyntaxError as e:
capture_message(
f"PDFPlumber encountered an error opening a file: {e}",
"warning"
"warning",
)
continue
cover_page = pdf.pages[0]
Expand All @@ -591,16 +595,19 @@ def find_approved_minutes(self, event):
with io.BytesIO() as in_mem_image:
pdf_image.save(in_mem_image)
in_mem_image.seek(0)
cover_page_text = pytesseract.image_to_string(Image.open(in_mem_image))

if "MINUTES" in cover_page_text.upper():
cover_page_text = pytesseract.image_to_string(
Image.open(in_mem_image)
)

if all(
substr in cover_page_text.lower()
for substr in (name.lower(), "minutes")
):
yield attach
n_minutes += 1

if n_minutes == 0:
self.warning(
f"Couldn't find minutes for the {name} meeting of {date}."
)
self.warning(f"Couldn't find minutes for the {name} meeting of {date}.")


class LAMetroAPIEvent(dict):
Expand Down
26 changes: 26 additions & 0 deletions tests/fixtures/event.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"EventId": 2907,
"EventGuid": "01EE5D79-B958-46F9-B8EB-384B745A57EC",
"EventLastModifiedUtc": "2024-09-21T00:49:38.617",
"EventRowVersion": "AAAAAAJZKHU=",
"EventBodyId": 138,
"EventBodyName": "Board of Directors - Regular Board Meeting",
"EventDate": "2024-09-26T00:00:00",
"EventTime": "10:00 AM",
"EventVideoStatus": "Public",
"EventAgendaStatusId": 10,
"EventAgendaStatusName": "Final",
"EventMinutesStatusId": 9,
"EventMinutesStatusName": "Draft",
"EventLocation": "One Gateway Plaza, Los Angeles, CA 90012, \r\n3rd Floor, Metro Board Room",
"EventAgendaFile": "https://metro.legistar1.com/metro/meetings/2024/9/2907_A_Board_of_Directors_-_Regular_Board_Meeting_24-09-26_Agenda.pdf",
"EventMinutesFile": null,
"EventAgendaLastPublishedUTC": "2024-09-21T00:49:38.513",
"EventMinutesLastPublishedUTC": null,
"EventComment": "Watch online: https://boardagendas.metro.net\r\nListen by phone: Dial 202-735-3323 and enter Access Code:\r\n5647249# (English) or 7292892# (Español) \r\n\r\nTo give written or live public comment, please see the top of page 4",
"EventVideoPath": null,
"EventMedia": "3231",
"EventInSiteURL": "https://metro.legistar.com/MeetingDetail.aspx?LEGID=2907&GID=557&G=A5FAA737-A54D-4A6C-B1E8-FF70F765FA94",
"EventItems":
[]
}
58 changes: 58 additions & 0 deletions tests/fixtures/matter_candidates.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[
{
"MatterId": 10881,
"MatterGuid": "5E324116-B670-49F2-AB53-02E768DBA6DD",
"MatterLastModifiedUtc": "2024-12-13T01:07:23.667",
"MatterRowVersion": "AAAAAAJpCV0=",
"MatterFile": "2024-1026",
"MatterName": null,
"MatterTitle": "APPROVE Minutes of the Regular Board Meeting held September 26, 2024 and the Special Board Meeting/Ad Hoc 2028 Olympic & Paralympic Games Committee held October 23, 2024.",
"MatterTypeId": 62,
"MatterTypeName": "Minutes",
"MatterStatusId": 72,
"MatterStatusName": "Passed",
"MatterBodyId": 138,
"MatterBodyName": "Board of Directors - Regular Board Meeting",
"MatterIntroDate": "2024-10-22T00:00:00",
"MatterAgendaDate": "2024-10-31T00:00:00",
"MatterPassedDate": "2024-10-31T00:00:00",
"MatterEnactmentDate": null,
"MatterEnactmentNumber": null,
"MatterRequester": null,
"MatterNotes": null,
"MatterVersion": "1",
"MatterCost": null,
"MatterText1": null,
"MatterText2": null,
"MatterText3": null,
"MatterText4": null,
"MatterText5": null,
"MatterDate1": null,
"MatterDate2": null,
"MatterEXText1": null,
"MatterEXText2": null,
"MatterEXText3": null,
"MatterEXText4": null,
"MatterEXText5": "Classification success: 2024-12-13T01:17:23 (UTC)",
"MatterEXText6": null,
"MatterEXText7": null,
"MatterEXText8": null,
"MatterEXText9": null,
"MatterEXText10": "",
"MatterEXText11": null,
"MatterEXDate1": null,
"MatterEXDate2": null,
"MatterEXDate3": null,
"MatterEXDate4": null,
"MatterEXDate5": null,
"MatterEXDate6": null,
"MatterEXDate7": null,
"MatterEXDate8": null,
"MatterEXDate9": null,
"MatterEXDate10": "2024-12-13T01:17:23",
"MatterAgiloftId": 0,
"MatterReference": null,
"MatterRestrictViewViaWeb": false,
"MatterReports": []
}
]
82 changes: 82 additions & 0 deletions tests/fixtures/minutes_candidates.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
[
{
"MatterAttachmentId": 28753,
"MatterAttachmentGuid": "6583B3CE-A71C-4446-BAA3-9193D82D5428",
"MatterAttachmentLastModifiedUtc": "2024-12-06T18:48:49.63",
"MatterAttachmentRowVersion": "AAAAAAJn7iQ=",
"MatterAttachmentName": "Regular Board Meeting MINUTES - September 26, 2024",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/73425e96-9569-465d-b4c2-14ef9398e6ee.pdf",
"MatterAttachmentFileName": "73425e96-9569-465d-b4c2-14ef9398e6ee.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 3
},
{
"MatterAttachmentId": 28754,
"MatterAttachmentGuid": "8FD724C9-ACAF-4631-B0A3-68A64ABAA7BA",
"MatterAttachmentLastModifiedUtc": "2024-12-06T16:43:12.877",
"MatterAttachmentRowVersion": "AAAAAAJn3jI=",
"MatterAttachmentName": "September 2024 RBM Public Comments",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/cf3d93b7-3286-4487-85bc-938dbbb14a4d.pdf",
"MatterAttachmentFileName": "cf3d93b7-3286-4487-85bc-938dbbb14a4d.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 4
},
{
"MatterAttachmentId": 28800,
"MatterAttachmentGuid": "D0647F1B-2680-4CD6-8564-D0D826CA0642",
"MatterAttachmentLastModifiedUtc": "2024-12-06T16:35:54.977",
"MatterAttachmentRowVersion": "AAAAAAJn3ac=",
"MatterAttachmentName": "Special Board Meeting/Ad Hoc 2028 MINUTES - October 23, 2024",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/b9206789-2b9a-4742-9b57-bee1d3d52581.pdf",
"MatterAttachmentFileName": "b9206789-2b9a-4742-9b57-bee1d3d52581.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 5
},
{
"MatterAttachmentId": 28801,
"MatterAttachmentGuid": "2525A3F4-2A93-4EE7-B41B-AE8AD7D8B895",
"MatterAttachmentLastModifiedUtc": "2024-10-25T17:49:17.703",
"MatterAttachmentRowVersion": "AAAAAAJgEQY=",
"MatterAttachmentName": "October 2024 SBM/Ad Hoc Public Comments",
"MatterAttachmentHyperlink": "https://metro.legistar1.com/metro/attachments/3ec341d3-49c3-4e13-af4d-26f834855032.pdf",
"MatterAttachmentFileName": "3ec341d3-49c3-4e13-af4d-26f834855032.pdf",
"MatterAttachmentMatterVersion": "0",
"MatterAttachmentIsHyperlink": false,
"MatterAttachmentBinary": null,
"MatterAttachmentIsSupportingDocument": false,
"MatterAttachmentShowOnInternetPage": true,
"MatterAttachmentIsMinuteOrder": false,
"MatterAttachmentIsBoardLetter": false,
"MatterAttachmentAgiloftId": 0,
"MatterAttachmentDescription": null,
"MatterAttachmentPrintWithReports": true,
"MatterAttachmentSort": 6
}
]
Binary file added tests/fixtures/right_minutes_file.pdf
Binary file not shown.
Binary file added tests/fixtures/wrong_minutes_file.pdf
Binary file not shown.
Loading

0 comments on commit df2d467

Please sign in to comment.