-
Notifications
You must be signed in to change notification settings - Fork 0
/
vernon_pdf_parser.py
28 lines (20 loc) · 940 Bytes
/
vernon_pdf_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pdfplumber
def vernon_ny_parser(pdf_file, search_param):
"""
pdf_file: Specify PDF path
search_param: Specify text to search for in property block
"""
with pdfplumber.open(pdf_file) as pdf:
with open(f"Vernon_{search_param}.txt", 'w') as txt_file:
pdf_size = int(str(pdf.pages[-1])[6:-1]) - 1
pdf_count = 0
while pdf_count < pdf_size:
page = pdf.pages[pdf_count]
text = page.extract_text()
property_block = text.split(
"**********************************************************************************************")
for prop in property_block[2:-1]:
if f"{search_param}" in prop:
txt_file.write(prop)
pdf_count = pdf_count + 1
vernon_ny_parser(pdf_file="2021_Final_Roll_Website_Updated.pdf", search_param="4 UNITS")