-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsapot.py
84 lines (64 loc) · 2.22 KB
/
sapot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urlencode
import requests
import re
default_parser = 'html.parser'
paren_regex = re.compile(r"\(.*\)")
#
# Code
#
def find_tag_containing_text(soup, tag, text):
for item in soup.find_all(tag):
if text in item.getText():
return item
return None
def process_to_string(process):
result = ''
for step in process:
result += process_step_to_string(step) + '\n\n'
return result
def process_step_to_string(process_step):
return """{}
Status: {}
Last page: {}
Date: {}
Days: {}""".format(process_step['location'],
process_step['status'],
process_step['last_page'],
process_step['date'],
process_step['days_at_location'])
def parse_table(table):
result = []
rows = [[cell.getText() for cell in row.find_all('td')] for row in table.find_all('tr')]
# NOTE(erick): Discarding header
rows = rows[1:]
for row in rows:
if len(row) <= 5: continue
location, status = parse_first_column(row[1])
result.append({
'location' : location.strip(),
'status' : status.strip(),
'date' : row[0].strip(),
'last_page' : row[4].strip(),
'days_at_location' : row[5].strip()
})
return result
def parse_first_column(column):
splited = column.split('-')
location = paren_regex.sub('', splited[0])
status = paren_regex.sub('', splited[1])
return location, status
def get_process_status(process_id):
sap_url = 'https://www.sap.ufrj.br/consultar.asp?{}'.format(process_id)
# TODO(erick): Do not use 'verify=False'. Pass a CA bundle
response = requests.get(sap_url, verify=False)
if response.status_code != 200:
raise Exception('Could not retrieve process page')
process_page = response.content
soup = BeautifulSoup(process_page, default_parser)
process_table_html = find_tag_containing_text(soup, 'table', 'Despachado em')
if not process_table_html:
raise Exception('Process table not found')
process_status = parse_table(process_table_html)
return process_status