forked from builker-col/bogota-apartments
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
executable file
·33 lines (25 loc) · 1.1 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Author: Erik Garcia (@erik172)
# Version: Unreleased
from datetime import datetime
import subprocess
import logging
filename = f'logs/data_pipeline.log'
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', filename=filename)
def run_data_pipeline():
logging.info(f'Start data pipeline at {datetime.now()}')
logging.info('Start web scraping HABI')
subprocess.run(['scrapy', 'crawl', 'habi'])
logging.info('Start web scraping METROCUADRADO')
subprocess.run(['scrapy', 'crawl', 'metrocuadrado'])
logging.info('End web scraping')
logging.info('Start data processing')
subprocess.run(['python3.11', 'ETL/01_initial_transformations.py'])
subprocess.run(['python3.11', 'ETL/02_data_correction.py'])
subprocess.run(['python3.11', 'ETL/03_data_enrichment.py'])
logging.info('End data processing')
logging.info('Start data saving')
subprocess.run(['python3.11', 'ETL/04_data_save.py'])
logging.info('End data saving')
logging.info(f'End data pipeline at {datetime.now()}')
if __name__ == '__main__':
run_data_pipeline()