Skip to content

Commit

Permalink
Fix halting error on empty or non existing input JSON data.
Browse files Browse the repository at this point in the history
  • Loading branch information
gcornut committed Jul 6, 2018
1 parent a904bf1 commit 3079480
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions etl/transform/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def index_on_disk(tmp_index_dir, data_list, pool, logger):
batches = split_every(1000, data_list)
index_list = pool.imap_unordered(index, batches)

global_index = reduce(DataIdIndex.merge, index_list)
global_index = reduce(DataIdIndex.merge, index_list, DataIdIndex({}))
return global_index


Expand Down Expand Up @@ -274,9 +274,12 @@ def collect_entities(parsed_template):
filtered_files = list(filter(lambda x: x[0] in source_entities, all_files))
for entity_name, file_path in filtered_files:
with open(file_path, 'r') as file:
data = json.loads(file.readline())
links = get_entity_links(data, 'DbId', 'PUI')
required_entities.update(set(map(first, links)))
line = file.readline()
if line:
data = json.loads(line)
links = get_entity_links(data, 'DbId', 'PUI')
entity_names = set(map(first, links))
required_entities.update(entity_names)

return required_entities

Expand Down

0 comments on commit 3079480

Please sign in to comment.