-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reads to stdin and writes to stdout, batch execution of workflows #69
Changes from 9 commits
ff2fccc
144f037
d3e624c
67d1b15
d0cd15a
a72e01a
aab4a38
69d8b8c
2d3dfe9
91c8f3d
98cdef8
8559dc3
7fe89c6
f798249
48879db
206998f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
import networkx as nx | ||
import json | ||
|
||
from .node import Node | ||
from .node import Node, ReadCsvNode | ||
from .node_factory import node_factory | ||
|
||
|
||
|
@@ -382,8 +382,29 @@ def to_session_dict(self): | |
except nx.NetworkXError as e: | ||
raise WorkflowException('to_session_dict', str(e)) | ||
|
||
def execute_read_csv(self, node_id, csv_location): | ||
# TODO: some duplicated code here from execute common method. Need to refactor. | ||
"""Execute read_csv from a file specified to standard input. | ||
Current use case: CLI. | ||
""" | ||
preceding_data = list() | ||
flow_vars = list() | ||
node_to_execute = self.get_node(node_id) | ||
if node_to_execute is None: | ||
raise WorkflowException('execute', 'The workflow does not contain node %s' % node_id) | ||
# Pass in data to current Node to use in execution | ||
output = node_to_execute.execute_for_read(preceding_data.append(None), flow_vars.append(None), csv_location) | ||
|
||
# Save new execution data to disk | ||
node_to_execute.data = Workflow.store_node_data(self, node_id, output) | ||
|
||
if node_to_execute.data is None: | ||
raise WorkflowException('execute', 'There was a problem saving node output.') | ||
|
||
return node_to_execute | ||
|
||
@staticmethod | ||
def execute_workflow(workflow_location): | ||
def execute_workflow(workflow_location, stdin_files): | ||
"""Execute entire workflow at a certain location. | ||
Current use case: CLI. | ||
""" | ||
|
@@ -400,7 +421,13 @@ def execute_workflow(workflow_location): | |
#execute each node in the order returned by execution order method | ||
#TODO exception handling: stop and provide details on which node failed to execute | ||
for node in execution_order: | ||
workflow_instance.execute(node) | ||
if type(workflow_instance.get_node(node)) is ReadCsvNode and len(stdin_files) > 0: | ||
csv_location = stdin_files[0] | ||
workflow_instance.execute_read_csv(node, csv_location) | ||
# delete file at index 0 | ||
del stdin_files[0] | ||
else: | ||
workflow_instance.execute(node) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran into this bug the other day where the graph didn't update after execution. The Changing both if/else to include There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, I've modified the code to do exactly what you suggested and seems to be working fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a modification that includes the dataframe output in sdtout. Didn't need to duplicate the exact functionality as ReadCsv I was able to print output = node_to_execute.execute(preceding_data, execution_options). I'm still not happy with the repeating of execute code in workflow, so will be refactoring this in the next PR. |
||
|
||
|
||
class WorkflowUtils: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to alter the node configuration (probably through
node.option_values
) to point to the stdin-copied file rather than create a new method? I think this would resolve the need for duplicated execute calls in the workflow object as you mentioned in the comments there.