PyWorkflowApp · reelmatt · May 1, 2020 · Apr 18, 2020 · Apr 18, 2020 · Apr 18, 2020
diff --git a/CLI/cli.py b/CLI/cli.py
@@ -1,6 +1,11 @@
+import sys
+
 import click
+import os
+import uuid
 
 from pyworkflow import Workflow
+from pyworkflow import NodeException
 
 
 class Config(object):
@@ -13,13 +18,40 @@ def __init__(self):
 @click.option('--file-directory', type=click.Path())
 @pass_config
 def cli(config, file_directory):
-    if file_directory is None:
-        file_directory = '.'
     config.file_directory = file_directory
 
+    stdin_files = []
+
+    if not click.get_text_stream('stdin').isatty():
+
+        stdin_text = click.get_text_stream('stdin')
+
+        # TODO should be done for each separate file coming from stdin, currently working for one file, but easy to build up.
+
+        #write standard in to a new file in local filesystem
+        file_name = str(uuid.uuid4())
+
+        # TODO small issue here, might be better to upload this file to the workflow directory instead of cwd
+        new_file_path = os.path.join(os.getcwd(), file_name)
+
+        #read from std in and upload a new file in project directory
+        with open(new_file_path, 'w') as f:
+            f.write(stdin_text.read())
+
+        stdin_files.append(file_name)
+
+    config.stdin_files = stdin_files
+
 
 @cli.command()
 @pass_config
 def execute(config):
-    click.echo('Loading workflow file form %s' % config.file_directory)
-    Workflow.execute_workflow(config.file_directory)
+    if config.file_directory is None:
+        click.echo('Please specify a workflow to run')
+        return
+    try:
+        click.echo('Loading workflow file form %s' % config.file_directory)
+        Workflow.execute_workflow(config.file_directory, config.stdin_files)
+    except NodeException as ne:
+        click.echo("Issues during node execution")
+        click.echo(ne)
diff --git a/pyworkflow/pyworkflow/node.py b/pyworkflow/pyworkflow/node.py
@@ -109,6 +109,16 @@ def execute(self, predecessor_data, flow_vars):
         except Exception as e:
             raise NodeException('read csv', str(e))
 
+    def execute_for_read(self, predecessor_data, flow_vars, file_to_read):
+        try:
+            fname = file_to_read
+            sep = self.options["sep"].get_value()
+            hdr = self.options["header"].get_value()
+            df = pd.read_csv(fname, sep=sep, header=hdr)
+            return df.to_json()
+        except Exception as e:
+            raise NodeException('read csv', str(e))
+
     def __str__(self):
         return "ReadCsvNode"
 

diff --git a/pyworkflow/pyworkflow/workflow.py b/pyworkflow/pyworkflow/workflow.py
@@ -2,7 +2,7 @@
 import networkx as nx
 import json
 
-from .node import Node
+from .node import Node, ReadCsvNode
 from .node_factory import node_factory
 
 
@@ -382,8 +382,29 @@ def to_session_dict(self):
         except nx.NetworkXError as e:
             raise WorkflowException('to_session_dict', str(e))
 
+    def execute_read_csv(self, node_id, csv_location):
+        # TODO: some duplicated code here from execute common method. Need to refactor.
+        """Execute read_csv from a file specified to standard input.
+                  Current use case: CLI.
+        """
+        preceding_data = list()
+        flow_vars = list()
+        node_to_execute = self.get_node(node_id)
+        if node_to_execute is None:
+            raise WorkflowException('execute', 'The workflow does not contain node %s' % node_id)
+        # Pass in data to current Node to use in execution
+        output = node_to_execute.execute_for_read(preceding_data.append(None), flow_vars.append(None), csv_location)
+
+        # Save new execution data to disk
+        node_to_execute.data = Workflow.store_node_data(self, node_id, output)
+
+        if node_to_execute.data is None:
+            raise WorkflowException('execute', 'There was a problem saving node output.')
+
+        return node_to_execute
+
     @staticmethod
-    def execute_workflow(workflow_location):
+    def execute_workflow(workflow_location, stdin_files):
         """Execute entire workflow at a certain location.
            Current use case: CLI.
         """
@@ -400,7 +421,13 @@ def execute_workflow(workflow_location):
         #execute each node in the order returned by execution order method
         #TODO exception handling: stop and provide details on which node failed to execute
         for node in execution_order:
-            workflow_instance.execute(node)
+            if type(workflow_instance.get_node(node)) is ReadCsvNode and len(stdin_files) > 0:
+                csv_location = stdin_files[0]
+                workflow_instance.execute_read_csv(node, csv_location)
+                # delete file at index 0
+                del stdin_files[0]
+            else:
+                workflow_instance.execute(node)
 
 
 class WorkflowUtils: