Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

streaming ndjson input and streaming flattened ndjson output (#53) #69

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 73 additions & 49 deletions jello/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""jello - query JSON at the command line with python syntax"""

import contextlib
import json
import os
import sys
import signal
Expand All @@ -8,7 +10,8 @@
import traceback
from textwrap import TextWrapper
import jello
from jello.lib import opts, load_json, read_file, pyquery, Schema, Json
import jello.lib as lib
from jello.lib import opts, load_json, read_file, pyquery, format_response


def ctrlc(signum, frame):
Expand All @@ -21,7 +24,7 @@ def get_stdin():
if sys.stdin.isatty():
return None
else:
return sys.stdin.read()
return sys.stdin


def print_help():
Expand All @@ -35,6 +38,7 @@ def print_help():
-C force color output even when using pipes (overrides -m)
-e empty data (don't process data from STDIN or file)
-f load input data from JSON file or JSON Lines files
-F flatten output list/iterator to newline-delimited json
-i initialize environment with .jelloconf.py
located at ~ (linux) or %appdata% (Windows)
-l output as lines suitable for assignment to a bash array
Expand All @@ -44,6 +48,8 @@ def print_help():
-r raw string output (no quotes)
-R raw string input (don't auto convert input to dict/list)
-s print the JSON schema in grep-able format
-S stream input newline-delimited json from STDIN or file
from -f. "_" is an iterator providing the entries
-t print type annotations in schema view
-v version info
-h help
Expand Down Expand Up @@ -125,6 +131,27 @@ def print_exception(e=None, data='', query='', response='', ex_type='Runtime'):
sys.exit(1)


def print_json_streaming_error(e, query='', ex_type='Json Load'):
cause = e.__cause__
data = None
if isinstance(cause, json.JSONDecodeError):
data = cause.doc
print_exception(
cause,
data=data,
query=query,
ex_type=ex_type
)


class nullclosing:
"""simpler version of contextlib.nullcontext, which was introduced in 3.7
(currently support 3.6)"""

def close(self):
pass


def main(data=None, query='_'):
# break on ctrl-c keyboard interrupt
signal.signal(signal.SIGINT, ctrlc)
Expand All @@ -139,12 +166,14 @@ def main(data=None, query='_'):
if sys.platform.startswith('win32'):
os.system('')

stdin = None
if data is None:
data = get_stdin()
stdin = get_stdin()

options = []
long_options = {}
arg_section = '' # can be query_file or data_files
data_files = []

for arg in sys.argv[1:]:
if arg == '-q':
Expand All @@ -163,10 +192,8 @@ def main(data=None, query='_'):
arg_section = ''

elif arg_section == 'data_files':
try:
data += '\n' + read_file(arg)
except Exception as e:
print_error(f'jello: Issue reading data file: {e}')
data_files.append(arg)
arg_section = ''

elif arg.startswith('-') and not arg.startswith('--'):
options.extend(arg[1:])
Expand All @@ -189,11 +216,13 @@ def main(data=None, query='_'):
opts.lines = opts.lines or 'l' in options
opts.empty = opts.empty or 'e' in options
opts.force_color = opts.force_color or 'C' in options
opts.flatten = opts.flatten or 'F' in options
opts.mono = opts.mono or ('m' in options or bool(os.getenv('NO_COLOR')))
opts.nulls = opts.nulls or 'n' in options
opts.raw = opts.raw or 'r' in options
opts.raw_input = opts.raw_input or 'R' in options
opts.schema = opts.schema or 's' in options
opts.stream_input = opts.stream_input or 'S' in options
opts.types = opts.types or 't' in options
opts.version_info = opts.version_info or 'v' in options
opts.helpme = opts.helpme or 'h' in options
Expand All @@ -211,57 +240,52 @@ def main(data=None, query='_'):
'''))
sys.exit()

if data is None and not opts.empty:
if not opts.empty and data is None and not stdin and not data_files:
print_error('jello: Missing JSON or JSON Lines data via STDIN or file via -f option.\n')

# read all the file sources
input_context_manager = contextlib.closing(nullclosing())
if opts.empty:
data = '{}'

# load the data as a raw string or JSON
if opts.raw_input:
data = str(data).rstrip('\r\n')

elif opts.stream_input:
data = lib.StreamingJsonInput(data, stdin, data_files)
input_context_manager = data
else:
# load the JSON or JSON Lines into a dict or list of dicts
data = lib.read_data_nonstreaming(data, stdin, data_files)
if opts.raw_input:
data = str(data).rstrip('\r\n')
else:
try:
data = load_json(data)
except json.JSONDecodeError as e:
print_exception(e, ex_type='JSON Load')

# closes input data resources
with input_context_manager as _:
# Read .jelloconf.py (if it exists) and run the query
response = ''
try:
data = load_json(data)
response = pyquery(data, query)
except lib.StreamingJsonError as e:
# when streaming input errors are not raised until the data is
# pulled by the user query
print_json_streaming_error(e)
except Exception as e:
print_exception(e, ex_type='JSON Load')
print_exception(e, data, query, ex_type='Query')

# Read .jelloconf.py (if it exists) and run the query
response = ''
try:
response = pyquery(data, query)
except Exception as e:
print_exception(e, data, query, ex_type='Query')
# reset opts.mono after pyquery since initialization in pyquery can change values
if opts.force_color:
opts.mono = False

# reset opts.mono after pyquery since initialization in pyquery can change values
if opts.force_color:
opts.mono = False

# Create and print schema or JSON/JSON-Lines/Lines
output = ''
try:
if opts.schema:
schema = Schema()
output = schema.create_schema(response)

if not opts.mono and (sys.stdout.isatty() or opts.force_color):
schema.set_colors()
output = schema.color_output(output)

else:
json_out = Json()
output = json_out.create_json(response)

if (not opts.mono and not opts.raw) and (sys.stdout.isatty() or opts.force_color):
json_out.set_colors()
output = json_out.color_output(output)

print(output)

except Exception as e:
print_exception(e, data, query, response, ex_type='Output')
# Create and print schema or JSON/JSON-Lines/Lines
try:
format_response(response)
except lib.StreamingJsonError as e:
# when streaming output using -F we don't parse input and process it
# until we pull from the output iterator here
print_json_streaming_error(e)
except Exception as e:
print_exception(e, data, query, response, ex_type='Output')


if __name__ == '__main__':
Expand Down
Loading
Loading