-
Notifications
You must be signed in to change notification settings - Fork 17
/
run_pipeline.sh
executable file
·25 lines (24 loc) · 1.07 KB
/
run_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/bin/sh
# Usage:
# run_pipeline <raw input file>
# Run the Stanford parser over raw sentences file (one per line)
# Following by an invokation of PropS over the json output
# Note:
# * This assumes that CORENLP_HOME points to the corenlp home directory (containing all jars)
# * We use the Stanford dependency format (not Universal Dependencies)
# * We use the makeCopulaHead flag
# * We assume the input file is split by sentences.
# This is indicated in stanford_parser.props
# (See https://github.com/stanfordnlp/CoreNLP/issues/415)
set -e
# Run Stanford parser
java -cp "$CORENLP_HOME/*" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP \
-annotators tokenize,ssplit,pos,parse \
-file $1 \
-props stanford_parser.props \
-outputFormat json \
-parse.originalDependencies -parse.flags " -makeCopulaHead"
# Run PropS on output
# (Stanford outputs to ${1}.json)
# TODO: Stanford doesn't seem to allow printing to standard output, so direct piping wasn't possible
cat ${1}.json | python props/applications/parse_props.py -t --oie --corenlp-json-input