Skip to content

Commit

Permalink
pipeline script
Browse files Browse the repository at this point in the history
  • Loading branch information
bhearsum committed May 12, 2023
1 parent 308bdef commit 48a83a2
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions pipeline/clean/merge-corpus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@ tmp="${output_prefix}/merge"
mkdir -p "${tmp}"

echo "### Merging"
cat "${input_prefixes[@]/%/.${SRC}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}"
cat "${input_prefixes[@]/%/.${TRG}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}"
if [[ "${input_prefixes[0]}" == *.${ARTIFACT_EXT} ]]; then
cat "${input_prefixes[@]}" >"${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}"
cat "${input_prefixes[@]}" >"${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}"
else
cat "${input_prefixes[@]/%/.${SRC}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}"
cat "${input_prefixes[@]/%/.${TRG}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}"
fi

echo "### Deduplication"
paste <(${COMPRESSION_CMD} -dc "${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}") <(${COMPRESSION_CMD} -dc "${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}") |
Expand Down

0 comments on commit 48a83a2

Please sign in to comment.