diff --git a/pipeline/clean/merge-corpus.sh b/pipeline/clean/merge-corpus.sh index a7514a3e2..236c1d2d4 100755 --- a/pipeline/clean/merge-corpus.sh +++ b/pipeline/clean/merge-corpus.sh @@ -22,8 +22,13 @@ tmp="${output_prefix}/merge" mkdir -p "${tmp}" echo "### Merging" -cat "${input_prefixes[@]/%/.${SRC}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}" -cat "${input_prefixes[@]/%/.${TRG}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}" +if [[ "${input_prefixes[0]}" == *.${ARTIFACT_EXT} ]]; then + cat "${input_prefixes[@]}" >"${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}" + cat "${input_prefixes[@]}" >"${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}" +else + cat "${input_prefixes[@]/%/.${SRC}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}" + cat "${input_prefixes[@]/%/.${TRG}.${ARTIFACT_EXT}}" >"${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}" +fi echo "### Deduplication" paste <(${COMPRESSION_CMD} -dc "${tmp}/corpus.${SRC}.dup.${ARTIFACT_EXT}") <(${COMPRESSION_CMD} -dc "${tmp}/corpus.${TRG}.dup.${ARTIFACT_EXT}") |