forked from NMAAHC/nmaahcmm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
restructureSIP
executable file
·187 lines (174 loc) · 9.16 KB
/
restructureSIP
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/bin/bash
# restructureSIP restructures a SIP into an AIP by:
# creating an output AIP directory named after an operator-selected media id and located in an operator-selected DELIVERDIR directory
# creating "objects" and "metadata" subcategories within that output AIP directory
# rsyncing files from the SIP into the AIP
# removing or leaving in place the SIP files upon successful completion
SCRIPTDIR=$(dirname $(which "${0}"))
. "${SCRIPTDIR}/nmaahcmmfunctions" || { echo "Missing '${SCRIPTDIR}/nmaahcmmfunctions'. Exiting." ; exit 1 ;};
_initialize_make # safe script termination process defined in nmaahcmmfunctions
DEPENDENCIES=(rsync) # list dependencies required by script
_check_dependencies "${DEPENDENCIES[@]}" # defined in nmaahcmmfunctions
RSYNC_ERROR_COUNT=0
LOG="${LOGDIR%/}/$(basename "${0}")_$(date +%F-%H%M%S).log"
## USAGE
_usage(){
echo
echo "$(basename "${0}")"
echo "This script will restructure a SIP into an AIP based on the type of package being submitted. The script will"
echo " -create an output AIP directory named after an operator-selected media id and located in an operator-selected DELIVERDIR directory"
echo " -create 'objects' and 'metadata' subcategories within that output AIP directory"
echo " -rsync files from the SIP into the AIP"
echo " -remove SIP files or leave them in place upon successful completion, depending upon operator selection"
echo " "
echo "If no options are selected, the script will prompt the operator for a selection."
echo "See the nmaachmm README for more information on package definitions."
echo "Usage: $(basename ${0}) [ -x | -f | -v | -d | -u | -r ] [ -m MEDIAID ] [ -o /path/to/output/directory ] /path/to/SIP"
echo " "
echo " -x Package type: Digitized Film (DPX package)"
echo " -f Package type: Digitized Film (MOV, MKV master files)"
echo " -v Package type: Digitized Analog Video (vrecord package)"
echo " -d Package type: Transferred DV (MOV, MKV master files)"
echo " -u Package type: Other/Unknown"
echo " -m MEDIAID (type media id for final package, e.g. SC0001_20190101_SMITH_VHS_01)"
echo " -o /path/to/output/directory (directory to deliver the resulting package to)"
echo " -r Remove source files after successful ingest"
echo " -h display this help"
echo
exit
}
# getopts loop
OPTIND=1
while getopts ":m:o:xfvdurh" OPT; do
case "${OPT}" in
x) PACKAGETYPE="Digitized Film (DPX package)" ;;
f) PACKAGETYPE="Digitized Film (MOV, MKV master files)" ;;
v) PACKAGETYPE="Digitized Analog Video (vrecord package)" ;;
d) PACKAGETYPE="Transferred DV (MOV, MKV master files)" ;;
u) PACKAGETYPE="Other/Unknown" ;;
m) MEDIAID="${OPTARG}" ;;
o) DELIVERDIR="${OPTARG}" && _check_deliverdir ;;
r) CLEANUPDECISION="Remove source files after successful ingest" ;;
h) _usage ;; # if the operator runs "[scriptname] -h" then the _usage text above will display in the terminal
*) echo "Invalid option -${OPTARG}" ; _usage ;; # if the operator tries to use an option other than the ones listed above, the _usage text will display in the terminal
esac
done
shift $(( ${OPTIND} - 1 ))
# check that all inputs were entered
if [[ -z "${PACKAGETYPE}" ]] ; then
_report -b "Select a package type:"
echo
PS3="Selection: "
select PACKAGETYPE in "Digitized Film (DPX package)" "Digitized Film (MOV, MKV master files)" "Digitized Analog Video (vrecord package)" "Transferred DV (MOV and/or MP4 files)" "Other/Unknown"
do
break
done
fi
if [[ -z "${MEDIAID}" ]] && [[ "${PACKAGETYPE}" != "Digitized Film (DPX package)" ]] ; then
_report -b "Enter a unique package name or 'q' to quit: "
read -e MEDIAID
[[ "${MEDIAID}" = "q" ]] && exit 0
# validate id and perhaps fail with exit
[[ -z "${MEDIAID}" ]] && { _report -rst "ERROR: You must enter a valid MEDIA ID" ; exit ;};
[[ ! -z $(echo "${MEDIAID}" | grep -v "^[A-Za-z0-9_-]*$") ]] && { _report -rst "ERROR: The MEDIA ID must only contain letters, numbers, hyphen and underscore" ; exit 1 ;};
fi
if [[ -z "${DELIVERDIR}" ]] ; then
_report -b "Drag in the destination directory or type 'q' to quit: "
read -e DELIVERDIR
[[ "${DELIVERDIR}" = "q" ]] && exit 0
if [[ ! -d "${DELIVERDIR}" ]] ; then
_report -rst "ERROR: Destination ${DELIVERDIR} is not a directory. Exiting..."
_log -a "Process terminated by script (input was not a directory)."
exit 1
fi
fi
if [[ -z "${CLEANUPDECISION}" ]] ; then
CLEANUPDECISION="Leave source files where they are"
fi
# log script beginning
_log -b
# record variables in log
_writelog "PACKAGE TYPE" "${PACKAGETYPE}"
_writelog "MEDIA ID" "${MEDIAID}"
_writelog "DELIVERDIR" "${DELIVERDIR}"
_writelog "CLEANUPDECISION" "${CLEANUPDECISION}"
## SCRIPT ACTIONS
INPUT="${1}"
OUTPUT_PACKAGE="${DELIVERDIR}/${MEDIAID}"
# if package type is unknown, check whether it looks like a DPX package
if [[ "${PACKAGETYPE}" == "Other/Unknown" ]] ; then
if [[ -n "$(find "${INPUT}" -iname "*.dpx")" ]] ; then
_report -rs "This SIP looks like a DPX package!"
PACKAGETYPE="Digitized Film (DPX package)"
fi
fi
# if SIP is a DPX, direct the operator to use restructureDPX instead
if [[ "${PACKAGETYPE}" == "Digitized Film (DPX package)" ]] ; then
_report -rs "If you are trying to restructure a DPX package, please run the 'restructureDPX' microservice instead!"
_report -b "You can also run the 'ingestfile' microservice, which invokes 'restructureDPX' as well."
cowsay "Thanks!"
_log -a "Process terminated by script (operator redirected to appropriate microservice)."
exit 0
else
_report -gs "Making objects and metadata directories..."
mkdir -p "${OUTPUT_PACKAGE}" "${OUTPUT_PACKAGE}/objects" "${OUTPUT_PACKAGE}/metadata"
_report -gs "Moving SIP to output package..."
fi
# perform restructuring actions on other SIPs
if [[ "${PACKAGETYPE}" == "Digitized Film (MOV, MKV master files)" ]] || [[ "${PACKAGETYPE}" == "Transferred DV (MOV, MKV master files)" ]] ; then
# Digitized Film and Transferred DV both expect MOV/MKV master files as objects, and everything else as metadata
find "${INPUT}" -type f \( -iname "*.mov" -o -iname "*.mkv" \) -exec rsync -avh --progress {} "${OUTPUT_PACKAGE}/objects/" \;
find "${INPUT}" -type f -not \( -iname "*.mov" -o -iname "*.mkv" \) -exec rsync -avh --progress {} "${OUTPUT_PACKAGE}/metadata/" \;
# if there are no mov or mkv files in the package, switch type to "Other/Unknown" to catch any audiovisual files in the package
if [[ -z "$(find "${INPUT}" -type f -iname "*.mov" -o -iname "*.mkv" )" ]] ; then
_report -r "There are no MKV or MOV files in this package! Changing package type to 'Other/Unknown' and proceeding..."
PACKAGETYPE="Other/Unknown"
fi
elif [[ "${PACKAGETYPE}" == "Digitized Analog Video (vrecord package)" ]] ; then
find "${INPUT}" -type f -iname "*.mkv" -exec rsync -avh --progress {} "${OUTPUT_PACKAGE}/objects/" \;
find "${INPUT}" -type f -not -iname "*.mkv" -exec rsync -avh --progress {} "${OUTPUT_PACKAGE}/metadata/" \;
fi
if [[ "${PACKAGETYPE}" == "Other/Unknown" ]] ; then
TEMPFILE_ALL="$(_maketemp)"
TEMPFILE_OBJECTS="$(_maketemp)"
TEMPFILE_METADATA="$(_maketemp)"
find "${INPUT}" -type f >> "${TEMPFILE_ALL}"
# Check whether there is a video or audio stream in each file; if there is, add it to a list of objects (TEMPFILE_OBJECTS). If there is no video stream, add it to a list of metadata files (TEMPFILE_METADATA). This approach is intended to catch all audiovisual files regardless of type or extension.
while read FILE ; do
if [[ -n "$(mediainfo --Inform="General;%VideoCount%" "${FILE}")" ]] ; then
echo "Found video file ${FILE}"
echo "${FILE}" >> "${TEMPFILE_OBJECTS}"
elif [[ -n "$(mediainfo --Inform="General;%AudioCount%" "${FILE}")" ]] ; then
echo "Found audio file ${FILE}"
echo "${FILE}" >> "${TEMPFILE_OBJECTS}"
else
echo "Found metadata file ${FILE}"
echo "${FILE}" >> "${TEMPFILE_METADATA}"
fi
done <"${TEMPFILE_ALL}"
# check whether any video files were found in this process, by checking whether $TEMPFILE2 is empty.
if [[ ! -s "${TEMPFILE_OBJECTS}" ]] ; then
_report -r "There are no object files in this package!"
fi
while read FILE ; do
rsync -avh --progress "${FILE}" "${OUTPUT_PACKAGE}/objects/"
done <"${TEMPFILE_OBJECTS}"
while read FILE ; do
rsync -avh --progress "${FILE}" "${OUTPUT_PACKAGE}/metadata/"
done <"${TEMPFILE_METADATA}"
fi
# clean up original files if requested - hardcoded OFF during testing
#if [[ "${CLEANUPDECISION}" == "Remove source files after successful ingest" ]] ; then
# if [[ "${RSYNC_ERROR_COUNT}" -gt "0" ]] ; then
# echo "There were ${RSYNC_ERROR_COUNT} errors during the rsync process!"
# _report -rst "Cancelling requested deletion of source file, not looking safe."
# else
# _report -gs "Removing the source file from ${INPUT} as requested."
# _run rm -f "${INPUT}"
# fi
#fi
# log script ending
_report -g "restructureSIP process complete."
echo
_log -e
_writelog "END TIME" "$(date +%FT%T)"