-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathutil_runtpcds.sh
executable file
·126 lines (99 loc) · 3.58 KB
/
util_runtpcds.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/bash
function timedate() {
TZ="${TIMEZONE}" date
echo ""
}
function usageExit() {
echo "Usage: sh util_runtpcds.sh SCALE FORMAT"
echo "SCALE must be greater than 1"
echo "FORMAT must be either 'orc' | 'parquet'"
exit 1
}
function setupRun() {
ID=$(TZ="${TIMEZONE}" date +"%m.%d.%Y-%H.%M.%S")
# --- QUERY FILE NAME ---
QUERY_BASE_NAME="sample-queries-tpcds/query"
QUERY_FILE_EXT=".sql"
# --- SETTINGS ---
SETTINGS_PATH="settings.sql"
# --- REPORT NAME ---
REPORT_NAME="time_elapsed_tpcds"
# --- DATABASE ---
DATABASE="tpcds_bin_partitioned_${FORMAT}_${SCALE}"
# --- CLOCK ---
CLOCK_FILE="aaa_clocktime.txt"
if [[ -f "${CLOCK_FILE}" ]]; then
rm "${CLOCK_FILE}"
echo "Old clock removed"
fi
echo "Created new clock"
# generate time report
rm "${REPORT_NAME}"*".csv"
echo "Old report removed"
echo "query #", "secs elapsed", "status" > "${REPORT_NAME}.csv"
echo "New report generated"
# remove old llapio_summary
rm "llapio_summary"*".csv" "llap_mintimes_summary"*".csv"
echo "Old llapio_summary*.csv llap_mintimes_summary*.csv removed"
# clear and make new log directory
if [[ -d log_query/ ]]; then
rm -r log_query/
echo "Old logs removed"
fi
mkdir log_query/
echo "Log folder generated"
# make executable
chmod +x util_internalGetPAT.sh
chmod +x util_internalRunQuery.sh
chmod -R +x PAT/
# absolute path
CURR_DIR="$(pwd)/"
}
function runBenchmark() {
echo "Run queries for TPC-DS ${FORMAT} at scale ${SCALE}" > "${CLOCK_FILE}"
timedate >> "${CLOCK_FILE}"
# range of queries
START=1
END=99
REPEAT=1
for (( QUERY_NUM = START; QUERY_NUM <= END; QUERY_NUM++ )); do
for (( j = 0; j < REPEAT; j++ )); do
query_path=("${QUERY_BASE_NAME}${QUERY_NUM}${QUERY_FILE_EXT}")
LOG_PATH="log_query/logquery${QUERY_NUM}.${j}.txt"
./util_internalRunQuery.sh "$DATABASE" "$CURR_DIR$SETTINGS_PATH" "$CURR_DIR$query_path" "$CURR_DIR$LOG_PATH" "$QUERY_NUM" "$CURR_DIR$REPORT_NAME.csv"
# ./util_internalGetPAT.sh /$CURR_DIR/util_internalRunQuery.sh "$DATABASE" "$CURR_DIR$SETTINGS_PATH" "$CURR_DIR$query_path" "$CURR_DIR$LOG_PATH" "$QUERY_NUM" "$CURR_DIR$REPORT_NAME.csv" tpcdsPAT"$ID"/query"$i"/
done
done
echo "Finished" >> "${CLOCK_FILE}"
timedate >> "${CLOCK_FILE}"
}
function generateZipReport() {
# Final report location
FINAL_REPORT_LOCATION="/TPCDS_RESULTS/${SCALE}/"
hdfs dfs -mkdir -p "${FINAL_REPORT_LOCATION}"
hdfs dfs -chmod 777 "${FINAL_REPORT_LOCATION}"
python3 parselog.py --test_type "tpcds" --time_id "${ID}"
mv "${REPORT_NAME}.csv" "${REPORT_NAME}${ID}.csv"
zip -j log_query.zip log_query/*
zip -r "tpcds-${SCALE}GB-${ID}.zip" log_query.zip "${REPORT_NAME}${ID}.csv" "llapio_summary_tpcds"*".csv" "llap_mintimes_summary_tpcds"*".csv"
# zip -r "tpcds-${SCALE}GB-${ID}.zip" log_query.zip PAT/PAT-collecting-data/results/tpcdsPAT"$ID"/* "${REPORT_NAME}${ID}.csv" "llapio_summary"*".csv" "llap_mintimes_summary"*".csv"
rm log_query.zip
hdfs dfs -copyFromLocal "tpcds-${SCALE}GB-${ID}.zip" "${FINAL_REPORT_LOCATION}/tpcds-${SCALE}GB-${ID}.zip"
}
# --- SCRIPT START ---
SCALE=$1
FORMAT=$2
TIMEZONE="America/Los_Angeles"
if [[ "X$SCALE" == "X" || $SCALE -eq 1 ]]; then
usageExit
fi
if ! [[ "$SCALE" =~ ^[0-9]+$ ]]; then
echo "'$SCALE' is not a number!"
usageExit
fi
if [[ "$FORMAT" != "orc" && "$FORMAT" != "parquet" ]]; then
usageExit
fi
setupRun
runBenchmark
generateZipReport