diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..457f44d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.typeCheckingMode": "basic" +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index c9dc375..33e8346 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,17 +26,17 @@ RUN pip3 install -r /tmp/requirements.txt # Create the directory structure for the metrics RUN mkdir -p /app/metrics/rawmetrics/filtered_by_status_code && \ - mkdir -p /app/metrics/goaccess-metrics && \ + mkdir -p /app/metrics/logs-metrics && \ mkdir -p /app/metrics/prometheus-metrics && \ mkdir -p /app/ui && \ mkdir -p /var/log/ingresslogs # Copy required files -COPY extract_goaccess_metrics.sh /app/ -COPY goaccess_metric_parser.py /app/ +COPY extract_logs_metrics.sh /app/ +COPY logs_metric_parser.py /app/ COPY extract_prometheus_metrics.sh /app/ -COPY metrics_prom.py /app/ +COPY prometheus_metric_parser.py /app/ COPY create_index.py /app/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f49a4e1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/create_index.py b/create_index.py index 8ac8fce..0ddf0ca 100644 --- a/create_index.py +++ b/create_index.py @@ -81,8 +81,8 @@ def generate_html(out_file, dir_path, out_url=''): icon = get_icon(file_name) if "dashboard" in file_name: file_name = "GoAccess Dashboard" - if "goaccess-metrics" in file_name: - file_name = "GoAccess Metrics" + if "logs-metrics" in file_name: + file_name = "Logs Metrics" if "prometheus-metrics" in file_name: file_name = "Prometheus Metrics" file_entry = html_file_entry_template.format(url=file_url, id=i, icon=icon, filename=file_name) diff --git a/extract_goaccess_metrics.sh b/extract_goaccess_metrics.sh deleted file mode 100755 index e4ef203..0000000 --- a/extract_goaccess_metrics.sh +++ /dev/null @@ -1,95 +0,0 @@ -# Container paths -OUTPUTS_PATH="/app/metrics/rawmetrics" -FILTERED_PATH="$OUTPUTS_PATH/filtered_by_status_code" -OUTPUT_FILE="output_OSCAR_goaccess" -# Path to the readonly volume with the cluster's ingress logs -CLUSTER_LOGS_DIR="/var/log/clusterlogs" -LOCAL_LOGS_DIR="/var/log/ingresslogs" -OSCAR_LOGS_DIR="$LOCAL_LOGS_DIR/oscar" - -HISTORY_LOGS="$OSCAR_LOGS_DIR/oscar.log" -LATEST_LOGS="$OSCAR_LOGS_DIR/latest_oscar.log" -mkdir -p $OSCAR_LOGS_DIR - -# Log format for goaccess -LOG_FORMAT='%^ %^ %^ %^ [%^] %d - %t | %s | %Ts | %h | %m %~ %U | %u' - -addLog(){ - ingress_logfile=$1 - cat $ingress_logfile | grep GIN-EXECUTIONS-LOGGER | grep -a '/job\|/run' | tee -a $HISTORY_LOGS >/dev/null -} - -metrics(){ - LOG_FILE=$1 - filename=`basename "$LOG_FILE"` - geo_err=$( { goaccess "${LOG_FILE}" --log-format="${LOG_FORMAT}" -o "${OUTPUTS_PATH}/${filename}_full.json" --json-pretty-print; } 2>&1 ) - if [[ $filename == "latest"* ]]; then - python3 goaccess_metric_parser.py -f "${OUTPUTS_PATH}/${filename}_full.json" -g 0 - else - python3 goaccess_metric_parser.py -f "${OUTPUTS_PATH}/${filename}_full.json" -g 0 -u - fi - - status_codes=('200' '204' '404' '500') - init="t" - - out="${FILTERED_PATH}/${filename}" - - for code in "${status_codes[@]}"; do - code_logs=$(cat $LOG_FILE| grep -e 'HTTP/[0-9].[0-9]" '${code}' ') - if [ ! -z "$code_logs" ]; then - app_err=$( { cat $LOG_FILE | grep -e 'HTTP/[0-9].[0-9]" '${code}' ' | goaccess - -o "${out}_f${code}.json" --json-pretty-print --log-format="${LOG_FORMAT}"; } 2>&1 ) - if [ ! -f "${out}_f${code}.json" ]; then - echo "[*] Warning: Couldn't process file $LOG_FILE for status code '$code'" - else - if [ $init == 't' ]; then - python3 goaccess_metric_parser.py -f "${out}_f${code}.json" -p $code - init="f" - else - python3 goaccess_metric_parser.py -f "${out}_f${code}.json" -p $code -u - fi - fi - fi - done -} - -for log_path in "$CLUSTER_LOGS_DIR"/*; -do - if [[ $log_path == *"oscar_oscar"* ]]; then - cp -r $log_path $LOCAL_LOGS_DIR - # remove total path - relative_log_path=$(echo $log_path | sed 's/\/var\/log\/clusterlogs\///') - # upload a backup of the logs to s3 - aws s3 cp --recursive $log_path s3://metrics.oscar.grycap.net/"${CLUSTER_ID}"/ingresslogs/"${log_relative_path}" - for logfile in "$LOCAL_LOGS_DIR/$log_relative_path/oscar/"*; - do - if [[ $logfile == *".gz" ]]; then - # unzip all log files - gzip -d $logfile - fi - done - break - fi -done - -# /var/log/ingresslogs/oscar_oscar-7499cd/oscar -for logfile in "$LOCAL_LOGS_DIR/$relative_log_path/oscar/"*; -do - if [[ $logfile == *".log"* ]]; then - if [[ $logfile == *".log" ]]; then - cat $logfile | grep GIN-EXECUTIONS-LOGGER | grep -a '/job\|/run' | tee -a $LATEST_LOGS >/dev/null - metrics $LATEST_LOGS - else - addLog $logfile - fi - fi -done - -# Generate the html file -if [ ! -f "${HISTORY_LOGS}" ] || [ ! -s "${HISTORY_LOGS}" ]; then - goaccess "${LATEST_LOGS}" --log-format="${LOG_FORMAT}" -o "/app/metrics/dashboard.html" -else - metrics $HISTORY_LOGS - - cat $LATEST_LOGS | tee -a $HISTORY_LOGS >/dev/null - goaccess "${HISTORY_LOGS}" --log-format="${LOG_FORMAT}" -o "/app/metrics/dashboard.html" -fi diff --git a/extract_logs_metrics.sh b/extract_logs_metrics.sh new file mode 100755 index 0000000..134fa11 --- /dev/null +++ b/extract_logs_metrics.sh @@ -0,0 +1,96 @@ +# Container paths +OUTPUTS_PATH="/app/metrics/rawmetrics" +FILTERED_PATH="$OUTPUTS_PATH/filtered_by_status_code" + +# Path to the readonly volume with the cluster's ingress logs +CLUSTER_LOGS_DIR="/var/log/clusterlogs" +LOCAL_LOGS_DIR="/var/log/ingresslogs" +OSCAR_LOGS_DIR="$LOCAL_LOGS_DIR/oscar" + +HISTORY_LOGS_INFERENCE="$OSCAR_LOGS_DIR/inference/oscar.log" +HISTORY_LOGS_CREATE="$OSCAR_LOGS_DIR/create/oscar.log" + +mkdir -p $OSCAR_LOGS_DIR/inference +mkdir -p $OSCAR_LOGS_DIR/create + +# Log format for goaccess +LOG_FORMAT='%^ %^ %^ %^ [%^] %d - %t | %s | %Ts | %h | %m %~ %U | %u' + +addLog(){ + ingress_logfile=$1 + cat $ingress_logfile | grep GIN-EXECUTIONS-LOGGER | grep -a '/job\|/run' | tee -a $HISTORY_LOGS_INFERENCE >/dev/null + cat $ingress_logfile | grep CREATE-HANDLER | grep '/system/services' | tee -a $HISTORY_LOGS_CREATE >/dev/null +} + +metrics(){ + LOG_FILE=$1 + filename=`basename "$LOG_FILE"` + geo_err=$( { goaccess "${LOG_FILE}" --log-format="${LOG_FORMAT}" -o "${OUTPUTS_PATH}/${filename}_full.json" --json-pretty-print; } 2>&1 ) + python3 logs_metric_parser.py -f "${OUTPUTS_PATH}/${filename}_full.json" -g 0 + + status_codes=('200' '201' '204' '404' '500') + init="t" + + out="${FILTERED_PATH}/${filename}" + + for code in "${status_codes[@]}"; do + code_logs=$(cat $LOG_FILE| grep -E '\|[ ]*'${code}'[ ]*\|') + if [ ! -z "$code_logs" ]; then + app_err=$( { cat $LOG_FILE | grep -E '\|[ ]*'${code}'[ ]*\|' | goaccess - -o "${out}_f${code}.json" --json-pretty-print --log-format="${LOG_FORMAT}"; } 2>&1 ) + if [ ! -f "${out}_f${code}.json" ]; then + echo "[*] Warning: Couldn't process file $LOG_FILE for status code '$code'" + else + if [ $init == 't' ]; then + python3 logs_metric_parser.py -f "${out}_f${code}.json" -a $code + init="f" + else + python3 logs_metric_parser.py -f "${out}_f${code}.json" -a $code -u + fi + fi + fi + done +} + + +# Read current logs and upload backup to s3 +for log_path in "$CLUSTER_LOGS_DIR"/*; +do + if [[ $log_path == *"oscar_oscar"* ]]; then + cp -r $log_path $LOCAL_LOGS_DIR + # Remove total path + relative_log_path=$(echo $log_path | sed 's/\/var\/log\/clusterlogs\///') + # Upload current logs to s3 + aws s3 cp --recursive $log_path s3://metrics.oscar.grycap.net/"${CLUSTER_ID}"/ingresslogs/"${relative_log_path}" + break + fi +done + +# Download from s3 all past logs from previous OSCAR pods +aws s3 cp s3://metrics.oscar.grycap.net/"${CLUSTER_ID}"/ingresslogs $LOCAL_LOGS_DIR --recursive --exclude "oscar/*" --exclude "${relative_log_path}" + +# /var/log/ingresslogs/oscar_oscar-7499cd/oscar +for logfile in "$LOCAL_LOGS_DIR/oscar_oscar"*"/oscar/"*; +do + if [[ $logfile == *".gz" ]]; then + # unzip all log files + gzip -d $logfile + logfile=$(echo $logfile | sed 's/\.gz//') + fi + + if [[ $logfile == *".log"* || $logfile == *".log" ]]; then + echo ">> Adding logfile '$logfile'" + addLog $logfile + fi +done + +awk '!seen[$0]++' $HISTORY_LOGS_INFERENCE > tmp_inference.log +mv tmp_inference.log $HISTORY_LOGS_INFERENCE + +awk '!seen[$0]++' $HISTORY_LOGS_CREATE > tmp_create.log +mv tmp_create.log $HISTORY_LOGS_CREATE + +metrics $HISTORY_LOGS_INFERENCE +python3 logs_metric_parser.py -f $HISTORY_LOGS_INFERENCE -i 0 +python3 logs_metric_parser.py -f $HISTORY_LOGS_CREATE -c 0 + +rm tmp_*.log diff --git a/extract_prometheus_metrics.sh b/extract_prometheus_metrics.sh index 0c62ec8..98c0635 100755 --- a/extract_prometheus_metrics.sh +++ b/extract_prometheus_metrics.sh @@ -1,3 +1,3 @@ #!/bin/bash cluster_auth='{"cluster_id":"'"${CLUSTER_ID}"'","endpoint":"'"${ENDPOINT}"'","user":"'"${USER}"'","password":"'"${PASSW}"'","ssl":"True"}' -python3 metrics_prom.py $PROMETHEUS_ENDPOINT $VO $cluster_auth \ No newline at end of file +python3 prometheus_metric_parser.py $PROMETHEUS_ENDPOINT $VO $cluster_auth \ No newline at end of file diff --git a/goaccess_metric_parser.py b/goaccess_metric_parser.py deleted file mode 100644 index edc0c96..0000000 --- a/goaccess_metric_parser.py +++ /dev/null @@ -1,119 +0,0 @@ - -from posixpath import split -import argparse -import csv -import json -import time -import os - -CREATE_PATH = "/system/services" -RUN_PATH = "/run" -JOB_PATH = "/job" - -TIMESTAMP = str(int(time.time())) - -OUTPUT_PATH = "/app/metrics/goaccess-metrics" - -parser = argparse.ArgumentParser(description="Command-line to retreive Prometheus metrics from OSCAR", formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument("-f", "--file_path", type=str, help="Logfile path/name") -parser.add_argument("-g", "--general", action="store_true", help="Complete logfile") -parser.add_argument("-u", "--use_existing", action="store_true", required=False, help="Use existing output file") -parser.add_argument("-p", "--partial", action="store_true", help="Filtered by status code logfile") -parser.add_argument("status_code", type=int, help="Complete logfile") - - -args = parser.parse_args() - -with open(args.file_path, 'r') as rawfile: - metrics = json.loads(rawfile.read()) - try: - START_DATE = metrics["general"]["start_date"] - END_DATE = metrics["general"]["end_date"] - except: - START_DATE = metrics["general"]["date_time"] - END_DATE = metrics["general"]["date_time"] - - -""" - > Countries reached - > Output format: {continent, country, total_visits, unique_visits, start_date, end_date} -""" -def parse_geolocation_info(write_type): - with open(f'{OUTPUT_PATH}/geolocation_metrics.csv', write_type, newline='') as gfile: - writer = csv.writer(gfile) - fields = ["continent", "country", "total_visits", "unique_visits", "start_metric_date", "end_metric_date"] - writer.writerow(fields) - - geolocation = metrics["geolocation"]["data"] - - for d in geolocation: - continent = d["data"] - for item in d["items"]: - writer.writerow([continent, item["data"], item["hits"]["count"] ,item["visitors"]["count"], START_DATE, END_DATE]) - - gfile.close() - -""" - > Number of AI applications (created services -> POST requests to /system/services) - > Output format: {num_created, start_date, end_date} - - > Processed inference executions (POST requests to /run or /job) - > Output format: {service, executions, type, successfull, failed, start_date, end_date} -""" - -def parse_requests_info(status_code, write_type): - - inference = dict() - requests = metrics["requests"]["data"] - create_count = 0 - exec_count = 0 - - for r in requests: - if r["method"] == "POST": - path = r["data"] - if path == CREATE_PATH: - create_count+=1 - elif RUN_PATH in path or JOB_PATH in path: - sum_requests = r["hits"]["count"] - split_path = split(path) - service = split_path[1] - if service in inference.keys(): - inference[service].append({"exec_type": split_path[0], "status_code": status_code, "count": sum_requests}) - else: - inference[service] = [{"exec_type": split_path[0], "status_code": status_code, "count": sum_requests}] - exec_count+=sum_requests - - if create_count != 0: - with open(f'{OUTPUT_PATH}/created_apps_metrics.csv', write_type, newline='') as cfile: - writer = csv.writer(cfile) - if write_type == "w": writer.writerow(["application_count", "status_code", "start_metric_date", "end_metric_date"]) - writer.writerow([create_count, status_code, START_DATE, END_DATE]) - - cfile.close() - - if exec_count != 0: - with open(f'{OUTPUT_PATH}/total_inference_metrics.csv', write_type, newline='') as efile: - writer = csv.writer(efile) - if write_type == "w": writer.writerow(["inference_count", "status_code", "start_metric_date", "end_metric_date"]) - writer.writerow([exec_count, status_code, START_DATE, END_DATE]) - - efile.close() - - with open(f'{OUTPUT_PATH}/services_inference_metrics.csv', write_type, newline='') as sfile: - writer = csv.writer(sfile) - if write_type == "w": writer.writerow(["service_name", "exec_type", "status_code", "inference_count" , "start_metric_date", "end_metric_date"]) - for k in inference.keys(): - for item in inference[k]: - writer.writerow([k, item["exec_type"], item["status_code"], item["count"], START_DATE, END_DATE]) - - sfile.close() - - -wr="w" -if args.use_existing: - wr="a" - -if args.general: - parse_geolocation_info(wr) -if args.partial: - parse_requests_info(args.status_code, wr) \ No newline at end of file diff --git a/logs_metric_parser.py b/logs_metric_parser.py new file mode 100644 index 0000000..add7302 --- /dev/null +++ b/logs_metric_parser.py @@ -0,0 +1,156 @@ + +from posixpath import split +import argparse +import csv +import json +import time +import os +import re + +CREATE_PATH = "/system/services" +RUN_PATH = "/run" +JOB_PATH = "/job" + +TIMESTAMP = str(int(time.time())) + +OUTPUT_PATH = "/app/metrics/logs-metrics" + +parser = argparse.ArgumentParser(description="Command-line to retreive Prometheus metrics from OSCAR", formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-f", "--file_path", type=str, help="Logfile path/name") +parser.add_argument("-g", "--geolocation", action="store_true", help="Using as input complete generated GoAccess json") +parser.add_argument("-u", "--use_existing", action="store_true", required=False, help="Use existing output file") +parser.add_argument("-i", "--inference", action="store_true", help="Using as input inference log format file") +parser.add_argument("-c", "--create", action="store_true", help="Using as input created services log format file") +parser.add_argument("-a", "--goaccess", action="store_true", help="Using as input generated GoAccess json filtered") +parser.add_argument("status_code", type=int, help="Complete logfile") + + +args = parser.parse_args() + +if not args.create and not args.inference: + with open(args.file_path, 'r') as rawfile: + metrics = json.loads(rawfile.read()) + try: + START_DATE = metrics["general"]["start_date"] + END_DATE = metrics["general"]["end_date"] + except: + START_DATE = metrics["general"]["date_time"] + END_DATE = metrics["general"]["date_time"] + +""" + > Countries reached + > Output format: {continent, country, total_visits, unique_visits, start_date, end_date} +""" +def parse_geolocation_info(write_type): + with open(f'{OUTPUT_PATH}/geolocation_metrics.csv', write_type, newline='') as gfile: + writer = csv.writer(gfile) + fields = ["continent", "country", "total_visits", "unique_visits", "start_metric_date", "end_metric_date"] + writer.writerow(fields) + + geolocation = metrics["geolocation"]["data"] + + for d in geolocation: + continent = d["data"] + for item in d["items"]: + writer.writerow([continent, item["data"], item["hits"]["count"] ,item["visitors"]["count"], START_DATE, END_DATE]) + + gfile.close() + +""" + > Processed inference executions (POST requests to /run or /job) + > Output format: {service_name, status_code, total_visits, unique_visits, start_date, end_date} +""" + +def parse_inference_goaccess(status_code, write_type): + requests = metrics["requests"]["data"] + + with open(f'{OUTPUT_PATH}/total_inference_metrics.csv', write_type, newline='') as efile: + writer = csv.writer(efile) + if write_type == "w": writer.writerow(["service_name", "status_code", "total_visits", "unique_visits", "start_metric_date", "end_metric_date"]) + for r in requests: + path = r["data"] + if RUN_PATH in path or JOB_PATH in path: + total_visits = r["hits"]["count"] + unique_visits = r["visitors"]["count"] + split_path = split(path) + service_name = split_path[1] + + writer.writerow([service_name, status_code, total_visits, unique_visits, START_DATE, END_DATE]) + +""" + > Info about all inference executions + > Output format: {service_name, exec_type, status_code, owner_uid, request_date, request_time} +""" +def parse_inference_log(write_type): + with open(f'{OUTPUT_PATH}/services_inference_metrics.csv', write_type, newline='') as sfile: + writer = csv.writer(sfile) + if write_type == "w": writer.writerow(["service_name", "exec_type", "status_code", "owner_uid", "request_datetime"]) + with open(args.file_path, 'r') as rawfile: + for log in rawfile: + pattern = re.compile( + r'\[GIN-EXECUTIONS-LOGGER\]\s' # Literal text "[GIN-EXECUTIONS-LOGGER]" + r'(?P\d{4}/\d{2}/\d{2})\s-\s(?P