From 8e394f9914a16b72adbad4515ab8edd55d1dd9b3 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 14 Feb 2023 10:30:51 -0300 Subject: [PATCH 01/49] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 4d86948d..2f2cca95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM amazonlinux:2 +FROM public.ecr.aws/lambda/python:3.7 # Set up working directories RUN mkdir -p /opt/app From 9c16448fdf8db273cc08cfeab6db657defbf236d Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 14 Feb 2023 10:36:20 -0300 Subject: [PATCH 02/49] fix dockerfile missing so from https://github.com/bluesentry/bucket-antivirus-function/issues/202#issuecomment-1194397587 --- Dockerfile | 66 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2f2cca95..fbe47c73 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/lambda/python:3.7 +FROM amazonlinux:2 # Set up working directories RUN mkdir -p /opt/app @@ -12,8 +12,8 @@ COPY requirements.txt /opt/app/requirements.txt # Install packages RUN yum update -y -RUN yum install -y cpio python3-pip yum-utils zip unzip less -RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm +RUN amazon-linux-extras install epel -y +RUN yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel RUN pip3 install -r requirements.txt @@ -21,24 +21,60 @@ RUN rm -rf /root/.cache/pip # Download libraries we need to run in lambda WORKDIR /tmp -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2 libprelude gnutls libtasn1 lib64nettle nettle -RUN rpm2cpio clamav-0*.rpm | cpio -idmv -RUN rpm2cpio clamav-lib*.rpm | cpio -idmv -RUN rpm2cpio clamav-update*.rpm | cpio -idmv -RUN rpm2cpio json-c*.rpm | cpio -idmv -RUN rpm2cpio pcre*.rpm | cpio -idmv -RUN rpm2cpio gnutls* | cpio -idmv -RUN rpm2cpio nettle* | cpio -idmv -RUN rpm2cpio lib* | cpio -idmv -RUN rpm2cpio *.rpm | cpio -idmv -RUN rpm2cpio libtasn1* | cpio -idmv +RUN yumdownloader -x \*i686 --archlist=x86_64 clamav +RUN rpm2cpio clamav-0*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 clamav-lib +RUN rpm2cpio clamav-lib*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 clamav-update +RUN rpm2cpio clamav-update*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 json-c +RUN rpm2cpio json-c*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 pcre2 +RUN rpm2cpio pcre*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 libtool-ltdl +RUN rpm2cpio libtool-ltdl*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 libxml2 +RUN rpm2cpio libxml2*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 bzip2-libs +RUN rpm2cpio bzip2-libs*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 xz-libs +RUN rpm2cpio xz-libs*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 libprelude +RUN rpm2cpio libprelude*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 gnutls +RUN rpm2cpio gnutls*.rpm | cpio -vimd + +RUN yumdownloader -x \*i686 --archlist=x86_64 nettle +RUN rpm2cpio nettle*.rpm | cpio -vimd + # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/ +RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ # Fix the freshclam.conf settings RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf +RUN echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf +RUN echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf + +RUN yum install shadow-utils.x86_64 -y + +RUN groupadd clamav +RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav +RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate + +ENV LD_LIBRARY_PATH=/opt/app/bin +RUN ldconfig # Create the zip file WORKDIR /opt/app From d45cbb7d11721f306a115ae4aeb4cc07b6894008 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 12:04:14 -0300 Subject: [PATCH 03/49] implement fangfrisch usage - add extra config variables to control fangfrisch usage - refactor use of str_to_bool to avoid repetition - running aws sync as subprocess - running fangfrisch as subprocess --- README.md | 53 +++++++++++++++++++++++++----------------------- common.py | 47 +++++++++++++++++++++++++----------------- fangfrisch.conf | 19 +++++++++++++++++ requirements.txt | 1 + scan.py | 19 ++++++----------- update.py | 16 +++++++++++++++ 6 files changed, 98 insertions(+), 57 deletions(-) create mode 100644 fangfrisch.conf diff --git a/README.md b/README.md index 6d911919..a8fc5f04 100644 --- a/README.md +++ b/README.md @@ -81,31 +81,34 @@ can cause a continuous loop of scanning if improperly configured. Runtime configuration is accomplished using environment variables. See the table below for reference. -| Variable | Description | Default | Required | -| --- | --- | --- | --- | -| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | -| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_PATH | Path containing files at runtime | /tmp/clamav_defs | No | -| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | -| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | -| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | -| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | -| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | -| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | -| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | -| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | -| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | -| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | -| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | -| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | -| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | -| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | -| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | -| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | -| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | -| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | -| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | +| Variable | Description | Default | Required | +|----------------------------------|-------------------------------------------------------------------------------------------------|------------------------|----------| +| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | +| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | +| AV_DEFINITION_S3_EXTRA_PREFIX | Prefix for extra antivirus definition files (downloaded with fangfrisch) | clamav_extra_defs | No | +| AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | +| AV_DEFINITION_EXTRA_PATH | Path containing extra virus definition files at runtime | /tmp/clamav_extra_defs | No | +| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | +| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | +| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | +| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | +| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | +| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | +| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | +| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | +| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | +| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | +| AV_USE_FANGFRISCH | Uses fangfrisch for extra download definitions | False | No | +| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | +| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | +| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | +| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | +| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | +| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | +| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | +| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | +| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | +| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | ## S3 Bucket Policy Examples diff --git a/common.py b/common.py index bb953fca..39fbe6a6 100644 --- a/common.py +++ b/common.py @@ -17,10 +17,32 @@ import datetime import os import os.path +from distutils.util import strtobool + + +def create_dir(path): + if not os.path.exists(path): + try: + print("Attempting to create directory %s.\n" % path) + os.makedirs(path) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + + +def get_timestamp(): + return datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S UTC") + + +def str_to_bool(s): + return bool(strtobool(str(s))) + AV_DEFINITION_S3_BUCKET = os.getenv("AV_DEFINITION_S3_BUCKET") AV_DEFINITION_S3_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_defs") +AV_DEFINITION_S3_EXTRA_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_extra_defs") AV_DEFINITION_PATH = os.getenv("AV_DEFINITION_PATH", "/tmp/clamav_defs") +AV_DEFINITION_EXTRA_PATH = os.getenv("AV_DEFINITION_PATH", "/tmp/clamav_extra_defs") AV_SCAN_START_SNS_ARN = os.getenv("AV_SCAN_START_SNS_ARN") AV_SCAN_START_METADATA = os.getenv("AV_SCAN_START_METADATA", "av-scan-start") AV_SIGNATURE_METADATA = os.getenv("AV_SIGNATURE_METADATA", "av-signature") @@ -30,33 +52,20 @@ AV_STATUS_INFECTED = os.getenv("AV_STATUS_INFECTED", "INFECTED") AV_STATUS_METADATA = os.getenv("AV_STATUS_METADATA", "av-status") AV_STATUS_SNS_ARN = os.getenv("AV_STATUS_SNS_ARN") -AV_STATUS_SNS_PUBLISH_CLEAN = os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True") -AV_STATUS_SNS_PUBLISH_INFECTED = os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True") +AV_STATUS_SNS_PUBLISH_CLEAN = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True")) +AV_STATUS_SNS_PUBLISH_INFECTED = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True")) AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") +AV_USE_FANGFRISCH = str_to_bool(os.getenv("AV_TIMESTAMP_METADATA", "False")) CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") -AV_PROCESS_ORIGINAL_VERSION_ONLY = os.getenv( +AV_PROCESS_ORIGINAL_VERSION_ONLY = str_to_bool(os.getenv( "AV_PROCESS_ORIGINAL_VERSION_ONLY", "False" -) -AV_DELETE_INFECTED_FILES = os.getenv("AV_DELETE_INFECTED_FILES", "False") +)) +AV_DELETE_INFECTED_FILES = str_to_bool(os.getenv("AV_DELETE_INFECTED_FILES", "False")) AV_DEFINITION_FILE_PREFIXES = ["main", "daily", "bytecode"] AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) LAMBDA_ENDPOINT = os.getenv("LAMBDA_ENDPOINT", None) - - -def create_dir(path): - if not os.path.exists(path): - try: - print("Attempting to create directory %s.\n" % path) - os.makedirs(path) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - - -def get_timestamp(): - return datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S UTC") diff --git a/fangfrisch.conf b/fangfrisch.conf new file mode 100644 index 00000000..6d8b2871 --- /dev/null +++ b/fangfrisch.conf @@ -0,0 +1,19 @@ +[DEFAULT] +db_url = sqlite:////tmp/clamav_extra_defs/db.sqlite +local_directory = /tmp/clamav_extra_defs +on_update_timeout = 120 +log_level = INFO +log_method = console + +[malwarepatrol] +enabled = no + +[sanesecurity] +enabled = yes + +[securiteinfo] +enabled = no + +[urlhaus] +enabled = yes +max_size = 2MB diff --git a/requirements.txt b/requirements.txt index 01c63d97..3badb1cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ requests==2.21 simplejson==3.16 urllib3==1.24.2 pytz==2019.3 +fangfrisch==1.6.0 diff --git a/scan.py b/scan.py index 48545a06..0eff952e 100644 --- a/scan.py +++ b/scan.py @@ -17,7 +17,6 @@ import json import os from urllib.parse import unquote_plus -from distutils.util import strtobool import boto3 @@ -171,12 +170,10 @@ def sns_scan_results( sns_client, s3_object, sns_arn, scan_result, scan_signature, timestamp ): # Don't publish if scan_result is CLEAN and CLEAN results should not be published - if scan_result == AV_STATUS_CLEAN and not str_to_bool(AV_STATUS_SNS_PUBLISH_CLEAN): + if scan_result == AV_STATUS_CLEAN and not AV_STATUS_SNS_PUBLISH_CLEAN: return # Don't publish if scan_result is INFECTED and INFECTED results should not be published - if scan_result == AV_STATUS_INFECTED and not str_to_bool( - AV_STATUS_SNS_PUBLISH_INFECTED - ): + if scan_result == AV_STATUS_INFECTED and not AV_STATUS_SNS_PUBLISH_INFECTED: return message = { "bucket": s3_object.bucket_name, @@ -210,10 +207,10 @@ def lambda_handler(event, context): EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") start_time = get_timestamp() - print("Script starting at %s\n" % (start_time)) + print("Script starting at %s\n" % start_time) s3_object = event_object(event, event_source=EVENT_SOURCE) - if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): + if AV_PROCESS_ORIGINAL_VERSION_ONLY: verify_s3_object_version(s3, s3_object) # Publish the start time of the scan @@ -234,7 +231,7 @@ def lambda_handler(event, context): local_path = download["local_path"] print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) - print("Downloading definition file %s complete!" % (local_path)) + print("Downloading definition file %s complete!" % local_path) scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" @@ -266,11 +263,7 @@ def lambda_handler(event, context): os.remove(file_path) except OSError: pass - if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: + if AV_DELETE_INFECTED_FILES and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time) - - -def str_to_bool(s): - return bool(strtobool(str(s))) diff --git a/update.py b/update.py index 80aa46d1..b3e7656c 100644 --- a/update.py +++ b/update.py @@ -14,13 +14,17 @@ # limitations under the License. import os +import subprocess import boto3 import clamav from common import AV_DEFINITION_PATH +from common import AV_DEFINITION_EXTRA_PATH from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX +from common import AV_DEFINITION_S3_EXTRA_PREFIX +from common import AV_USE_FANGFRISCH from common import CLAMAVLIB_PATH from common import S3_ENDPOINT from common import get_timestamp @@ -42,6 +46,18 @@ def lambda_handler(event, context): s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) + if AV_USE_FANGFRISCH: + bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) + sync_command = f"aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" + subprocess.run(sync_command, shell=True) + + fangfrisch_base_command = "fangfrisch --conf fangfrisch.conf" + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True) + + sync_after_command = f"aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" + subprocess.run(sync_after_command, shell=True) + clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) # If main.cvd gets updated (very rare), we will need to force freshclam # to download the compressed version to keep file sizes down. From 3a898f297e42ec4040fd30cffe1880cfb0413107 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 12:21:31 -0300 Subject: [PATCH 04/49] update dockerfile and update to reference correct fangfrisch bin --- Dockerfile | 3 ++- common.py | 6 +++--- requirements.txt | 2 +- update.py | 4 +++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index fbe47c73..31fc0951 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,7 +78,8 @@ RUN ldconfig # Create the zip file WORKDIR /opt/app -RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin +RUN cp /usr/local/bin/fangfrisch bin \ + && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin WORKDIR /usr/local/lib/python3.7/site-packages RUN zip -r9 /opt/app/build/lambda.zip * diff --git a/common.py b/common.py index 39fbe6a6..e81b8cea 100644 --- a/common.py +++ b/common.py @@ -40,9 +40,9 @@ def str_to_bool(s): AV_DEFINITION_S3_BUCKET = os.getenv("AV_DEFINITION_S3_BUCKET") AV_DEFINITION_S3_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_defs") -AV_DEFINITION_S3_EXTRA_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_extra_defs") +AV_DEFINITION_S3_EXTRA_PREFIX = os.getenv("AV_DEFINITION_S3_EXTRA_PREFIX", "clamav_extra_defs") AV_DEFINITION_PATH = os.getenv("AV_DEFINITION_PATH", "/tmp/clamav_defs") -AV_DEFINITION_EXTRA_PATH = os.getenv("AV_DEFINITION_PATH", "/tmp/clamav_extra_defs") +AV_DEFINITION_EXTRA_PATH = os.getenv("AV_DEFINITION_EXTRA_PATH", "/tmp/clamav_extra_defs") AV_SCAN_START_SNS_ARN = os.getenv("AV_SCAN_START_SNS_ARN") AV_SCAN_START_METADATA = os.getenv("AV_SCAN_START_METADATA", "av-scan-start") AV_SIGNATURE_METADATA = os.getenv("AV_SIGNATURE_METADATA", "av-signature") @@ -55,7 +55,7 @@ def str_to_bool(s): AV_STATUS_SNS_PUBLISH_CLEAN = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True")) AV_STATUS_SNS_PUBLISH_INFECTED = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True")) AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") -AV_USE_FANGFRISCH = str_to_bool(os.getenv("AV_TIMESTAMP_METADATA", "False")) +AV_USE_FANGFRISCH = str_to_bool(os.getenv("AV_USE_FANGFRISCH", "False")) CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") diff --git a/requirements.txt b/requirements.txt index 3badb1cc..1754a9d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ requests==2.21 simplejson==3.16 urllib3==1.24.2 pytz==2019.3 -fangfrisch==1.6.0 +fangfrisch==1.6.1 diff --git a/update.py b/update.py index b3e7656c..b6ec32bc 100644 --- a/update.py +++ b/update.py @@ -51,12 +51,14 @@ def lambda_handler(event, context): sync_command = f"aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" subprocess.run(sync_command, shell=True) - fangfrisch_base_command = "fangfrisch --conf fangfrisch.conf" + fangfrisch_base_command = "bin/fangfrisch --conf fangfrisch.conf" subprocess.run(f"{fangfrisch_base_command} initdb", shell=True) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True) sync_after_command = f"aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" subprocess.run(sync_after_command, shell=True) + else: + print("Skip downloading extra virus definitions with Fangfrisch") clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) # If main.cvd gets updated (very rare), we will need to force freshclam From fce2f29c56fe381e0374758cdfcacb60a803aa59 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:02:32 -0300 Subject: [PATCH 05/49] installing awscliv2 and add correct pythonpath for fangfrisch --- Dockerfile | 40 ++++++++++++++++++++++++---------------- update.py | 6 ++++-- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 31fc0951..6983fe2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,16 @@ FROM amazonlinux:2 +ENV TASK_FOLDER=/var/task + # Set up working directories -RUN mkdir -p /opt/app -RUN mkdir -p /opt/app/build -RUN mkdir -p /opt/app/bin/ +RUN mkdir -p $TASK_FOLDER +RUN mkdir -p $TASK_FOLDER/build +RUN mkdir -p $TASK_FOLDER/bin/ # Copy in the lambda source -WORKDIR /opt/app -COPY ./*.py /opt/app/ -COPY requirements.txt /opt/app/requirements.txt +WORKDIR $TASK_FOLDER +COPY ./*.py $TASK_FOLDER/ +COPY requirements.txt $TASK_FOLDER/requirements.txt # Install packages RUN yum update -y @@ -59,13 +61,13 @@ RUN rpm2cpio nettle*.rpm | cpio -vimd # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ +RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 $TASK_FOLDER/bin/ # Fix the freshclam.conf settings -RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf -RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf -RUN echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf -RUN echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf +RUN echo "DatabaseMirror database.clamav.net" > $TASK_FOLDER/bin/freshclam.conf +RUN echo "CompressLocalDatabase yes" >> $TASK_FOLDER/bin/freshclam.conf +RUN echo "ScriptedUpdates no" >> $TASK_FOLDER/bin/freshclam.conf +RUN echo "DatabaseDirectory /var/lib/clamav" >> $TASK_FOLDER/bin/freshclam.conf RUN yum install shadow-utils.x86_64 -y @@ -73,15 +75,21 @@ RUN groupadd clamav RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate -ENV LD_LIBRARY_PATH=/opt/app/bin +# install AWSCLI +RUN yum install -y unzip \ + && curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install --bin-dir $TASK_FOLDER/bin --install-dir $TASK_FOLDER/aws-cli + +ENV LD_LIBRARY_PATH=$TASK_FOLDER/bin RUN ldconfig # Create the zip file -WORKDIR /opt/app +WORKDIR $TASK_FOLDER RUN cp /usr/local/bin/fangfrisch bin \ - && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin + && zip -r9 --exclude="*test*" $TASK_FOLDER/build/lambda.zip *.py *.conf bin aws-cli WORKDIR /usr/local/lib/python3.7/site-packages -RUN zip -r9 /opt/app/build/lambda.zip * +RUN zip -r9 $TASK_FOLDER/build/lambda.zip * -WORKDIR /opt/app +WORKDIR $TASK_FOLDER diff --git a/update.py b/update.py index b6ec32bc..8727d034 100644 --- a/update.py +++ b/update.py @@ -52,8 +52,10 @@ def lambda_handler(event, context): subprocess.run(sync_command, shell=True) fangfrisch_base_command = "bin/fangfrisch --conf fangfrisch.conf" - subprocess.run(f"{fangfrisch_base_command} initdb", shell=True) - subprocess.run(f"{fangfrisch_base_command} refresh", shell=True) + fangfrisch_env = os.environ.copy() + fangfrisch_env["PYTHONPATH"] = fangfrisch_env["LAMBDA_TASK_ROOT"] + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=fangfrisch_env) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=fangfrisch_env) sync_after_command = f"aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" subprocess.run(sync_after_command, shell=True) From abbeee131b1b28ba5808e26534d1016c6e4deefc Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:13:55 -0300 Subject: [PATCH 06/49] revert change to /opt/app --- Dockerfile | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6983fe2a..1ca1f169 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,14 @@ FROM amazonlinux:2 -ENV TASK_FOLDER=/var/task - # Set up working directories -RUN mkdir -p $TASK_FOLDER -RUN mkdir -p $TASK_FOLDER/build -RUN mkdir -p $TASK_FOLDER/bin/ +RUN mkdir -p /opt/app +RUN mkdir -p /opt/app/build +RUN mkdir -p /opt/app/bin/ # Copy in the lambda source -WORKDIR $TASK_FOLDER -COPY ./*.py $TASK_FOLDER/ -COPY requirements.txt $TASK_FOLDER/requirements.txt +WORKDIR /opt/app +COPY ./*.py /opt/app/ +COPY requirements.txt /opt/app/requirements.txt # Install packages RUN yum update -y @@ -18,7 +16,8 @@ RUN amazon-linux-extras install epel -y RUN yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install -r requirements.txt +RUN pip3 install -r requirements.txt \ + && cp /usr/local/bin/fangfrisch /opt/app/bin RUN rm -rf /root/.cache/pip # Download libraries we need to run in lambda @@ -61,13 +60,13 @@ RUN rpm2cpio nettle*.rpm | cpio -vimd # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 $TASK_FOLDER/bin/ +RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ # Fix the freshclam.conf settings -RUN echo "DatabaseMirror database.clamav.net" > $TASK_FOLDER/bin/freshclam.conf -RUN echo "CompressLocalDatabase yes" >> $TASK_FOLDER/bin/freshclam.conf -RUN echo "ScriptedUpdates no" >> $TASK_FOLDER/bin/freshclam.conf -RUN echo "DatabaseDirectory /var/lib/clamav" >> $TASK_FOLDER/bin/freshclam.conf +RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf +RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf +RUN echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf +RUN echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf RUN yum install shadow-utils.x86_64 -y @@ -79,17 +78,15 @@ RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate RUN yum install -y unzip \ && curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ && unzip awscliv2.zip \ - && ./aws/install --bin-dir $TASK_FOLDER/bin --install-dir $TASK_FOLDER/aws-cli + && ./aws/install --bin-dir /var/task/bin --install-dir /var/task/aws-cli -ENV LD_LIBRARY_PATH=$TASK_FOLDER/bin +ENV LD_LIBRARY_PATH=/opt/app/bin RUN ldconfig # Create the zip file -WORKDIR $TASK_FOLDER -RUN cp /usr/local/bin/fangfrisch bin \ - && zip -r9 --exclude="*test*" $TASK_FOLDER/build/lambda.zip *.py *.conf bin aws-cli - -WORKDIR /usr/local/lib/python3.7/site-packages -RUN zip -r9 $TASK_FOLDER/build/lambda.zip * +RUN cd /opt/app \ + && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin aws-cli \ + && cd /usr/local/lib/python3.7/site-packages \ + && zip -r9 /opt/app/build/lambda.zip * -WORKDIR $TASK_FOLDER +WORKDIR /opt/app From efc71b6dbac31f9a9f38c8ee6238c8d8c68446f4 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:24:00 -0300 Subject: [PATCH 07/49] add aws-cli to final lambda --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1ca1f169..265e9cdc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -87,6 +87,8 @@ RUN ldconfig RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin aws-cli \ && cd /usr/local/lib/python3.7/site-packages \ - && zip -r9 /opt/app/build/lambda.zip * + && zip -r9 /opt/app/build/lambda.zip * \ + && cd /var/task \ + && zip -r9 /opt/app/build/lambda.zip aws-cli bin WORKDIR /opt/app From e21edc65a1da0c406be89f4f8af349b1b0ce99dd Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:32:29 -0300 Subject: [PATCH 08/49] add lib64 python packages --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 265e9cdc..6af3fadf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -88,6 +88,8 @@ RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin aws-cli \ && cd /usr/local/lib/python3.7/site-packages \ && zip -r9 /opt/app/build/lambda.zip * \ + && cd /usr/local/lib64/python3.7/site-packages/ \ + && zip -r9 /opt/app/build/lambda.zip * \ && cd /var/task \ && zip -r9 /opt/app/build/lambda.zip aws-cli bin From ea1bc0c8ea57dc6f1d2e6928ebfeabffb6d3f235 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:34:36 -0300 Subject: [PATCH 09/49] centralizing pip installation with --target argument --- Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6af3fadf..8e2c6d3b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,8 @@ FROM amazonlinux:2 # Set up working directories RUN mkdir -p /opt/app RUN mkdir -p /opt/app/build -RUN mkdir -p /opt/app/bin/ +RUN mkdir -p /opt/app/bin +RUN mkdir -p /opt/app/python_deps # Copy in the lambda source WORKDIR /opt/app @@ -16,7 +17,7 @@ RUN amazon-linux-extras install epel -y RUN yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install -r requirements.txt \ +RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ && cp /usr/local/bin/fangfrisch /opt/app/bin RUN rm -rf /root/.cache/pip @@ -86,9 +87,7 @@ RUN ldconfig # Create the zip file RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin aws-cli \ - && cd /usr/local/lib/python3.7/site-packages \ - && zip -r9 /opt/app/build/lambda.zip * \ - && cd /usr/local/lib64/python3.7/site-packages/ \ + && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * \ && cd /var/task \ && zip -r9 /opt/app/build/lambda.zip aws-cli bin From ed91b06b01e61fa5024db45b3904b5b2cfd05f42 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:39:34 -0300 Subject: [PATCH 10/49] removing cp statement made unecessary with --target argument for pip --- Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8e2c6d3b..cb59ab2b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,7 @@ RUN amazon-linux-extras install epel -y RUN yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ - && cp /usr/local/bin/fangfrisch /opt/app/bin +RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps RUN rm -rf /root/.cache/pip # Download libraries we need to run in lambda @@ -86,7 +85,7 @@ RUN ldconfig # Create the zip file RUN cd /opt/app \ - && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin aws-cli \ + && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin \ && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * \ && cd /var/task \ From 5df78a7a865e65b0bcac738c20285ddb3a6c515a Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:45:28 -0300 Subject: [PATCH 11/49] downgrade awscli to v1 uncompressed lambda zip was hitting size limit, maybe downgrading will shrink size --- Dockerfile | 10 +--------- requirements.txt | 1 + 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index cb59ab2b..a0565351 100644 --- a/Dockerfile +++ b/Dockerfile @@ -74,12 +74,6 @@ RUN groupadd clamav RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate -# install AWSCLI -RUN yum install -y unzip \ - && curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ - && unzip awscliv2.zip \ - && ./aws/install --bin-dir /var/task/bin --install-dir /var/task/aws-cli - ENV LD_LIBRARY_PATH=/opt/app/bin RUN ldconfig @@ -87,8 +81,6 @@ RUN ldconfig RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin \ && cd /opt/app/python_deps \ - && zip -r9 /opt/app/build/lambda.zip * \ - && cd /var/task \ - && zip -r9 /opt/app/build/lambda.zip aws-cli bin + && zip -r9 /opt/app/build/lambda.zip * WORKDIR /opt/app diff --git a/requirements.txt b/requirements.txt index 1754a9d8..ca337bbf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ simplejson==3.16 urllib3==1.24.2 pytz==2019.3 fangfrisch==1.6.1 +awscli==1.27.78 From ca235e484941f23330bb574093bd8c346b427c90 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 15:52:17 -0300 Subject: [PATCH 12/49] return with copying python3.7 site-packages for extra missing deps --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index a0565351..d54e51f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -80,6 +80,8 @@ RUN ldconfig # Create the zip file RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin \ + && cd /usr/local/lib/python3.7/site-packages \ + && zip -r9 /opt/app/build/lambda.zip * \ && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * From 3e9b2e3e4ebc10136ea06052b12339b99f79e6be Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 16:30:23 -0300 Subject: [PATCH 13/49] update urllib3 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ca337bbf..b2b76ba3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ decorator==4.3 idna==2.8 requests==2.21 simplejson==3.16 -urllib3==1.24.2 +urllib3==1.26.7 pytz==2019.3 fangfrisch==1.6.1 awscli==1.27.78 From cf304110ff67d3bd124565f07c96bd4df316eda9 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Fri, 24 Feb 2023 16:30:45 -0300 Subject: [PATCH 14/49] remove python3.7 site packages, as it doesn't really exist --- Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d54e51f7..a0565351 100644 --- a/Dockerfile +++ b/Dockerfile @@ -80,8 +80,6 @@ RUN ldconfig # Create the zip file RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin \ - && cd /usr/local/lib/python3.7/site-packages \ - && zip -r9 /opt/app/build/lambda.zip * \ && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * From 43baf438354be75e925caf987096ee9f8787de10 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 09:15:42 -0300 Subject: [PATCH 15/49] run aws from bin folder --- update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/update.py b/update.py index 8727d034..9da86f45 100644 --- a/update.py +++ b/update.py @@ -48,7 +48,7 @@ def lambda_handler(event, context): if AV_USE_FANGFRISCH: bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) - sync_command = f"aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" + sync_command = f"bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" subprocess.run(sync_command, shell=True) fangfrisch_base_command = "bin/fangfrisch --conf fangfrisch.conf" @@ -57,7 +57,7 @@ def lambda_handler(event, context): subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=fangfrisch_env) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=fangfrisch_env) - sync_after_command = f"aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" + sync_after_command = f"bin/aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" subprocess.run(sync_after_command, shell=True) else: print("Skip downloading extra virus definitions with Fangfrisch") From f59ec401b27e78a18c13d8c5ec42f7e5705cfffb Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 09:30:56 -0300 Subject: [PATCH 16/49] add pythonpath to awscli invocation --- update.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/update.py b/update.py index 9da86f45..74441965 100644 --- a/update.py +++ b/update.py @@ -47,18 +47,18 @@ def lambda_handler(event, context): print("Downloading definition file %s complete!" % (local_path)) if AV_USE_FANGFRISCH: + env_with_pythonpath = os.environ.copy() + env_with_pythonpath["PYTHONPATH"] = env_with_pythonpath["LAMBDA_TASK_ROOT"] bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) sync_command = f"bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" - subprocess.run(sync_command, shell=True) + subprocess.run(sync_command, shell=True, env=env_with_pythonpath) fangfrisch_base_command = "bin/fangfrisch --conf fangfrisch.conf" - fangfrisch_env = os.environ.copy() - fangfrisch_env["PYTHONPATH"] = fangfrisch_env["LAMBDA_TASK_ROOT"] - subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=fangfrisch_env) - subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=fangfrisch_env) + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_with_pythonpath) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_with_pythonpath) sync_after_command = f"bin/aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" - subprocess.run(sync_after_command, shell=True) + subprocess.run(sync_after_command, shell=True, env=env_with_pythonpath) else: print("Skip downloading extra virus definitions with Fangfrisch") From cb5f840f77a317fb18b453b1ec37afcab1d85ae5 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 10:14:12 -0300 Subject: [PATCH 17/49] separate fangfrisch requirements.txt; optimized docker build image --- Dockerfile | 102 +++++++++++++++++------------------- requirements-fangfrisch.txt | 1 + requirements.txt | 1 - update.py | 16 +++--- 4 files changed, 58 insertions(+), 62 deletions(-) create mode 100644 requirements-fangfrisch.txt diff --git a/Dockerfile b/Dockerfile index a0565351..2ef16153 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,12 @@ FROM amazonlinux:2 # Set up working directories -RUN mkdir -p /opt/app -RUN mkdir -p /opt/app/build -RUN mkdir -p /opt/app/bin -RUN mkdir -p /opt/app/python_deps +RUN mkdir -p \ + /opt/app \ + /opt/app/build \ + /opt/app/bin \ + /opt/app/python_deps \ + /opt/app/fangfrisch # Copy in the lambda source WORKDIR /opt/app @@ -12,74 +14,66 @@ COPY ./*.py /opt/app/ COPY requirements.txt /opt/app/requirements.txt # Install packages -RUN yum update -y -RUN amazon-linux-extras install epel -y -RUN yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip +RUN yum update -y \ + && amazon-linux-extras install epel -y \ + && yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip shadow-utils.x86_64 # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps -RUN rm -rf /root/.cache/pip +RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ + && rm -rf /root/.cache/pip + +COPY requirements-fangfrisch.txt /opt/app/ +RUN pip3 install --requirement requirements-fangfrisch.txt --target /opt/app/fangfrisch \ + && rm -rf /root/.cache/pip # Download libraries we need to run in lambda WORKDIR /tmp -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav -RUN rpm2cpio clamav-0*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav-lib -RUN rpm2cpio clamav-lib*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav-update -RUN rpm2cpio clamav-update*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 json-c -RUN rpm2cpio json-c*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 pcre2 -RUN rpm2cpio pcre*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 libtool-ltdl -RUN rpm2cpio libtool-ltdl*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 libxml2 -RUN rpm2cpio libxml2*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 bzip2-libs -RUN rpm2cpio bzip2-libs*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 xz-libs -RUN rpm2cpio xz-libs*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 libprelude -RUN rpm2cpio libprelude*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 gnutls -RUN rpm2cpio gnutls*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 nettle -RUN rpm2cpio nettle*.rpm | cpio -vimd +RUN yumdownloader -x \*i686 --archlist=x86_64 \ + clamav \ + clamav-lib \ + clamav-update \ + json-c \ + pcre2 \ + libtool-ltdl \ + libxml2 \ + bzip2-libs \ + xz-libs \ + libprelude \ + gnutls \ + nettle +RUN rpm2cpio clamav-0*.rpm | cpio -vimd \ + && rpm2cpio clamav-lib*.rpm | cpio -vimd \ + && rpm2cpio clamav-update*.rpm | cpio -vimd \ + && rpm2cpio json-c*.rpm | cpio -vimd \ + && rpm2cpio pcre*.rpm | cpio -vimd \ + && rpm2cpio libtool-ltdl*.rpm | cpio -vimd \ + && rpm2cpio libxml2*.rpm | cpio -vimd \ + && rpm2cpio bzip2-libs*.rpm | cpio -vimd \ + && rpm2cpio xz-libs*.rpm | cpio -vimd \ + && rpm2cpio libprelude*.rpm | cpio -vimd \ + && rpm2cpio gnutls*.rpm | cpio -vimd \ + && rpm2cpio nettle*.rpm | cpio -vimd # Copy over the binaries and libraries RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ # Fix the freshclam.conf settings -RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf -RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf -RUN echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf -RUN echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf - -RUN yum install shadow-utils.x86_64 -y +RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ + && echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf \ + && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ + && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf -RUN groupadd clamav -RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav -RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate +RUN groupadd clamav \ + && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav \ + && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate ENV LD_LIBRARY_PATH=/opt/app/bin RUN ldconfig # Create the zip file RUN cd /opt/app \ - && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin \ + && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin fangfrisch \ && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * diff --git a/requirements-fangfrisch.txt b/requirements-fangfrisch.txt new file mode 100644 index 00000000..44ddf5e7 --- /dev/null +++ b/requirements-fangfrisch.txt @@ -0,0 +1 @@ +fangfrisch==1.6.1 diff --git a/requirements.txt b/requirements.txt index b2b76ba3..26820d30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,4 @@ requests==2.21 simplejson==3.16 urllib3==1.26.7 pytz==2019.3 -fangfrisch==1.6.1 awscli==1.27.78 diff --git a/update.py b/update.py index 74441965..a5be8eb5 100644 --- a/update.py +++ b/update.py @@ -47,18 +47,20 @@ def lambda_handler(event, context): print("Downloading definition file %s complete!" % (local_path)) if AV_USE_FANGFRISCH: - env_with_pythonpath = os.environ.copy() - env_with_pythonpath["PYTHONPATH"] = env_with_pythonpath["LAMBDA_TASK_ROOT"] + awscli_env = os.environ.copy() + awscli_env["PYTHONPATH"] = awscli_env["LAMBDA_TASK_ROOT"] bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) sync_command = f"bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" - subprocess.run(sync_command, shell=True, env=env_with_pythonpath) + subprocess.run(sync_command, shell=True, env=awscli_env) - fangfrisch_base_command = "bin/fangfrisch --conf fangfrisch.conf" - subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_with_pythonpath) - subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_with_pythonpath) + fangfrisch_env = os.environ.copy() + fangfrisch_env["PYTHONPATH"] = os.path.join(fangfrisch_env["LAMBDA_TASK_ROOT"], "fangfrisch") + fangfrisch_base_command = "fangfrisch/bin/fangfrisch --conf fangfrisch.conf" + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=fangfrisch_env) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=fangfrisch_env) sync_after_command = f"bin/aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" - subprocess.run(sync_after_command, shell=True, env=env_with_pythonpath) + subprocess.run(sync_after_command, shell=True, env=awscli_env) else: print("Skip downloading extra virus definitions with Fangfrisch") From 02fef58e8a66f4bfc95414bb393efdf472eb092d Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 13:29:03 -0300 Subject: [PATCH 18/49] sed in-place the shebang for correct python interpreter for lambdas --- Dockerfile | 15 +++++++------ ...nts-fangfrisch.txt => requirements-cli.txt | 1 + requirements.txt | 3 +-- update.py | 21 +++++++++---------- 4 files changed, 21 insertions(+), 19 deletions(-) rename requirements-fangfrisch.txt => requirements-cli.txt (52%) diff --git a/Dockerfile b/Dockerfile index 2ef16153..da171699 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN mkdir -p \ /opt/app/build \ /opt/app/bin \ /opt/app/python_deps \ - /opt/app/fangfrisch + /opt/app/cli # Copy in the lambda source WORKDIR /opt/app @@ -20,11 +20,14 @@ RUN yum update -y \ # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ - && rm -rf /root/.cache/pip + && rm -rf /root/.cache/pip -COPY requirements-fangfrisch.txt /opt/app/ -RUN pip3 install --requirement requirements-fangfrisch.txt --target /opt/app/fangfrisch \ - && rm -rf /root/.cache/pip +COPY requirements-cli.txt /opt/app/ +RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ + && rm -rf /root/.cache/pip \ + && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' \ + /opt/app/cli/bin/fangfrisch \ + /opt/app/cli/bin/aws # Download libraries we need to run in lambda WORKDIR /tmp @@ -73,7 +76,7 @@ RUN ldconfig # Create the zip file RUN cd /opt/app \ - && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin fangfrisch \ + && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin cli \ && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * diff --git a/requirements-fangfrisch.txt b/requirements-cli.txt similarity index 52% rename from requirements-fangfrisch.txt rename to requirements-cli.txt index 44ddf5e7..df29e787 100644 --- a/requirements-fangfrisch.txt +++ b/requirements-cli.txt @@ -1 +1,2 @@ fangfrisch==1.6.1 +awscli==1.27.78 diff --git a/requirements.txt b/requirements.txt index 26820d30..01c63d97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,5 @@ decorator==4.3 idna==2.8 requests==2.21 simplejson==3.16 -urllib3==1.26.7 +urllib3==1.24.2 pytz==2019.3 -awscli==1.27.78 diff --git a/update.py b/update.py index a5be8eb5..c8a75e82 100644 --- a/update.py +++ b/update.py @@ -47,20 +47,19 @@ def lambda_handler(event, context): print("Downloading definition file %s complete!" % (local_path)) if AV_USE_FANGFRISCH: - awscli_env = os.environ.copy() - awscli_env["PYTHONPATH"] = awscli_env["LAMBDA_TASK_ROOT"] + subprocess.run("/usr/bin/python3 --version", shell=True) + env_pythonpath = os.environ.copy() + env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) - sync_command = f"bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" - subprocess.run(sync_command, shell=True, env=awscli_env) + sync_command = f"cli/bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" + subprocess.run(sync_command, shell=True, env=env_pythonpath) - fangfrisch_env = os.environ.copy() - fangfrisch_env["PYTHONPATH"] = os.path.join(fangfrisch_env["LAMBDA_TASK_ROOT"], "fangfrisch") - fangfrisch_base_command = "fangfrisch/bin/fangfrisch --conf fangfrisch.conf" - subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=fangfrisch_env) - subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=fangfrisch_env) + fangfrisch_base_command = "cli/bin/fangfrisch --conf fangfrisch.conf" + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath) - sync_after_command = f"bin/aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" - subprocess.run(sync_after_command, shell=True, env=awscli_env) + sync_after_command = f"cli/bin/aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" + subprocess.run(sync_after_command, shell=True, env=env_pythonpath) else: print("Skip downloading extra virus definitions with Fangfrisch") From 52d4137c84a166c9932619931f944dba1219d3b9 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 13:34:01 -0300 Subject: [PATCH 19/49] fangfrisch.conf absolute path --- update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/update.py b/update.py index c8a75e82..11d39af8 100644 --- a/update.py +++ b/update.py @@ -47,14 +47,14 @@ def lambda_handler(event, context): print("Downloading definition file %s complete!" % (local_path)) if AV_USE_FANGFRISCH: - subprocess.run("/usr/bin/python3 --version", shell=True) env_pythonpath = os.environ.copy() env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) sync_command = f"cli/bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" subprocess.run(sync_command, shell=True, env=env_pythonpath) - fangfrisch_base_command = "cli/bin/fangfrisch --conf fangfrisch.conf" + fangfrisch_base_command = ("cli/bin/fangfrisch " + f"--conf {os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf')}") subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath) From af499af883e3fdc37f83301449d776d6f0eea794 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 13:41:55 -0300 Subject: [PATCH 20/49] add missing fangfrisch.conf file --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index da171699..c42c668e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,6 +75,7 @@ ENV LD_LIBRARY_PATH=/opt/app/bin RUN ldconfig # Create the zip file +COPY fangfrisch.conf /opt/app/fangfrisch.conf RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin cli \ && cd /opt/app/python_deps \ From 0180c60871aa2031e4df337ac22f5738fe9b619d Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 13:47:58 -0300 Subject: [PATCH 21/49] check shell return; sync without progress --- update.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/update.py b/update.py index 11d39af8..2a4eebe1 100644 --- a/update.py +++ b/update.py @@ -50,16 +50,17 @@ def lambda_handler(event, context): env_pythonpath = os.environ.copy() env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) - sync_command = f"cli/bin/aws s3 sync {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" - subprocess.run(sync_command, shell=True, env=env_pythonpath) + aws_s3_sync_command = "cli/bin/aws s3 sync --no-progress" + sync_command = f"{aws_s3_sync_command} {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" + subprocess.run(sync_command, shell=True, env=env_pythonpath, check=True) fangfrisch_base_command = ("cli/bin/fangfrisch " f"--conf {os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf')}") - subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath) - subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath) + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath, check=True) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) - sync_after_command = f"cli/bin/aws s3 sync {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" - subprocess.run(sync_after_command, shell=True, env=env_pythonpath) + sync_after_command = f"{aws_s3_sync_command} {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" + subprocess.run(sync_after_command, shell=True, env=env_pythonpath, check=True) else: print("Skip downloading extra virus definitions with Fangfrisch") From b253208e7106558e3a430c06521fd25ebb0a5065 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 14:16:31 -0300 Subject: [PATCH 22/49] use update_defs_from_s3 to download extra definitions --- clamav.py | 2 +- common.py | 43 +++++++++++++++++++++++++++++++++++++++++-- scan.py | 4 ++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/clamav.py b/clamav.py index a44ab3a2..217ebca1 100644 --- a/clamav.py +++ b/clamav.py @@ -54,7 +54,7 @@ def update_defs_from_s3(s3_client, bucket, prefix): s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix - s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) + s3_path = os.path.join(prefix, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) s3_time = time_from_s3(s3_client, bucket, s3_path) diff --git a/common.py b/common.py index e81b8cea..912d6ed8 100644 --- a/common.py +++ b/common.py @@ -64,8 +64,47 @@ def str_to_bool(s): )) AV_DELETE_INFECTED_FILES = str_to_bool(os.getenv("AV_DELETE_INFECTED_FILES", "False")) -AV_DEFINITION_FILE_PREFIXES = ["main", "daily", "bytecode"] -AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] +AV_DEFINITION_FILE_PREFIXES = [ + "main", + "daily", + "bytecode", + 'badmacro', + 'blurl', + 'bofhland_cracked_URL', + 'bofhland_malware_URL', + 'bofhland_malware_attach', + 'bofhland_phishing_URL', + 'foxhole_filename', + 'foxhole_generic', + 'foxhole_js', + 'hackingteam', + 'junk', + 'jurlbl', + 'jurlbla', + 'lott', + 'malware.expert', + 'malwarehash', + 'phish', + 'phishtank', + 'porcupine', + 'rogue', + 'scam', + 'shelter', + 'spamattach', + 'spamimg', + 'spear', + 'spearl', + 'urlhaus', + 'winnow.attachments', + 'winnow_bad_cw', + 'winnow_extended_malware', + 'winnow_extended_malware_links', + 'winnow_malware', + 'winnow_malware_links', + 'winnow_phish_complete_url', + 'winnow_spam_complete' +] +AV_DEFINITION_FILE_SUFFIXES = ['cdb', 'fp', 'hdb', 'hsb', 'ldb', 'ndb', "cld", "cvd"] SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) LAMBDA_ENDPOINT = os.getenv("LAMBDA_ENDPOINT", None) diff --git a/scan.py b/scan.py index 0eff952e..d919ce9d 100644 --- a/scan.py +++ b/scan.py @@ -24,6 +24,7 @@ import metrics from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX +from common import AV_DEFINITION_S3_EXTRA_PREFIX from common import AV_DELETE_INFECTED_FILES from common import AV_PROCESS_ORIGINAL_VERSION_ONLY from common import AV_SCAN_START_METADATA @@ -225,6 +226,9 @@ def lambda_handler(event, context): to_download = clamav.update_defs_from_s3( s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX ) + to_download.update(clamav.update_defs_from_s3( + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX + )) for download in to_download.values(): s3_path = download["s3_path"] From 1ec5169787d682c03c3c784a6328acac7694b04f Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 15:39:58 -0300 Subject: [PATCH 23/49] ditch awscli for custom down/upload functions --- Dockerfile | 3 +- README.md | 4 +-- clamav.py | 7 ++-- common.py | 85 ++++++++++++++++++++++++-------------------- requirements-cli.txt | 1 - scan.py | 7 +--- update.py | 12 ++----- 7 files changed, 54 insertions(+), 65 deletions(-) diff --git a/Dockerfile b/Dockerfile index c42c668e..bdb61fa9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,8 +26,7 @@ COPY requirements-cli.txt /opt/app/ RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ && rm -rf /root/.cache/pip \ && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' \ - /opt/app/cli/bin/fangfrisch \ - /opt/app/cli/bin/aws + /opt/app/cli/bin/fangfrisch # Download libraries we need to run in lambda WORKDIR /tmp diff --git a/README.md b/README.md index a8fc5f04..7da043ec 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,7 @@ the table below for reference. |----------------------------------|-------------------------------------------------------------------------------------------------|------------------------|----------| | AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | | AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_S3_EXTRA_PREFIX | Prefix for extra antivirus definition files (downloaded with fangfrisch) | clamav_extra_defs | No | | AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | -| AV_DEFINITION_EXTRA_PATH | Path containing extra virus definition files at runtime | /tmp/clamav_extra_defs | No | | AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | | AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | | AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | @@ -98,7 +96,7 @@ the table below for reference. | AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | | AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | | AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| AV_USE_FANGFRISCH | Uses fangfrisch for extra download definitions | False | No | +| AV_EXTRA_VIRUS_DEFINITIONS | Uses fangfrisch for extra antivirus definitions | False | No | | CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | | CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | | FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | diff --git a/clamav.py b/clamav.py index 217ebca1..8b211d01 100644 --- a/clamav.py +++ b/clamav.py @@ -24,10 +24,9 @@ import botocore from pytz import utc -from common import AV_DEFINITION_S3_PREFIX, S3_ENDPOINT -from common import AV_DEFINITION_PATH from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES +from common import AV_DEFINITION_PATH from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN @@ -35,9 +34,9 @@ from common import CLAMAVLIB_PATH from common import CLAMSCAN_PATH from common import FRESHCLAM_PATH +from common import S3_ENDPOINT from common import create_dir - RE_SEARCH_DIR = r"SEARCH_DIR\(\"=([A-z0-9\/\-_]*)\"\)" @@ -144,7 +143,7 @@ def md5_from_file(filename): def md5_from_s3_tags(s3_client, bucket, key): try: - tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["TagSet"] + tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["ETag"] except botocore.exceptions.ClientError as e: expected_errors = { "404", # Object does not exist diff --git a/common.py b/common.py index 912d6ed8..160228ed 100644 --- a/common.py +++ b/common.py @@ -40,9 +40,7 @@ def str_to_bool(s): AV_DEFINITION_S3_BUCKET = os.getenv("AV_DEFINITION_S3_BUCKET") AV_DEFINITION_S3_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_defs") -AV_DEFINITION_S3_EXTRA_PREFIX = os.getenv("AV_DEFINITION_S3_EXTRA_PREFIX", "clamav_extra_defs") AV_DEFINITION_PATH = os.getenv("AV_DEFINITION_PATH", "/tmp/clamav_defs") -AV_DEFINITION_EXTRA_PATH = os.getenv("AV_DEFINITION_EXTRA_PATH", "/tmp/clamav_extra_defs") AV_SCAN_START_SNS_ARN = os.getenv("AV_SCAN_START_SNS_ARN") AV_SCAN_START_METADATA = os.getenv("AV_SCAN_START_METADATA", "av-scan-start") AV_SIGNATURE_METADATA = os.getenv("AV_SIGNATURE_METADATA", "av-signature") @@ -55,7 +53,7 @@ def str_to_bool(s): AV_STATUS_SNS_PUBLISH_CLEAN = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True")) AV_STATUS_SNS_PUBLISH_INFECTED = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True")) AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") -AV_USE_FANGFRISCH = str_to_bool(os.getenv("AV_USE_FANGFRISCH", "False")) +AV_EXTRA_VIRUS_DEFINITIONS = str_to_bool(os.getenv("AV_EXTRA_VIRUS_DEFINITIONS", "False")) CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") @@ -68,43 +66,52 @@ def str_to_bool(s): "main", "daily", "bytecode", - 'badmacro', - 'blurl', - 'bofhland_cracked_URL', - 'bofhland_malware_URL', - 'bofhland_malware_attach', - 'bofhland_phishing_URL', - 'foxhole_filename', - 'foxhole_generic', - 'foxhole_js', - 'hackingteam', - 'junk', - 'jurlbl', - 'jurlbla', - 'lott', - 'malware.expert', - 'malwarehash', - 'phish', - 'phishtank', - 'porcupine', - 'rogue', - 'scam', - 'shelter', - 'spamattach', - 'spamimg', - 'spear', - 'spearl', - 'urlhaus', - 'winnow.attachments', - 'winnow_bad_cw', - 'winnow_extended_malware', - 'winnow_extended_malware_links', - 'winnow_malware', - 'winnow_malware_links', - 'winnow_phish_complete_url', - 'winnow_spam_complete' ] -AV_DEFINITION_FILE_SUFFIXES = ['cdb', 'fp', 'hdb', 'hsb', 'ldb', 'ndb', "cld", "cvd"] +AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] + +if AV_EXTRA_VIRUS_DEFINITIONS is True: + AV_DEFINITION_FILE_PREFIXES = list(set(AV_DEFINITION_FILE_PREFIXES + [ + 'badmacro', + 'blurl', + 'bofhland_cracked_URL', + 'bofhland_malware_URL', + 'bofhland_malware_attach', + 'bofhland_phishing_URL', + 'foxhole_filename', + 'foxhole_generic', + 'foxhole_js', + 'db', + 'hackingteam', + 'junk', + 'jurlbl', + 'jurlbla', + 'lott', + 'malware.expert', + 'malwarehash', + 'phish', + 'phishtank', + 'porcupine', + 'rogue', + 'scam', + 'shelter', + 'spamattach', + 'spamimg', + 'spear', + 'spearl', + 'urlhaus', + 'winnow.attachments', + 'winnow_bad_cw', + 'winnow_extended_malware', + 'winnow_extended_malware_links', + 'winnow_malware', + 'winnow_malware_links', + 'winnow_phish_complete_url', + 'winnow_spam_complete' + ])) + AV_DEFINITION_FILE_SUFFIXES = list(set( + AV_DEFINITION_FILE_SUFFIXES + ['cdb', 'fp', 'hdb', 'hsb', 'ldb', 'ndb', 'sqlite'] + )) + SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) LAMBDA_ENDPOINT = os.getenv("LAMBDA_ENDPOINT", None) diff --git a/requirements-cli.txt b/requirements-cli.txt index df29e787..44ddf5e7 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,2 +1 @@ fangfrisch==1.6.1 -awscli==1.27.78 diff --git a/scan.py b/scan.py index d919ce9d..32e17060 100644 --- a/scan.py +++ b/scan.py @@ -24,7 +24,6 @@ import metrics from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX -from common import AV_DEFINITION_S3_EXTRA_PREFIX from common import AV_DELETE_INFECTED_FILES from common import AV_PROCESS_ORIGINAL_VERSION_ONLY from common import AV_SCAN_START_METADATA @@ -37,14 +36,13 @@ from common import AV_STATUS_SNS_PUBLISH_CLEAN from common import AV_STATUS_SNS_PUBLISH_INFECTED from common import AV_TIMESTAMP_METADATA -from common import SNS_ENDPOINT from common import S3_ENDPOINT +from common import SNS_ENDPOINT from common import create_dir from common import get_timestamp def event_object(event, event_source="s3"): - # SNS events are slightly different if event_source.upper() == "SNS": event = json.loads(event["Records"][0]["Sns"]["Message"]) @@ -226,9 +224,6 @@ def lambda_handler(event, context): to_download = clamav.update_defs_from_s3( s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX ) - to_download.update(clamav.update_defs_from_s3( - s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX - )) for download in to_download.values(): s3_path = download["s3_path"] diff --git a/update.py b/update.py index 2a4eebe1..f867faf3 100644 --- a/update.py +++ b/update.py @@ -20,11 +20,9 @@ import clamav from common import AV_DEFINITION_PATH -from common import AV_DEFINITION_EXTRA_PATH from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX -from common import AV_DEFINITION_S3_EXTRA_PREFIX -from common import AV_USE_FANGFRISCH +from common import AV_EXTRA_VIRUS_DEFINITIONS from common import CLAMAVLIB_PATH from common import S3_ENDPOINT from common import get_timestamp @@ -46,21 +44,15 @@ def lambda_handler(event, context): s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) - if AV_USE_FANGFRISCH: + if AV_EXTRA_VIRUS_DEFINITIONS: env_pythonpath = os.environ.copy() env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") - bucket_extra_defs_path = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_EXTRA_PREFIX) - aws_s3_sync_command = "cli/bin/aws s3 sync --no-progress" - sync_command = f"{aws_s3_sync_command} {bucket_extra_defs_path} {AV_DEFINITION_EXTRA_PATH}" - subprocess.run(sync_command, shell=True, env=env_pythonpath, check=True) fangfrisch_base_command = ("cli/bin/fangfrisch " f"--conf {os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf')}") subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath, check=True) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) - sync_after_command = f"{aws_s3_sync_command} {AV_DEFINITION_EXTRA_PATH} {bucket_extra_defs_path}" - subprocess.run(sync_after_command, shell=True, env=env_pythonpath, check=True) else: print("Skip downloading extra virus definitions with Fangfrisch") From af8746799c7131d43391f99e9d417b874706d6f8 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 15:50:59 -0300 Subject: [PATCH 24/49] use sed to reflect AV_DEFINITION_PATH in fangfrisch.conf --- fangfrisch.conf | 4 ++-- update.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fangfrisch.conf b/fangfrisch.conf index 6d8b2871..98d55e8e 100644 --- a/fangfrisch.conf +++ b/fangfrisch.conf @@ -1,6 +1,6 @@ [DEFAULT] -db_url = sqlite:////tmp/clamav_extra_defs/db.sqlite -local_directory = /tmp/clamav_extra_defs +db_url = sqlite:///AV_DEFINITION_PATH/db.sqlite +local_directory = AV_DEFINITION_PATH on_update_timeout = 120 log_level = INFO log_method = console diff --git a/update.py b/update.py index f867faf3..0a8574f3 100644 --- a/update.py +++ b/update.py @@ -44,12 +44,14 @@ def lambda_handler(event, context): s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) - if AV_EXTRA_VIRUS_DEFINITIONS: + if AV_EXTRA_VIRUS_DEFINITIONS is True: env_pythonpath = os.environ.copy() env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") - fangfrisch_base_command = ("cli/bin/fangfrisch " - f"--conf {os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf')}") + fangfrisch_conf_filepath = os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf') + fangfrisch_base_command = f"cli/bin/fangfrisch --conf {fangfrisch_conf_filepath}" + subprocess.run(f"sed -i 's~AV_DEFINITION_PATH~{AV_DEFINITION_PATH}~g' {fangfrisch_conf_filepath}", + shell=True, check=True) subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath, check=True) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) From d1c3ac89b25846bf251b7731f3f586b96d0376c6 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 16:21:28 -0300 Subject: [PATCH 25/49] revert changes to md5 from s3 tags function --- clamav.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clamav.py b/clamav.py index 8b211d01..24a59e67 100644 --- a/clamav.py +++ b/clamav.py @@ -143,7 +143,7 @@ def md5_from_file(filename): def md5_from_s3_tags(s3_client, bucket, key): try: - tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["ETag"] + tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["TagSet"] except botocore.exceptions.ClientError as e: expected_errors = { "404", # Object does not exist From 89d6c80683121c1c77a3b088dfddfd47201a2752 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 16:30:29 -0300 Subject: [PATCH 26/49] changing fangfrisch.conf in /tmp folder --- update.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/update.py b/update.py index 0a8574f3..692a16c9 100644 --- a/update.py +++ b/update.py @@ -49,9 +49,11 @@ def lambda_handler(event, context): env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") fangfrisch_conf_filepath = os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf') - fangfrisch_base_command = f"cli/bin/fangfrisch --conf {fangfrisch_conf_filepath}" - subprocess.run(f"sed -i 's~AV_DEFINITION_PATH~{AV_DEFINITION_PATH}~g' {fangfrisch_conf_filepath}", - shell=True, check=True) + fangfrisch_base_command = f"cli/bin/fangfrisch --conf /tmp/fangfrisch.conf" + subprocess.run(f"cp {fangfrisch_conf_filepath} /tmp/fangfrisch.conf &&" + f"sed -i 's~AV_DEFINITION_PATH~{AV_DEFINITION_PATH}~g' /tmp/fangfrisch.conf", + shell=True, + check=True) subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath, check=True) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) From 177d3dcad7975d9dfb64ea0caa6a4708de8507eb Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 19:37:38 -0300 Subject: [PATCH 27/49] add hardened database with false-positives avoiding configs --- Dockerfile | 8 +++++- common.py | 15 ++++++++--- fangfrisch.conf | 70 ++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 86 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index bdb61fa9..fc17f6dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,13 @@ RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/ RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ && echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf \ && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ - && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf + && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf \ + && echo "DetectPUA yes" >> /opt/app/bin/freshclam.conf \ + && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/freshclam.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/freshclam.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/freshclam.conf \ + && echo "ExcludePUA PUA.Win.Packer.Upx" >> /opt/app/bin/freshclam.conf \ + && echo "ExcludePUA PUA.Doc.Packed" >> /opt/app/bin/freshclam.conf RUN groupadd clamav \ && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav \ diff --git a/common.py b/common.py index 160228ed..335c6cce 100644 --- a/common.py +++ b/common.py @@ -71,34 +71,43 @@ def str_to_bool(s): if AV_EXTRA_VIRUS_DEFINITIONS is True: AV_DEFINITION_FILE_PREFIXES = list(set(AV_DEFINITION_FILE_PREFIXES + [ + 'MiscreantPunch099-Low', 'badmacro', 'blurl', 'bofhland_cracked_URL', 'bofhland_malware_URL', 'bofhland_malware_attach', 'bofhland_phishing_URL', + 'ditekshen', + 'exexor99', 'foxhole_filename', 'foxhole_generic', 'foxhole_js', - 'db', 'hackingteam', + 'interserver256', + 'interservertopline', 'junk', 'jurlbl', 'jurlbla', 'lott', - 'malware.expert', 'malwarehash', + 'miscreantpunch', 'phish', 'phishtank', 'porcupine', + 'rfxn', 'rogue', 'scam', + 'shell', 'shelter', 'spamattach', 'spamimg', 'spear', 'spearl', + 'twinclams', + 'twinwave', 'urlhaus', + 'whitelist', 'winnow.attachments', 'winnow_bad_cw', 'winnow_extended_malware', @@ -109,7 +118,7 @@ def str_to_bool(s): 'winnow_spam_complete' ])) AV_DEFINITION_FILE_SUFFIXES = list(set( - AV_DEFINITION_FILE_SUFFIXES + ['cdb', 'fp', 'hdb', 'hsb', 'ldb', 'ndb', 'sqlite'] + AV_DEFINITION_FILE_SUFFIXES + ['cdb', 'db', 'fp', 'hdb', 'hsb', 'ign2', 'ldb', 'ndb', 'yara'] )) SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) diff --git a/fangfrisch.conf b/fangfrisch.conf index 98d55e8e..8ee1c365 100644 --- a/fangfrisch.conf +++ b/fangfrisch.conf @@ -1,15 +1,33 @@ [DEFAULT] db_url = sqlite:///AV_DEFINITION_PATH/db.sqlite local_directory = AV_DEFINITION_PATH -on_update_timeout = 120 -log_level = INFO -log_method = console + +# The following settings are optional. Other sections inherit +# values from DEFAULT and may also overwrite values. + +max_size = 5MB +log_level = info [malwarepatrol] enabled = no +# Replace with your personal Malwarepatrol receipt +# receipt = XXXXXXXXX +# change product id if necessary. +# 32 = free guard, 33 = Basic Defense yearly, 34 = Basic Defense monthly, 37 = Basic Defense EDU/Contributor +# product = 32 + +[malwareexpert] +enabled = no [sanesecurity] +prefix = https://ftp.swin.edu.au/sanesecurity/ +max_size = 10M enabled = yes +interval = 1h +url_malwareexpert_fp = disabled +url_malwareexpert_hdb = disabled +url_malwareexpert_ldb = disabled +url_malwareexpert_ndb = disabled [securiteinfo] enabled = no @@ -17,3 +35,49 @@ enabled = no [urlhaus] enabled = yes max_size = 2MB + +[twinwave] +enabled = yes +max_size = 2M +integrity_check = disabled +interval = 1h +prefix = https://raw.githubusercontent.com/twinwave-security/twinclams/master/ +url_twinclams = ${prefix}twinclams.ldb +url_twinwave_ign2 = ${prefix}twinwave.ign2 + +[clampunch] +enabled = yes +max_size = 2M +integrity_check = disabled +interval = 24h +prefix = https://raw.githubusercontent.com/wmetcalf/clam-punch/master/ +url_miscreantpunch099low = ${prefix}MiscreantPunch099-Low.ldb +url_exexor99 = ${prefix}exexor99.ldb +url_miscreantpuchhdb = ${prefix}miscreantpunch.hdb + +[rfxn] +enabled = yes +interval= 4h +integrity_check = disabled +prefix = https://www.rfxn.com/downloads/ +url_rfxn_ndb = ${prefix}rfxn.ndb +url_rfxn_hdb = ${prefix}rfxn.hdb +url_rfxn_yara = ${prefix}rfxn.yara + +[interserver] +enabled = yes +interval = 1d +integrity_check = disabled +prefix = https://rbldata.interserver.net/ +url_interserver_sha256 = ${prefix}interserver256.hdb +url_interserver_topline = ${prefix}interservertopline.db +url_interserver_shell = ${prefix}shell.ldb +url_interserver_whitelist = ${prefix}whitelist.fp + +[ditekshen] +enabled = yes +interval = 1d +integrity_check = disabled +prefix = https://raw.githubusercontent.com/ditekshen/detection/master/clamav/ +url_ditekshen_ldb = ${prefix}clamav.ldb +filename_ditekshen_ldb = ditekshen.ldb From 1ac4456cb1f767d878d0b1788a29b877e772a29b Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Tue, 28 Feb 2023 19:45:35 -0300 Subject: [PATCH 28/49] minor fix for logging download definition --- scan.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scan.py b/scan.py index 32e17060..92feb81d 100644 --- a/scan.py +++ b/scan.py @@ -228,7 +228,8 @@ def lambda_handler(event, context): for download in to_download.values(): s3_path = download["s3_path"] local_path = download["local_path"] - print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) + s3_url = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, s3_path) + print("Downloading definition file %s from %s" % (local_path, s3_url)) s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % local_path) scan_result, scan_signature = clamav.scan_file(file_path) From ffa3c6b0d13aefec641ea8bafa905a05c0743f1f Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:00:47 -0300 Subject: [PATCH 29/49] merging clamdscan for scan.conf support and lower scan timers must still verify if extra virus definitions get pulled form S3 from https://github.com/bluesentry/bucket-antivirus-function/pull/112 --- .envrc.local.template | 2 +- Dockerfile | 33 ++++++++++--- README.md | 52 ++++++++++---------- clamav.py | 110 +++++++++++++++++++++++++++++++++++++----- common.py | 4 +- scan.py | 66 ++++++++++++++++--------- 6 files changed, 197 insertions(+), 70 deletions(-) diff --git a/.envrc.local.template b/.envrc.local.template index a196e5d3..4007b13c 100644 --- a/.envrc.local.template +++ b/.envrc.local.template @@ -44,5 +44,5 @@ export TEST_KEY="" # export AV_STATUS_SNS_PUBLISH_INFECTED # export AV_TIMESTAMP_METADATA # export CLAMAVLIB_PATH -# export CLAMSCAN_PATH +# export CLAMDSCAN_PATH # export FRESHCLAM_PATH diff --git a/Dockerfile b/Dockerfile index fc17f6dc..429e0c09 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,8 +34,12 @@ RUN yumdownloader -x \*i686 --archlist=x86_64 \ clamav \ clamav-lib \ clamav-update \ + clamav-scanner-systemd \ + elfutils-libs \ json-c \ + lz4 \ pcre2 \ + systemd-libs \ libtool-ltdl \ libxml2 \ bzip2-libs \ @@ -54,11 +58,21 @@ RUN rpm2cpio clamav-0*.rpm | cpio -vimd \ && rpm2cpio xz-libs*.rpm | cpio -vimd \ && rpm2cpio libprelude*.rpm | cpio -vimd \ && rpm2cpio gnutls*.rpm | cpio -vimd \ - && rpm2cpio nettle*.rpm | cpio -vimd + && rpm2cpio nettle*.rpm | cpio -vimd \ + && rpm2cpio clamd-0*.rpm | cpio -idmv \ + && rpm2cpio elfutils-libs*.rpm | cpio -idmv \ + && rpm2cpio lz4*.rpm | cpio -idmv \ + && rpm2cpio systemd-libs*.rpm | cpio -idmv # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ +RUN cp -r \ + /tmp/usr/bin/clamdscan \ + /tmp/usr/sbin/clamd \ + /tmp/usr/bin/freshclam \ + /tmp/usr/lib64/* \ + /usr/lib64/libpcre.so.1 \ + /opt/app/bin/ # Fix the freshclam.conf settings RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ @@ -66,11 +80,16 @@ RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf \ && echo "DetectPUA yes" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Packer.Upx" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Doc.Packed" >> /opt/app/bin/freshclam.conf + && echo "DatabaseDirectory /tmp/clamav_defs" > /opt/app/bin/scan.conf \ + && echo "PidFile /tmp/clamd.pid" >> /opt/app/bin/scan.conf \ + && echo "LogFile /tmp/clamd.log" >> /opt/app/bin/scan.conf \ + && echo "LocalSocket /tmp/clamd.sock" >> /opt/app/bin/scan.conf \ + && echo "FixStaleSocket yes" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Packer.Upx" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Doc.Packed" >> /opt/app/bin/scan.conf RUN groupadd clamav \ && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav \ diff --git a/README.md b/README.md index 7da043ec..aba98f84 100644 --- a/README.md +++ b/README.md @@ -81,32 +81,32 @@ can cause a continuous loop of scanning if improperly configured. Runtime configuration is accomplished using environment variables. See the table below for reference. -| Variable | Description | Default | Required | -|----------------------------------|-------------------------------------------------------------------------------------------------|------------------------|----------| -| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | -| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | -| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | -| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | -| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | -| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | -| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | -| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | -| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | -| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | -| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | -| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| AV_EXTRA_VIRUS_DEFINITIONS | Uses fangfrisch for extra antivirus definitions | False | No | -| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | -| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | -| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | -| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | -| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | -| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | -| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | -| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | -| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | -| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | +| Variable | Description | Default | Required | +|----------------------------------|-------------------------------------------------------------------------------------------------|------------------|----------| +| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | +| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | +| AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | +| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | +| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | +| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | +| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | +| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | +| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | +| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | +| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | +| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | +| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | +| AV_EXTRA_VIRUS_DEFINITIONS | Uses fangfrisch for extra antivirus definitions | False | No | +| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | +| CLAMDSCAN_PATH | Path to ClamAV clamdscan binary | ./bin/clamdscan | No | +| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | +| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | +| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | +| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | +| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | +| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | +| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | +| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | ## S3 Bucket Policy Examples diff --git a/clamav.py b/clamav.py index 24a59e67..71fba2d4 100644 --- a/clamav.py +++ b/clamav.py @@ -19,23 +19,29 @@ import pwd import re import subprocess +import socket +import errno import boto3 import botocore from pytz import utc +from common import AV_DEFINITION_S3_BUCKET +from common import AV_DEFINITION_S3_PREFIX +from common import AV_DEFINITION_PATH from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES -from common import AV_DEFINITION_PATH from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED from common import CLAMAVLIB_PATH -from common import CLAMSCAN_PATH +from common import CLAMDSCAN_PATH from common import FRESHCLAM_PATH -from common import S3_ENDPOINT +from common import CLAMDSCAN_TIMEOUT from common import create_dir +from common import CLAMD_SOCKET + RE_SEARCH_DIR = r"SEARCH_DIR\(\"=([A-z0-9\/\-_]*)\"\)" @@ -53,7 +59,7 @@ def update_defs_from_s3(s3_client, bucket, prefix): s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix - s3_path = os.path.join(prefix, filename) + s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) s3_time = time_from_s3(s3_client, bucket, s3_path) @@ -89,7 +95,7 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): "Uploading %s to s3://%s" % (local_file_path, os.path.join(bucket, prefix, filename)) ) - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3 = boto3.resource("s3") s3_object = s3.Object(bucket, os.path.join(prefix, filename)) s3_object.upload_file(os.path.join(local_path, filename)) s3_client.put_object_tagging( @@ -118,7 +124,7 @@ def update_defs_from_freshclam(path, library_path=""): fc_proc = subprocess.Popen( [ FRESHCLAM_PATH, - "--config-file=./bin/freshclam.conf", + "--config-file=%s/freshclam.conf" % CLAMAVLIB_PATH, "-u %s" % pwd.getpwuid(os.getuid())[0], "--datadir=%s" % path, ], @@ -186,24 +192,102 @@ def scan_output_to_json(output): def scan_file(path): av_env = os.environ.copy() av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH - print("Starting clamscan of %s." % path) + print("Starting clamdscan of %s." % path) av_proc = subprocess.Popen( - [CLAMSCAN_PATH, "-v", "-a", "--stdout", "-d", AV_DEFINITION_PATH, path], + [ + CLAMDSCAN_PATH, + "-v", + "--stdout", + "--config-file", + "%s/scan.conf" % CLAMAVLIB_PATH, + path, + ], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, env=av_env, ) - output = av_proc.communicate()[0].decode() - print("clamscan output:\n%s" % output) - # Turn the output into a data source we can read - summary = scan_output_to_json(output) + try: + output, errors = av_proc.communicate(timeout=CLAMDSCAN_TIMEOUT) + except subprocess.TimeoutExpired: + av_proc.kill() + output, errors = av_proc.communicate() + + decoded_output = output.decode() + print("clamdscan output:\n%s" % decoded_output) + if av_proc.returncode == 0: return AV_STATUS_CLEAN, AV_SIGNATURE_OK elif av_proc.returncode == 1: + # Turn the output into a data source we can read + summary = scan_output_to_json(decoded_output) signature = summary.get(path, AV_SIGNATURE_UNKNOWN) return AV_STATUS_INFECTED, signature else: - msg = "Unexpected exit code from clamscan: %s.\n" % av_proc.returncode + msg = "Unexpected exit code from clamdscan: %s.\n" % av_proc.returncode + + if errors: + msg += "Errors: %s\n" % errors.decode() + print(msg) raise Exception(msg) + +def is_clamd_running(): + print("Checking if clamd is running on %s" % CLAMD_SOCKET) + + if os.path.exists(CLAMD_SOCKET): + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: + s.settimeout(10) + s.connect(CLAMD_SOCKET) + s.send(b"PING") + try: + data = s.recv(32) + except (socket.timeout, socket.error) as e: + print("Failed to read from socket: %s\n" % e) + return False + + print("Received %s in response to PING" % repr(data)) + return data == b"PONG\n" + + print("Clamd is not running on %s" % CLAMD_SOCKET) + return False + +def start_clamd_daemon(): + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + + to_download = update_defs_from_s3( + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX + ) + + for download in to_download.values(): + s3_path = download["s3_path"] + local_path = download["local_path"] + print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) + s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) + print("Downloading definition file %s complete!" % (local_path)) + + av_env = os.environ.copy() + av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH + + print("Starting clamd") + + if os.path.exists(CLAMD_SOCKET): + try: + os.unlink(CLAMD_SOCKET) + except OSError as e: + if e.errno != errno.ENOENT: + print("Could not unlink clamd socket %s" % CLAMD_SOCKET) + raise + + clamd_proc = subprocess.Popen( + ["%s/clamd" % CLAMAVLIB_PATH, "-c", "%s/scan.conf" % CLAMAVLIB_PATH], + env=av_env, + ) + + clamd_proc.wait() + + clamd_log_file = open("/tmp/clamd.log") + print(clamd_log_file.read()) + + return clamd_proc.pid diff --git a/common.py b/common.py index 335c6cce..740a20e3 100644 --- a/common.py +++ b/common.py @@ -55,8 +55,10 @@ def str_to_bool(s): AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") AV_EXTRA_VIRUS_DEFINITIONS = str_to_bool(os.getenv("AV_EXTRA_VIRUS_DEFINITIONS", "False")) CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") -CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") +CLAMDSCAN_PATH = os.getenv("CLAMDSCAN_PATH", "./bin/clamdscan") FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") +CLAMDSCAN_TIMEOUT = os.getenv("CLAMDSCAN_TIMEOUT", 240) +CLAMD_SOCKET = os.getenv("CLAMD_SOCKET", "/tmp/clamd.sock") AV_PROCESS_ORIGINAL_VERSION_ONLY = str_to_bool(os.getenv( "AV_PROCESS_ORIGINAL_VERSION_ONLY", "False" )) diff --git a/scan.py b/scan.py index 92feb81d..c065e89b 100644 --- a/scan.py +++ b/scan.py @@ -16,7 +16,9 @@ import copy import json import os +import signal from urllib.parse import unquote_plus +from distutils.util import strtobool import boto3 @@ -36,13 +38,14 @@ from common import AV_STATUS_SNS_PUBLISH_CLEAN from common import AV_STATUS_SNS_PUBLISH_INFECTED from common import AV_TIMESTAMP_METADATA -from common import S3_ENDPOINT -from common import SNS_ENDPOINT from common import create_dir from common import get_timestamp +clamd_pid = None + def event_object(event, event_source="s3"): + # SNS events are slightly different if event_source.upper() == "SNS": event = json.loads(event["Records"][0]["Sns"]["Message"]) @@ -73,7 +76,7 @@ def event_object(event, event_source="s3"): raise Exception("Unable to retrieve object from event.\n{}".format(event)) # Create and return the object - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3 = boto3.resource("s3") return s3.Object(bucket_name, key_name) @@ -169,10 +172,12 @@ def sns_scan_results( sns_client, s3_object, sns_arn, scan_result, scan_signature, timestamp ): # Don't publish if scan_result is CLEAN and CLEAN results should not be published - if scan_result == AV_STATUS_CLEAN and not AV_STATUS_SNS_PUBLISH_CLEAN: + if scan_result == AV_STATUS_CLEAN and not str_to_bool(AV_STATUS_SNS_PUBLISH_CLEAN): return # Don't publish if scan_result is INFECTED and INFECTED results should not be published - if scan_result == AV_STATUS_INFECTED and not AV_STATUS_SNS_PUBLISH_INFECTED: + if scan_result == AV_STATUS_INFECTED and not str_to_bool( + AV_STATUS_SNS_PUBLISH_INFECTED + ): return message = { "bucket": s3_object.bucket_name, @@ -196,20 +201,44 @@ def sns_scan_results( ) +def kill_process_by_pid(pid): + # Check if process is running on PID + try: + os.kill(clamd_pid, 0) + except OSError: + return + + print("Killing the process by PID %s" % clamd_pid) + + try: + os.kill(clamd_pid, signal.SIGTERM) + except OSError: + os.kill(clamd_pid, signal.SIGKILL) + + def lambda_handler(event, context): - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) - s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) - sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT) + global clamd_pid + + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + sns_client = boto3.client("sns") # Get some environment variables ENV = os.getenv("ENV", "") EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") + if not clamav.is_clamd_running(): + if clamd_pid is not None: + kill_process_by_pid(clamd_pid) + + clamd_pid = clamav.start_clamd_daemon() + print("Clamd PID: %s" % clamd_pid) + start_time = get_timestamp() - print("Script starting at %s\n" % start_time) + print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) - if AV_PROCESS_ORIGINAL_VERSION_ONLY: + if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) # Publish the start time of the scan @@ -221,17 +250,6 @@ def lambda_handler(event, context): create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) - to_download = clamav.update_defs_from_s3( - s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX - ) - - for download in to_download.values(): - s3_path = download["s3_path"] - local_path = download["local_path"] - s3_url = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, s3_path) - print("Downloading definition file %s from %s" % (local_path, s3_url)) - s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) - print("Downloading definition file %s complete!" % local_path) scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" @@ -263,7 +281,11 @@ def lambda_handler(event, context): os.remove(file_path) except OSError: pass - if AV_DELETE_INFECTED_FILES and scan_result == AV_STATUS_INFECTED: + if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time) + + +def str_to_bool(s): + return bool(strtobool(str(s))) From 6c1d36d414ef36f1c1548447fcef2b65e7dd7a18 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:16:11 -0300 Subject: [PATCH 30/49] fix sqlite db persistence for fangfrisch --- common.py | 5 +++-- update.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/common.py b/common.py index 740a20e3..98c324b2 100644 --- a/common.py +++ b/common.py @@ -117,10 +117,11 @@ def str_to_bool(s): 'winnow_malware', 'winnow_malware_links', 'winnow_phish_complete_url', - 'winnow_spam_complete' + 'winnow_spam_complete', + 'db' ])) AV_DEFINITION_FILE_SUFFIXES = list(set( - AV_DEFINITION_FILE_SUFFIXES + ['cdb', 'db', 'fp', 'hdb', 'hsb', 'ign2', 'ldb', 'ndb', 'yara'] + AV_DEFINITION_FILE_SUFFIXES + ['cdb', 'db', 'fp', 'hdb', 'hsb', 'ign2', 'ldb', 'ndb', 'yara', 'sqlite'] )) SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) diff --git a/update.py b/update.py index 692a16c9..edd0a896 100644 --- a/update.py +++ b/update.py @@ -54,7 +54,7 @@ def lambda_handler(event, context): f"sed -i 's~AV_DEFINITION_PATH~{AV_DEFINITION_PATH}~g' /tmp/fangfrisch.conf", shell=True, check=True) - subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath, check=True) + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) else: From 97037a064b3991a20ec6fef25e2ed4ef81cdfb17 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:20:29 -0300 Subject: [PATCH 31/49] optimize build times --- Dockerfile | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 429e0c09..92246bfe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,25 +8,12 @@ RUN mkdir -p \ /opt/app/python_deps \ /opt/app/cli -# Copy in the lambda source -WORKDIR /opt/app -COPY ./*.py /opt/app/ -COPY requirements.txt /opt/app/requirements.txt - # Install packages RUN yum update -y \ && amazon-linux-extras install epel -y \ - && yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip shadow-utils.x86_64 - -# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ - && rm -rf /root/.cache/pip - -COPY requirements-cli.txt /opt/app/ -RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ - && rm -rf /root/.cache/pip \ - && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' \ - /opt/app/cli/bin/fangfrisch + && yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip shadow-utils.x86_64 \ + && yum clean all \ + && rm -rf /var/cache/yum # Download libraries we need to run in lambda WORKDIR /tmp @@ -98,7 +85,22 @@ RUN groupadd clamav \ ENV LD_LIBRARY_PATH=/opt/app/bin RUN ldconfig +# Copy in the lambda source +WORKDIR /opt/app +COPY requirements.txt /opt/app/requirements.txt + +# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel +RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ + && rm -rf /root/.cache/pip + +COPY requirements-cli.txt /opt/app/ +RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ + && rm -rf /root/.cache/pip \ + && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' \ + /opt/app/cli/bin/fangfrisch + # Create the zip file +COPY ./*.py /opt/app/ COPY fangfrisch.conf /opt/app/fangfrisch.conf RUN cd /opt/app \ && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin cli \ From 64fbd466ef4e2ffbd527924bf36197a701b43b4c Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:26:56 -0300 Subject: [PATCH 32/49] optimize dockerfile --- Dockerfile | 66 +++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/Dockerfile b/Dockerfile index 92246bfe..f017df79 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,30 +11,37 @@ RUN mkdir -p \ # Install packages RUN yum update -y \ && amazon-linux-extras install epel -y \ - && yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip shadow-utils.x86_64 \ + && yum install -y \ + cpio \ + yum-utils \ + tar.x86_64 \ + gzip \ + zip \ + python3-pip \ + shadow-utils.x86_64 \ && yum clean all \ && rm -rf /var/cache/yum # Download libraries we need to run in lambda WORKDIR /tmp RUN yumdownloader -x \*i686 --archlist=x86_64 \ - clamav \ - clamav-lib \ - clamav-update \ - clamav-scanner-systemd \ - elfutils-libs \ - json-c \ - lz4 \ - pcre2 \ - systemd-libs \ - libtool-ltdl \ - libxml2 \ - bzip2-libs \ - xz-libs \ - libprelude \ - gnutls \ - nettle -RUN rpm2cpio clamav-0*.rpm | cpio -vimd \ + clamav \ + clamav-lib \ + clamav-update \ + clamav-scanner-systemd \ + elfutils-libs \ + json-c \ + lz4 \ + pcre2 \ + systemd-libs \ + libtool-ltdl \ + libxml2 \ + bzip2-libs \ + xz-libs \ + libprelude \ + gnutls \ + nettle \ + && rpm2cpio clamav-0*.rpm | cpio -vimd \ && rpm2cpio clamav-lib*.rpm | cpio -vimd \ && rpm2cpio clamav-update*.rpm | cpio -vimd \ && rpm2cpio json-c*.rpm | cpio -vimd \ @@ -49,17 +56,15 @@ RUN rpm2cpio clamav-0*.rpm | cpio -vimd \ && rpm2cpio clamd-0*.rpm | cpio -idmv \ && rpm2cpio elfutils-libs*.rpm | cpio -idmv \ && rpm2cpio lz4*.rpm | cpio -idmv \ - && rpm2cpio systemd-libs*.rpm | cpio -idmv - - -# Copy over the binaries and libraries -RUN cp -r \ - /tmp/usr/bin/clamdscan \ - /tmp/usr/sbin/clamd \ - /tmp/usr/bin/freshclam \ - /tmp/usr/lib64/* \ - /usr/lib64/libpcre.so.1 \ - /opt/app/bin/ + && rpm2cpio systemd-libs*.rpm | cpio -idmv \ + && cp -r \ + /tmp/usr/bin/clamdscan \ + /tmp/usr/sbin/clamd \ + /tmp/usr/bin/freshclam \ + /tmp/usr/lib64/* \ + /usr/lib64/libpcre.so.1 \ + /opt/app/bin/ \ + && rm -rf /tmp/usr # Fix the freshclam.conf settings RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ @@ -96,8 +101,7 @@ RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ COPY requirements-cli.txt /opt/app/ RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ && rm -rf /root/.cache/pip \ - && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' \ - /opt/app/cli/bin/fangfrisch + && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' /opt/app/cli/bin/fangfrisch # Create the zip file COPY ./*.py /opt/app/ From 7962ef50368e6e84cbb99d5815b7a7c3cda45281 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:53:45 -0300 Subject: [PATCH 33/49] fix libpcre.so not being added to zip --- Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index f017df79..5e3ebac6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,7 +62,7 @@ RUN yumdownloader -x \*i686 --archlist=x86_64 \ /tmp/usr/sbin/clamd \ /tmp/usr/bin/freshclam \ /tmp/usr/lib64/* \ - /usr/lib64/libpcre.so.1 \ + /usr/lib64/libpcre.so* \ /opt/app/bin/ \ && rm -rf /tmp/usr @@ -106,8 +106,7 @@ RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ # Create the zip file COPY ./*.py /opt/app/ COPY fangfrisch.conf /opt/app/fangfrisch.conf -RUN cd /opt/app \ - && zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin cli \ +RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin cli \ && cd /opt/app/python_deps \ && zip -r9 /opt/app/build/lambda.zip * From 310d112a54bc3111e9a89352e7be4bca42f484ef Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:59:19 -0300 Subject: [PATCH 34/49] compress missing files into a single log line --- clamav.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/clamav.py b/clamav.py index 71fba2d4..11cab198 100644 --- a/clamav.py +++ b/clamav.py @@ -14,34 +14,34 @@ # limitations under the License. import datetime +import errno import hashlib +import json import os import pwd import re -import subprocess import socket -import errno +import subprocess import boto3 import botocore from pytz import utc -from common import AV_DEFINITION_S3_BUCKET -from common import AV_DEFINITION_S3_PREFIX -from common import AV_DEFINITION_PATH from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES +from common import AV_DEFINITION_PATH +from common import AV_DEFINITION_S3_BUCKET +from common import AV_DEFINITION_S3_PREFIX from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED from common import CLAMAVLIB_PATH from common import CLAMDSCAN_PATH -from common import FRESHCLAM_PATH from common import CLAMDSCAN_TIMEOUT -from common import create_dir from common import CLAMD_SOCKET - +from common import FRESHCLAM_PATH +from common import create_dir RE_SEARCH_DIR = r"SEARCH_DIR\(\"=([A-z0-9\/\-_]*)\"\)" @@ -82,6 +82,7 @@ def update_defs_from_s3(s3_client, bucket, prefix): def upload_defs_to_s3(s3_client, bucket, prefix, local_path): + non_existent_files = set() for file_prefix in AV_DEFINITION_FILE_PREFIXES: for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix @@ -109,7 +110,9 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): % filename ) else: - print("File does not exist: %s" % filename) + non_existent_files.add(filename) + print("The following files do not exist for upload:") + print(json.dumps(list(non_existent_files))) def update_defs_from_freshclam(path, library_path=""): @@ -232,6 +235,7 @@ def scan_file(path): print(msg) raise Exception(msg) + def is_clamd_running(): print("Checking if clamd is running on %s" % CLAMD_SOCKET) @@ -252,6 +256,7 @@ def is_clamd_running(): print("Clamd is not running on %s" % CLAMD_SOCKET) return False + def start_clamd_daemon(): s3 = boto3.resource("s3") s3_client = boto3.client("s3") From 6fc216a50b32cfe4f6c137c4e4d4dd3ce7561749 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 11:59:54 -0300 Subject: [PATCH 35/49] fix orphan config in freshclam.conf --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5e3ebac6..c90c7101 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,12 +71,12 @@ RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ && echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf \ && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf \ - && echo "DetectPUA yes" >> /opt/app/bin/freshclam.conf \ && echo "DatabaseDirectory /tmp/clamav_defs" > /opt/app/bin/scan.conf \ && echo "PidFile /tmp/clamd.pid" >> /opt/app/bin/scan.conf \ && echo "LogFile /tmp/clamd.log" >> /opt/app/bin/scan.conf \ && echo "LocalSocket /tmp/clamd.sock" >> /opt/app/bin/scan.conf \ && echo "FixStaleSocket yes" >> /opt/app/bin/scan.conf \ + && echo "DetectPUA yes" >> /opt/app/bin/scan.conf \ && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/scan.conf \ && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/scan.conf \ && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/scan.conf \ From 22d0b4e59d8b44577ed83afe0e039e7a57d3d638 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 12:00:45 -0300 Subject: [PATCH 36/49] fix config in freshclam.conf; separate freshclam and scan conf runs --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index c90c7101..5969d892 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,8 +70,9 @@ RUN yumdownloader -x \*i686 --archlist=x86_64 \ RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ && echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf \ && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ - && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf \ - && echo "DatabaseDirectory /tmp/clamav_defs" > /opt/app/bin/scan.conf \ + && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf +# clamd conf with hardened configs to avoid false positives +RUN echo "DatabaseDirectory /tmp/clamav_defs" > /opt/app/bin/scan.conf \ && echo "PidFile /tmp/clamd.pid" >> /opt/app/bin/scan.conf \ && echo "LogFile /tmp/clamd.log" >> /opt/app/bin/scan.conf \ && echo "LocalSocket /tmp/clamd.sock" >> /opt/app/bin/scan.conf \ From bd196a9ec03f8b43f1b17089ff2ca283b821469f Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 12:07:21 -0300 Subject: [PATCH 37/49] logging freshclam output as a list of strings --- clamav.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clamav.py b/clamav.py index 11cab198..53bf63b7 100644 --- a/clamav.py +++ b/clamav.py @@ -135,8 +135,9 @@ def update_defs_from_freshclam(path, library_path=""): stdout=subprocess.PIPE, env=fc_env, ) - output = fc_proc.communicate()[0] - print("freshclam output:\n%s" % output) + output = fc_proc.communicate()[0].decode() + print("freshclam output:") + print(json.dumps(output.split("/n"))) if fc_proc.returncode != 0: print("Unexpected exit code from freshclam: %s." % fc_proc.returncode) return fc_proc.returncode From e6102f967d4655e7a23da2897cfddf2391a311dc Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 12:09:42 -0300 Subject: [PATCH 38/49] compressing not downloading and md5 matches into single line --- clamav.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/clamav.py b/clamav.py index 53bf63b7..6c09b0b2 100644 --- a/clamav.py +++ b/clamav.py @@ -55,6 +55,8 @@ def current_library_search_path(): def update_defs_from_s3(s3_client, bucket, prefix): create_dir(AV_DEFINITION_PATH) to_download = {} + older_files = set() + md5_matches = set() for file_prefix in AV_DEFINITION_FILE_PREFIXES: s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: @@ -65,19 +67,23 @@ def update_defs_from_s3(s3_client, bucket, prefix): s3_time = time_from_s3(s3_client, bucket, s3_path) if s3_best_time is not None and s3_time < s3_best_time: - print("Not downloading older file in series: %s" % filename) + older_files.add(filename) continue else: s3_best_time = s3_time if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: - print("Not downloading %s because local md5 matches s3." % filename) + md5_matches.add(filename) continue if s3_md5: to_download[file_prefix] = { "s3_path": s3_path, "local_path": local_path, } + print("Not downloading the following older files in series:") + print(json.dumps(list(older_files))) + print("Not downloading the following files because local md5 matches s3:") + print(json.dumps(list(md5_matches))) return to_download From d4ebd2de8711a3daecf0849b425d4355e8a24811 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 12:12:56 -0300 Subject: [PATCH 39/49] fix freshclam output split --- clamav.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clamav.py b/clamav.py index 6c09b0b2..ecdb7f7a 100644 --- a/clamav.py +++ b/clamav.py @@ -143,7 +143,7 @@ def update_defs_from_freshclam(path, library_path=""): ) output = fc_proc.communicate()[0].decode() print("freshclam output:") - print(json.dumps(output.split("/n"))) + print(json.dumps(output.split("\n"))) if fc_proc.returncode != 0: print("Unexpected exit code from freshclam: %s." % fc_proc.returncode) return fc_proc.returncode From 0e95de65f03963e6a7efab7148e8da08b55ff588 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 12:16:40 -0300 Subject: [PATCH 40/49] download freshclam defs before running fangfrisch --- update.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/update.py b/update.py index edd0a896..72ef2ec2 100644 --- a/update.py +++ b/update.py @@ -44,6 +44,8 @@ def lambda_handler(event, context): s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) + clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) + if AV_EXTRA_VIRUS_DEFINITIONS is True: env_pythonpath = os.environ.copy() env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") @@ -54,13 +56,13 @@ def lambda_handler(event, context): f"sed -i 's~AV_DEFINITION_PATH~{AV_DEFINITION_PATH}~g' /tmp/fangfrisch.conf", shell=True, check=True) + print("running fangfrisch refresh...") subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath) subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) else: print("Skip downloading extra virus definitions with Fangfrisch") - clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) # If main.cvd gets updated (very rare), we will need to force freshclam # to download the compressed version to keep file sizes down. # The existence of main.cud is the trigger to know this has happened. From bb8fe9fc65695de51259c765d4216680fe626bfb Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Wed, 1 Mar 2023 12:17:44 -0300 Subject: [PATCH 41/49] breaking updater if freshclam panics --- update.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/update.py b/update.py index 72ef2ec2..79148605 100644 --- a/update.py +++ b/update.py @@ -44,7 +44,8 @@ def lambda_handler(event, context): s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) - clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) + if clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) != 0: + return 1 if AV_EXTRA_VIRUS_DEFINITIONS is True: env_pythonpath = os.environ.copy() From d0209e09242b9973483b8497754bd53ec99e6039 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 10:14:05 -0300 Subject: [PATCH 42/49] downloading ALL extra files definitions, even if same prefix --- clamav.py | 17 +++++++++ common.py | 104 ++++++++++++++++++++++++++---------------------------- 2 files changed, 68 insertions(+), 53 deletions(-) diff --git a/clamav.py b/clamav.py index ecdb7f7a..6bfaf399 100644 --- a/clamav.py +++ b/clamav.py @@ -29,6 +29,8 @@ from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES +from common import AV_EXTRA_VIRUS_DEFINITIONS +from common import AV_DETINITION_EXTRA_FILES from common import AV_DEFINITION_PATH from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX @@ -80,6 +82,21 @@ def update_defs_from_s3(s3_client, bucket, prefix): "s3_path": s3_path, "local_path": local_path, } + + if AV_EXTRA_VIRUS_DEFINITIONS is True: + for filename in AV_DETINITION_EXTRA_FILES: + s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) + local_path = os.path.join(AV_DEFINITION_PATH, filename) + s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) + if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: + md5_matches.add(filename) + continue + if s3_md5: + to_download[filename] = { + "s3_path": s3_path, + "local_path": local_path, + } + print("Not downloading the following older files in series:") print(json.dumps(list(older_files))) print("Not downloading the following files because local md5 matches s3:") diff --git a/common.py b/common.py index 98c324b2..08d8017a 100644 --- a/common.py +++ b/common.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import errno import datetime +import errno import os import os.path from distutils.util import strtobool @@ -71,58 +71,56 @@ def str_to_bool(s): ] AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] -if AV_EXTRA_VIRUS_DEFINITIONS is True: - AV_DEFINITION_FILE_PREFIXES = list(set(AV_DEFINITION_FILE_PREFIXES + [ - 'MiscreantPunch099-Low', - 'badmacro', - 'blurl', - 'bofhland_cracked_URL', - 'bofhland_malware_URL', - 'bofhland_malware_attach', - 'bofhland_phishing_URL', - 'ditekshen', - 'exexor99', - 'foxhole_filename', - 'foxhole_generic', - 'foxhole_js', - 'hackingteam', - 'interserver256', - 'interservertopline', - 'junk', - 'jurlbl', - 'jurlbla', - 'lott', - 'malwarehash', - 'miscreantpunch', - 'phish', - 'phishtank', - 'porcupine', - 'rfxn', - 'rogue', - 'scam', - 'shell', - 'shelter', - 'spamattach', - 'spamimg', - 'spear', - 'spearl', - 'twinclams', - 'twinwave', - 'urlhaus', - 'whitelist', - 'winnow.attachments', - 'winnow_bad_cw', - 'winnow_extended_malware', - 'winnow_extended_malware_links', - 'winnow_malware', - 'winnow_malware_links', - 'winnow_phish_complete_url', - 'winnow_spam_complete', - 'db' - ])) - AV_DEFINITION_FILE_SUFFIXES = list(set( - AV_DEFINITION_FILE_SUFFIXES + ['cdb', 'db', 'fp', 'hdb', 'hsb', 'ign2', 'ldb', 'ndb', 'yara', 'sqlite'] - )) +AV_DETINITION_EXTRA_FILES = [ + "MiscreantPunch099-Low.ldb", + "badmacro.ndb", + "blurl.ndb", + "bofhland_cracked_URL.ndb", + "bofhland_malware_URL.ndb", + "bofhland_malware_attach.hdb", + "bofhland_phishing_URL.ndb", + "ditekshen.ldb", + "exexor99.ldb", + "foxhole_filename.cdb", + "foxhole_generic.cdb", + "foxhole_js.cdb", + "foxhole_js.ndb", + "hackingteam.hsb", + "interserver256.hdb", + "interservertopline.db", + "junk.ndb", + "jurlbl.ndb", + "jurlbla.ndb", + "lott.ndb", + "malwarehash.hsb", + "miscreantpunch.hdb", + "phish.ndb", + "phishtank.ndb", + "porcupine.ndb", + "rfxn.hdb", + "rfxn.ndb", + "rfxn.yara", + "rogue.hdb", + "scam.ndb", + "shell.ldb", + "shelter.ldb", + "spamattach.hdb", + "spamimg.hdb", + "spear.ndb", + "spearl.ndb", + "twinclams.ldb", + "twinwave.ign2", + "urlhaus.ndb", + "whitelist.fp", + "winnow.attachments.hdb", + "winnow_bad_cw.hdb", + "winnow_extended_malware.hdb", + "winnow_extended_malware_links.ndb", + "winnow_malware.hdb", + "winnow_malware_links.ndb", + "winnow_phish_complete_url.ndb", + "winnow_spam_complete.ndb" +] SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) From 02cc4fa477fd592708cb75e8974af47ecb3355a0 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 10:16:21 -0300 Subject: [PATCH 43/49] only print older files and md5 matches if they exist --- clamav.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/clamav.py b/clamav.py index 6bfaf399..4c14195d 100644 --- a/clamav.py +++ b/clamav.py @@ -97,10 +97,12 @@ def update_defs_from_s3(s3_client, bucket, prefix): "local_path": local_path, } - print("Not downloading the following older files in series:") - print(json.dumps(list(older_files))) - print("Not downloading the following files because local md5 matches s3:") - print(json.dumps(list(md5_matches))) + if older_files: + print("Not downloading the following older files in series:") + print(json.dumps(list(older_files))) + if md5_matches: + print("Not downloading the following files because local md5 matches s3:") + print(json.dumps(list(md5_matches))) return to_download From 2ba7b19468ebff85442433a479cae4aa4d73b182 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 10:24:59 -0300 Subject: [PATCH 44/49] fix infection deletion log --- scan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scan.py b/scan.py index c065e89b..43a1340f 100644 --- a/scan.py +++ b/scan.py @@ -114,7 +114,7 @@ def delete_s3_object(s3_object): % (s3_object.bucket_name, s3_object.key) ) else: - print("Infected file deleted: %s.%s" % (s3_object.bucket_name, s3_object.key)) + print("Infected file deleted: %s" % os.path.join("s3://", s3_object.bucket_name, s3_object.key)) def set_av_metadata(s3_object, scan_result, scan_signature, timestamp): From 5ac75933612097dedf6919f0e35d6749751b55ed Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 10:30:54 -0300 Subject: [PATCH 45/49] using aws lambda stage image for fangfrisch to avoid shebang rewrite --- Dockerfile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5969d892..fb8ed274 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,10 @@ +FROM public.ecr.aws/lambda/python:3.7 AS cli_deps + +COPY requirements-cli.txt requirements-cli.txt +RUN mkdir -p /opt/app/cli \ + && pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ + && rm -rf /root/.cache/pip + FROM amazonlinux:2 # Set up working directories @@ -99,10 +106,8 @@ COPY requirements.txt /opt/app/requirements.txt RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ && rm -rf /root/.cache/pip -COPY requirements-cli.txt /opt/app/ -RUN pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ - && rm -rf /root/.cache/pip \ - && sed -i 's~/usr/bin/python3~/var/lang/bin/python3~g' /opt/app/cli/bin/fangfrisch +# Copy fangfrisch CLI from lambda image +COPY --from=cli_deps /opt/app/cli /opt/app/cli # Create the zip file COPY ./*.py /opt/app/ From 2b76b8c579eb2b4754a387507d8facae1755fe64 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 10:53:31 -0300 Subject: [PATCH 46/49] uploading all extra definition files - refactored upload function to extract core upload logic - using all filenames to upload to s3 - refactored core upload logic to throw errors when no such file or md5 matches happen --- clamav.py | 84 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/clamav.py b/clamav.py index 4c14195d..acddda76 100644 --- a/clamav.py +++ b/clamav.py @@ -29,11 +29,11 @@ from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES -from common import AV_EXTRA_VIRUS_DEFINITIONS -from common import AV_DETINITION_EXTRA_FILES from common import AV_DEFINITION_PATH from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX +from common import AV_DETINITION_EXTRA_FILES +from common import AV_EXTRA_VIRUS_DEFINITIONS from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN @@ -106,38 +106,66 @@ def update_defs_from_s3(s3_client, bucket, prefix): return to_download +class Md5Matches(Exception): + pass + + +class NoSuchFile(Exception): + pass + + def upload_defs_to_s3(s3_client, bucket, prefix, local_path): + md5_matches = set() non_existent_files = set() for file_prefix in AV_DEFINITION_FILE_PREFIXES: for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix - local_file_path = os.path.join(local_path, filename) - if os.path.exists(local_file_path): - local_file_md5 = md5_from_file(local_file_path) - if local_file_md5 != md5_from_s3_tags( - s3_client, bucket, os.path.join(prefix, filename) - ): - print( - "Uploading %s to s3://%s" - % (local_file_path, os.path.join(bucket, prefix, filename)) - ) - s3 = boto3.resource("s3") - s3_object = s3.Object(bucket, os.path.join(prefix, filename)) - s3_object.upload_file(os.path.join(local_path, filename)) - s3_client.put_object_tagging( - Bucket=s3_object.bucket_name, - Key=s3_object.key, - Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]}, - ) - else: - print( - "Not uploading %s because md5 on remote matches local." - % filename - ) - else: + try: + upload_new_file_to_s3(bucket, filename, local_path, non_existent_files, prefix, s3_client) + except Md5Matches: + md5_matches.add(filename) + except NoSuchFile: non_existent_files.add(filename) - print("The following files do not exist for upload:") - print(json.dumps(list(non_existent_files))) + + for filename in AV_DETINITION_EXTRA_FILES: + try: + upload_new_file_to_s3(bucket, filename, local_path, non_existent_files, prefix, s3_client) + except Md5Matches: + md5_matches.add(filename) + except NoSuchFile: + non_existent_files.add(filename) + + if non_existent_files: + print("The following files do not exist for upload:") + print(json.dumps(list(non_existent_files))) + if md5_matches: + print("The following files MD5 hashes matches those in S3:") + print(json.dumps(list(md5_matches))) + + +def upload_new_file_to_s3(bucket, filename, local_path, non_existent_files, prefix, s3_client): + local_file_path = os.path.join(local_path, filename) + + if not os.path.exists(local_file_path): + raise NoSuchFile + + local_file_md5 = md5_from_file(local_file_path) + + if local_file_md5 == md5_from_s3_tags(s3_client, bucket, os.path.join(prefix, filename)): + raise Md5Matches + + print( + "Uploading %s to s3://%s" + % (local_file_path, os.path.join(bucket, prefix, filename)) + ) + s3 = boto3.resource("s3") + s3_object = s3.Object(bucket, os.path.join(prefix, filename)) + s3_object.upload_file(os.path.join(local_path, filename)) + s3_client.put_object_tagging( + Bucket=s3_object.bucket_name, + Key=s3_object.key, + Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]}, + ) def update_defs_from_freshclam(path, library_path=""): From 111af04828c4b0c932166586f446928fcc84c6a8 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 11:04:45 -0300 Subject: [PATCH 47/49] refactored upload defs to merge all files into single list --- clamav.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/clamav.py b/clamav.py index acddda76..c96ba8a3 100644 --- a/clamav.py +++ b/clamav.py @@ -117,19 +117,14 @@ class NoSuchFile(Exception): def upload_defs_to_s3(s3_client, bucket, prefix, local_path): md5_matches = set() non_existent_files = set() - for file_prefix in AV_DEFINITION_FILE_PREFIXES: - for file_suffix in AV_DEFINITION_FILE_SUFFIXES: - filename = file_prefix + "." + file_suffix - try: - upload_new_file_to_s3(bucket, filename, local_path, non_existent_files, prefix, s3_client) - except Md5Matches: - md5_matches.add(filename) - except NoSuchFile: - non_existent_files.add(filename) + official_databases = [file_prefix + "." + file_suffix + for file_prefix in AV_DEFINITION_FILE_PREFIXES + for file_suffix in AV_DEFINITION_FILE_SUFFIXES] + all_databases = official_databases + AV_DETINITION_EXTRA_FILES - for filename in AV_DETINITION_EXTRA_FILES: + for filename in all_databases: try: - upload_new_file_to_s3(bucket, filename, local_path, non_existent_files, prefix, s3_client) + upload_new_file_to_s3(bucket, filename, local_path, prefix, s3_client) except Md5Matches: md5_matches.add(filename) except NoSuchFile: @@ -139,11 +134,11 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): print("The following files do not exist for upload:") print(json.dumps(list(non_existent_files))) if md5_matches: - print("The following files MD5 hashes matches those in S3:") + print("The following MD5 hashes match those in S3:") print(json.dumps(list(md5_matches))) -def upload_new_file_to_s3(bucket, filename, local_path, non_existent_files, prefix, s3_client): +def upload_new_file_to_s3(bucket, filename, local_path, prefix, s3_client): local_file_path = os.path.join(local_path, filename) if not os.path.exists(local_file_path): From c6eb89172fda0a9b66a60011d0ec1c5ad5c03616 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 11:14:32 -0300 Subject: [PATCH 48/49] fix typo --- clamav.py | 6 +++--- common.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clamav.py b/clamav.py index c96ba8a3..4ab1561a 100644 --- a/clamav.py +++ b/clamav.py @@ -32,7 +32,7 @@ from common import AV_DEFINITION_PATH from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX -from common import AV_DETINITION_EXTRA_FILES +from common import AV_DEFINITION_EXTRA_FILES from common import AV_EXTRA_VIRUS_DEFINITIONS from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN @@ -84,7 +84,7 @@ def update_defs_from_s3(s3_client, bucket, prefix): } if AV_EXTRA_VIRUS_DEFINITIONS is True: - for filename in AV_DETINITION_EXTRA_FILES: + for filename in AV_DEFINITION_EXTRA_FILES: s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) @@ -120,7 +120,7 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): official_databases = [file_prefix + "." + file_suffix for file_prefix in AV_DEFINITION_FILE_PREFIXES for file_suffix in AV_DEFINITION_FILE_SUFFIXES] - all_databases = official_databases + AV_DETINITION_EXTRA_FILES + all_databases = official_databases + AV_DEFINITION_EXTRA_FILES for filename in all_databases: try: diff --git a/common.py b/common.py index 08d8017a..bfeefe2f 100644 --- a/common.py +++ b/common.py @@ -71,7 +71,7 @@ def str_to_bool(s): ] AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] -AV_DETINITION_EXTRA_FILES = [ +AV_DEFINITION_EXTRA_FILES = [ "MiscreantPunch099-Low.ldb", "badmacro.ndb", "blurl.ndb", From 750f7f0336fc185e407c2cd88ce2976d450f34b7 Mon Sep 17 00:00:00 2001 From: Gabriel Chamon Araujo Date: Thu, 2 Mar 2023 11:38:44 -0300 Subject: [PATCH 49/49] only add extra definition files to databases to upload if necessary --- clamav.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clamav.py b/clamav.py index 4ab1561a..298cb1fa 100644 --- a/clamav.py +++ b/clamav.py @@ -120,7 +120,9 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): official_databases = [file_prefix + "." + file_suffix for file_prefix in AV_DEFINITION_FILE_PREFIXES for file_suffix in AV_DEFINITION_FILE_SUFFIXES] - all_databases = official_databases + AV_DEFINITION_EXTRA_FILES + all_databases = (official_databases + AV_DEFINITION_EXTRA_FILES + if AV_EXTRA_VIRUS_DEFINITIONS is True + else official_databases) for filename in all_databases: try: