diff --git a/.gitignore b/.gitignore index 844524c30..b7776e0b0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,11 +6,11 @@ docs/build __pycache__ # Terraform files -*.terraform/ -terraform.tfvars -terraform/terraform.tfstate* -terraform/*.zip -terraform/*.tf.json +streamalert_cli/_infrastructure/.terraform/ +streamalert_cli/_infrastructure/terraform.tfvars +streamalert_cli/_infrastructure/terraform.tfstate* +streamalert_cli/_infrastructure/*.zip +streamalert_cli/_infrastructure/*.tf.json # Coveralls repo token .coveralls.yml diff --git a/Vagrantfile b/Vagrantfile index d06e9a9a1..7c0e9ac57 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -33,9 +33,6 @@ def configure_python(machine, version) end STREAMALERT_SHELL_ENV = %{ -export AWS_DEFAULT_REGION='#{ENV.fetch('SA_AWS_DEFAULT_REGION', 'Your region here!')}' -export AWS_ACCESS_KEY_ID='#{ENV.fetch('SA_AWS_ACCESS_KEY_ID', 'Your access key ID here!')}' -export AWS_SECRET_ACCESS_KEY='#{ENV.fetch('SA_AWS_SECRET_ACCESS_KEY', 'Your secret access key here!')}' export SA_EMAIL='#{ENV.fetch('SA_EMAIL', 'example@example.com')}' } @@ -114,4 +111,11 @@ Vagrant.configure(2) do |config| configure_streamalert(py3) final_message(py3) end + + config.ssh.forward_env = [ + 'AWS_DEFAULT_REGION', + 'AWS_ACCESS_KEY_ID', + 'AWS_SECRET_ACCESS_KEY', + 'AWS_SESSION_TOKEN' + ] end diff --git a/conf/clusters/prod.json b/conf/clusters/prod.json index 57c889286..ed04a32ab 100644 --- a/conf/clusters/prod.json +++ b/conf/clusters/prod.json @@ -40,7 +40,8 @@ "prefix.cluster.sample.bucket": [ "cloudtrail", "carbonblack", - "fleet" + "fleet", + "packetbeat" ] }, "sns": { @@ -84,4 +85,4 @@ "lambda_alarms_enabled": true } } -} \ No newline at end of file +} diff --git a/conf/global.json b/conf/global.json index f6c10b855..a2d260916 100644 --- a/conf/global.json +++ b/conf/global.json @@ -10,6 +10,9 @@ ], "rule_locations": [ "rules" + ], + "scheduled_query_locations": [ + "scheduled_queries" ] }, "infrastructure": { @@ -21,7 +24,6 @@ "use_prefix": true, "buffer_interval": 900, "buffer_size": 128, - "compression_format": "GZIP", "enabled": false, "enabled_logs": {} }, diff --git a/conf/lambda.json b/conf/lambda.json index 919140b6d..6396c1ae8 100644 --- a/conf/lambda.json +++ b/conf/lambda.json @@ -52,6 +52,7 @@ }, "athena_partition_refresh_config": { "concurrency_limit": 10, + "file_format": null, "log_level": "info" }, "classifier_config": {}, @@ -79,7 +80,7 @@ "timeout": 120 }, "rules_engine_config": { - "concurrency_limit": 200, + "concurrency_limit": 10, "enable_custom_metrics": true, "log_level": "info", "log_retention_days": 14, diff --git a/conf/outputs.json b/conf/outputs.json index f6f886acb..bfa1ebcc9 100644 --- a/conf/outputs.json +++ b/conf/outputs.json @@ -5,6 +5,9 @@ "aws-s3": { "bucket": "aws-s3-bucket" }, + "aws-ses": [ + "sample-integration" + ], "aws-sns": { "sample-topic": "sample-topic-name" }, @@ -25,5 +28,8 @@ ], "slack": [ "sample-channel" + ], + "teams": [ + "sample-webhook" ] } \ No newline at end of file diff --git a/conf/scheduled_queries.json b/conf/scheduled_queries.json new file mode 100644 index 000000000..e8d0e2e7f --- /dev/null +++ b/conf/scheduled_queries.json @@ -0,0 +1,24 @@ +{ + "enabled": false, + "config": { + "destination_kinesis_stream": "KINESIS_STREAM", + "sfn_timeout_secs": 300, + "sfn_wait_secs": 30 + }, + "packs": { + "sample": { + "description": "Runs sample queries once per hour", + "schedule_expression": "rate(1 hour)" + } + }, + "lambda_config": { + "log_level": "info", + "log_retention_days": 14, + "memory": 128, + "timeout": 60, + "alarms_enabled": false, + "error_threshold": 1, + "error_period_secs": 3600, + "error_evaluation_periods": 2 + } +} \ No newline at end of file diff --git a/conf/schemas/carbonblack.json b/conf/schemas/carbonblack.json index 83a408de1..c9d4e88de 100644 --- a/conf/schemas/carbonblack.json +++ b/conf/schemas/carbonblack.json @@ -22,7 +22,7 @@ }, "carbonblack:alert.watchlist.hit.feedsearch.bin": { "schema": { - "alert_severity": "float", + "alert_severity": "string", "alert_type": "string", "assigned_to": "string", "cb_server": "string", @@ -48,7 +48,7 @@ "segment_id": "integer", "sensor_criticality": "integer", "status": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "unique_id": "string", "watchlist_id": "string", @@ -71,7 +71,7 @@ }, "carbonblack:alert.watchlist.hit.ingress.binary": { "schema": { - "alert_severity": "float", + "alert_severity": "string", "alert_type": "string", "assigned_to": "string", "cb_server": "string", @@ -97,7 +97,7 @@ "segment_id": "integer", "sensor_criticality": "integer", "status": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "unique_id": "string", "watchlist_id": "string", @@ -107,7 +107,7 @@ }, "carbonblack:alert.watchlist.hit.ingress.process": { "schema": { - "alert_severity": "integer", + "alert_severity": "string", "alert_type": "string", "assigned_to": "string", "cb_server": "string", @@ -145,8 +145,9 @@ "segment_id": "string", "sensor_criticality": "integer", "sensor_id": "integer", + "sha256": "string", "status": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "unique_id": "string", "username": "string", @@ -166,7 +167,8 @@ "ioc_query_index", "ioc_query_string", "ioc_value_facet", - "resolved_time" + "resolved_time", + "sha256" ] } }, @@ -234,8 +236,9 @@ "segment_id": "string", "sensor_criticality": "integer", "sensor_id": "integer", + "sha256": "string", "status": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "unique_id": "string", "username": "string", @@ -280,7 +283,8 @@ "alliance_updated_virustotalconnector", "assigned_to", "ioc_attr", - "resolved_time" + "resolved_time", + "sha256" ] } }, @@ -298,7 +302,8 @@ "group": "string", "md5": "string", "scores": {}, - "timestamp": "float", + "sha256": "string", + "timestamp": "string", "type": "string", "watchlists": {} }, @@ -312,18 +317,25 @@ "md5": "string", "scores": {}, "sensor_id": "integer", - "timestamp": "float", + "sha256": "string", + "timestamp": "string", "type": "string", "watchlists": {} }, - "parser": "json" + "parser": "json", + "configuration": { + "optional_top_level_keys": [ + "sha256" + ] + } }, "carbonblack:binaryinfo.observed": { "schema": { "cb_server": "string", "md5": "string", "scores": {}, - "timestamp": "float", + "sha256": "string", + "timestamp": "string", "type": "string", "watchlists": {} }, @@ -361,7 +373,7 @@ "report_score": "integer", "sensor_id": "integer", "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json" @@ -384,7 +396,7 @@ "report_score": "integer", "sensor_id": "integer", "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json" @@ -410,7 +422,7 @@ "segment_id": "string", "sensor_id": "integer", "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -475,7 +487,7 @@ "report_id": "string", "report_score": "integer", "sensor_id": "integer", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "json_path": "docs[*]", @@ -486,6 +498,7 @@ }, "carbonblack:feed.query.hit.process": { "schema": { + "alliance_data_attackframework": [], "alliance_data_bit9endpointvisibility": [], "alliance_data_bit9suspiciousindicators": [], "alliance_data_nvd": [], @@ -493,6 +506,7 @@ "alliance_data_srstrust": [], "alliance_data_virustotal": [], "alliance_data_virustotalconnector": [], + "alliance_link_attackframework": "string", "alliance_link_bit9endpointvisibility": "string", "alliance_link_bit9suspiciousindicators": "string", "alliance_link_nvd": "string", @@ -500,6 +514,7 @@ "alliance_link_srstrust": "string", "alliance_link_virustotal": "string", "alliance_link_virustotalconnector": "string", + "alliance_score_attackframework": "integer", "alliance_score_bit9endpointvisibility": "integer", "alliance_score_bit9suspiciousindicators": "integer", "alliance_score_nvd": "integer", @@ -507,6 +522,7 @@ "alliance_score_srstrust": "integer", "alliance_score_virustotal": "integer", "alliance_score_virustotalconnector": "integer", + "alliance_updated_attackframework": "string", "alliance_updated_bit9endpointvisibility": "string", "alliance_updated_bit9suspiciousindicators": "string", "alliance_updated_nvd": "string", @@ -539,6 +555,7 @@ "process_md5": "string", "process_name": "string", "process_pid": "integer", + "process_sha256": "string", "processblock_count": "integer", "regmod_count": "integer", "segment_id": "string", @@ -567,11 +584,12 @@ "report_score": "integer", "segment_id": "string", "sensor_id": "integer", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "json_path": "docs[*]", "optional_top_level_keys": [ + "alliance_data_attackframework", "alliance_data_bit9endpointvisibility", "alliance_data_bit9suspiciousindicators", "alliance_data_nvd", @@ -579,6 +597,7 @@ "alliance_data_srstrust", "alliance_data_virustotal", "alliance_data_virustotalconnector", + "alliance_link_attackframework", "alliance_link_bit9endpointvisibility", "alliance_link_bit9suspiciousindicators", "alliance_link_nvd", @@ -586,6 +605,7 @@ "alliance_link_srstrust", "alliance_link_virustotal", "alliance_link_virustotalconnector", + "alliance_score_attackframework", "alliance_score_bit9endpointvisibility", "alliance_score_bit9suspiciousindicators", "alliance_score_nvd", @@ -593,6 +613,7 @@ "alliance_score_srstrust", "alliance_score_virustotal", "alliance_score_virustotalconnector", + "alliance_updated_attackframework", "alliance_updated_bit9endpointvisibility", "alliance_updated_bit9suspiciousindicators", "alliance_updated_nvd", @@ -602,6 +623,7 @@ "alliance_updated_virustotalconnector", "interface_ip", "process_guid", + "process_sha256", "segment_id" ] } @@ -627,13 +649,15 @@ "report_score": "integer", "sensor_id": "integer", "server_name": "string", - "timestamp": "float", + "sha256": "string", + "timestamp": "string", "type": "string" }, "configuration": { "optional_top_level_keys": [ "ioc_query_index", - "ioc_query_string" + "ioc_query_string", + "sha256" ] }, "parser": "json" @@ -663,7 +687,7 @@ "segment_id": "string", "sensor_id": "integer", "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -681,7 +705,7 @@ "feed_name": "string", "feed_update_time": "string", "scan_start_time": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json" @@ -703,17 +727,19 @@ "path": "string", "pid": "integer", "process_guid": "string", + "process_path": "string", "sensor_id": "integer", "sha256": "string", "tamper": "boolean", "tamper_sent": "boolean", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "configuration": { "optional_top_level_keys": [ "child_command_line", - "child_username" + "child_username", + "process_path" ] }, "parser": "json" @@ -738,7 +764,7 @@ "target_pid": "integer", "target_process_guid": "string", "target_sha256": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -769,7 +795,7 @@ "sha256": "string", "tamper": "boolean", "tamper_sent": "boolean", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -794,7 +820,7 @@ "sensor_id": "integer", "sha256": "string", "size": "integer", - "timestamp": "float", + "timestamp": "string", "type": "string", "utf8_comments": "string", "utf8_company_name": "string", @@ -827,7 +853,7 @@ "process_path": "string", "sensor_id": "integer", "sha256": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -868,7 +894,7 @@ "remote_port": "string", "sensor_id": "string", "sha256": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -912,7 +938,7 @@ "process_path": "string", "sensor_id": "integer", "sha256": "string", - "timestamp": "integer", + "timestamp": "string", "type": "string", "uid": "string", "username": "string" @@ -952,7 +978,7 @@ "process_create_time": "integer", "process_guid": "string", "sensor_id": "integer", - "timestamp": "float", + "timestamp": "string", "type": "string", "uid": "string", "username": "string" @@ -1036,7 +1062,7 @@ "sha256": "string", "tamper": "boolean", "tamper_sent": "boolean", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -1065,7 +1091,7 @@ "target_pid": "integer", "target_process_guid": "string", "target_sha256": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -1082,7 +1108,7 @@ "event_type": "string", "sensor_id": "integer", "tamper_type": "string", - "timestamp": "float", + "timestamp": "string", "type": "string" }, "parser": "json", @@ -1166,7 +1192,7 @@ "cb_version": "string", "highlights_by_doc": {}, "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "watchlist_id": "integer", "watchlist_name": "string" @@ -1214,7 +1240,7 @@ }, "carbonblack:watchlist.hit.ingress.binary": { "schema": { - "alert_severity": "float", + "alert_severity": "string", "alert_type": "string", "assigned_to": "string", "cb_server": "string", @@ -1238,7 +1264,7 @@ "report_score": "integer", "sensor_criticality": "integer", "status": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "unique_id": "string", "watchlist_id": "string", @@ -1258,6 +1284,7 @@ "alliance_data_nvd": [], "alliance_data_srsthreat": [], "alliance_data_srstrust": [], + "alliance_data_tor": [], "alliance_data_virustotal": [], "alliance_data_virustotalconnector": [], "alliance_link_bit9endpointvisibility": "string", @@ -1265,6 +1292,7 @@ "alliance_link_nvd": "string", "alliance_link_srsthreat": "string", "alliance_link_srstrust": "string", + "alliance_link_tor": "string", "alliance_link_virustotal": "string", "alliance_link_virustotalconnector": "string", "alliance_score_bit9endpointvisibility": "integer", @@ -1272,6 +1300,7 @@ "alliance_score_nvd": "integer", "alliance_score_srsthreat": "integer", "alliance_score_srstrust": "integer", + "alliance_score_tor": "integer", "alliance_score_virustotal": "integer", "alliance_score_virustotalconnector": "integer", "alliance_updated_bit9endpointvisibility": "string", @@ -1279,6 +1308,7 @@ "alliance_updated_nvd": "string", "alliance_updated_srsthreat": "string", "alliance_updated_srstrust": "string", + "alliance_updated_tor": "string", "alliance_updated_virustotal": "string", "alliance_updated_virustotalconnector": "string", "childproc_count": "integer", @@ -1311,6 +1341,7 @@ "process_md5": "string", "process_name": "string", "process_pid": "integer", + "process_sha256": "string", "processblock_count": "integer", "regmod_count": "integer", "segment_id": "integer", @@ -1326,7 +1357,7 @@ "cb_version": "string", "highlights_by_doc": {}, "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "watchlist_id": "integer", "watchlist_name": "string" @@ -1338,6 +1369,7 @@ "alliance_data_nvd", "alliance_data_srsthreat", "alliance_data_srstrust", + "alliance_data_tor", "alliance_data_virustotal", "alliance_data_virustotalconnector", "alliance_link_bit9endpointvisibility", @@ -1345,6 +1377,7 @@ "alliance_link_nvd", "alliance_link_srsthreat", "alliance_link_srstrust", + "alliance_link_tor", "alliance_link_virustotal", "alliance_link_virustotalconnector", "alliance_score_bit9endpointvisibility", @@ -1352,6 +1385,7 @@ "alliance_score_nvd", "alliance_score_srsthreat", "alliance_score_srstrust", + "alliance_score_tor", "alliance_score_virustotal", "alliance_score_virustotalconnector", "alliance_updated_bit9endpointvisibility", @@ -1359,9 +1393,11 @@ "alliance_updated_nvd", "alliance_updated_srsthreat", "alliance_updated_srstrust", + "alliance_updated_tor", "alliance_updated_virustotal", "alliance_updated_virustotalconnector", - "process_guid" + "process_guid", + "process_sha256" ] } }, @@ -1372,7 +1408,7 @@ "docs": [], "md5": "string", "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "watchlist_id": "integer", "watchlist_name": "string" @@ -1395,7 +1431,7 @@ "process_id": "string", "segment_id": "string", "server_name": "string", - "timestamp": "float", + "timestamp": "string", "type": "string", "watchlist_id": "integer", "watchlist_name": "string" diff --git a/conf/schemas/duo.json b/conf/schemas/duo.json index afb4bd2a0..f649cc7e9 100644 --- a/conf/schemas/duo.json +++ b/conf/schemas/duo.json @@ -17,6 +17,7 @@ "factor": "string", "integration": "string", "ip": "string", + "isotimestamp": "string", "location": {}, "new_enrollment": "boolean", "reason": "string", diff --git a/conf/schemas/osquery.json b/conf/schemas/osquery.json index 396585fa3..5b4f8041e 100644 --- a/conf/schemas/osquery.json +++ b/conf/schemas/osquery.json @@ -4,6 +4,7 @@ "calendarTime": "string", "counter": "integer", "decorations": {}, + "log_type": "string", "diffResults": { "added": [], "removed": [] @@ -18,6 +19,7 @@ "optional_top_level_keys": [ "decorations", "epoch", + "log_type", "counter" ] } @@ -70,6 +72,7 @@ "decorations", "epoch", "counter", + "log_type", "logNumericsAsNumbers", "numerics" ] @@ -91,8 +94,9 @@ "parser": "json", "configuration": { "optional_top_level_keys": [ - "decorations" + "decorations", + "log_type" ] } } -} \ No newline at end of file +} diff --git a/conf/schemas/packetbeat.json b/conf/schemas/packetbeat.json new file mode 100644 index 000000000..314c6bf40 --- /dev/null +++ b/conf/schemas/packetbeat.json @@ -0,0 +1,32 @@ +{ + "packetbeat:dns": { + "schema": { + "@timestamp": "string", + "client_ip": "string", + "type": "string", + "transport": "string", + "bytes_in": "integer", + "bytes_out": "integer", + "dns": {} + }, + "parser": "json", + "configuration": { + "optional_top_level_keys": [ + "bytes_in", + "bytes_out" + ] + } + }, + "packetbeat:flow": { + "schema": { + "@timestamp": "string", + "start_time": "string", + "last_time": "string", + "type": "string", + "final": "boolean", + "dest": {}, + "source": {} + }, + "parser": "json" + } +} diff --git a/conf/schemas/streamquery.json b/conf/schemas/streamquery.json new file mode 100644 index 000000000..f79fd0033 --- /dev/null +++ b/conf/schemas/streamquery.json @@ -0,0 +1,17 @@ +{ + "streamquery:version1": { + "parser": "json", + "schema": { + "streamquery_schema_version": "string", + "execution": {}, + "data": {} + }, + "configuration": { + "log_patterns": { + "streamquery_schema_version": [ + "1.*.*" + ] + } + } + } +} \ No newline at end of file diff --git a/docs/images/alerts-query.png b/docs/images/alerts-query.png deleted file mode 100644 index 37118c71c..000000000 Binary files a/docs/images/alerts-query.png and /dev/null differ diff --git a/docs/images/athena-alerts-search.png b/docs/images/athena-alerts-search.png new file mode 100644 index 000000000..88218cb10 Binary files /dev/null and b/docs/images/athena-alerts-search.png differ diff --git a/docs/images/athena-data-search.png b/docs/images/athena-data-search.png new file mode 100644 index 000000000..ded566e08 Binary files /dev/null and b/docs/images/athena-data-search.png differ diff --git a/docs/images/athena-refresh-arch.png b/docs/images/athena-refresh-arch.png deleted file mode 100644 index 1c78141d9..000000000 Binary files a/docs/images/athena-refresh-arch.png and /dev/null differ diff --git a/docs/images/athena-usage-1.png b/docs/images/athena-usage-1.png deleted file mode 100644 index 79b5e7255..000000000 Binary files a/docs/images/athena-usage-1.png and /dev/null differ diff --git a/docs/images/athena-usage-2.png b/docs/images/athena-usage-2.png deleted file mode 100644 index 02b082af6..000000000 Binary files a/docs/images/athena-usage-2.png and /dev/null differ diff --git a/docs/images/athena-usage-3.png b/docs/images/athena-usage-3.png deleted file mode 100644 index 51e3a16ac..000000000 Binary files a/docs/images/athena-usage-3.png and /dev/null differ diff --git a/docs/images/athena-usage-4.png b/docs/images/athena-usage-4.png deleted file mode 100644 index b167040e2..000000000 Binary files a/docs/images/athena-usage-4.png and /dev/null differ diff --git a/docs/images/historical-search.png b/docs/images/historical-search.png new file mode 100644 index 000000000..151bfac5e Binary files /dev/null and b/docs/images/historical-search.png differ diff --git a/docs/source/config-clusters.rst b/docs/source/config-clusters.rst index 6420a7041..cbbb54697 100644 --- a/docs/source/config-clusters.rst +++ b/docs/source/config-clusters.rst @@ -275,6 +275,7 @@ Options ``is_global_trail`` ``true`` If ``true``, the CloudTrail is applied to all regions ``send_to_cloudwatch`` ``false`` Enable CloudTrail delivery to CloudWatch Logs. Logs sent to CloudWatch Logs are forwarded to this cluster's Kinesis stream for processing. If this is enabled, the ``enable_s3_events`` option should be disabled to avoid duplicative processing. ``cloudwatch_destination_arn`` (Computed from CloudWatch Logs Destination module) CloudWatch Destination ARN used for forwarding data to this cluster's Kinesis stream. This has a default value but can be overriden here with a different CloudWatch Logs Destination ARN +``send_to_sns`` ``false`` Create an SNS topic to which notifications should be sent when CloudTrail puts a new object in the S3 bucket. The topic name will be the same as the S3 bucket name ``enable_s3_events`` ``false`` Enable S3 events for the logs sent to the S3 bucket. These will invoke this cluster's classifier for every new object in the CloudTrail S3 bucket ``s3_bucket_name`` ``prefix-cluster-streamalert-cloudtrail`` Name of the S3 bucket to be used for the CloudTrail logs. This can be overriden, but defaults to ``prefix-cluster-streamalert-cloudtrail`` ``s3_event_selector_type`` ``""`` An S3 event selector to enable object level logging for the account's S3 buckets. Choices are: "ReadOnly", "WriteOnly", "All", or "", where "" disables object level logging for S3 diff --git a/docs/source/config-global.rst b/docs/source/config-global.rst index 172acf49a..8b074375a 100644 --- a/docs/source/config-global.rst +++ b/docs/source/config-global.rst @@ -66,6 +66,9 @@ Configuration ], "rule_locations": [ "rules" + ], + "scheduled_query_locations": [ + "scheduled_queries" ] } } @@ -73,12 +76,13 @@ Configuration Options ------- -====================== ============ ================= =============== -**Key** **Required** **Default** **Description** ----------------------- ------------ ----------------- --------------- -``matcher_locations`` Yes ``["matchers"]`` List of local paths where ``matchers`` are defined -``rule_locations`` Yes ``["rules"]`` List of local paths where ``rules`` are defined -====================== ============ ================= =============== +============================= ============= ========================= =============== +**Key** **Required** **Default** **Description** +----------------------------- ------------- ------------------------- --------------- +``matcher_locations`` Yes ``["matchers"]`` List of local paths where ``matchers`` are defined +``rule_locations`` Yes ``["rules"]`` List of local paths where ``rules`` are defined +``scheduled_query_locations`` Yes ``["scheduled_queries"]`` List of local paths where ``scheduled_queries`` are defined +============================= ============= ========================= =============== ************** @@ -97,6 +101,8 @@ was triggered, the source of the log, the date/time the alert was triggered, the which the log came, and a variety of other fields. +.. _alerts_firehose_configuration: + Configuration ------------- The following ``alerts_firehose`` configuration settings can be defined within the ``infrastructure`` @@ -110,8 +116,7 @@ section of ``global.json``: "bucket_name": "-streamalerts", "buffer_size": 64, "buffer_interval": 300, - "cloudwatch_log_retention": 14, - "compression_format": "GZIP" + "cloudwatch_log_retention": 14 } } } @@ -127,7 +132,6 @@ Options before delivering it to S3 ``buffer_interval`` No ``300`` (seconds) Buffer incoming data for the specified period of time, in seconds, before delivering it to S3 -``compression_format`` No ``GZIP`` The compression algorithm to use on data stored in S3 ``cloudwatch_log_retention`` No ``14`` (days) Days for which to retain error logs that are sent to CloudWatch in relation to this Kinesis Firehose Delivery Stream ============================= ============ ========================== =============== @@ -206,6 +210,8 @@ Options =============== ============ =========== =============== +.. _firehose_configuration: + Firehose (Historical Data Retention) ==================================== StreamAlert also supports sending all logs to S3 for historical retention and searching based on @@ -228,7 +234,6 @@ section of ``global.json``: "bucket_name": "-streamalert-data", "buffer_size": 64, "buffer_interval": 300, - "compression_format": "GZIP", "enabled_logs": { "osquery": { "enable_alarm": true @@ -258,7 +263,6 @@ Options ``bucket_name`` No ``-streamalert-data`` Bucket name to override the default name ``buffer_size`` No ``64`` (MB) Buffer incoming data to the specified size, in megabytes, before delivering it to S3 ``buffer_interval`` No ``300`` (seconds) Buffer incoming data for the specified period of time, in seconds, before delivering it to S3 -``compression_format`` No ``GZIP`` The compression algorithm to use on data stored in S3 ``enabled_logs`` No ``{}`` Which classified log types to send to Kinesis Firehose from the Classifier function, along with specific settings per log type ======================= ============ ============================== =============== @@ -353,9 +357,9 @@ For instance, suppose the following schemas are defined across one or more files Supposing also that the above ``enabled_logs`` :ref:`example ` is used, the following Firehose resources will be created: -* ``_streamalert_data_cloudwatch_cloudtrail`` -* ``_streamalert_data_osquery_differential`` -* ``_streamalert_data_osquery_status`` +* ``_streamalert_cloudwatch_cloudtrail`` +* ``_streamalert_osquery_differential`` +* ``_streamalert_osquery_status`` .. note:: diff --git a/docs/source/dynamic-outputs.rst b/docs/source/dynamic-outputs.rst new file mode 100644 index 000000000..f5dc76a98 --- /dev/null +++ b/docs/source/dynamic-outputs.rst @@ -0,0 +1,141 @@ +############### +Dynamic Outputs +############### + + +************* +Prerequisites +************* + +* Any output assigned must be added with ``python manage.py output`` +* ``functions`` must return ``None``, ``str`` or ``List[str]`` which maps to an output configured with the above. +* Only pass ``context`` if the ``rule`` sets context. + + +******** +Overview +******** + +Adds the ability to have custom logic run to define an ``output`` or ``outputs`` based on information within the ``record``. +For information on supported outputs and how to add support for additional outputs, see `outputs`_ + +As can be seen by the examples below, they are easy to configure, but add a very useful feature to StreamAlert. + +- StreamAlert sends to all outputs defined within a rules ``outputs=[]`` and ``dynamic_outputs=[]`` when sending ``Alerts``. +- It is also possible to pass ``context`` to the ``dynamic_function`` if the ``rule`` sets it. + +.. note:: + Any ``output`` passed must be configured with ``./manage.py output -h`` + + +Example: Simple +=============== + +The below code block is considered a simple ``dynamic_output`` function, because the outputs are dynamically configured, but the information used still lives within the code. It also: + + - allows you to maintain a static list of information inside your code + - will return the outputs relevant to the team who "own" the account + - ``Alerts`` are sent to the ``aws-sns:security`` output aswell as those returned by the function + +.. code-block:: python + + from streamalert.shared.rule import rule + + def map_account_to_team(record): + teams = { + "team_a": {"accounts": ["123", "456", ...], "outputs": ["aws-sns:team_a"]}, + "team_b": {"accounts": ["789", ...], "outputs": ["aws-sns:team_b", "slack:team_b"]}, + } + + account_id = record.get('recipientaccountid') + + for team in teams: + if account_id in team["accounts"]: + return team["outputs"] + # None is guarded against by StreamAlert + + @rule( + logs=['cloudwatch:events'], + req_subkeys={ + 'detail': ['userIdentity', 'eventType'] + }, + outputs=["aws-sns:security"], + dynamic_outputs=[map_account_to_team] + ) + def cloudtrail_root_account_usage(rec): + # Rule logic + + +Example: With LookupTables +========================== + +With the simple addition of a `lookup-table`_ you can take a rule like ``cloudtrail_root_account_usage`` and configure it as such: + +.. code-block:: python + + from streamalert.shared.rule import rule + from streamalert.shared.lookup_tables.core import LookupTables + + def dynamic_output_with_context(record, context): # pass context only if the rule added context + account_id = context["account_id"] + + return LookupTables.get( + 'my_lookup_table', + 'aws-account-owner:{}'.format(account_id), + None + ) # potentially returns [aws-sns:team_a] + + @rule( + logs=['cloudwatch:events'], + outputs=["aws-sns:security], + dynamic_outputs=[dynamic_output_with_context], + context={"account_id": "valid_account_id"}, + ) + def cloudtrail_root_account_usage(rec): + context["account_id"] = record.get('recipientaccountid') + # Rule logic + +The above has the benefit of using information that lives outside of StreamAlert, which means teams can acquire new accounts and get ``Alerts`` +without having to alter StreamAlert code. + + +Example: With Other Data Source +================================ + +.. code-block:: python + + from streamalert.shared.rule import rule + import requests + + def dynamic_output(record): + account_id = record.get('recipientaccountid') + + # invoke an external API to get data back + response = requests.get("API/team_map") + + for team in response.json(): + if account_id in team["accounts"]: + return team["outputs"] # potentially "aws-lambda:team_a" + + @rule( + logs=['cloudwatch:events'], + outputs=["aws-sns:security], + dynamic_outputs=[dynamic_output], + ) + def cloudtrail_root_account_usage(rec): + # Rule logic + +The above example uses an external API to get the output map, which is to be queried with the ``account_id`` on the record. +This is just an example, but hopefully highlights many ways in which ``dynamic_outputs`` can be used. + +.. warning:: + The above example could result in many queries to the API in use and could potentially slow down StreamAlert + Lambdas when processing ``Alerts``. + + +.. + All references should be placed here for easy updating + This section is not included in the generated documentation + +.. _`lookup-table`: lookup-tables.html +.. _`outputs`: outputs.html \ No newline at end of file diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index 608aef9bf..1eb425bc1 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -103,6 +103,20 @@ Deploy python manage.py configure aws_account_id 111111111111 # Replace with your 12-digit AWS account ID python manage.py configure prefix # Choose a unique name prefix (alphanumeric characters only) +.. note:: + + * Update the ``file_format`` value in ``conf/lambda.json``. Valid options are ``parquet`` or ``json``. The default value will be parquet in a future release, but this must be manually configured at this time. + + .. code-block:: bash + + "athena_partition_refresh_config": { + "concurrency_limit": 10, + "file_format": "parquet", + "log_level": "info" + } + + * More information can be found on the `historical search `_ page. + 2. Build the StreamAlert infrastructure for the first time: .. code-block:: bash @@ -264,10 +278,10 @@ If not, look for any errors in the CloudWatch Logs for the StreamAlert Lambda fu `Amazon Athena `_. Select your StreamAlert database in the dropdown on the left and preview the ``alerts`` table: -.. figure:: ../images/alerts-query.png +.. figure:: ../images/athena-alerts-search.png :alt: Query Alerts Table in Athena :align: center - :target: _images/alerts-query.png + :target: _images/athena-alerts-search.png (Here, my name prefix is ``testv2``.) If no records are returned, look for errors in the ``athena_partition_refresh`` function or try invoking it directly. diff --git a/docs/source/historical-search.rst b/docs/source/historical-search.rst index cc317efa9..cbafac739 100644 --- a/docs/source/historical-search.rst +++ b/docs/source/historical-search.rst @@ -1,103 +1,96 @@ -################# Historical Search ################# -The historical data retention and search feature in StreamAlert is backed by Amazon Athena and S3. -Amazon Athena is a serverless query service used to analyze large volumes of data stored in S3. - -Data in Athena is searchable via ANSI SQL and powered by Presto. +StreamAlert historical search feature is backed by Amazon S3 and `Athena `_ services. By default, StreamAlert will send all alerts to S3 and those alerts will be searchable in Athena table. StreamAlert users have option to enable historical search feature for data as well. -StreamAlert uses Amazon Athena for historical searching of: +As of StreamAlert v3.1.0, a new field, ``file_format``, has been added to ``athena_partition_refresh_config`` in ``conf/lamba.json``, defaulting to ``null``. This field allows users to configure how the data processed by the Classifier is stored in S3 bucket—either in ``parquet`` or ``json``. Prior to v3.1.0, all data was stored in ``json``. When using this format, Athena's search performance degrades greatly when partition sizes grow. To address this, we've introduce support for ``parquet`` to provide better Athena search performance and cost saving. -* Generated alerts from StreamAlert, enabled within StreamAlert out of the box -* All incoming log data sent to StreamAlert, configurable after StreamAlert initialization +.. note:: -This works by: + * When upgrading StreamAlert to v3.1.0, it is required to change the default ``file_format`` value to either ``parquet`` or ``json``, otherwise StreamAlert will raise ``MisconfigurationError`` exception when run ``python manage.py build``. + * For existing deployments, ``file_format`` can be set to ``json`` and there will have no change occurred. However, if the ``file_format`` is changed to ``parquet``, all Athena tables need to be created to load ``parquet`` format. The existing JSON data won't be searchable anymore unless we build a separated tables to process data in JSON format. (All data stay in S3 bucket, there is no data loss.). + * For new StreamAlert deployments, it is recommended to set ``file_format`` to ``parquet`` to take the advantage of better Athena search performance and save the cost when scanning data. + * In the future release, the default value of ``file_format`` will change to ``parquet``. So let's change now! -* Creating a ``streamalert`` Athena database -* Creating Athena tables to read S3 data -* Using a Lambda function to periodically refresh Athena to make the data searchable +************ +Architecture +************ +.. image:: ../images/historical-search.png + :align: left -**************** -General Concepts -**************** -* `Amazon Athena details `_ -* `Amazon Athena tables `_ -* `AWS Lambda FAQ `_ -* `AWS Lambda pricing `_ +The pipeline is +* StreamAlert creates an Athena Database, alerts kinesis Firehose and ``alerts`` table during initial deployment +* Optional to create Firehose and Athena tables for data +* S3 events will be sent to SQS to invoke ``athena_partition_refresh`` lambda function to add new partitions when there are new alerts or data saved in S3 bucket via Firehose +* New alerts and data are available for searching via Athena console or SDK +.. _alerts_search: -*************** -Getting Started -*************** -Searching of alerts is enabled within StreamAlert out of the box, and can be further extended to search all incoming log data. +************* +Alerts Search +************* -To create tables for searching data sent to StreamAlert, run: +* Review alert Firehose configuration, see :ref:`alerts_firehose_configuration` in ``CONFIGURATION`` session. Athena database and Athena alerts table are created automatically when you first deploy StreamAlert. +* If the ``file_format`` is set to ``parquet``, you can run ``MSCK REPAIR TABLE alerts`` command in the Athena to load all available partitions and then alerts can be searchable. However, using ``MSCK REPAIR`` command can not load new partitions automatically. +* StreamAlert provides a lambda function ``athena_partition_refresh`` to load new partitions to Athena tables once the data arrives in the S3 buckets automatically. Update ``athena_partition_refresh_config`` if necessary. Open ``conf/lambda.json``. See more settings :ref:`configure_athena_partition_refresh_lambda` -.. code-block:: bash + .. code-block:: bash - python manage.py athena create-table \ - --bucket -streamalert-data \ - --table-name + { + "athena_partition_refresh_config": { + "concurrency_limit": 10, + "file_format": "parquet", + "log_level": "info" + } + } -The log name above reflects an enabled log type in your StreamAlert deployment. These are also top level keys in the various files under the ``schemas`` directory. +* Deploy athena_partition_refresh lambda function -For example, if you have 'cloudwatch' in your sources, you would want to create tables for all possible subtypes. This includes ``cloudwatch:control_message``, ``cloudwatch:events``, and ``cloudwatch:flow_logs``. The ``:`` character is not an acceptable character in table names due to a Hive limitation, but your arguments can be either ``cloudwatch:events`` **or** ``cloudwatch_events``. Both will be handled properly by StreamAlert. + .. code-block:: bash -Repeat this process for all relevant data tables in your deployment. + python manage.py deploy --function athena +* Search alerts in `Athena Console `_ -Deploying -========= -Once the options above are set, deploy the infrastructure with the following commands: + * Choose your ``Database`` from the dropdown on the left. Database name is ``_streamalert`` + * Write SQL query statement in the ``Query Editor`` on the right -.. code-block:: bash + .. image:: ../images/athena-alerts-search.png - python manage.py build - python manage.py deploy --function classifier +*********** +Data Search +*********** +It is optional to store data in S3 bucket and available for search in Athena tables. -******************* -Athena Architecture -******************* -The Athena Partition Refresh function exists to periodically refresh Athena tables, enabling the searchability of alerts and log data. +* Enable Firehose in ``conf/global.json`` see :ref:`firehose_configuration` +* Build the Firehose and Athena tables -The default refresh interval is 10 minutes but can be configured by the user. + .. code-block:: bash + python manage.py build -Concepts -======== -The Athena Partition Refresh function utilizes: +* Deploy classifier so classifier will know to send data to S3 bucket via Firehose -* `Amazon S3 Event Notifications `_ -* `Amazon SQS `_ -* `AWS Lambda Invocations by Schedule `_ -* `Amazon Athena Repair Table `_ + .. code-block:: bash + python manage.py deploy --function classifier -Diagram -------- -.. figure:: ../images/athena-refresh-arch.png - :alt: StreamAlert Athena Refresh Partition Diagram - :align: center - :target: _images/athena-refresh-arch.png +* Search data `Athena Console `_ + * Choose your ``Database`` from the dropdown on the left. Database name is ``_streamalert`` + * Write SQL query statement in the ``Query Editor`` on the right -Internals ---------- -Each time the Athena Partition Refresh Lambda function is invoked, it does the following: + .. image:: ../images/athena-data-search.png -* Polls the SQS queue for the latest S3 event notifications (up to 100) -* S3 event notifications contain context around any new object written to a data bucket (as configured below) -* A set of unique S3 Bucket IDs is deduplicated from the notifications -* Queries Athena to verify the ``streamalert`` database exists -* Refreshes the Athena tables for data in the relevant S3 buckets, as specified below in the list of ``buckets`` -* Deletes messages off the queue once partitions are created +.. _configure_athena_partition_refresh_lambda: +************************* Configure Lambda Settings -========================= +************************* + Open ``conf/lambda.json``, and fill in the following options: =================================== ======== ==================== =========== @@ -108,8 +101,8 @@ Key Required Default Descriptio ``log_level`` No ``info`` The log level for the Lambda function, can be either ``info`` or ``debug``. Debug will help with diagnosing errors with polling SQS or sending Athena queries. ``memory`` No ``128`` The amount of memory (in MB) allocated to the Lambda function ``timeout`` No ``60`` The maximum duration of the Lambda function (in seconds) -``schedule_expression`` No ``rate(10 minutes)`` The rate of which the Athena Partition Refresh Lambda function is invoked in the form of a `CloudWatch schedule expression `_. -``buckets`` Yes ``{}`` Key value pairs of S3 buckets and associated Athena table names. By default, the alerts bucket will exist in each deployment. +``file_format`` Yes ``null`` The alerts and data format stored in S3 bucket via Firehose, can be either ``parquet`` (preferred) or ``json`` +``buckets`` No ``{}`` Key value pairs of S3 buckets and associated Athena table names. By default, the alerts bucket will exist in each deployment. =================================== ======== ==================== =========== **Example:** @@ -123,93 +116,26 @@ Key Required Default Descriptio "buckets": { "alternative_bucket": "data" }, - "...": "...", + "file_format": "parquet", "timeout": 60 } } -Deployment -========== -If any of the settings above are changed from the initialized defaults, the Lambda function will need to be deployed in order for them to take effect: - -.. code-block:: bash - - python manage.py deploy --function athena - -Going forward, if the deploy flag ``--function all`` is used, it will redeploy this function along with the ``rule`` function and ``alert`` function. - - -Monitoring ----------- -To ensure the function is operating as expected, monitor the following SQS metrics for ``_streamalert_athena_s3_notifications``: - -* ``NumberOfMessagesReceived`` -* ``NumberOfMessagesSent`` -* ``NumberOfMessagesDeleted`` - -All three of these metrics should have very close values. - -If the ``NumberOfMessagesSent`` is much higher than the other two metrics, the ``schedule_expression`` should be increased in the configuration. - -For high throughput production environments, an interval of 1 to 2 minutes is recommended. - - ***************** -Athena User Guide +Athena References ***************** -Concepts -======== -* `SQL `_ -* `Athena Partitions `_ - - -Querying Data -============= -All alerts generated by StreamAlert will be sent to an ``alerts`` S3 bucket via Firehose. These will then be searchable within Athena. - -To get started with querying of this data, navigate to the AWS Console, click Services, and type 'Athena'. - -When the service loads, switch the ``DATABASE`` option in the dropdown to ``streamalert``: - -.. figure:: ../images/athena-usage-1.png - :alt: StreamAlert Athena Database Selection - :align: center - :target: _images/athena-usage-1.png - -To view the schema of the ``alerts`` table, click the eye icon: - -.. figure:: ../images/athena-usage-2.png - :alt: StreamAlert Athena Alerts Schema - :align: center - :target: _images/athena-usage-2. - -To make a query, type a SQL statement in the Query Editor, and click Run Query: - -.. figure:: ../images/athena-usage-3.png - :alt: StreamAlert Athena Run Query - :align: center - :target: _images/athena-usage-3. - -The query shown above will show the most recent 10 alerts. - - -Tips -==== -Data is partitioned in the following format ``YYYY-MM-DD-hh-mm``. - -An example is ``2017-08-01-22-00``. - -To increase query performance, filter data within a specific partition or range of partitions. +* `Introduction to SQL `_ +* `Amazon Athena Getting Started `_ +* `Presto Documenation `_ -With StreamAlert tables, the date partition is the ``dt`` column. +.. tip:: -As an example, the query below counts all alerts during a given minute: + * Alerts and data are partitioned by ``dt`` in the format ``YYYY-MM-DD-hh`` + * To improve query performance, filter data within a specific partition or range of partitions -.. figure:: ../images/athena-usage-4.png - :alt: StreamAlert Athena Run Query with Partition - :align: center - :target: _images/athena-usage-4. + .. code-block:: sql -For additional guidance on using SQL, visit the link under Concepts. + SELECT * FROM "_streamalert"."alerts" + WHERE dt BETWEEN 2020-02-28-00 AND 2020-02-29-00 diff --git a/docs/source/index.rst b/docs/source/index.rst index 4ca6bb549..45059978c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -73,6 +73,7 @@ Table of Contents rules testing outputs + dynamic-outputs publishers lookup-tables apps @@ -80,6 +81,7 @@ Table of Contents rule-staging rule-promotion historical-search + scheduled-queries conf-schemas-examples troubleshooting faq diff --git a/docs/source/outputs.rst b/docs/source/outputs.rst index 3f39af653..6d4482303 100644 --- a/docs/source/outputs.rst +++ b/docs/source/outputs.rst @@ -8,9 +8,10 @@ Out of the box, StreamAlert supports: * **Amazon CloudWatch Logs** * **Amazon Kinesis Firehose** * **AWS Lambda** -* **Amazon Simple Storage Service (S3)** -* **Amazon Simple Notification Service (SNS)** -* **Amazon Simple Queue Service (SQS)** +* **AWS S3** +* **AWS SES** +* **AWS SNS** +* **AWS SQS** * **Carbon Black** * **Demisto** * **GitHub** @@ -19,6 +20,7 @@ Out of the box, StreamAlert supports: * **PagerDuty** * **Phantom** * **Slack** +* **Microsoft Teams** StreamAlert can be extended to support any API. Creating a new output to send alerts to is easily accomplished through inheritance from the ``StreamOutputBase`` class. More on that in the `Adding Support for New Services`_ section below. @@ -35,7 +37,6 @@ Configuration ************* Adding a new configuration for a currently supported service is handled using ``manage.py``: - .. code-block:: bash python manage.py output diff --git a/docs/source/publishers.rst b/docs/source/publishers.rst index 65ceb61b0..57959ffc4 100644 --- a/docs/source/publishers.rst +++ b/docs/source/publishers.rst @@ -266,7 +266,7 @@ integration, leaving the Slack integration the same. Registering the publisher c @rule( logs=['ssh'], - output=['slack:engineering', 'pagerduty:engineering'], + outputs=['slack:engineering', 'pagerduty:engineering'], publishers={ 'pagerduty:engineering': simplify_pagerduty_output, } diff --git a/docs/source/rules.rst b/docs/source/rules.rst index 40a193f3b..38e7801b0 100644 --- a/docs/source/rules.rst +++ b/docs/source/rules.rst @@ -139,6 +139,7 @@ The following table provides an overview of each rule option, with more details ``merge_by_keys`` ``List[str]`` List of key names that must match in value before merging alerts ``merge_window_mins`` ``int`` Merge related alerts at this interval rather than sending immediately ``outputs`` ``List[str]`` List of alert outputs +``dynamic_outputs`` ``List[function]`` List of functions which return valid outputs ``req_subkeys`` ``Dict[str, List[str]]`` Subkeys which must be present in the record ===================== ======================== =============== @@ -255,10 +256,15 @@ The following table provides an overview of each rule option, with more details The original (unmerged) alert will always be sent to `Athena `_. +:dynamic_outputs: + + The ``dynamic_outputs`` keyword argument defines additional `outputs `_ to an Alert which are dynamically generated. + See `dynamic_outputs `_ for more info + :outputs: - Defines the alert destination if the return value of a rule is ``True``. - Alerts are always sent to an `Athena table `_ which is easy to query. + The ``outputs`` keyword argument defines the alert destination if the return value of a rule is ``True``. + Alerts are always sent to an :ref:`Athena alerts table ` which is easy to query. Any number of additional `outputs `_ can be specified. :req_subkeys: diff --git a/docs/source/scheduled-queries.rst b/docs/source/scheduled-queries.rst new file mode 100644 index 000000000..071258416 --- /dev/null +++ b/docs/source/scheduled-queries.rst @@ -0,0 +1,297 @@ +Scheduled Queries +================= + +Overview +-------- +Originally dubbed "StreamQuery", this system allows you to execute Athena queries on a schedule, and +funnel their results back into StreamAlert for rules analysis. + +Because StreamAlert is mostly stateless, scheduled queries can allow you to correlate data together +and analyze them automatically. Rules that were not previously possible can be written: + +* Detect X failed logins within Y minutes +* Detect spike in API behavior that is correlated with an increase in # of a different alert/rule +* Detect elevated API % error rates from specific IP address + + +How do scheduled queries work? +`````````````````````````````` +This system leverages two main components: AWS Lambda and AWS Step Functions. + +First, a CloudWatch scheduled event triggers the execution of a new AWS Step Function State Machine. +This State Machine manages the lifecycle of Athena queries through the Lambda function. Its sole +responsibility is to execute the Lambda, wait a predefined window of time, and execute the Lambda again, +repeating until the Lambda reports it is finished. + +The Lambda function is a simple function that starts Athena queries, caches their execution ids, checks +on their execution status, and uploads results to StreamAlert via Kinesis. Instead of doing all of these +steps in a blocking fashion and sleeping while it waits for Athena, it runs through all queries in a single +nonblocking pass, and returns the result of its execution to the State Machine. Once all queries have +completed and their results sent to StreamAlert, the Lambda returns a "done" flag to the State Machine, +signalling that this job has been finished. + + + +Configuration +------------- +Scheduled Queries is configured via a single file, ``conf/scheduled_queries.json``. + +.. code-block:: json + + { + "enabled": true, + "config": { + "destination_kinesis": "prefix_prod_streamalert", + "sfn_timeout_secs": 3600, + "sfn_wait_secs": 30 + }, + "packs": { + "hourly": { + "description": "Runs all hourly queries", + "schedule_expression": "rate(1 hour)" + }, + "two_hour": { + "description": "Runs all queries run every two hours", + "schedule_expression": "rate(2 hours)" + }, + "daily": { + "description": "Runs all daily queries", + "schedule_expression": "rate(24 hours)" + }, + "two_day": { + "description": "Runs all queries that are run once every 2 days", + "schedule_expression": "rate(2 days)" + } + }, + "lambda_config": {} + } + +* ``enabled`` — (bool) Pass `true` to activate ScheduledQueries. Leave `false` to disable. +* ``config.destination_kinesis`` — (str) The name of the Kinesis stream to upload results to. +* ``config.sfn_timeout_secs`` - (int) Max number of seconds for the state machine to execute. +* ``config.sfn_wait_secs`` - (int) Time to wait between checks of query status. +* ``query_packs`` - (dict) The keys of this dict are the **names** of the query packs. This section is discussed in more depth below. + + +Query Packs +``````````` +Query Packs are batches of scheduled Athena queries that are executed together. + +.. code-block:: + + "query_packs": { + ... + "hourly": { + "description": "Runs all hourly queries", + "schedule_expression": "rate(1 hour)" + }, + ... + } + +- ``description`` - (str) A string summary of what queries belong in this pack. +- ``schedule_expression`` - (str) A CloudWatch schedule expression defining how frequently to execute this query pack. + +Again, the keys to the ``query_packs`` dict are the **names** of the query packs. These names are used below. + + +Writing Queries +``````````````` +After you've defined a few Query Packs, it's time to add actual scheduled queries. + +All scheduled queries are located in the ``scheduled_queries/`` directory, located in the root of the project. + + +.. code-block:: python + + from streamalert.scheduled_queries.query_packs.configuration import QueryPackConfiguration + + QueryPackConfiguration( + name='NAME_OF_QUERY_PACK', + description='Hey, hey! This is a description!', + + # Make sure to edit the database name properly or this query will error with some + # "insufficient privileges errors" + query=""" + SELECT + * + FROM + "ATHENA_DATABASE_NAME"."cloudwatch_cloudtrail" + WHERE + dt = '{utcdatehour_minus1hour}' + + AND eventsource = 'athena.amazonaws.com' + AND eventname = 'StartQueryExecution' + """, + params=['utcdatehour_minus1hour'], + tags=['sample'] + ) + +* ``name`` - (str) The name of this query. This name is published in the final result, and is useful when writing rules. +* ``description`` - (str) Description of this query. This is published in the final result. +* ``query`` - (str) A template SQL statement sent to Athena, with query parameters identified ``{like_this}``. +* ``params`` - (list[str]) A list of query parameters to pass to the query string. These have special values that are calculated at runtime, and are interpolated into the template SQL string. +* ``tags`` - (list[str]) Tags required by this query to be run. The simplest way to use this is to put the **Query pack name** into this array. + + + +Writing Rules for StreamQuery +----------------------------- + +Classifier Schema +````````````````` +We provide an out-of-box sample schema for scheduled query v1.0.0 results. It is located at ``conf/schemas/streamquery.json``. + + +What does a scheduled query result look like? +````````````````````````````````````````````` +Below is an example of what StreamAlert may receive as a result from a scheduled query. + +.. code-block:: json + + { + "streamquery_schema_version": "1.0.0", + "execution": { + "name": "query_name_goes_here", + "description": "This is an example", + "query": "SELECT *\nFROM my_database.my_table\nWHERE dt = '2020-01-01-01' LIMIT 10", + "query_parameters": { + "dt": "2020-01-01-01" + }, + "data_scanned_in_bytes": 4783293, + "execution_time_ms": 12374, + "tags": [ "query_pack_1" ], + "query_execution_id": "123845ac-273b-ad3b-2812-9812739789", + "console_link": "https://console.amazonaws.com/athena/somethingsomething", + }, + "data": { + "headers": [ + "username", + "time" + ], + "rows": [ + { + "username": "bob", + "time": 1, + }, + { + "username": "sally", + "time": 2, + }, + { + "username": "joe", + "time": 3, + }, + ], + "count": 3, + }, + } + +Because the **data** of each query may be different it is generally advisable to write a StreamAlert +matcher on the ``execution.name`` value of the data, first. The rest is up to you! + + +Deployment +---------- +Deploying the various components of scheduled_queries is easy. + +Building the Step Function, Lambda, and Query Packs +``````````````````````````````````````````````````` + +Anytime you change the configured query packs, you will need to run this to update the AWS Resources. + +.. code-block:: bash + + % ./manage.py built -t scheduled_queries + + +Deploying Python Code to Lambda +``````````````````````````````` + +.. code-block:: bash + + % ./manage.py deploy -f scheduled_queries + + + +Best Practices +-------------- + +Use cron() instead of rate() +```````````````````````````` +When defining ``schedule_expressions``, it's safer to use ``cron(1 * * * *)`` than ``rate(1 hour)``. The reason for +this is, if you use Terraform to build or rebuild your scheduled queries resources, you may end up recreating the +query pack. When using ``rate(1 hour)``, this will cause the CloudWatch event to immediately trigger, then wait +1 hour increments. With ``cron(1 * * * *)``, it is easier to determine exactly when a query pack will be executed. In this case: +"1st minute of every hour". + + +Be mindful of how much data is being sent +````````````````````````````````````````` +Athena queries can return a TON of data. Remember that this data has to fit in Lambda memory or it will crash your application. +Try to structure your queries with GROUP BY statements or restrict the fields they operate on. + + +CAREFULLY CONSIDER Firehose'ing Scheduled Query results into Athena +``````````````````````````````````````````````````````````````````` +It is theoretically possible to Firehose all StreamQuery results received by StreamAlert back into S3, using scheduled +queries for data transformation. + +We don't really recommend doing this. This can add significantly more data to the pipeline, and usage of ``CREATE TABLE AS SELECT`` +is likely a more cost efficient choice. + + +Use dt BETWEEN, not dt > Queries +```````````````````````````````` +In queries, prefer to be explicit about which partitions to scan. Use clauses like these: + +* ``dt = {datehour}`` +* ``dt BETWEEN {datehour_minus1hour} AND {datehour}`` + +Avoid things like ``dt > {datehour_minus1hour}``. This creates time-sensitivity in your query, and +may cause it to return different results than expected if there is a delay in Step Function execution (see below). + + + +Neat Little Details +------------------- + +Athena Queries are Incredibly Cheap +``````````````````````````````````` +At $5 per 1 Terabyte scanned, Athena is absurdly cheap. Go nuts with your scheduled queries! + + +Failed State Machine Executions are Retriable +````````````````````````````````````````````` +AWS Step Functions record every single execution of each State Machine, as well as each state change. +Going to the console, you can observe that the Input event of a State Machine execution is simply a JSON blob: + +.. code-block:: json + + { + "name": "streamalert_scheduled_queries_cloudwatch_trigger", + "event_id": "12345678-53e7-b479-0601-1234567890", + "source_arn": "arn:aws:events:us-east-1:123456789012:rule/myprefix_streamalert_scheduled_queries_event_0", + "streamquery_configuration": { + "clock": "2020-02-13T22:06:20Z", + "tags": [ + "hourly" + ] + } + } + +Notice the "clock". This value is generated at the time the CloudWatch scheduled event is triggered. Thus, +if you start a new State Machine execution using the exact same Input event (with the same clock), the +results of that execution will be exactly (mostly...) the same. + +This is useful for replaying failed State Machine executions that are resultant of Athena downtime, or +deployed bugs. Simply use the AWS Console, navigate to any failed executions, and click the ``New Execution`` +button, whereupon a form will be shown with a copy of the Input event already pre-populated! + + +You manually trigger query executions +````````````````````````````````````` +Knowing the above, you can force StreamQuery to execute ad hoc queries simply by manually triggering State +Machine executions, and passing in a correctly formatted Input event! + +Make sure the Input event's tags and clock are populated correctly to ensure the correct queries are +executed. diff --git a/docs/source/testing.rst b/docs/source/testing.rst index 396ab1c18..6e35d6b60 100644 --- a/docs/source/testing.rst +++ b/docs/source/testing.rst @@ -9,15 +9,25 @@ The ``manage.py`` CLI tool comes built-in with a ``test`` command which does exa ************* Configuration ************* -To test a new rule, first create a new JSON file anywhere within the 'tests/integration/rules/' directory with the ``.json`` extension. +To test a new rule, first create a new JSON file next to your rule file. The suggested convention is +to use the same name as the rule you are testing, but you can choose any name you would like. This +will help with organization, but you may also create test events to test your rules anywhere within +the same top-level directory where your rules are stored. -This file should contain the following structure: + +Basic Configuration +=================== + +Each test event file should contain the following structure: .. code-block:: json [ { - "data": "Either a string, or JSON object", + "data": { + "key_01": "value_01", + "key_02": "value_02" + }, "description": "This test should trigger or not trigger an alert", "log": "The log name declared in a json file under the conf/schemas directory", "service": "The service sending the log - kinesis, s3, sns, or streamalert_app", @@ -31,6 +41,10 @@ This file should contain the following structure: .. note:: Multiple tests can be included in one file by adding them to the array above. + +Specifying Test Data +==================== + When specifying the test data, it can be either of two fields: 1. ``"data"``: An entire example record, with all necessary fields to properly classify @@ -100,6 +114,105 @@ Both test events would have the same result, but with much less effort. Either ``override_record`` or ``data`` is required in the test event +Testing Classification +====================== + +Classification tests are always run on each test. Consider these two fields in the test configuration: + +.. code-block:: json + + [ + { + "log": "cloudwatch:events", + "classify_only": true + } + ] + + +The ``log`` field in each test specifies the expected classified type of the test record. The test will fail +if the classified log type differs. + +By default, the test runner will continue on to test rules. If you only wish to test classification, +specify ``classify_only`` as ``true``. + + +Testing Rules +============= + +Assuming a test is not ``classify_only``, rules are run after classification. Consider this field in the test file: + +.. code-block:: json + + [ + { + "trigger_rules": [ + "my_first_fake_rule", + "my_second_fake_rule" + ] + } + ] + +All rules are run on each set of test data. The ``trigger_rules`` field specifies an array of rule names that should +be triggered as a result. An empty array implies that the test data should not trigger any rules. + + +Publisher Tests +=============== + +Consider the following rule: + +.. code-block:: python + + @rule( + logs=['cloudwatch:events'], + outputs=['slack:sample-channel'], + publishers={'slack': my_publisher} + ) + def my_rule(record): + # .. something logic + return True + +To test the output of the Alert Publisher framework, you can specify ``publisher_tests``. Consider this field: + +.. code-block:: json + + [ + { + "trigger_rules": ["my_rule"], + "publisher_tests": { + "slack:sample-channel": [ + { + "jmespath_expression": "path.to.record", + "condition": "is", + "value": 4 + }, + [ "path.to.other.record", "is", 5 ] + ] + } + } + ] + +This field is a dictionary, where keys specify outputs to test. Each key's value is an array of publisher tests. +These tests compare the Alert Publisher's output to a configured expectation. + +Each publisher test can be a dict with 3 keys: + +- ``jmespath_expression``: A jmespath search expression. This is run on the Alert Publisher output for the given OutputDispatcher. +- ``condition``: Either "is" or "in", for equality or substring/subset matching, respectively. +- ``value``: The expected value of the field. + +The field that is extract via the ``jmespath_expression`` is tested against the expected value, using the conditional. + + +.. note:: + + An alternate shorthand syntax to the above is to specify a triple of strings: + + .. code-block:: json + + ["path.to.field", "is", "value"] + + Rule Test Reference =================== ========================= ====================== ======== =========== @@ -123,11 +236,75 @@ Key Type Required Description provided in the ``data_sources`` field defined within a cluster in ``conf/clusters/.json`` ``trigger_rules`` ``list`` No A list of zero or more rule names that this test record should trigger. An empty list implies this record should not trigger any alerts -``validate_schema_only`` ``boolean`` No Whether or not the test record should go through the rule processing engine. - If set to ``true``, this record will only have validation performed +``classify_only`` ``boolean`` No Whether or not the test record should go through the rule processing engine. + If set to ``true``, this record will only be tested for valid classification +``publisher_tests`` ``dict`` No This is a dict of tests to run against the Alert's published representation. + The keys of the dict are output descriptors. The values of the dict should be + arrays of individual tests. Publisher tests use jmespath to extract values from + the final publication dictionary for testing. At least one rule should be triggered, + or publisher tests will do nothing. +``test_fixtures`` ``dict`` No Values to be mocked out for use within rules for the ``threat_intel`` and + ``lookup_tables`` features. See below for examples of this. ========================= ====================== ======== =========== -For more examples, see the provided default rule tests in ``tests/integration/rules`` + +Test Fixtures Configuration +=========================== + +Fixtures for tests events should be configured as part of the event itself. These should be +added within the ``threat_intel`` or ``lookup_tables`` keys under a ``test_fixtures`` section +of the test event. Usage of these two sections is outlined below. + + +Threat Intel Fixtures +--------------------- + +The below format should be used to "mock" out threat intel data to test rules that leverage this feature. + +.. code-block:: json + + [ + { + "test_fixtures": { + "threat_intel": [ + { + "ioc_value": "1.2.3.4", + "ioc_type": "ip", + "sub_type": "mal_ip" + }, + { + "ioc_value": "0123456789abcdef0123456789abcdef", + "ioc_type": "md5", + "sub_type": "mal_md5" + } + ] + } + } + ] + +Lookup Tables Fixtures +---------------------- + +The below format should be used to "mock" out lookup table data to test rules that leverage this feature. + +.. code-block:: json + + [ + { + "test_fixtures": { + "lookup_tables": { + "dynamodb-table-name": { + "lookup_key": [ + "value_for_rule" + ] + } + } + } + } + ] + + +For more examples of how to configure tests for rules, see the provided default rules and tests in the ``rules/`` directory ************* @@ -210,18 +387,18 @@ Here is a sample command showing how to run tests against two test event files i .. code-block:: bash - python manage.py test rules --test-files cloudtrail_put_bucket_acl.json cloudtrail_root_account_usage.json + python manage.py test rules --test-files rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json rules/community/cloudwatch_events/cloudtrail_root_account_usage.json This will produce output similar to the following:: - Running tests for files found in: tests/integration/rules/ + Running tests for files found in: rules - File: cloudtrail/cloudtrail_put_bucket_acl.json + File: rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json Test #01: Pass Test #02: Pass - File: cloudtrail/cloudtrail_root_account_usage.json + File: rules/community/cloudwatch_events/cloudtrail_root_account_usage.json Test #01: Pass Test #02: Pass @@ -235,47 +412,68 @@ This will produce output similar to the following:: To see more verbose output for any of the test commands, add the ``--verbose`` flag. The previous command, with the addition of the ``--verbose`` flag, produces the following output:: - Running tests for files found in: tests/integration/rules/ + Running tests for files found in: rules + + File: rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json + + Test #01: + + Description: Modifying an S3 bucket to have a bucket ACL of AllUsers or AuthenticatedUsers should create an alert. + + Classification: Pass + Classified Type: cloudwatch:events + Expected Type: cloudwatch:events + + Rules: Pass + Triggered Rules: cloudtrail_put_bucket_acl + Expected Rules: cloudtrail_put_bucket_acl + + Test #02: + + Description: Modifying an S3 bucket ACL without use of AllUsers or AuthenticatedUsers should not create an alert. - File: cloudtrail/cloudtrail_put_bucket_acl.json + Classification: Pass + Classified Type: cloudwatch:events + Expected Type: cloudwatch:events - Test #01: Pass - Description: Modifying an S3 bucket to have a bucket ACL of AllUsers or AuthenticatedUsers should create an alert. - Classified Type: cloudwatch:events - Expected Type: cloudwatch:events - Triggered Rules: cloudtrail_put_bucket_acl - Expected Rules: cloudtrail_put_bucket_acl + Rules: Pass + Triggered Rules: + Expected Rules: - Test #02: Pass - Description: Modifying an S3 bucket ACL without use of AllUsers or AuthenticatedUsers should not create an alert. - Classified Type: cloudwatch:events - Expected Type: cloudwatch:events - Triggered Rules: - Expected Rules: + File: rules/community/cloudwatch_events/cloudtrail_root_account_usage.json - File: cloudtrail/cloudtrail_root_account_usage.json + Test #01: - Test #01: Pass - Description: Use of the AWS 'Root' account will create an alert. - Classified Type: cloudwatch:events - Expected Type: cloudwatch:events - Triggered Rules: cloudtrail_root_account_usage - Expected Rules: cloudtrail_root_account_usage + Description: Use of the AWS 'Root' account will create an alert. - Test #02: Pass - Description: AWS 'Root' account activity initiated automatically by an AWS service on your behalf will not create an alert. - Classified Type: cloudwatch:events - Expected Type: cloudwatch:events - Triggered Rules: - Expected Rules: + Classification: Pass + Classified Type: cloudwatch:events + Expected Type: cloudwatch:events + Rules: Pass + Triggered Rules: cloudtrail_root_account_usage + Expected Rules: cloudtrail_root_account_usage - Summary: + Test #02: + + Description: AWS 'Root' account activity initiated automatically by an AWS service on your behalf will not create an alert. + + Classification: Pass + Classified Type: cloudwatch:events + Expected Type: cloudwatch:events + + Rules: Pass + Triggered Rules: + Expected Rules: + + + Summary: + + Total Tests: 4 + Pass: 4 + Fail: 0 - Total Tests: 4 - Pass: 4 - Fail: 0 Additionally, any given test that results in a status of **Fail** will, by default, print verbosely. In the below example, the ``cloudtrail_put_bucket_acl.json`` file has been altered to include a triggering @@ -283,24 +481,29 @@ rule that does not actually exist. .. code-block:: bash - python manage.py test rules --test-files cloudtrail_put_bucket_acl.json cloudtrail_root_account_usage.json + python manage.py test rules --test-files rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json rules/community/cloudwatch_events/cloudtrail_root_account_usage.json :: - Running tests for files found in: tests/integration/rules/ + Running tests for files found in: rules - File: cloudtrail/cloudtrail_put_bucket_acl.json + File: rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json + + Test #01: - Test #01: Fail Description: Modifying an S3 bucket to have a bucket ACL of AllUsers or AuthenticatedUsers should create an alert. - Classified Type: cloudwatch:events - Expected Type: cloudwatch:events - Triggered Rules: cloudtrail_put_bucket_acl - Expected Rules: cloudtrail_put_bucket_acl, nonexistent_rule (does not exist) + + Classification: Pass + Classified Type: cloudwatch:events + Expected Type: cloudwatch:events + + Rules: Fail + Triggered Rules: cloudtrail_put_bucket_acl + Expected Rules: cloudtrail_put_bucket_acl, nonexistent_rule (does not exist) Test #02: Pass - File: cloudtrail/cloudtrail_root_account_usage.json + File: rules/community/cloudwatch_events/cloudtrail_root_account_usage.json Test #01: Pass Test #02: Pass diff --git a/manage.py b/manage.py index 1b6b6159b..912a9e911 100755 --- a/manage.py +++ b/manage.py @@ -73,7 +73,7 @@ def build_parser(): # Dynamically generate subparsers, and create a 'commands' block for the prog description command_block = [] - subparsers = parser.add_subparsers(dest="command", required=True) + subparsers = parser.add_subparsers(dest='command', required=True) command_col_size = max([len(command) for command in commands]) + 10 for command in sorted(commands): setup_subparser_func, description = commands[command] @@ -109,8 +109,8 @@ def main(): options = parser.parse_args() # Exit with the result, which will be False if an error occurs, or True otherwise - sys.exit(not cli_runner(options)) + return not cli_runner(options) if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/requirements-top-level.txt b/requirements-top-level.txt index 8082d8970..7755e21d6 100644 --- a/requirements-top-level.txt +++ b/requirements-top-level.txt @@ -21,6 +21,7 @@ pathlib2 policyuniverse pyfakefs pylint==2.3.1 +pymsteams requests Sphinx sphinx-rtd-theme diff --git a/requirements.txt b/requirements.txt index 90824c2ec..7683ce1b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -83,6 +83,7 @@ pycparser==2.19 pyflakes==2.1.1 Pygments==2.4.2 PyJWT==1.7.1 +pymsteams==0.1.12 pyparsing==2.4.2 pyrsistent==0.15.5 pytest==5.0.0 diff --git a/tests/integration/rules/aliyun/aliyun_basic.json b/rules/classifier/aliyun/aliyun_actiontrail.json similarity index 98% rename from tests/integration/rules/aliyun/aliyun_basic.json rename to rules/classifier/aliyun/aliyun_actiontrail.json index 84f741008..0a8eb9eef 100644 --- a/tests/integration/rules/aliyun/aliyun_basic.json +++ b/rules/classifier/aliyun/aliyun_actiontrail.json @@ -45,7 +45,7 @@ "log": "aliyun:actiontrail", "service": "streamalert_app", "source": "prefix_cluster_aliyun_actiontrail_sm-app-name_app", - "trigger_rules": [] + "classify_only": true }, { "data": { @@ -81,7 +81,7 @@ "log": "aliyun:actiontrail", "service": "streamalert_app", "source": "prefix_cluster_aliyun_actiontrail_sm-app-name_app", - "trigger_rules": [] + "classify_only": true }, { "data": { @@ -118,7 +118,7 @@ "log": "aliyun:actiontrail", "service": "streamalert_app", "source": "prefix_cluster_aliyun_actiontrail_sm-app-name_app", - "trigger_rules": [] + "classify_only": true }, { "data": { @@ -155,6 +155,6 @@ "log": "aliyun:actiontrail", "service": "streamalert_app", "source": "prefix_cluster_aliyun_actiontrail_sm-app-name_app", - "trigger_rules": [] + "classify_only": true } ] \ No newline at end of file diff --git a/tests/integration/rules/box/box_admin_events.json b/rules/classifier/box/box_admin_events.json similarity index 93% rename from tests/integration/rules/box/box_admin_events.json rename to rules/classifier/box/box_admin_events.json index d7d546919..3b8dfa47d 100644 --- a/tests/integration/rules/box/box_admin_events.json +++ b/rules/classifier/box/box_admin_events.json @@ -25,7 +25,6 @@ "log": "box:admin_events", "service": "streamalert_app", "source": "prefix_cluster_box_admin_events_sm-app-name_app", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true } ] \ No newline at end of file diff --git a/rules/classifier/cloudtrail/cloudtrail_events.json b/rules/classifier/cloudtrail/cloudtrail_events.json new file mode 100644 index 000000000..d91d0a489 --- /dev/null +++ b/rules/classifier/cloudtrail/cloudtrail_events.json @@ -0,0 +1,41 @@ +[ + { + "data": { + "Records": [ + { + "eventVersion": "1.05", + "userIdentity": { + "arn": "arn", + "accountId": "accountId", + "userName": "userName", + "type": "type" + }, + "eventTime": "eventTime", + "eventSource": "quicksight.amazonaws.com", + "eventName": "QueryDatabase", + "awsRegion": "awsRegion", + "requestParameters": null, + "responseElements": null, + "eventID": "eventID", + "readOnly": true, + "eventType": "AwsServiceEvent", + "recipientAccountId": "recipientAccountId", + "serviceEventDetails": { + "eventRequestDetails": { + "dataSourceId": "dataSourceId", + "queryId": "queryId", + "resourceId": "resourceId", + "dataSetId": "dataSetId", + "dataSetMode": "dataSetMode" + } + } + } + ] + }, + "description": "quicksight event via cloudtrail", + "log": "cloudtrail:events", + "service": "s3", + "source": "prefix.cluster.sample.bucket", + "classify_only": true + } +] \ No newline at end of file diff --git a/tests/integration/rules/cloudwatch/cloudtrail_via_cloudwatch.json b/rules/classifier/cloudwatch/cloudwatch_cloudtrail.json similarity index 95% rename from tests/integration/rules/cloudwatch/cloudtrail_via_cloudwatch.json rename to rules/classifier/cloudwatch/cloudwatch_cloudtrail.json index 3eb8de534..11fddbd10 100644 --- a/tests/integration/rules/cloudwatch/cloudtrail_via_cloudwatch.json +++ b/rules/classifier/cloudwatch/cloudwatch_cloudtrail.json @@ -20,7 +20,6 @@ "log": "cloudwatch:cloudtrail", "service": "kinesis", "source": "prefix_cluster1_streamalert", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true } ] \ No newline at end of file diff --git a/tests/integration/rules/cloudwatch/cloudwatch_control_message.json b/rules/classifier/cloudwatch/cloudwatch_control_message.json similarity index 89% rename from tests/integration/rules/cloudwatch/cloudwatch_control_message.json rename to rules/classifier/cloudwatch/cloudwatch_control_message.json index 65c54e9b4..7a4d7e735 100644 --- a/tests/integration/rules/cloudwatch/cloudwatch_control_message.json +++ b/rules/classifier/cloudwatch/cloudwatch_control_message.json @@ -15,7 +15,6 @@ "log": "cloudwatch:control_message", "service": "kinesis", "source": "prefix_cluster1_streamalert", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true } ] \ No newline at end of file diff --git a/tests/integration/rules/cloudwatch/rds_aurora_via_cloudwatch.json b/rules/classifier/cloudwatch/cloudwatch_rds_aurora.json similarity index 93% rename from tests/integration/rules/cloudwatch/rds_aurora_via_cloudwatch.json rename to rules/classifier/cloudwatch/cloudwatch_rds_aurora.json index 4c10df566..36eccbe45 100644 --- a/tests/integration/rules/cloudwatch/rds_aurora_via_cloudwatch.json +++ b/rules/classifier/cloudwatch/cloudwatch_rds_aurora.json @@ -20,8 +20,7 @@ "log": "cloudwatch:rds_aurora", "service": "kinesis", "source": "prefix_cluster1_streamalert", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true }, { "data": { @@ -44,8 +43,7 @@ "log": "cloudwatch:rds_aurora", "service": "kinesis", "source": "prefix_cluster1_streamalert", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true }, { "data": { @@ -68,7 +66,6 @@ "log": "cloudwatch:rds_aurora", "service": "kinesis", "source": "prefix_cluster1_streamalert", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true } ] \ No newline at end of file diff --git a/tests/integration/rules/gsuite/gsuite_admin.json b/rules/classifier/gsuite/gsuite_reports.json similarity index 95% rename from tests/integration/rules/gsuite/gsuite_admin.json rename to rules/classifier/gsuite/gsuite_reports.json index 3e72799e9..56037dfb5 100644 --- a/tests/integration/rules/gsuite/gsuite_admin.json +++ b/rules/classifier/gsuite/gsuite_reports.json @@ -35,7 +35,6 @@ "log": "gsuite:reports", "service": "streamalert_app", "source": "prefix_cluster_gsuite_admin_sm-app-name_app", - "trigger_rules": [], - "validate_schema_only": true + "classify_only": true } ] \ No newline at end of file diff --git a/rules/classifier/osquery/osquery_snapshot.json b/rules/classifier/osquery/osquery_snapshot.json new file mode 100644 index 000000000..7b6116ff1 --- /dev/null +++ b/rules/classifier/osquery/osquery_snapshot.json @@ -0,0 +1,25 @@ +[ + { + "data": { + "numerics": false, + "name": "pack/windows-hardening/Disallowed", + "calendarTime": "Thu Feb 27 14:34:21 2020 UTC", + "counter": 0, + "epoch": 0, + "snapshot": [], + "decorations": { + "hostname": "foo-hostname", + "hardware_serial": "8Q394Y2", + "uuid": "4C4C4544-0051-XXXX-YYYY-ZZZZZZZZZZZZ" + }, + "unixTime": 1582814061, + "action": "snapshot", + "hostIdentifier": "4C4C4544-0051-XXXX-YYYY-ZZZZZZZZZZZZ" + }, + "log": "osquery:snapshot", + "description": "OSQuery event.", + "classify_only": true, + "source": "prefix_cluster1_streamalert", + "service": "kinesis" + } +] diff --git a/rules/classifier/packbeat/packetbeat_dns.json b/rules/classifier/packbeat/packetbeat_dns.json new file mode 100644 index 000000000..a2afc0e4a --- /dev/null +++ b/rules/classifier/packbeat/packetbeat_dns.json @@ -0,0 +1,23 @@ +[ + { + "data": { + "@timestamp": "2018-02-06T07:22:59.991Z", + "bytes_in": 39, + "bytes_out": 71, + "client_ip": "172.16.1.10", + "dns": { + "answers_count": 2, + "opt": { "udp_size": 512 }, + "question": { "name": "evil.com.", "type": "A" }, + "response_code": "NOERROR" + }, + "transport": "udp", + "type": "dns" + }, + "description": "basic schema validation check for packetbeat:dns", + "log": "packetbeat:dns", + "source": "prefix.cluster.sample.bucket", + "service": "s3", + "classify_only": true + } +] diff --git a/rules/classifier/packbeat/packetbeat_flow.json b/rules/classifier/packbeat/packetbeat_flow.json new file mode 100644 index 000000000..8efb049dd --- /dev/null +++ b/rules/classifier/packbeat/packetbeat_flow.json @@ -0,0 +1,25 @@ +[ + { + "data": { + "@timestamp": "2018-02-06T07:23:00.010Z", + "dest": { + "ip": "172.16.2.3", + "port": 50717 + }, + "final": false, + "last_time": "2018-02-06T07:22:34.933Z", + "source": { + "ip": "35.195.65.23", + "mac": "00:08:a2:09:e4:6a", + "port": 443 + }, + "start_time": "2018-02-06T07:22:34.933Z", + "type": "flow" + }, + "description": "basic schema validation check for packetbeat:flow", + "log": "packetbeat:flow", + "source": "prefix.cluster.sample.bucket", + "service": "s3", + "classify_only": true + } +] diff --git a/rules/classifier/slack/slack_access.json b/rules/classifier/slack/slack_access.json new file mode 100644 index 000000000..245fb4dd5 --- /dev/null +++ b/rules/classifier/slack/slack_access.json @@ -0,0 +1,21 @@ +[ + { + "data": { + "user_id": "U12345", + "username": "bob", + "date_first": 1422922864, + "date_last": 1422922864, + "count": 1, + "ip": "127.0.0.1", + "user_agent": "SlackWeb Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.35 Safari/537.36", + "isp": "BigCo ISP", + "country": "US", + "region": "CA" + }, + "description": "basic schema validation check for slack:access", + "log": "slack:access", + "service": "streamalert_app", + "source": "prefix_cluster_slack_access_sm-app-name_app", + "classify_only": true + } +] \ No newline at end of file diff --git a/tests/integration/rules/slack/slack_basic.json b/rules/classifier/slack/slack_integration.json similarity index 59% rename from tests/integration/rules/slack/slack_basic.json rename to rules/classifier/slack/slack_integration.json index 13ebb5d2b..9d84d15f0 100644 --- a/tests/integration/rules/slack/slack_basic.json +++ b/rules/classifier/slack/slack_integration.json @@ -1,23 +1,4 @@ [ - { - "data": { - "user_id": "U12345", - "username": "bob", - "date_first": 1422922864, - "date_last": 1422922864, - "count": 1, - "ip": "127.0.0.1", - "user_agent": "SlackWeb Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.35 Safari/537.36", - "isp": "BigCo ISP", - "country": "US", - "region": "CA" - }, - "description": "basic schema validation check for slack:access", - "log": "slack:access", - "service": "streamalert_app", - "source": "prefix_cluster_slack_access_sm-app-name_app", - "trigger_rules": [] - }, { "data": { "service_id": "1234567890", @@ -33,7 +14,7 @@ "log": "slack:integration", "service": "streamalert_app", "source": "prefix_cluster_slack_integration_sm-app-name_app", - "trigger_rules": [] + "classify_only": true }, { "data": { @@ -49,6 +30,6 @@ "log": "slack:integration", "service": "streamalert_app", "source": "prefix_cluster_slack_integration_sm-app-name_app", - "trigger_rules": [] + "classify_only": true } ] \ No newline at end of file diff --git a/tests/integration/rules/binaryalert/binaryalert_yara_match.json b/rules/community/binaryalert/binaryalert_yara_match.json similarity index 100% rename from tests/integration/rules/binaryalert/binaryalert_yara_match.json rename to rules/community/binaryalert/binaryalert_yara_match.json diff --git a/rules/community/cloudtrail/cloudtrail_aws_config.json b/rules/community/cloudtrail/cloudtrail_aws_config.json new file mode 100644 index 000000000..a3419ed0c --- /dev/null +++ b/rules/community/cloudtrail/cloudtrail_aws_config.json @@ -0,0 +1,199 @@ +[ + { + "data": { + "Records": [ + { + "additionalEventData": { + "configRuleArn": "...", + "configRuleInputParameters": "{}", + "configRuleName": "s3-bucket-logging-enabled", + "notificationJobType": "SCHEDULED_NOTIFICATION" + }, + "awsRegion": "...", + "eventID": "...", + "eventName": "PutEvaluations", + "eventSource": "config.amazonaws.com", + "eventTime": "...", + "eventType": "AwsApiCall", + "eventVersion": "1.05", + "recipientAccountId": "...", + "requestID": "...", + "requestParameters": { + "evaluations": [ + { + "complianceResourceId": "BUCKET_ONE", + "complianceResourceType": "AWS::S3::Bucket", + "complianceType": "NON_COMPLIANT", + "orderingTimestamp": "..." + }, + { + "complianceResourceId": "BUCKET_TWO", + "complianceResourceType": "AWS::S3::Bucket", + "complianceType": "COMPLIANT", + "orderingTimestamp": "..." + } + ], + "resultToken": "...", + "testMode": false + }, + "responseElements": null, + "sourceIPAddress": "...", + "userAgent": "...", + "userIdentity": { + "accessKeyId": "...", + "accountId": "...", + "arn": "...", + "principalId": "...", + "sessionContext": { + "attributes": { + "creationDate": "..." + }, + "sessionIssuer": { + "accountId": "...", + "arn": "...", + "principalId": "...", + "type": "Role", + "userName": "..." + }, + "webIdFederationData": {} + }, + "type": "AssumedRole" + } + } + ] + }, + "description": "Triggers an alert caused by a config compliance change of NON_COMPLIANT", + "log": "cloudtrail:events", + "service": "s3", + "source": "prefix.cluster.sample.bucket", + "trigger_rules": [ + "config_compliance" + ] + }, + { + "data": { + "Records": [ + { + "additionalEventData": { + "configRuleArn": "...", + "configRuleInputParameters": "{}", + "configRuleName": "s3-bucket-logging-enabled", + "notificationJobType": "SCHEDULED_NOTIFICATION" + }, + "awsRegion": "...", + "eventID": "...", + "eventName": "PutEvaluations", + "eventSource": "config.amazonaws.com", + "eventTime": "...", + "eventType": "AwsApiCall", + "eventVersion": "1.05", + "recipientAccountId": "...", + "requestID": "...", + "requestParameters": { + "evaluations": [ + { + "complianceResourceId": "BUCKET_ONE", + "complianceResourceType": "AWS::S3::Bucket", + "complianceType": "COMPLIANT", + "orderingTimestamp": "..." + }, + { + "complianceResourceId": "BUCKET_TWO", + "complianceResourceType": "AWS::S3::Bucket", + "complianceType": "COMPLIANT", + "orderingTimestamp": "..." + } + ], + "resultToken": "...", + "testMode": false + }, + "responseElements": null, + "sourceIPAddress": "...", + "userAgent": "...", + "userIdentity": { + "accessKeyId": "...", + "accountId": "...", + "arn": "...", + "principalId": "...", + "sessionContext": { + "attributes": { + "creationDate": "..." + }, + "sessionIssuer": { + "accountId": "...", + "arn": "...", + "principalId": "...", + "type": "Role", + "userName": "..." + }, + "webIdFederationData": {} + }, + "type": "AssumedRole" + } + } + ] + }, + "description": "Will not trigger an alert caused by a config compliance change of COMPLIANT", + "log": "cloudtrail:events", + "service": "s3", + "source": "prefix.cluster.sample.bucket", + "trigger_rules": [] + }, + { + "data": { + "Records": [ + { + "awsRegion": "eu-west-1", + "eventID": "...", + "eventName": "StartAutomationExecution", + "eventSource": "ssm.amazonaws.com", + "eventTime": "...", + "eventType": "...", + "eventVersion": "1.05", + "recipientAccountId": "...", + "requestID": "...", + "requestParameters": { + "domain": "vpc" + }, + "responseElements": { + "allocationId": "..", + "domain": "vpc", + "networkBorderGroup": "...", + "publicIp": "...", + "publicIpv4Pool": "amazon", + "requestId": "..." + }, + "sourceIPAddress": "config.amazonaws.com", + "userAgent": "...", + "userIdentity": { + "accessKeyId": "...", + "accountId": "...", + "arn": "...", + "principalId": "...", + "sessionContext": { + "attributes": { + "creationDate": "..." + }, + "sessionIssuer": { + "accountId": "...", + "arn": "...", + "principalId": "...", + "type": "Role", + "userName": "..." + }, + "webIdFederationData": {} + }, + "type": "AssumedRole" + } + } + ] + }, + "description": "Triggers an alert when auto-remediation of Config NON_COMPLIANT takes place", + "log": "cloudtrail:events", + "service": "s3", + "source": "prefix.cluster.sample.bucket", + "trigger_rules": [ + "config_auto_remediation" + ] + } +] \ No newline at end of file diff --git a/rules/community/cloudtrail/cloudtrail_aws_config.py b/rules/community/cloudtrail/cloudtrail_aws_config.py new file mode 100644 index 000000000..3103cf687 --- /dev/null +++ b/rules/community/cloudtrail/cloudtrail_aws_config.py @@ -0,0 +1,47 @@ +"""Alert on AWS Config""" +from rules.matchers.matchers import AwsConfigMatcher +from streamalert.shared.rule import rule + + +# Populate this list to alert on specific Config Rules, otherwise all rules will be in-scope +# Also consider the use of Lookup-Tables +RULES_TO_ALERT_ON = [] + + +@rule(logs=["cloudtrail:events"], matchers=[AwsConfigMatcher.is_config_compliance]) +def config_compliance(record): + """ + author: jack (jack1902) + description: Alert on AWS Config Complaince Change events of NON_COMPLIANT + testing: From the Config page (https://console.aws.amazon.com/config/home) + ensure recording is turned on. And you have a basic rule you can + trigger as compliant or non-compliant. + """ + + non_compliance_present = any( + evaluation["complianceType"] == "NON_COMPLIANT" + for evaluation in record["requestParameters"]["evaluations"] + ) + + if RULES_TO_ALERT_ON: + # Alert on specific rule names. Useful when some Config Rules are just TOO noisy. + rule_name = record["additionalEventData"]["configRuleName"] + result = rule_name in RULES_TO_ALERT_ON and non_compliance_present + else: + # Alert on ALL config rules regardless of their name + result = non_compliance_present + + return result + + +@rule(logs=["cloudtrail:events"], matchers=[AwsConfigMatcher.is_auto_remediation]) +def config_auto_remediation(_): + """ + author: jack (jack1902) + description: Alert on AWS Config Auto Remediation + testing: From the Config page (https://console.aws.amazon.com/config/home) + ensure recording is turned on. And you have a basic rule you can + trigger as compliant or non-compliant. Then trigger the remediation + either manually or have it done automatically. + """ + return True diff --git a/rules/community/cloudtrail/__init__.py b/rules/community/cloudwatch_events/__init__.py similarity index 100% rename from rules/community/cloudtrail/__init__.py rename to rules/community/cloudwatch_events/__init__.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_critical_api_calls.json b/rules/community/cloudwatch_events/cloudtrail_critical_api_calls.json similarity index 93% rename from tests/integration/rules/cloudtrail/cloudtrail_critical_api_calls.json rename to rules/community/cloudwatch_events/cloudtrail_critical_api_calls.json index 81a6d22f1..335afc12b 100644 --- a/tests/integration/rules/cloudtrail/cloudtrail_critical_api_calls.json +++ b/rules/community/cloudwatch_events/cloudtrail_critical_api_calls.json @@ -797,55 +797,5 @@ "trigger_rules": [ "cloudtrail_critical_api_calls" ] - }, - { - "data": { - "Records": [ - { - "eventVersion": "1.05", - "userIdentity": { - "accessKeyId": "...", - "accountId": "12345", - "arn": "...", - "invokedBy": "...", - "principalId": "12345", - "sessionContext": { - "attributes": { - "creationDate": "...", - "mfaAuthenticated": "true" - } - }, - "type": "..." - }, - "eventTime": "2019-07-09T16:59:01Z", - "eventSource": "ec2.amazonaws.com", - "eventName": "DisableEbsEncryptionByDefault", - "awsRegion": "us-east-1", - "sourceIPAddress": "...", - "userAgent": "...", - "requestParameters": { - "DisableEbsEncryptionByDefaultRequest": {} - }, - "responseElements": { - "DisableEbsEncryptionByDefaultResponse": { - "xmlns": "http://ec2.amazonaws.com/doc/2016-11-15/", - "ebsEncryptionByDefault": false, - "requestId": "19a19cd8-5f1b-4d5e-8af4-3e826fa03d0f" - } - }, - "requestID": "...", - "eventID": "...", - "eventType": "AwsApiCall", - "recipientAccountId": "123456789123" - } - ] - }, - "description": "Disabling default EBS encryption", - "log": "cloudtrail:events", - "service": "s3", - "source": "prefix.cluster.sample.bucket", - "trigger_rules": [ - "cloudtrail_critical_api_calls" - ] } ] \ No newline at end of file diff --git a/rules/community/cloudtrail/cloudtrail_critical_api_calls.py b/rules/community/cloudwatch_events/cloudtrail_critical_api_calls.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_critical_api_calls.py rename to rules/community/cloudwatch_events/cloudtrail_critical_api_calls.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_ec2_image_creation.json b/rules/community/cloudwatch_events/cloudtrail_ec2_image_creation.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_ec2_image_creation.json rename to rules/community/cloudwatch_events/cloudtrail_ec2_image_creation.json diff --git a/rules/community/cloudtrail/cloudtrail_ec2_image_creation.py b/rules/community/cloudwatch_events/cloudtrail_ec2_image_creation.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_ec2_image_creation.py rename to rules/community/cloudwatch_events/cloudtrail_ec2_image_creation.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_mfa_policy_abuse_attempt.json b/rules/community/cloudwatch_events/cloudtrail_mfa_policy_abuse_attempt.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_mfa_policy_abuse_attempt.json rename to rules/community/cloudwatch_events/cloudtrail_mfa_policy_abuse_attempt.json diff --git a/rules/community/cloudtrail/cloudtrail_mfa_policy_abuse_attempt.py b/rules/community/cloudwatch_events/cloudtrail_mfa_policy_abuse_attempt.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_mfa_policy_abuse_attempt.py rename to rules/community/cloudwatch_events/cloudtrail_mfa_policy_abuse_attempt.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_network_acl_ingress_anywhere.json b/rules/community/cloudwatch_events/cloudtrail_network_acl_ingress_anywhere.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_network_acl_ingress_anywhere.json rename to rules/community/cloudwatch_events/cloudtrail_network_acl_ingress_anywhere.json diff --git a/rules/community/cloudtrail/cloudtrail_network_acl_ingress_anywhere.py b/rules/community/cloudwatch_events/cloudtrail_network_acl_ingress_anywhere.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_network_acl_ingress_anywhere.py rename to rules/community/cloudwatch_events/cloudtrail_network_acl_ingress_anywhere.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_public_resources.json b/rules/community/cloudwatch_events/cloudtrail_public_resources.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_public_resources.json rename to rules/community/cloudwatch_events/cloudtrail_public_resources.json diff --git a/rules/community/cloudtrail/cloudtrail_public_resources.py b/rules/community/cloudwatch_events/cloudtrail_public_resources.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_public_resources.py rename to rules/community/cloudwatch_events/cloudtrail_public_resources.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_put_bucket_acl.json b/rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_put_bucket_acl.json rename to rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.json diff --git a/rules/community/cloudtrail/cloudtrail_put_bucket_acl.py b/rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_put_bucket_acl.py rename to rules/community/cloudwatch_events/cloudtrail_put_bucket_acl.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_put_object_acl_public.json b/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_put_object_acl_public.json rename to rules/community/cloudwatch_events/cloudtrail_put_object_acl_public.json diff --git a/rules/community/cloudtrail/cloudtrail_put_object_acl_public.py b/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_put_object_acl_public.py rename to rules/community/cloudwatch_events/cloudtrail_put_object_acl_public.py diff --git a/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public_publisher_example.json b/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public_publisher_example.json new file mode 100644 index 000000000..b8b6729fa --- /dev/null +++ b/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public_publisher_example.json @@ -0,0 +1,188 @@ +[ + { + "data": { + "account": 12345, + "detail": { + "additionalEventData": { + "x_amz_id_2": "..." + }, + "awsRegion": "...", + "eventID": "...", + "eventName": "PutObjectAcl", + "eventSource": "s3.amazonaws.com", + "eventTime": "2017-01-01T00:20:50Z", + "eventType": "...", + "eventVersion": "...", + "readOnly": false, + "recipientAccountId": "12345", + "requestID": "19a19cd8-5f1b-4d5e-8af4-3e826fa03d0f", + "requestParameters": { + "AccessControlPolicy": { + "AccessControlList": { + "Grant": [ + { + "Grantee": { + "URI": "http://acs.amazonaws.com/groups/global/AllUsers", + "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + "xsi:type": "Group" + }, + "permission": "READ" + } + ] + }, + "Owner": { + "ID": "..." + }, + "xmlns": "http://s3.amazonaws.com/doc/2006-03-01/" + }, + "acl": "", + "bucketName": "example-bucket-to-ignore", + "key": "..." + }, + "resources": [ + { + "ARN": "...", + "type": "..." + }, + { + "ARN": "...", + "accountId": "12345", + "type": "..." + } + ], + "responseElements": {}, + "sourceIPAddress": "1.2.3.4", + "userAgent": "...", + "userIdentity": { + "accessKeyId": "...", + "accountId": "12345", + "arn": "...", + "principalId": "...:...", + "sessionContext": { + "attributes": { + "creationDate": "...", + "mfaAuthenticated": "..." + }, + "sessionIssuer": { + "accountId": "12345", + "arn": "...", + "principalId": "...", + "type": "...", + "userName": "test_user" + } + }, + "type": "..." + } + }, + "detail-type": "...", + "id": "123", + "region": "...", + "resources": [], + "source": "...", + "time": "...", + "version": "0" + }, + "description": "A PutObjectAcl alert to demonstrate the testing of publishers", + "log": "cloudwatch:events", + "service": "kinesis", + "source": "prefix_cluster1_streamalert", + "trigger_rules": [ + "cloudtrail_put_object_acl_public_publisher_example" + ], + "publisher_tests": { + "slack:sample-channel": [ + { + "jmespath_expression": "keys(@)", + "condition": "is", + "value": [ + "@slack.text", + "@slack.attachments", + "@slack._previous_publication" + ] + }, + [ + "\"@slack.text\"", + "is", + "Rule triggered" + ], + [ + "\"@slack.attachments\"[0].title", + "is", + "cloudtrail_put_object_acl_public_publisher_example" + ], + [ + "\"@slack.attachments\"[0].title_link", + "is", + "https://github.com/airbnb/streamalert/search?q=cloudtrail_put_object_acl_public_publisher_example+path%3A%2Frules" + ], + [ + "\"@slack.attachments\"[1].fields[0].title", + "is", + "Note" + ], + [ + "\"@slack.attachments\"[1].fields[0].value", + "is", + "This is purely for example purposes in testing, and is not meant to be used as-is" + ], + [ + "\"@slack.attachments\"[2].title", + "is", + "Record" + ], + [ + "\"@slack.attachments\"[2].text", + "contains", + "\"requestID\": \"19a19cd8-5f1b-4d5e-8af4-3e826fa03d0f\"" + ], + [ + "\"@slack.attachments\"[2].author", + "is", + "prefix_cluster1_streamalert" + ], + [ + "\"@slack.attachments\"[2].fields[0].title", + "is", + "Alert Id" + ] + ], + "pagerduty:sample-integration": [ + [ + "\"@pagerduty.description\"", + "is", + "cloudtrail_put_object_acl_public_publisher_example" + ], + [ + "\"@pagerduty.details\".eventName", + "contains", + "PutObjectAcl" + ], + [ + "\"@pagerduty.details\".eventSource", + "contains", + "s3.amazonaws.com" + ], + [ + "\"@pagerduty.details\".userName", + "is", + "test_user" + ], + [ + "\"@pagerduty.details\".sourceIPAddress", + "is", + "1.2.3.4" + ], + [ + "\"@pagerduty.details\".bucketName", + "is", + "example-bucket-to-ignore" + ], + [ + "\"@pagerduty.details\".eventTime", + "is", + "2017-01-01T00:20:50Z" + ] + ] + } + } +] \ No newline at end of file diff --git a/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public_publisher_example.py b/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public_publisher_example.py new file mode 100644 index 000000000..6a4e8e375 --- /dev/null +++ b/rules/community/cloudwatch_events/cloudtrail_put_object_acl_public_publisher_example.py @@ -0,0 +1,62 @@ +"""Identifies new S3 object ACLs that grant access to the public.""" +from publishers.community.generic import add_record, populate_fields +from publishers.community.pagerduty.pagerduty_layout import ( + ShortenTitle, as_custom_details, + PrettyPrintArrays, +) +from publishers.community.slack.slack_layout import Summary, AttachRuleInfo, AttachFullRecord +from rules.helpers.base import data_has_value_from_substring_list +from streamalert.shared.rule import rule + + +_PUBLIC_ACLS = { + 'http://acs.amazonaws.com/groups/global/AuthenticatedUsers', + 'http://acs.amazonaws.com/groups/global/AllUsers' +} + + +@rule( + logs=['cloudwatch:events'], + req_subkeys={ + 'detail': ['eventName', 'requestParameters', 'sourceIPAddress'] + }, + outputs=['slack:sample-channel', 'pagerduty:sample-integration'], + publishers={ + 'slack': [Summary, AttachRuleInfo, AttachFullRecord], + 'pagerduty': [ + add_record, + populate_fields, + PrettyPrintArrays, + ShortenTitle, + as_custom_details + ], + }, + context={ + 'populate_fields': [ + 'userName', + 'sourceIPAddress', + 'eventTime', + 'eventName', + 'eventSource', + 'bucketName', + ] + } +) +def cloudtrail_put_object_acl_public_publisher_example(rec, _): + """ + description: Identifies a change to an S3 object ACL that grants access + to AllUsers (anyone on the internet) or AuthenticatedUsers + (any user with an AWS account). + + note: This is purely for example purposes in testing, and is not meant to be used as-is + """ + if rec['detail']['sourceIPAddress'] != '1.2.3.4': + return False # Hack to avoid triggering for other tests events + + request_params = rec['detail']['requestParameters'] + return ( + rec['detail']['eventName'] == 'PutObjectAcl' and + # note: substring is used because it can exist as: + # "http://acs.amazonaws.com/groups/global/AllUsers" or + # "uri=http://acs.amazonaws.com/groups/global/AllUsers" + data_has_value_from_substring_list(request_params, _PUBLIC_ACLS)) diff --git a/tests/integration/rules/cloudtrail/cloudtrail_root_account_usage.json b/rules/community/cloudwatch_events/cloudtrail_root_account_usage.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_root_account_usage.json rename to rules/community/cloudwatch_events/cloudtrail_root_account_usage.json diff --git a/rules/community/cloudtrail/cloudtrail_root_account_usage.py b/rules/community/cloudwatch_events/cloudtrail_root_account_usage.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_root_account_usage.py rename to rules/community/cloudwatch_events/cloudtrail_root_account_usage.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_security_group_ingress_anywhere.json b/rules/community/cloudwatch_events/cloudtrail_security_group_ingress_anywhere.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_security_group_ingress_anywhere.json rename to rules/community/cloudwatch_events/cloudtrail_security_group_ingress_anywhere.json diff --git a/rules/community/cloudtrail/cloudtrail_security_group_ingress_anywhere.py b/rules/community/cloudwatch_events/cloudtrail_security_group_ingress_anywhere.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_security_group_ingress_anywhere.py rename to rules/community/cloudwatch_events/cloudtrail_security_group_ingress_anywhere.py diff --git a/tests/integration/rules/cloudtrail/cloudtrail_snapshot_or_ami_made_public.json b/rules/community/cloudwatch_events/cloudtrail_snapshot_or_ami_made_public.json similarity index 100% rename from tests/integration/rules/cloudtrail/cloudtrail_snapshot_or_ami_made_public.json rename to rules/community/cloudwatch_events/cloudtrail_snapshot_or_ami_made_public.json diff --git a/rules/community/cloudtrail/cloudtrail_snapshot_or_ami_made_public.py b/rules/community/cloudwatch_events/cloudtrail_snapshot_or_ami_made_public.py similarity index 100% rename from rules/community/cloudtrail/cloudtrail_snapshot_or_ami_made_public.py rename to rules/community/cloudwatch_events/cloudtrail_snapshot_or_ami_made_public.py diff --git a/tests/integration/rules/duo/duo_bypass_code_create_non_auto_generated.json b/rules/community/duo_administrator/duo_bypass_code_create_non_auto_generated.json similarity index 100% rename from tests/integration/rules/duo/duo_bypass_code_create_non_auto_generated.json rename to rules/community/duo_administrator/duo_bypass_code_create_non_auto_generated.json diff --git a/tests/integration/rules/duo/duo_bypass_code_create_non_expiring.json b/rules/community/duo_administrator/duo_bypass_code_create_non_expiring.json similarity index 100% rename from tests/integration/rules/duo/duo_bypass_code_create_non_expiring.json rename to rules/community/duo_administrator/duo_bypass_code_create_non_expiring.json diff --git a/tests/integration/rules/duo/duo_bypass_code_create_unlimited_use.json b/rules/community/duo_administrator/duo_bypass_code_create_unlimited_use.json similarity index 100% rename from tests/integration/rules/duo/duo_bypass_code_create_unlimited_use.json rename to rules/community/duo_administrator/duo_bypass_code_create_unlimited_use.json diff --git a/tests/integration/rules/duo/duo_anonymous_ip_failure.json b/rules/community/duo_authentication/duo_anonymous_ip_failure.json similarity index 96% rename from tests/integration/rules/duo/duo_anonymous_ip_failure.json rename to rules/community/duo_authentication/duo_anonymous_ip_failure.json index a7c999200..04f70bb83 100644 --- a/tests/integration/rules/duo/duo_anonymous_ip_failure.json +++ b/rules/community/duo_authentication/duo_anonymous_ip_failure.json @@ -15,6 +15,7 @@ "factor": "Duo Push", "integration": "Test Integration", "ip": "12.123.123.12", + "isotimestamp": "2017-09-13T15:28:19.000Z", "location": { "city": "Place", "country": "US", diff --git a/tests/integration/rules/duo/duo_fraud.json b/rules/community/duo_authentication/duo_fraud.json similarity index 95% rename from tests/integration/rules/duo/duo_fraud.json rename to rules/community/duo_authentication/duo_fraud.json index b5bd01896..3cec6c74c 100644 --- a/tests/integration/rules/duo/duo_fraud.json +++ b/rules/community/duo_authentication/duo_fraud.json @@ -15,6 +15,7 @@ "factor": "Duo Push", "integration": "Test Integration", "ip": "12.123.123.12", + "isotimestamp": "2017-09-13T15:28:19.000Z", "location": { "city": "Place", "country": "US", @@ -50,6 +51,7 @@ "factor": "Duo Push", "integration": "Test Integration", "ip": "12.123.123.12", + "isotimestamp": "2017-09-13T15:28:19.000Z", "location": { "city": "Place", "country": "US", diff --git a/rules/community/duo_authentication/duo_lookup_tables_example.json b/rules/community/duo_authentication/duo_lookup_tables_example.json new file mode 100644 index 000000000..12ce67c72 --- /dev/null +++ b/rules/community/duo_authentication/duo_lookup_tables_example.json @@ -0,0 +1,47 @@ +[ + { + "data": { + "access_device": { + "browser": "Netscape", + "browser_version": "60.0.0000.80", + "flash_version": "27.0.0.0", + "java_version": "uninstalled", + "os": "Mac OS X", + "os_version": "10.12.6", + "trusted_endpoint_status": "not trusted" + }, + "alias": "", + "device": "555-123-4567", + "factor": "Duo Push", + "integration": "Test Integration", + "ip": "12.123.123.12", + "isotimestamp": "2017-09-13T15:28:19.000Z", + "location": { + "city": "Place", + "country": "US", + "state": "State" + }, + "new_enrollment": false, + "reason": "", + "result": "SUCCESS", + "timestamp": 1505316499, + "username": "user.name@email.com" + }, + "description": "Duo authentication log marked as failure as a result of 'Anonymous IP' that will create an alert", + "log": "duo:authentication", + "service": "streamalert_app", + "source": "prefix_cluster_duo_auth_sm-app-name_app", + "trigger_rules": [ + "duo_lookup_tables_example" + ], + "test_fixtures": { + "lookup_tables": { + "dynamo-backed-table": { + "duo_blacklisted_browsers": [ + "Netscape" + ] + } + } + } + } +] \ No newline at end of file diff --git a/rules/community/duo_authentication/duo_lookup_tables_example.py b/rules/community/duo_authentication/duo_lookup_tables_example.py new file mode 100644 index 000000000..12740ca22 --- /dev/null +++ b/rules/community/duo_authentication/duo_lookup_tables_example.py @@ -0,0 +1,18 @@ +"""Alert on any Duo auth logs marked as a failure due to an Anonymous IP.""" +from streamalert.shared.rule import rule +from streamalert.shared.lookup_tables.core import LookupTables + + +@rule(logs=['duo:authentication']) +def duo_lookup_tables_example(rec): + """ + description: Alert on Duo auth logs from blacklisted browsers, as defined by a lookup table + note: This is purely for example purposes in testing, and is not meant to be used as-is + """ + # The 'global' fixture file at rules/test_fixtures/lookup_tables/dynamo-backed-table.json + # creates the 'dynamo-backed-table' containing the 'duo_blacklisted_browsers' value + blacklisted_browsers = LookupTables.get('dynamo-backed-table', 'duo_blacklisted_browsers', []) + + # The test event contains a browser of 'Netscape', which is + # included in the lookup table blacklist + return rec['access_device'].get('browser') in set(blacklisted_browsers) diff --git a/rules/community/fleet/fleet_bad_action.json b/rules/community/fleet/fleet_bad_action.json new file mode 100644 index 000000000..60ef00908 --- /dev/null +++ b/rules/community/fleet/fleet_bad_action.json @@ -0,0 +1,52 @@ +[ + { + "data": { + "name": "Bad action query", + "hostIdentifier": "pc-name", + "calendarTime": "Wed May 8 15:28:02 2019 UTC", + "unixTime": 1557329282, + "epoch": 0, + "counter": 19, + "decorations": { + "host_uuid": "ABCDEFG-XXXX-YYYY-ZZZZZZZZ", + "hostname": "pc-name" + }, + "columns": { + "mtime": "1557323732", + "bad_action": "0" + }, + "action": "added" + }, + "log": "fleet:results", + "description": "no bad actions have occoured", + "trigger_rules": [], + "source": "prefix.cluster.sample.bucket", + "service": "s3" + }, + { + "data": { + "name": "Bad action query", + "hostIdentifier": "pc-name", + "calendarTime": "Wed May 8 15:28:02 2019 UTC", + "unixTime": 1557329282, + "epoch": 0, + "counter": 19, + "decorations": { + "host_uuid": "ABCDEFG-XXXX-YYYY-ZZZZZZZZ", + "hostname": "pc-name" + }, + "columns": { + "mtime": "1557323732", + "bad_action": "1" + }, + "action": "added" + }, + "log": "fleet:results", + "description": "bad action", + "trigger_rules": [ + "fleet_bad_action" + ], + "source": "prefix.cluster.sample.bucket", + "service": "s3" + } +] \ No newline at end of file diff --git a/tests/integration/rules/github/github_disable_dismiss_stale_pull_request_approvals.json b/rules/community/github/github_disable_dismiss_stale_pull_request_approvals.json similarity index 100% rename from tests/integration/rules/github/github_disable_dismiss_stale_pull_request_approvals.json rename to rules/community/github/github_disable_dismiss_stale_pull_request_approvals.json diff --git a/tests/integration/rules/github/github_disable_protect_this_branch.json b/rules/community/github/github_disable_protect_this_branch.json similarity index 100% rename from tests/integration/rules/github/github_disable_protect_this_branch.json rename to rules/community/github/github_disable_protect_this_branch.json diff --git a/tests/integration/rules/github/github_disable_required_pull_request_reviews.json b/rules/community/github/github_disable_required_pull_request_reviews.json similarity index 100% rename from tests/integration/rules/github/github_disable_required_pull_request_reviews.json rename to rules/community/github/github_disable_required_pull_request_reviews.json diff --git a/tests/integration/rules/github/github_disable_required_status_checks.json b/rules/community/github/github_disable_required_status_checks.json similarity index 100% rename from tests/integration/rules/github/github_disable_required_status_checks.json rename to rules/community/github/github_disable_required_status_checks.json diff --git a/tests/integration/rules/github/github_disable_two_factor_requirement_org.json b/rules/community/github/github_disable_two_factor_requirement_org.json similarity index 100% rename from tests/integration/rules/github/github_disable_two_factor_requirement_org.json rename to rules/community/github/github_disable_two_factor_requirement_org.json diff --git a/tests/integration/rules/github/github_disable_two_factor_requirement_user.json b/rules/community/github/github_disable_two_factor_requirement_user.json similarity index 100% rename from tests/integration/rules/github/github_disable_two_factor_requirement_user.json rename to rules/community/github/github_disable_two_factor_requirement_user.json diff --git a/tests/integration/rules/github/github_oauth_application_create.json b/rules/community/github/github_oauth_application_create.json similarity index 100% rename from tests/integration/rules/github/github_oauth_application_create.json rename to rules/community/github/github_oauth_application_create.json diff --git a/tests/integration/rules/github/github_site_admin_action.json b/rules/community/github/github_site_admin_action.json similarity index 100% rename from tests/integration/rules/github/github_site_admin_action.json rename to rules/community/github/github_site_admin_action.json diff --git a/tests/integration/rules/github/github_site_admin_user_promotion.json b/rules/community/github/github_site_admin_user_promotion.json similarity index 100% rename from tests/integration/rules/github/github_site_admin_user_promotion.json rename to rules/community/github/github_site_admin_user_promotion.json diff --git a/tests/integration/rules/guardduty/guard_duty_all.json b/rules/community/guardduty/guard_duty_all.json similarity index 100% rename from tests/integration/rules/guardduty/guard_duty_all.json rename to rules/community/guardduty/guard_duty_all.json diff --git a/rules/community/guardduty/guard_duty_all.py b/rules/community/guardduty/guard_duty_all.py index a43020efc..4f2de6d8b 100644 --- a/rules/community/guardduty/guard_duty_all.py +++ b/rules/community/guardduty/guard_duty_all.py @@ -1,9 +1,9 @@ """Alert on GuardDuty""" -from rules.matchers import matchers +from rules.matchers.matchers import AwsGuardDutyMatcher from streamalert.shared.rule import rule -@rule(logs=['cloudwatch:events'], matchers=[matchers.guard_duty]) +@rule(logs=['cloudwatch:events'], matchers=[AwsGuardDutyMatcher.guard_duty]) def guard_duty_all(*_): """ author: spiper diff --git a/tests/integration/rules/mitre_attack/right_to_left_character.json b/rules/community/mitre_attack/defense_evasion/multi/obfuscated_files_or_information/right_to_left_character.json similarity index 100% rename from tests/integration/rules/mitre_attack/right_to_left_character.json rename to rules/community/mitre_attack/defense_evasion/multi/obfuscated_files_or_information/right_to_left_character.json diff --git a/tests/integration/rules/onelogin/onelogin_events_assumed_role.json b/rules/community/onelogin/onelogin_events_assumed_role.json similarity index 100% rename from tests/integration/rules/onelogin/onelogin_events_assumed_role.json rename to rules/community/onelogin/onelogin_events_assumed_role.json diff --git a/rules/community/onelogin/onelogin_events_threat_intel_example.json b/rules/community/onelogin/onelogin_events_threat_intel_example.json new file mode 100644 index 000000000..d4ca9597b --- /dev/null +++ b/rules/community/onelogin/onelogin_events_threat_intel_example.json @@ -0,0 +1,68 @@ +[ + { + "data": { + "account_id": 1234, + "actor_system": "System", + "actor_user_id": 987, + "actor_user_name": "", + "app_id": 123456, + "app_name": "App Name", + "assuming_acting_user_id": 654, + "client_id": 11223344, + "created_at": "2017-10-05T18:11:32Z", + "custom_message": "Message", + "directory_id": 6666, + "directory_sync_run_id": 7777, + "error_description": "ERROR ERROR", + "event_type_id": 1, + "group_id": 98765, + "group_name": "Group Name", + "id": 123, + "ipaddr": "1.1.1.2", + "notes": "Notes", + "operation_name": "Operation Name", + "otp_device_id": 11111, + "otp_device_name": "OTP Device Name", + "policy_id": 22222, + "policy_name": "Policy Name", + "proxy_ip": "0.0.0.0", + "resolution": "Resolved", + "resource_type_id": 44332211, + "role_id": 456, + "role_name": "Role", + "user_id": 123456789, + "user_name": "username" + }, + "description": "OneLogin generated event from a malicious IP address as defined by threat intel", + "log": "onelogin:events", + "service": "streamalert_app", + "source": "prefix_cluster_onelogin-events-app-name_app", + "trigger_rules": [ + "onelogin_events_threat_intel_example" + ], + "test_fixtures": { + "threat_intel": [ + { + "ioc_value": "1.1.1.2", + "ioc_type": "ip", + "sub_type": "mal_ip" + }, + { + "ioc_value": "0123456789abcdef0123456789abcdef", + "ioc_type": "md5", + "sub_type": "mal_md5" + }, + { + "ioc_value": "evil.com", + "ioc_type": "domain", + "sub_type": "c2_domain" + }, + { + "ioc_value": "false.positive", + "ioc_type": "domain", + "sub_type": "c2_domain" + } + ] + } + } +] \ No newline at end of file diff --git a/rules/community/onelogin/onelogin_events_threat_intel_example.py b/rules/community/onelogin/onelogin_events_threat_intel_example.py new file mode 100644 index 000000000..ea2911cb0 --- /dev/null +++ b/rules/community/onelogin/onelogin_events_threat_intel_example.py @@ -0,0 +1,24 @@ +"""Alert on the OneLogin event that a user has assumed the role of someone else.""" +from streamalert.shared.rule import disable, rule +from streamalert.rules_engine.threat_intel import ThreatIntel + + +# This example is disabled because it requires the threat_intel feature to be +# enabled in the following locations: +# https://github.com/airbnb/streamalert/blob/ +# 791abf892983eedbaf30ff5aeb1f55e46e20d82a/conf/threat_intel.json#L3 +# and +# https://github.com/airbnb/streamalert/blob/ +# 791abf892983eedbaf30ff5aeb1f55e46e20d82a/conf/clusters/prod.json#L80 +@disable +@rule(logs=['onelogin:events']) +def onelogin_events_threat_intel_example(rec): + """ + description: Alert on OneLogin activity from a malicious IP address using threat intel + note: This is purely for example purposes in testing, and is not meant to be used as-is + """ + # The 'local' fixture file at rules/community/onelogin/test_fixtures/threat_intel/example.json + # mocks out the threat intel values used by this rule + + # In this case, the rec['ipaddr'] value is a "known" malicious IP, so this will alert + return ThreatIntel.IOC_KEY in rec and 'ip' in rec[ThreatIntel.IOC_KEY] diff --git a/tests/unit/streamalert/alert_processor/publishers/__init__.py b/rules/community/osquery/__init__.py similarity index 100% rename from tests/unit/streamalert/alert_processor/publishers/__init__.py rename to rules/community/osquery/__init__.py diff --git a/rules/community/osquery/ssh_login_activity.json b/rules/community/osquery/ssh_login_activity.json new file mode 100644 index 000000000..36c89f9ae --- /dev/null +++ b/rules/community/osquery/ssh_login_activity.json @@ -0,0 +1,88 @@ +[ + { + "data": { + "action": "added", + "calendarTime": "Wed Feb 12 21:38:11 2020 UTC", + "columns": { + "host": "10.0.2.2", + "pid": 12345, + "time": 1581542540, + "tty": "ttys001", + "type": "7", + "username": "vagrant" + }, + "decorations": { + "envIdentifier": "fake-environment", + "roleIdentifier": "fake-role" + }, + "epoch": "0", + "hostIdentifier": "...", + "log_type": "result", + "name": "pack_incident-response_last", + "unixTime": "1581543491" + }, + "description": "This rule alerts on ssh logins to a linux host", + "log": "osquery:differential", + "service": "kinesis", + "source": "prefix_cluster1_streamalert", + "trigger_rules": [ + "ssh_login_activity" + ] + }, + { + "data": { + "action": "added", + "calendarTime": "Wed Feb 12 21:38:11 2020 UTC", + "columns": { + "host": "10.0.2.2", + "pid": 12345, + "time": 1581542540, + "tty": "ttys001", + "type": "7", + "username": "runlevel" + }, + "decorations": { + "envIdentifier": "fake-environment", + "roleIdentifier": "fake-role" + }, + "epoch": "0", + "hostIdentifier": "...", + "log_type": "result", + "name": "pack_incident-response_last", + "unixTime": "1581543491" + }, + "description": "This rule will not alert on runlevel ssh logins", + "log": "osquery:differential", + "service": "kinesis", + "source": "prefix_cluster1_streamalert", + "trigger_rules": [] + }, + { + "data": { + "action": "added", + "calendarTime": "Wed Feb 12 21:38:11 2020 UTC", + "columns": { + "host": "10.0.2.2", + "pid": 12345, + "time": 1581542540, + "tty": "ttys001", + "type": "8", + "username": "runlevel" + }, + "decorations": { + "envIdentifier": "fake-environment", + "roleIdentifier": "fake-role" + }, + "epoch": "0", + "hostIdentifier": "...", + "log_type": "result", + "name": "pack_incident-response_last", + "unixTime": "1581543491" + }, + "description": "This rule will not alert on ssh logout(type: 8)", + "log": "osquery:differential", + "service": "kinesis", + "source": "prefix_cluster1_streamalert", + "trigger_rules": [] + } +] \ No newline at end of file diff --git a/rules/community/osquery/ssh_login_activity.py b/rules/community/osquery/ssh_login_activity.py new file mode 100644 index 000000000..d3c7d440d --- /dev/null +++ b/rules/community/osquery/ssh_login_activity.py @@ -0,0 +1,18 @@ +"""Detect ssh login activity based on osquery last table""" +from rules.matchers.matchers import OsqueryMatcher +from streamalert.shared.rule import rule + + +@rule(logs=['osquery:differential'], + matchers=[OsqueryMatcher.added, OsqueryMatcher.user_login]) +def ssh_login_activity(_): + """ + author: chunyong-lin + description: Detect on ssh login activity to the linux host based on osquery + last table. This rule assumes we use default osquery pack + shipped with osquery package located at + /usr/share/osquery/packs/incident-response.conf on the linux + host. Update the pack name in rules/matchers/matchers.py if different. + reference: https://osquery.io/schema/4.1.2#last + """ + return True diff --git a/tests/unit/streamalert/alert_processor/publishers/community/__init__.py b/rules/community/packetbeat/__init__.py similarity index 100% rename from tests/unit/streamalert/alert_processor/publishers/community/__init__.py rename to rules/community/packetbeat/__init__.py diff --git a/rules/community/packetbeat/packetbeat_blacklisted_domain.json b/rules/community/packetbeat/packetbeat_blacklisted_domain.json new file mode 100644 index 000000000..0ca71a2fc --- /dev/null +++ b/rules/community/packetbeat/packetbeat_blacklisted_domain.json @@ -0,0 +1,24 @@ +[ + { + "data": { + "@timestamp": "2018-02-06T07:23:54.827Z", + "bytes_in": 32, + "bytes_out": 64, + "client_ip": "172.16.3.33", + "dns": { + "answers_count": 2, + "question": { "name": "evil.com.", "type": "A" }, + "response_code": "NOERROR" + }, + "transport": "udp", + "type": "dns" + }, + "description": "basic schema validation check for packetbeat:flow", + "log": "packetbeat:dns", + "source": "prefix.cluster.sample.bucket", + "service": "s3", + "trigger_rules": [ + "packetbeat_blacklisted_domain" + ] + } +] diff --git a/rules/community/packetbeat/packetbeat_blacklisted_domain.py b/rules/community/packetbeat/packetbeat_blacklisted_domain.py new file mode 100644 index 000000000..bba303d76 --- /dev/null +++ b/rules/community/packetbeat/packetbeat_blacklisted_domain.py @@ -0,0 +1,18 @@ +"""Alert on PacketBeat events""" + +from streamalert.shared.rule import rule + + +DNS_BLACKLIST = [ + 'evil.com.' +] + +@rule(logs=['packetbeat:dns']) +def packetbeat_blacklisted_domain(rec): + """ + author: gavin (gavinelder) + description: Lookup for BlackListed DNS (CnC). + testing: (a) Review traffic logs for machine in question. + reference: https://www.elastic.co/guide/en/beats/packetbeat/master/packetbeat-overview.html + """ + return rec['dns']['question']['name'] in DNS_BLACKLIST diff --git a/rules/community/packetbeat/packetbeat_blacklisted_ip.json b/rules/community/packetbeat/packetbeat_blacklisted_ip.json new file mode 100644 index 000000000..460476738 --- /dev/null +++ b/rules/community/packetbeat/packetbeat_blacklisted_ip.json @@ -0,0 +1,27 @@ +[ + { + "data": { + "@timestamp": "2018-02-06T07:24:00.006Z", + "dest": { + "ip": "222.173.190.239", + "port": 44172 + }, + "final": true, + "last_time": "2018-02-06T07:23:00.620Z", + "source": { + "ip": "192.30.253.125", + "mac": "00:08:a2:09:e4:6a", + "port": 443 + }, + "start_time": "2018-02-06T07:23:00.620Z", + "type": "flow" + }, + "description": "packetbeat:flow showing outbound connection to bad domain", + "log": "packetbeat:flow", + "source": "prefix.cluster.sample.bucket", + "service": "s3", + "trigger_rules": [ + "packetbeat_blacklisted_ip" + ] + } +] diff --git a/rules/community/packetbeat/packetbeat_blacklisted_ip.py b/rules/community/packetbeat/packetbeat_blacklisted_ip.py new file mode 100644 index 000000000..c5fd9e31f --- /dev/null +++ b/rules/community/packetbeat/packetbeat_blacklisted_ip.py @@ -0,0 +1,18 @@ +"""Alert on PacketBeat events""" +import ipaddress +from streamalert.shared.rule import rule + +IP_BLACKLIST = [ + '222.173.190.239', +] + + +@rule(logs=['packetbeat:flow']) +def packetbeat_blacklisted_ip(rec): + """ + author: gavin (gavinelder) + description: Network connection to blacklisted IP. + testing: (a) Review traffic logs for machine in question. + reference: https://www.elastic.co/guide/en/beats/packetbeat/master/packetbeat-overview.html + """ + return ipaddress.IPv4Address(rec['source']['ip']) and rec['dest']['ip'] in IP_BLACKLIST diff --git a/rules/community/packetbeat/packetbeat_dns_lookup.json b/rules/community/packetbeat/packetbeat_dns_lookup.json new file mode 100644 index 000000000..4bdfeacb9 --- /dev/null +++ b/rules/community/packetbeat/packetbeat_dns_lookup.json @@ -0,0 +1,27 @@ +[ + { + "data": { + "@timestamp": "2018-02-06T07:24:03.251Z", + "bytes_in": 54, + "bytes_out": 148, + "client_ip": "172.16.2.97", + "dns": { + "answers_count": 0, + "question": { + "name": "foo.evil.com.", + "type": "A" + }, + "response_code": "NXDOMAIN" + }, + "transport": "udp", + "type": "dns" + }, + "description": "packetbeat:dns showing lookup to bad domain", + "log": "packetbeat:dns", + "source": "prefix.cluster.sample.bucket", + "service": "s3", + "trigger_rules": [ + "packetbeat_dns_lookup" + ] + } +] diff --git a/rules/community/packetbeat/packetbeat_dns_lookup.py b/rules/community/packetbeat/packetbeat_dns_lookup.py new file mode 100644 index 000000000..633340e2a --- /dev/null +++ b/rules/community/packetbeat/packetbeat_dns_lookup.py @@ -0,0 +1,13 @@ +"""Alert on PacketBeat events""" +from streamalert.shared.rule import rule + + +@rule(logs=['packetbeat:dns']) +def packetbeat_dns_lookup(rec): + """ + author: gavin (gavinelder) + description: Alert on DNS lookup for Blacklisted domain + testing: (a) Review traffic logs for machine in question. + reference: https://www.elastic.co/guide/en/beats/packetbeat/master/packetbeat-overview.html + """ + return rec['dns']['question']['name'].endswith('.evil.com.') diff --git a/tests/integration/rules/trendmicro/trendmicro_schema.json b/rules/community/trendmicro/trendmicro_malware_event.json similarity index 97% rename from tests/integration/rules/trendmicro/trendmicro_schema.json rename to rules/community/trendmicro/trendmicro_malware_event.json index 1ccc8b5de..25e96630a 100644 --- a/tests/integration/rules/trendmicro/trendmicro_schema.json +++ b/rules/community/trendmicro/trendmicro_malware_event.json @@ -5,7 +5,6 @@ "log": "trendmicro:malwareevent", "service": "sns", "source": "prefix_cluster_sample_topic", - "validate_schema_only": false, "trigger_rules": [ "trendmicro_malware_event" ] @@ -16,7 +15,6 @@ "log": "trendmicro:malwareevent", "service": "sns", "source": "prefix_cluster_sample_topic", - "validate_schema_only": false, "trigger_rules": [ "trendmicro_malware_event" ] @@ -27,7 +25,6 @@ "log": "trendmicro:malwareevent", "service": "sns", "source": "prefix_cluster_sample_topic", - "validate_schema_only": false, "trigger_rules": [ "trendmicro_malware_event" ] @@ -38,7 +35,6 @@ "log": "trendmicro:malwareevent", "service": "sns", "source": "prefix_cluster_sample_topic", - "validate_schema_only": false, "trigger_rules": [ "trendmicro_malware_event" ] @@ -49,7 +45,6 @@ "log": "trendmicro:malwareevent", "service": "sns", "source": "prefix_cluster_sample_topic", - "validate_schema_only": false, "trigger_rules": [ "trendmicro_malware_event" ] diff --git a/rules/matchers/matchers.py b/rules/matchers/matchers.py index 31154608c..3f924ebbf 100644 --- a/rules/matchers/matchers.py +++ b/rules/matchers/matchers.py @@ -14,7 +14,75 @@ @rule('root_logins', logs=['osquery:differential'], matchers=[matchers.prod, matchers.pci], outputs=['pagerduty:sample-integration']) """ +class AwsGuardDutyMatcher: + """A class contains matchers for AWS GuardDuty service""" + @classmethod + def guard_duty(cls, rec): + return rec['detail-type'] == 'GuardDuty Finding' -def guard_duty(record): - return record['detail-type'] == 'GuardDuty Finding' +class OsqueryMatcher: + """A class defines contains matchers for Osquery events""" + + _EVENT_TYPE_LOGIN = 7 + _RUNLEVELS = { + '', + 'LOGIN', + 'reboot', + 'shutdown', + 'runlevel' + } + + + @classmethod + def added(cls, rec): + return rec['action'] == 'added' + + + @classmethod + def user_login(cls, rec): + """Capture user logins from the osquery last table + This matcher assumes we use default osquery pack shipped with osquery package + located at /usr/share/osquery/packs/incident-response.conf on the linux host. + Update the pack name (rec['name']) if it is different. + """ + return ( + rec['name'] == 'pack_incident-response_last' and + int(rec['columns']['type']) == cls._EVENT_TYPE_LOGIN and + (rec['columns']['username'] not in cls._RUNLEVELS) + ) + + +class AwsConfigMatcher: + """Contains Matchers relevant to AWS Config""" + + @staticmethod + def is_config_compliance(rec): + """Check if the record event is from config compliance + + Args: + rec (dict): Parsed log to check key/value pairs + + Returns: + bool: True if from config and not in testMode else False + """ + return ( + rec['eventSource'] == 'config.amazonaws.com' + and rec['eventName'] == 'PutEvaluations' + and not rec['requestParameters']['testMode'] + ) + + @staticmethod + def is_auto_remediation(rec): + """Check if the record is an auto-remediation event + + Args: + rec (dict): Parsed log to check key/value pairs + Returns: + bool: True if auto_remediation event else False + """ + return ( + rec['eventName'] == 'StartAutomationExecution' + and rec['eventSource'] == 'ssm.amazonaws.com' + and rec['sourceIPAddress'] == 'config.amazonaws.com' + ) diff --git a/tests/unit/streamalert/alert_processor/publishers/pagerduty/__init__.py b/scheduled_queries/__init__.py similarity index 100% rename from tests/unit/streamalert/alert_processor/publishers/pagerduty/__init__.py rename to scheduled_queries/__init__.py diff --git a/tests/unit/streamalert/alert_processor/publishers/slack/__init__.py b/scheduled_queries/sample/__init__.py similarity index 100% rename from tests/unit/streamalert/alert_processor/publishers/slack/__init__.py rename to scheduled_queries/sample/__init__.py diff --git a/scheduled_queries/sample/athena.py b/scheduled_queries/sample/athena.py new file mode 100644 index 000000000..4d79f77b7 --- /dev/null +++ b/scheduled_queries/sample/athena.py @@ -0,0 +1,50 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from streamalert.scheduled_queries.query_packs.configuration import QueryPackConfiguration + +QueryPackConfiguration( + name='athena_any_query', + description='This query returns all Athena queries... how meta!', + + # Make sure to edit the database name properly or this query will error with some + # "insufficient privileges errors" + query=""" +SELECT + eventtime, + json_extract(requestparameters['queryexecutioncontext'], '$.database') as database_name, + requestparameters['querystring'] as querystring, + useridentity['type'] as user_identity_type, + useridentity['arn'] as user_identity_arn, + dt +FROM + "ATHENA_DATABASE_NAME"."cloudwatch_cloudtrail" +WHERE + dt = '{utcdatehour_minus1hour}' + + -- Only Events from Athena + AND eventsource = 'athena.amazonaws.com' + AND eventname = 'StartQueryExecution' + + -- Only on the CSIRT Prod account + AND recipientaccountid = '123456789012' + + -- Filter out noisy ALTER and SHOW queries. SHOW queries are commonly run in automation + -- by API clients, and ALTER queries are run commonly by the Athena partition function. + AND upper(substr(requestparameters['querystring'], 1, 5)) NOT IN ('ALTER', 'SHOW ') +""", + params=['utcdatehour_minus1hour'], + tags=['sample'] +) diff --git a/streamalert/__init__.py b/streamalert/__init__.py index 80ec03492..3d25a00d1 100644 --- a/streamalert/__init__.py +++ b/streamalert/__init__.py @@ -1,2 +1,2 @@ """StreamAlert version.""" -__version__ = '3.0.0' +__version__ = '3.1.0' diff --git a/streamalert/alert_processor/outputs/aws.py b/streamalert/alert_processor/outputs/aws.py index d5324bbb4..af6193162 100644 --- a/streamalert/alert_processor/outputs/aws.py +++ b/streamalert/alert_processor/outputs/aws.py @@ -15,6 +15,9 @@ """ from abc import abstractmethod from collections import OrderedDict +from email.mime.application import MIMEApplication +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText from datetime import datetime import json import uuid @@ -477,3 +480,180 @@ def _dispatch(self, alert, descriptor): LOGGER.info('New Alert:\n%s', json.dumps(publication, indent=2)) return True + +@StreamAlertOutput +class SESOutput(OutputDispatcher): + """Handle all alert dispatching for AWS SES""" + __service__ = "aws-ses" + + @staticmethod + def _add_attachment(msg, name, content): + """Add attachments to the msg + + Args: + msg (MIMEMultipart): email to attach too + name (str): name for the file to be attached + content (str): content of the file to be attached (should be string) + + Returns: + msg (MIMEMultipart): Email with the relevant attachments + """ + LOGGER.debug("Attaching %s to msg", name) + + att = MIMEApplication(content) + + att.add_header("Content-Disposition", "attachment", filename=name) + msg.attach(att) + + return msg + + @staticmethod + def _construct_body(msg, body): + """ Create the body of the email + + Args: + msg (MIMEMultipart): Email Object to contruct body + body (str): the body is represented as a string + body (dict): dictionary of message_type/message for the body (for use with HTML) + """ + if isinstance(body, str): + # For use with string based body + LOGGER.debug("body is a string of: %s", body) + + msg.attach(MIMEText(body)) + elif isinstance(body, dict): + # For use with HTML body + LOGGER.debug("body is not a string, attaching body of: %s", body) + + textual_message = MIMEMultipart("alternative") + for m_type, message in body.items(): + part = MIMEText(message, m_type) + textual_message.attach(part) + msg.attach(textual_message) + + return msg + + @classmethod + def _build_email(cls, alert, publication, creds): + """Construct the email to be sent using the alert, publication and creds + + Args: + alert (Alert): The alert + publication (dict): Alert relevant to the triggered rule + creds (dict): Information relevant to send the alert + + Returns: + msg (MIMEMultipart): The constructed email ready to be sent + """ + + # Presentation defaults + default_subject = "{} triggered alert {}".format( + alert.rule_name, alert.alert_id + ) + default_body = "Please review the attached record.json" + + # Presentation values + subject = publication.get("@aws-ses.subject", default_subject) + body = publication.get("@aws-ses.body", default_body) + + msg = MIMEMultipart("mixed") + + # Setup to, from and subject + msg["To"] = creds["to_emails"] + msg["From"] = creds["from_email"] + msg["Subject"] = subject + + # Attach the record to the email + if publication.get("@aws-ses.attach_record", True): + record = json.dumps(alert.record, sort_keys=True, indent=2) + msg = cls._add_attachment(msg, "record.json", record) + + # Attach additional attachments to the email + if "@aws-ses.attachments" in publication: + for name, content in publication["@aws-ses.attachments"].items(): + msg = cls._add_attachment(msg, name, content) + + # Attach the body and return + return cls._construct_body(msg, body) + + @classmethod + def get_user_defined_properties(cls): + """Properties assigned by the user when configuring a new SES output. + + Returns: + OrderedDict: With 'descriptor' and 'aws_value' OutputProperty tuples + """ + return OrderedDict( + [ + ( + "descriptor", + OutputProperty( + description="a short and unique descriptor for this SES Output." + ), + ), + ( + "from_email", + OutputProperty( + description="the SES Verified email address to send from", + cred_requirement=True, + ), + ), + ( + "to_emails", + OutputProperty( + description="the SES Verified recipient email addresses, comma-seperated", + cred_requirement=True, + ), + ), + ] + ) + + def _dispatch(self, alert, descriptor): + """Send alert to an SES Output + + Publishing: + By default the aws-ses output sends an email comprising some default intro text + and an attachment containing: + * alert.record (record.json) + + - @aws-ses.subject (str): + Replaces the default subject + - @aws-ses.attach_record (bool): + True (default): Attach the alert.record to the email + False: Don't attach the alert.record to the email + - @aws-ses.attachments (dict): + A dict of attachments to include in the message. + - @aws-ses.body (str): + Replaces the default intro text + + @see cls._construct_body() for some insight into how you can customize the body + + Args: + alert (Alert): Alert instance which triggered a rule + descriptor (str): Output descriptor + + Returns: + bool: True if alert was sent successfully, False otherwise + """ + creds = self._load_creds(descriptor) + if not creds: + return False + + publication = compose_alert(alert, self, descriptor) + + msg = self._build_email(alert, publication, creds) + + ses = boto3.client('ses', region_name=self.region) + + try: + response = ses.send_raw_email( + Source=msg['From'], + Destinations=msg['To'].split(','), + RawMessage={'Data': msg.as_string()}, + ) + except ClientError as e: + LOGGER.error(e.response['Error']['Message']) + return False + else: + LOGGER.info('Email sent! Message ID: %s', response['MessageId']) + return True diff --git a/streamalert/alert_processor/outputs/phantom.py b/streamalert/alert_processor/outputs/phantom.py index 18338fac0..230a3ef8d 100644 --- a/streamalert/alert_processor/outputs/phantom.py +++ b/streamalert/alert_processor/outputs/phantom.py @@ -63,6 +63,7 @@ def get_user_defined_properties(cls): ('url', OutputProperty(description='the endpoint url for this Phantom integration', mask_input=True, + input_restrictions={' '}, cred_requirement=True)) ]) diff --git a/streamalert/alert_processor/outputs/slack.py b/streamalert/alert_processor/outputs/slack.py index 79dd56a04..a78092dcd 100644 --- a/streamalert/alert_processor/outputs/slack.py +++ b/streamalert/alert_processor/outputs/slack.py @@ -60,6 +60,7 @@ def get_user_defined_properties(cls): ('url', OutputProperty(description='the full Slack webhook url, including the secret', mask_input=True, + input_restrictions={' '}, cred_requirement=True)) ]) diff --git a/streamalert/alert_processor/outputs/teams.py b/streamalert/alert_processor/outputs/teams.py new file mode 100644 index 000000000..e97a615fe --- /dev/null +++ b/streamalert/alert_processor/outputs/teams.py @@ -0,0 +1,290 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from collections import OrderedDict + +import pymsteams +from pymsteams import TeamsWebhookException + +from streamalert.alert_processor.helpers import compose_alert +from streamalert.alert_processor.outputs.output_base import ( + OutputDispatcher, + OutputProperty, + StreamAlertOutput, +) +from streamalert.shared.logger import get_logger + +LOGGER = get_logger(__name__) + + +@StreamAlertOutput +class TeamsOutput(OutputDispatcher): + """TeamsOutput handles all alert dispatching for Microsoft Teams""" + + __service__ = "teams" + + @classmethod + def get_user_defined_properties(cls): + """Get properties that must be assigned by the user when configuring a new Microsoft Teams + output. This should be sensitive or unique information for this use-case that needs + to come from the user. + + Every output should return a dict that contains a 'descriptor' with a description of the + integration being configured. + + Microsoft Teams also requires a user provided 'webhook' url that is composed of the Team's + api url and the unique integration key for this output. This value should be should be + masked during input and is a credential requirement. + + Returns: + OrderedDict: Contains various OutputProperty items + """ + return OrderedDict( + [ + ( + "descriptor", + OutputProperty( + description="a short and unique descriptor for this service configuration " + "(ie: name of Team the webhook relates too)" + ), + ), + ( + "url", + OutputProperty( + description="the full teams webhook url, including the secret", + mask_input=True, + cred_requirement=True, + ), + ), + ] + ) + + @classmethod + def _format_message(cls, alert, publication, webhook_url): + """Format the message to be sent to Teams + + Args: + alert (Alert): The alert + publication (dict): Alert relevant to the triggered rule + webhook_url (str): The webhook_url to send the card too + + Returns: + pymsteams.connectorcard: The message to be sent to Teams + The card will look like (by Default): + StreamAlert Rule Triggered: rule_name + Rule Description: + This will be the docstring from the rule, sent as the rule_description + + Record: + key value + key value + ... + """ + # Presentation defaults + default_title = "StreamAlert Rule Triggered: {}".format(alert.rule_name) + default_description = alert.rule_description + default_color = "E81123" # Red in Hexstring format + + # Special field that Publishers can use to customize the message + title = publication.get("@teams.title", default_title) + description = publication.get("@teams.description", default_description) + card_color = publication.get("@teams.card_color", default_color) + with_record = publication.get("@teams.with_record", True) + + # Instantiate the card with the url + teams_card = pymsteams.connectorcard(webhook_url) + + # Set the cards title, text and color + teams_card.title(title) + teams_card.text(description) + teams_card.color(card_color) + + # Add the Alert Section + teams_card.addSection(cls._generate_alert_section(alert)) + + if with_record: + # Add the record Section + teams_card.addSection(cls._generate_record_section(alert.record)) + + if "@teams.additional_card_sections" in publication: + teams_card = cls._add_additional_sections( + teams_card, publication["@teams.additional_card_sections"] + ) + + if "@teams.buttons" in publication: + teams_card = cls._add_buttons( + teams_card, publication["@teams.buttons"] + ) + + return teams_card + + @classmethod + def _generate_record_section(cls, record): + """Generate the record section + + This adds the entire record to a section as key/value pairs + + Args: + record (dict): The record that triggered the alert + + Returns: + record_section (pymsteams.cardsection): record section for the outgoing card + """ + # Instantiate the card section + record_section = pymsteams.cardsection() + + # Set the title + record_section.activityTitle("StreamAlert Alert Record") + + # Add the record as key/value pairs + for key, value in record.items(): + record_section.addFact(key, str(value)) + + return record_section + + @classmethod + def _generate_alert_section(cls, alert): + """Generate the alert section + + Args: + alert (Alert): The alert + + Returns: + alert_section (pymsteams.cardsection): alert section for the outgoing card + """ + + # Instantiate the card + alert_section = pymsteams.cardsection() + + # Set the title + alert_section.activityTitle("Alert Info") + + # Add basic information to the alert section + alert_section.addFact("rule_name", alert.rule_name) + alert_section.addFact("alert_id", alert.alert_id) + + return alert_section + + @staticmethod + def _add_additional_sections(teams_card, additional_sections): + """Add additional card sections to the teams card + + Args: + teams_card (pymsteams.connectorcard): Teams connector card + additional_sections (list[pymsteams.cardsection]): + Additional sections to be added to the card. Each section should be of + type: pymsteams.cardsection and have their relevant fields filled out. + Please review the pymsteams documentation for additional information. + + Returns: + teams_card (pymsteams.connectorcard): teams_card with additional sections added + """ + if not isinstance(additional_sections, list): + LOGGER.debug("additional_sections is not a list, converting") + + additional_sections = [additional_sections] + + for additional_section in additional_sections: + if not isinstance(additional_section, pymsteams.cardsection): + LOGGER.error( + "additional_section: %s is not an instance of %s", + additional_section, + pymsteams.cardsection, + ) + continue + + teams_card.addSection(additional_section) + + return teams_card + + @staticmethod + def _add_buttons(teams_card, buttons): + """Add buttons to the teams card + + Args: + teams_card (pymsteams.connectorcard): Teams connector card + buttons (list[(text, url)]): + Buttons to place on the card, should be a list of tuples containing + the text and the url + + Returns: + teams_card (pymsteams.connectorcard): teams_card with buttons added + """ + for button_text, button_url in buttons: + teams_card.addLinkButton(button_text, button_url) + + return teams_card + + def _dispatch(self, alert, descriptor): + """Sends the Teams Card to Teams + + Publishing: + By default the teams output sends a teams card comprising some default intro text + and a section containing: + * title with rule name + * alert description + * alert record (as a section of key/value pairs) + + To override this behavior use the following fields: + + - @teams.title (str): + Replaces the title of the teams connector card. + + - @teams.description (str): + Replaces the text of the team connector card + + - @teams.card_color (str): + Replaces the default color of the connector card (red) + Note: colors are represented by hex string + + - @teams.with_record (bool): + Set to False, to remove the alert record section. Useful if you want to have a + more targeted approach for the alert + + - @teams.additional_card_sections (list[pymsteams.cardsection]): + Pass in additional sections you want to send on the message. + + @see cls._add_additional_sections() for more info + + - @teams.buttons (list[(text, url)]) + Pass a list of tuples containing the button text and url + + These will be placed at the bottom of a teams card + + Args: + alert (Alert): Alert instance which triggered a rule + descriptor (str): Output descriptor + + Returns: + bool: True if alert was sent successfully, False otherwise + """ + creds = self._load_creds(descriptor) + if not creds: + LOGGER.error("No credentials found for descriptor: %s", descriptor) + return False + + # Create the publication + publication = compose_alert(alert, self, descriptor) + + # Format the message + teams_card = self._format_message(alert, publication, creds["url"]) + + try: + teams_card.send() + except TeamsWebhookException as err: + LOGGER.error("Error Sending Alert to Teams: %s", err) + return False + + return True diff --git a/streamalert/athena_partition_refresh/main.py b/streamalert/athena_partition_refresh/main.py index 8f37df2e9..91acffb28 100644 --- a/streamalert/athena_partition_refresh/main.py +++ b/streamalert/athena_partition_refresh/main.py @@ -21,8 +21,10 @@ import urllib.parse import urllib.error +from streamalert.shared.utils import get_database_name, get_data_file_format from streamalert.shared.athena import AthenaClient from streamalert.shared.config import firehose_alerts_bucket, firehose_data_bucket, load_config +from streamalert.shared.exceptions import ConfigError from streamalert.shared.logger import get_logger @@ -36,17 +38,26 @@ class AthenaRefreshError(Exception): class AthenaRefresher: """Handle polling an SQS queue and running Athena queries for updating tables""" - STREAMALERTS_REGEX = re.compile(r'alerts/dt=(?P\d{4})' + ALERTS_REGEX = re.compile(r'alerts/dt=(?P\d{4})' + r'\-(?P\d{2})' + r'\-(?P\d{2})' + r'\-(?P\d{2})' + r'\/.*.json') + DATA_REGEX = re.compile(r'(?P\d{4})' + r'\/(?P\d{2})' + r'\/(?P\d{2})' + r'\/(?P\d{2})\/.*') + + ALERTS_REGEX_PARQUET = re.compile(r'alerts/dt=(?P\d{4})' + r'\-(?P\d{2})' + r'\-(?P\d{2})' + r'\-(?P\d{2})' + r'\/.*.parquet') + DATA_REGEX_PARQUET = re.compile(r'dt=(?P\d{4})' r'\-(?P\d{2})' r'\-(?P\d{2})' - r'\-(?P\d{2})' - r'\/.*.json') - FIREHOSE_REGEX = re.compile(r'(?P\d{4})' - r'\/(?P\d{2})' - r'\/(?P\d{2})' - r'\/(?P\d{2})\/.*') - - STREAMALERT_DATABASE = '{}_streamalert' + r'\-(?P\d{2})\/.*') + ATHENA_S3_PREFIX = 'athena_partition_refresh' _ATHENA_CLIENT = None @@ -55,13 +66,26 @@ def __init__(self): config = load_config(include={'lambda.json', 'global.json'}) prefix = config['global']['account']['prefix'] athena_config = config['lambda']['athena_partition_refresh_config'] + self._file_format = get_data_file_format(config) + + if self._file_format == 'parquet': + self._alerts_regex = self.ALERTS_REGEX_PARQUET + self._data_regex = self.DATA_REGEX_PARQUET + + elif self._file_format == 'json': + self._alerts_regex = self.ALERTS_REGEX + self._data_regex = self.DATA_REGEX + else: + message = ( + 'file format "{}" is not supported. Supported file format are ' + '"parquet", "json". Please update the setting in athena_partition_refresh_config ' + 'in "conf/lambda.json"'.format(self._file_format) + ) + raise ConfigError(message) self._athena_buckets = self.buckets_from_config(config) - db_name = athena_config.get( - 'database_name', - self.STREAMALERT_DATABASE.format(prefix) - ) + db_name = get_database_name(config) # Get the S3 bucket to store Athena query results results_bucket = athena_config.get( @@ -131,7 +155,7 @@ def _get_partitions_from_keys(self): for key in keys: match = None key = key.decode('utf-8') - for pattern in (self.FIREHOSE_REGEX, self.STREAMALERTS_REGEX): + for pattern in (self._data_regex, self._alerts_regex): match = pattern.search(key) if match: break @@ -150,7 +174,14 @@ def _get_partitions_from_keys(self): # first element in the S3 path, as that's how log types # are configured to send to Firehose. if athena_table != 'alerts': - athena_table = path.split('/')[0] + athena_table = ( + # when file_format is json, s3 file path is + # s3://bucketname/[data-type]/YYYY/MM/DD/hh/*.gz + # when file_format is parquet, s3 file path is + # s3://bucketname/parquet/[data-type]/dt=YYYY-MM-DD-hh/*.parquet + path.split('/')[1] if self._file_format == 'parquet' + else path.split('/')[0] + ) # Example: # PARTITION (dt = '2017-01-01-01') LOCATION 's3://bucket/path/' diff --git a/streamalert/classifier/clients/firehose.py b/streamalert/classifier/clients/firehose.py index 2519f6d44..f6c3c2443 100644 --- a/streamalert/classifier/clients/firehose.py +++ b/streamalert/classifier/clients/firehose.py @@ -15,6 +15,7 @@ """ from collections import defaultdict import json +import hashlib import re import backoff @@ -52,7 +53,7 @@ class FirehoseClient: MAX_RECORD_SIZE = 1000 * 1000 - 2 # Default firehose name format, should be formatted with deployment prefix - DEFAULT_FIREHOSE_FMT = '{}streamalert_data_{}' + DEFAULT_FIREHOSE_FMT = '{}streamalert_{}' # Exception for which backoff operations should be performed EXCEPTIONS_TO_BACKOFF = (ClientError, BotocoreConnectionError, HTTPClientError) @@ -60,13 +61,15 @@ class FirehoseClient: # Set of enabled log types for firehose, loaded from configs _ENABLED_LOGS = dict() + # The max length of the firehose stream name is 64. For streamalert data firehose, + # we reserve 12 chars to have `streamalert_` as part of prefix. Please refer to + # terraform/modules/tf_kinesis_firehose_delivery_stream/main.tf + AWS_FIREHOSE_NAME_MAX_LEN = 64 + + FIREHOSE_NAME_MIN_HASH_LEN = 8 + def __init__(self, prefix, firehose_config=None, log_sources=None): - self._prefix = ( - '{}_'.format(prefix) - # This default value must be consistent with the classifier Terraform config - if firehose_config and firehose_config.get('use_prefix', True) - else '' - ) + self._prefix = prefix if firehose_config.get('use_prefix', True) else '' self._client = boto3.client('firehose', config=boto_helpers.default_config()) self.load_enabled_log_sources(firehose_config, log_sources, force_load=True) @@ -124,6 +127,18 @@ def _record_batches(cls, records): if current_batch: yield current_batch + @classmethod + def sanitized_value(cls, key): + """Sanitize a key by replacing non-word characters with '_' + + Args: + key (str): a string needs to be sanitized + + Returns: + str: sanitized string + """ + return re.sub(cls.SPECIAL_CHAR_REGEX, cls.SPECIAL_CHAR_SUB, key) + @classmethod def sanitize_keys(cls, record): """Remove special characters from parsed record keys @@ -139,7 +154,7 @@ def sanitize_keys(cls, record): """ new_record = {} for key, value in record.items(): - sanitized_key = re.sub(cls.SPECIAL_CHAR_REGEX, cls.SPECIAL_CHAR_SUB, key) + sanitized_key = cls.sanitized_value(key) # Handle nested objects if isinstance(value, dict): @@ -287,16 +302,37 @@ def _firehose_request_helper(data): self._log_failed(len(records_data)) @classmethod - def firehose_log_name(cls, log_name): - """Convert conventional log names into Firehose delivery stream names + def generate_firehose_name(cls, prefix, log_stream_name): + """Generate suffix of stream name complaint to firehose naming restriction, no + longer than 64 characters Args: - log_name: The name of the log from logs.json + prefix (str): The prefix defined in conf/global.json to firehose stream name + log_stream_name (str): The name of the log from conf/logs.json or conf/schemas/*.json - Returns - str: Converted name which corresponds to a Firehose delivery Stream + Returns: + str: suffix of stream name """ - return re.sub(cls.SPECIAL_CHAR_REGEX, cls.SPECIAL_CHAR_SUB, log_name) + if prefix: + prefix += '_' + + # This same substitution method is used when naming the Delivery Streams + stream_name = cls.sanitized_value(cls.DEFAULT_FIREHOSE_FMT.format(prefix, log_stream_name)) + if len(stream_name) <= cls.AWS_FIREHOSE_NAME_MAX_LEN: + return stream_name + + base_name = stream_name[:cls.AWS_FIREHOSE_NAME_MAX_LEN - cls.FIREHOSE_NAME_MIN_HASH_LEN] + if not base_name.endswith('_'): + # make sure this ends in an underscore, but not 2 + base_name = '{}_'.format( + base_name[:-1] + ) if base_name[-2] != '_' else '{}_'.format(base_name[:-2]) + + # combine the base_name and first 8 chars of hash result together as new + # stream name. + return '{}{}'.format( + base_name, hashlib.md5(stream_name.encode()).hexdigest() # nosec + )[:cls.AWS_FIREHOSE_NAME_MAX_LEN] @classmethod def enabled_log_source(cls, log_source_name): @@ -312,7 +348,7 @@ def enabled_log_source(cls, log_source_name): LOGGER.error('Enabled logs not loaded') return False - return cls.firehose_log_name(log_source_name) in cls._ENABLED_LOGS + return cls.sanitized_value(log_source_name) in cls._ENABLED_LOGS @classmethod def load_enabled_log_sources(cls, firehose_config, log_sources, force_load=False): @@ -342,7 +378,7 @@ def load_enabled_log_sources(cls, firehose_config, log_sources, force_load=False # Expand to all subtypes if len(enabled_log_parts) == 1: expanded_logs = { - cls.firehose_log_name(log_name): log_name + cls.sanitized_value(log_name): log_name for log_name in log_sources if log_name.split(':')[0] == enabled_log_parts[0] } @@ -358,7 +394,7 @@ def load_enabled_log_sources(cls, firehose_config, log_sources, force_load=False LOGGER.error('Enabled Firehose log %s not declared in logs.json', enabled_log) continue - cls._ENABLED_LOGS[cls.firehose_log_name('_'.join(enabled_log_parts))] = enabled_log + cls._ENABLED_LOGS[cls.sanitized_value(enabled_log)] = enabled_log return cls._ENABLED_LOGS @@ -390,12 +426,11 @@ def send(self, payloads): # Each batch will be processed to their specific Firehose, which lands the data # in a specific prefix in S3. for log_type, records in records.items(): - # This same substitution method is used when naming the Delivery Streams - formatted_log_type = self.firehose_log_name(log_type) - stream_name = self.DEFAULT_FIREHOSE_FMT.format(self._prefix, formatted_log_type) + # firehose stream name has the length limit, no longer than 64 characters + formatted_stream_name = self.generate_firehose_name(self._prefix, log_type) # Process each record batch in the categorized payload set for record_batch in self._record_batches(records): batch_size = len(record_batch) - response = self._send_batch(stream_name, record_batch) - self._finalize(response, stream_name, batch_size) + response = self._send_batch(formatted_stream_name, record_batch) + self._finalize(response, formatted_stream_name, batch_size) diff --git a/streamalert/rule_promotion/promoter.py b/streamalert/rule_promotion/promoter.py index 2b3b35591..11e1a8dca 100644 --- a/streamalert/rule_promotion/promoter.py +++ b/streamalert/rule_promotion/promoter.py @@ -17,6 +17,7 @@ from streamalert.rule_promotion.publisher import StatsPublisher from streamalert.rule_promotion.statistic import StagingStatistic +from streamalert.shared.utils import get_database_name from streamalert.shared.athena import AthenaClient from streamalert.shared.config import load_config from streamalert.shared.logger import get_logger @@ -30,7 +31,6 @@ class RulePromoter: """Run queries to generate statistics on alerts.""" ATHENA_S3_PREFIX = 'rule_promoter' - STREAMALERT_DATABASE = '{}_streamalert' def __init__(self): self._config = load_config() @@ -42,7 +42,7 @@ def __init__(self): athena_config = self._config['lambda']['athena_partition_refresh_config'] # Get the name of the athena database to access - db_name = athena_config.get('database_name', self.STREAMALERT_DATABASE.format(prefix)) + db_name = athena_config.get('database_name', get_database_name(self._config)) # Get the S3 bucket to store Athena query results results_bucket = athena_config.get( diff --git a/streamalert/rules_engine/rules_engine.py b/streamalert/rules_engine/rules_engine.py index 5c4c0c02a..14bde8caa 100644 --- a/streamalert/rules_engine/rules_engine.py +++ b/streamalert/rules_engine/rules_engine.py @@ -192,15 +192,18 @@ def _rule_analysis(self, payload, rule): if not rule_result: return + # Define the outputs + outputs = self._configure_outputs(payload['record'], rule) + alert = Alert( - rule.name, payload['record'], self._configure_outputs(rule), + rule.name, payload['record'], outputs, cluster=payload['cluster'], context=rule.context, log_source=payload['log_schema_type'], log_type=payload['data_type'], merge_by_keys=rule.merge_by_keys, merge_window=timedelta(minutes=rule.merge_window_mins), - publishers=self._configure_publishers(rule), + publishers=self._configure_publishers(rule, outputs), rule_description=rule.description, source_entity=payload['resource'], source_service=payload['service'], @@ -213,17 +216,129 @@ def _rule_analysis(self, payload, rule): return alert - def _configure_outputs(self, rule): + def _configure_outputs(self, record, rule): + """Configure the outputs for the rule + + Args: + record (dict): Record to pass through to dynamic_outputs + rule (rule.Rule): Attributes for the rule which triggered the alert + Returns: + set: unique set of outputs, only required outputs if the rule is staged + """ # Check if the rule is staged and, if so, only use the required alert outputs if rule.is_staged(self._rule_table): - all_outputs = self._required_outputs_set - else: # Otherwise, combine the required alert outputs with the ones for this rule - all_outputs = self._required_outputs_set.union(rule.outputs_set) + output_sources = [self._required_outputs_set] + else: # Otherwise, combine all outputs into one + output_sources = [self._required_outputs_set, rule.outputs_set] + if rule.dynamic_outputs: + # append dynamic_outputs to output sources if they exist + dynamic_outputs = self._configure_dynamic_outputs(record, rule) + output_sources.append(dynamic_outputs) + + return { + output + for output_source in output_sources + for output in output_source + if self._check_valid_output(output) + } + + @classmethod + def _configure_dynamic_outputs(cls, record, rule): + """Generate list of outputs from dynamic_outputs + + Args: + record (dict): Record to pass through to the dynamic_output function + rule (rule.Rule): Attributes for the rule which triggered the alert + Returns: + list: list of additional outputs to append to the current set + """ + args_list = [record] + if rule.context: + # Pass context to dynamic_output function if context exists + args_list.append(rule.context) + + return [ + output + for dynamic_output_function in rule.dynamic_outputs_set + for output in cls._call_dynamic_output_function( + dynamic_output_function, rule.name, args_list + ) + ] + + @staticmethod + def _call_dynamic_output_function(function, rule_name, args_list): + """Call the dynamic_output function + + Args: + dynamic_output (func): Callable function which returns None, str or List[str] + rule_name (str): The name of the rule the functions belong to + args_list (list): list of args to be passed to the dynamic function + should be (record or record and context) + Returns: + list: list of additional outputs + """ + LOGGER.debug("invoking function %s", function.__name__) + + outputs = [] + + try: + outputs = function(*args_list) + except Exception: # pylint: disable=broad-except + # Logger error and return [] + LOGGER.error( + "Exception when calling dynamic_output %s for rule %s", + function.__name__, rule_name + ) + else: + LOGGER.debug("function %s returned: %s", function.__name__, outputs) + + if isinstance(outputs, str): + # Case 1: outputs is a string + # return outputs wrapped in a list + outputs = [outputs] + elif isinstance(outputs, list): + # Case 2: outputs is a list + # return outputs + pass + else: + # Case 3: outputs is neither a string or a list + # return an empty list + outputs = [] + + return outputs - return all_outputs + @staticmethod + def _check_valid_output(output): + """Verify output is valid + + Args: + output (str): The output to check if its valid + Returns: + True (bool): Output is valid + False (bool): Output is invalid + """ + valid = False + + if not isinstance(output, str): + # Case 1: output is not a string + # return False + LOGGER.warning("Output (%s) is not a string", output) + valid = False + elif isinstance(output, str) and ":" not in output: + # Case 2: output is a string but missing ":" + # Log warning and return False + LOGGER.warning("Output (%s) is missing ':'", output) + + valid = False + else: + # Case 3: output is a string and contains ":" + # return True + valid = True + + return valid @classmethod - def _configure_publishers(cls, rule): + def _configure_publishers(cls, rule, requested_outputs): """Assigns publishers to each output. The @Rule publisher syntax accepts several formats, including a more permissive blanket @@ -234,11 +349,11 @@ def _configure_publishers(cls, rule): Args: rule (Rule): The rule to create publishers for + requested_outputs (set): A set containing the outputs Returns: dict: Maps string outputs names to lists of strings of their fully qualified publishers """ - requested_outputs = rule.outputs_set requested_publishers = rule.publishers if not requested_publishers: return None @@ -246,6 +361,10 @@ def _configure_publishers(cls, rule): configured_publishers = {} for output in requested_outputs: + if output == "aws-firehose:alerts": + # This output doesn't require a publisher + continue + assigned_publishers = [] if cls.is_publisher_declaration(requested_publishers): diff --git a/streamalert/scheduled_queries/__init__.py b/streamalert/scheduled_queries/__init__.py new file mode 100644 index 000000000..0c1ee3e8f --- /dev/null +++ b/streamalert/scheduled_queries/__init__.py @@ -0,0 +1,18 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +StreamQuery base module +""" diff --git a/terraform/modules/tf_threat_intel_downloader/README.md b/streamalert/scheduled_queries/command/__init__.py similarity index 100% rename from terraform/modules/tf_threat_intel_downloader/README.md rename to streamalert/scheduled_queries/command/__init__.py diff --git a/streamalert/scheduled_queries/command/application.py b/streamalert/scheduled_queries/command/application.py new file mode 100644 index 000000000..d3707402c --- /dev/null +++ b/streamalert/scheduled_queries/command/application.py @@ -0,0 +1,88 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from streamalert.scheduled_queries.config.lambda_conf import get_streamquery_env_vars +from streamalert.scheduled_queries.config.services import ApplicationServices + + +class ScheduledQueries: + + def __init__(self): + # Ready all services + self._services = ApplicationServices() + + def run(self, event): + """The main application execution. + + StreamQuery executions are configured by two external sources. ENVIRONMENT variables and + the input event. ENVIRONMENT variables help configure the application at deployment, + whereas the input event tracks state within a single state machine. + + FIXME (Ryxias) + We should re-evaluate which environment variables can be deployed via configuration files + instead of being embedded into Terraform configurations. + + By design, StreamQuery's executions should be nonblocking. Waiting on Athena to complete + many query executions is a waste of Lambda execution time, so StreamQuery is designed to + fire-and-forget Athena queries. Upon first execution, query execution ids are saved into + the state machine. Subsequent executions check the statuses of these queries, and dispatch + the results of successful queries to StreamAlert. This process repeats until all scheduled + queries are dispatched. + + Params: + event (dict) + The input event, which represents the state of the state machine. + + StreamQuery expects a very specific structure to the event. See StateManager or + StepFunctionStateManager for more details. + + Returns: + dict: The final state of the state machine. + """ + + # Start the function + self._services.logger.info('Running scheduled_queries lambda handler') + self._services.logger.debug( + 'Invocation event: %s', event + ) + self._services.logger.debug( + 'ServiceContainer parameters: %s', get_streamquery_env_vars() + ) + + # Load up any prior state from the event passed in from the StepFunction + state_manager_loader = self._services.create_step_function_state_manager() + state_manager_loader.load_from_step_function_event(event) + + # Execute a single pass of the StreamQuery runtime + done = self._services.command_processor.nonblocking_single_pass() + + # Set the updated state into the response + # The step function as-written currently looks specifically for $.done and + # $.continue and expects both of them to be present AND to be adopt exact + # numeric values + # + # When 'continue' is set to 1, the state machine will go into a waiting state, then + # re-execute this Lambda function again. When 'done' is set to 1, the state machine + # is considered complete and will not execute again. This should only happen if all + # scheduled queries have completed or failed. + # + # @see terraform/modules/tf_scheduled_queries/step_function.tf + response = { + 'done': 1 if done else 0, + 'continue': 1, + } + state_manager_loader.write_to_step_function_response(response) + + return response diff --git a/streamalert/scheduled_queries/command/processor.py b/streamalert/scheduled_queries/command/processor.py new file mode 100644 index 000000000..5c531ba26 --- /dev/null +++ b/streamalert/scheduled_queries/command/processor.py @@ -0,0 +1,129 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from logging import Logger + +from streamalert.scheduled_queries.query_packs.manager import QueryPacksManager +from streamalert.scheduled_queries.state.state_manager import StateManager +from streamalert.scheduled_queries.streamalert.kinesis import KinesisClient + + +class CommandProcessor: + + def __init__(self, + logger=None, kinesis=None, state_manager=None, manager_factory=None): + self._logger = logger # type: Logger + self._kinesis = kinesis # type: KinesisClient + self._state_manager = state_manager # type: StateManager + self._manager = manager_factory.new_manager() # type: QueryPacksManager + + def nonblocking_single_pass(self): + """Make a single, nonblocking pass through all Queries that are configured to run. + + It is up to the caller to call this method over and over. + + Return: + bool: True when all work is finished. False otherwise. + """ + self._logger.info( + 'Discovered {} query packs to execute'.format(self._manager.num_registered_queries) + ) + + self._manager.initialize_query_packs() + self._manager.start_queries() + + finished_queries = self._manager.finished_query_packs + + for query_pack in finished_queries: + self._handle_finished_query(query_pack) + + if len(finished_queries) == self._manager.num_registered_queries: + self._logger.info('All queries completed.') + return True + + return False + + def _handle_finished_query(self, query_pack): + """Figures out what to do with a QueryPack that has finished running. + + This method is Idempotent. + + Arguments: + query_pack (QueryPack) + """ + query_execution = query_pack.query_execution + query_execution_id = query_pack.query_execution_id + + # If query pack is sent + if self._query_pack_already_sent(query_pack): + self._logger.debug(' Already sent to Kinesis.') + return + + if not query_execution.is_succeeded(): + # uh o + self._logger.error('ENCOUNTERED ERROR') + self._logger.error( + 'QUERY FOR {} (Execution Id = {}) HAS FAILED'.format( + query_pack.query_pack_configuration.name, + query_execution_id + ) + ) + self._logger.error(query_execution.status_description) + + self._kinesis.send_error_results(query_pack) + + self._mark_query_pack_sent(query_pack) + self._mark_query_pack_error(query_pack) + return + + result = query_pack.fetch_results() + + self._logger.debug('Query Completed:') + self._logger.debug( + 'Execution Id: %s', + result.query_execution.query_execution_id + ) + self._logger.debug('Query: %s', result.query_execution.query) + self._logger.debug( + 'Runtime: %d', + result.query_execution.engine_execution_time_in_millis + ) + self._logger.debug( + 'Bytes: %d', + result.query_execution.data_scanned_in_bytes + ) + self._logger.debug('Status: %s', result.query_execution.status) + self._logger.debug('Reason: %s', result.query_execution.status_description) + + self._kinesis.send_query_results(query_pack) + + self._mark_query_pack_sent(query_pack) + + def _query_pack_already_sent(self, query_pack): + cache_key = query_pack.unique_id + cache_entry = self._state_manager.get(cache_key) + return cache_entry.get('sent_to_streamalert', False) + + def _mark_query_pack_sent(self, query_pack): + cache_key = query_pack.unique_id + cache_entry = self._state_manager.get(cache_key) + cache_entry['sent_to_streamalert'] = True + self._state_manager.set(cache_key, cache_entry) + + def _mark_query_pack_error(self, query_pack): + cache_key = query_pack.unique_id + cache_entry = self._state_manager.get(cache_key) + cache_entry['error'] = True + self._state_manager.set(cache_key, cache_entry) diff --git a/streamalert/scheduled_queries/config/__init__.py b/streamalert/scheduled_queries/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/config/lambda_conf.py b/streamalert/scheduled_queries/config/lambda_conf.py new file mode 100644 index 000000000..6067a1345 --- /dev/null +++ b/streamalert/scheduled_queries/config/lambda_conf.py @@ -0,0 +1,28 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import os + + +def get_streamquery_env_vars(): + """Returns environment variables pertinent to StreamQuery""" + return { + 'command_name': 'StreamQuery', + 'aws_region': os.environ['REGION'], + 'log_level': os.environ['LOGGER_LEVEL'], + 'athena_database': os.environ['ATHENA_DATABASE'], + 'athena_results_bucket': os.environ['ATHENA_RESULTS_BUCKET'], + 'kinesis_stream': os.environ['KINESIS_STREAM'], + } diff --git a/streamalert/scheduled_queries/config/services.py b/streamalert/scheduled_queries/config/services.py new file mode 100644 index 000000000..27ac57ba3 --- /dev/null +++ b/streamalert/scheduled_queries/config/services.py @@ -0,0 +1,214 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import logging + +import boto3 +from botocore import client as botocore_client +from botocore.exceptions import ProfileNotFound + +from streamalert.scheduled_queries.command.processor import CommandProcessor +from streamalert.scheduled_queries.config.lambda_conf import get_streamquery_env_vars +from streamalert.scheduled_queries.container.container import ServiceDefinition, ServiceContainer +from streamalert.scheduled_queries.handlers.athena import AthenaClient +from streamalert.scheduled_queries.query_packs.configuration import QueryPackRepository +from streamalert.scheduled_queries.query_packs.manager import ( + QueryPackExecutionContext, + QueryPacksManagerFactory, + QueryParameterGenerator, +) +from streamalert.scheduled_queries.state.state_manager import StateManager, StepFunctionStateManager +from streamalert.scheduled_queries.streamalert.kinesis import KinesisClient +from streamalert.scheduled_queries.support.clock import Clock +from streamalert.shared.config import load_config + + +# FIXME (Ryxias) +# Eventually we should get rid of the ServiceContainer. This pattern isn't really in the spirit +# of StreamAlert and is a relic from when StreamQuery was a separately maintained project. +class ApplicationServices: + def __init__(self): + # Boot the service container + self._service_container = ServiceContainer(get_streamquery_env_vars()) + configure_container(self._service_container) + + @property + def logger(self): + return self._service_container.get('logger') + + @property + def command_processor(self): + return self._service_container.get('command_processor') + + @property + def state_manager(self): + return self._service_container.get('state_manager') + + @property + def clock(self): + return self._service_container.get('clock') + + def create_step_function_state_manager(self): + return StepFunctionStateManager( + self.state_manager, + self.logger, + self.clock + ) + + +# pylint: disable=too-many-statements +def configure_container(container): + """Configures the container + + Params: + container (ServiceContainer) + """ + container.register(ServiceDefinition('command_processor', _make_command_processor)) + container.register(ServiceDefinition('logger', _make_logger)) + container.register(ServiceDefinition('streamalert_forwarder', _make_kinesis)) + container.register(ServiceDefinition('state_manager', _make_cache)) + container.register(ServiceDefinition('athena', _make_athena)) + container.register(ServiceDefinition('query_parameter_generator', _make_param_generator)) + container.register(ServiceDefinition('query_pack_repository', _make_query_pack_repo)) + container.register(ServiceDefinition('query_pack_manager_factory', _make_query_pack_factory)) + container.register(ServiceDefinition('query_pack_execution_context', _make_execution_context)) + container.register(ServiceDefinition('clock', _make_clock)) + container.register(ServiceDefinition('boto3_athena_client', _make_boto3_athena_client)) + container.register(ServiceDefinition('boto3_kinesis_client', _make_boto3_kinesis_client)) + container.register(ServiceDefinition('config', _load_config)) + + +def _load_config(_): + return load_config() + + +def _make_command_processor(container): + return CommandProcessor( + logger=container.get('logger'), + kinesis=container.get('streamalert_forwarder'), + state_manager=container.get('state_manager'), + manager_factory=container.get('query_pack_manager_factory') + ) + + +def _make_logger(container): + logger = logging.getLogger(container.get_parameter('command_name')) + logger.setLevel(container.get_parameter('log_level').upper()) + logging.basicConfig( + format='%(name)s [%(levelname)s]: [%(module)s.%(funcName)s] %(message)s' + ) + return logger + + +def _make_kinesis(container): + return KinesisClient( + logger=container.get('logger'), + client=container.get('boto3_kinesis_client'), + kinesis_stream=container.get_parameter('kinesis_stream') + ) + + +def _make_cache(container): + cache = StateManager( + logger=container.get('logger') + ) + + return cache + + +def _make_athena(container): + return AthenaClient( + logger=container.get('logger'), + client=container.get('boto3_athena_client'), + database=container.get_parameter('athena_database'), + results_bucket=container.get_parameter('athena_results_bucket') + ) + + +def _make_param_generator(container): + return QueryParameterGenerator(container.get('logger'), container.get('clock')) + + +def _make_query_pack_repo(container): + repo = QueryPackRepository + + config = container.get('config') + query_directories = [ + item + for item in config['global']['general'].get('scheduled_query_locations', []) + ] + + repo.load_packs(query_directories) + return repo + + +def _make_query_pack_factory(container): + return QueryPacksManagerFactory( + container.get('query_pack_execution_context') + ) + + +def _make_execution_context(container): + return QueryPackExecutionContext( + cache=container.get('state_manager'), + athena=container.get('athena'), + logger=container.get('logger'), + params=container.get('query_parameter_generator'), + repository=container.get('query_pack_repository'), + clock=container.get('clock') + ) + + +def _make_clock(_): + return Clock() + + +def _make_boto3_athena_client(container): + region = container.get_parameter('aws_region') + logger = container.get('logger') + + config = botocore_client.Config( + connect_timeout=5, + read_timeout=5, + region_name=region + ) + + session_kwargs = {} + try: + session = boto3.Session(**session_kwargs) + return session.client( + 'athena', + config=config, + ) + except ProfileNotFound: + logger.error('AWS Athena Connection via Profile Failed') + + +def _make_boto3_kinesis_client(container): + region = container.get_parameter('aws_region') + logger = container.get('logger') + + config = botocore_client.Config( + connect_timeout=5, + read_timeout=5, + region_name=region + ) + + session_kwargs = {} + try: + session = boto3.Session(**session_kwargs) + return session.client('kinesis', config=config) + except ProfileNotFound: + logger.error('AWS Kinesis Connection via Profile Failed') diff --git a/streamalert/scheduled_queries/container/__init__.py b/streamalert/scheduled_queries/container/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/container/container.py b/streamalert/scheduled_queries/container/container.py new file mode 100644 index 000000000..9eabe18d6 --- /dev/null +++ b/streamalert/scheduled_queries/container/container.py @@ -0,0 +1,85 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +Classes related to dependency injection container. +""" + + +class ServiceContainer: + """A container that houses all configurations and services for the application runtime. + + @see https://symfony.com/doc/current/service_container.html + """ + + def __init__(self, parameters): + self._services = {} + self._parameters = parameters + self._definitions = {} + + def get(self, service_id): + """Returns a service + + All instances of a unique service id are singleton. + """ + if service_id not in self._services: + self._services[service_id] = self._instantiate(service_id) + return self._services[service_id] + + def get_parameter(self, parameter_name): + """Returns a parameter registered in the service container""" + if parameter_name not in self._parameters: + raise ValueError('ServiceContainer no such parameter: "{}"'.format(parameter_name)) + + return self._parameters[parameter_name] + + @property + def parameters(self): + return self._parameters + + def register(self, definition): + """ + + Params: + definition (ServiceDefinition): + """ + service_id = definition.service_id + if service_id in self._definitions: + raise ValueError( + 'ServiceContainer registering duplicate definition: "{}"'.format(service_id) + ) + + self._definitions[service_id] = definition + + def _instantiate(self, service_id): + if service_id in self._definitions: + return self._definitions[service_id].instantiate(self) + + raise ValueError( + 'ServiceContainer does not know how to create: "{}"'.format(service_id) + ) + + +class ServiceDefinition: + def __init__(self, service_id, definition): + self._service_id = service_id + self._definition = definition + + @property + def service_id(self): + return self._service_id + + def instantiate(self, service_container): + return self._definition(service_container) diff --git a/streamalert/scheduled_queries/handlers/__init__.py b/streamalert/scheduled_queries/handlers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/handlers/athena.py b/streamalert/scheduled_queries/handlers/athena.py new file mode 100644 index 000000000..c5799303f --- /dev/null +++ b/streamalert/scheduled_queries/handlers/athena.py @@ -0,0 +1,258 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import json +import uuid + +from botocore.exceptions import ClientError + + +class AthenaQueryExecutionError(Exception): + """Exception to be raised when an Athena query fails""" + + +# FIXME (ryxias) +# At some point we should DRY out the implementation of this API client with the one in +# streamalert/shared/athena.py +class AthenaClient: + """A StreamAlert Athena Client for creating tables, databases, and executing queries""" + + def __init__(self, logger=None, client=None, database=None, results_bucket=None): + """Initialize the Boto3 Athena Client, and S3 results bucket/key""" + self._logger = logger + self._client = client + self._database = database + self._s3_results_bucket = results_bucket + + def _execute_query(self, query, options): + """Execute an Athena query on the current database. This operation is non-blocking + + See: + https://docs.aws.amazon.com/cli/latest/reference/athena/start-query-execution.html + + Args: + query (str): SQL query to execute + options (dict): Configuration options + + - database (str): The Athena database to connect to. + + Returns: + str: Athena execution ID for the query that was started + + Raises: + AthenaQueryExecutionError: If any failure occurs during the execution of the + query, this exception will be raised + """ + self._logger.debug('Executing query: %s', query) + try: + output_location = 's3://{bucket}/{key}.csv'.format( + bucket=self._s3_results_bucket, + key=uuid.uuid4() + ) + result = self._client.start_query_execution( + QueryString=query, + QueryExecutionContext={'Database': options.get('database', self._database)}, + ResultConfiguration={'OutputLocation': output_location} + ) + query_execution_id = result['QueryExecutionId'] + self._logger.debug('Query dispatched. ID returned: %s', query_execution_id) + + return query_execution_id + except ClientError as err: + raise AthenaQueryExecutionError('Athena query failed:\n{}'.format(err)) + + def get_query_execution(self, query_execution_id): + """Gets an AthenaQueryExecution object encapsulating the result of a query + + Check the result.is_still_running() and result.is_succeeded() for the statuses + of the queries. + + Args: + query_execution_id (str): The Athena-returned query execution id + + Returns: + AthenaQueryExecution + """ + return AthenaQueryExecution(self._client.get_query_execution( + QueryExecutionId=query_execution_id + )) + + def get_query_result(self, query_execution): + """Returns a query result payload, wrapped in a AthenaQueryResult object + + Args: + query_execution (AthenaQueryExecution) + + Returns: + AthenaQueryResult + Returns None if the given query_execution is not completed + """ + if not query_execution.is_succeeded(): + return None + return AthenaQueryResult( + query_execution, + self._client.get_query_results(QueryExecutionId=query_execution.query_execution_id) + ) + + def run_async_query(self, query, options=None): + """Run an Athena query in an asynchronous fashion. This operation is non-blocking + + Args: + query (str): SQL query to execute + options (dict): Configuration options + + Returns: + str: Athena query execution ID + + Raises: + AthenaQueryExecutionError: If any failure occurs during the execution of the + query, this exception will be raised + """ + if options is None: + options = {} + + return self._execute_query(query, options) + + +class AthenaQueryExecution: + """Encapsulation of a query execution response + + See: + https://docs.aws.amazon.com/cli/latest/reference/athena/get-query-execution.html + """ + + def __init__(self, response): + self._response = response + + @property + def query_execution_id(self): + return self._response['QueryExecution']['QueryExecutionId'] + + @property + def database(self): + return self._response['QueryExecution']['QueryExecutionContext']['Database'] + + @property + def status(self): + return self._response['QueryExecution']['Status']['State'] + + @property + def status_description(self): + return self._response['QueryExecution']['Status'].get('StateChangeReason', None) + + @property + def completion_datetime(self): + return self._response['QueryExecution']['Status']['CompletionDateTime'] + + @property + def data_scanned_in_bytes(self): + return self._response['QueryExecution']['Statistics']['DataScannedInBytes'] + + @property + def engine_execution_time_in_millis(self): + return self._response['QueryExecution']['Statistics']['EngineExecutionTimeInMillis'] + + @property + def output_location(self): + return self._response['QueryExecution']['ResultConfiguration']['OutputLocation'] + + @property + def query(self): + return self._response['QueryExecution']['Query'] + + def is_still_running(self): + return self.status in {'QUEUED', 'RUNNING'} + + def is_succeeded(self): + return self.status == 'SUCCEEDED' + + +class AthenaQueryResult: + """Encapsulation of a query execution's result""" + + def __init__(self, query_execution, result): + self._query_execution = query_execution + self._result = result + + @property + def query_execution(self): + """ + Returns: + AthenaQueryExecution + """ + return self._query_execution + + @property + def headers(self): + """Returns the headers of the query result, as a list + + Returns: + list + """ + return self._raw_row_to_list(self.raw_rows[0]) + + @property + def data_as_list(self): + """Returns the data of the query result, as a list of lists + + The result set is a list of rows, in the order they appear in the query result. + Each row is a list of column values, in the order they appear from left-to-right. This + should match the ordering in the "headers". + + Returns: + list[list] + """ + return [self._raw_row_to_list(row) for row in self.raw_rows[1:]] + + @property + def data_as_dicts(self): + """Returns the data of the query results as a list of dicts mapping headers to values + + An alternative to data_as_list. The returned result is a list of rows, but in this method + the rows are dicts, mapping the headers (keys) to their respective values. + + This method results in a larger data set and is more CPU intensive but the returned data + is easier to use. + + Returns: + list[dict] + """ + headers = self.headers + + data = [] + for row in self.data_as_list: + dict_row = {} + for index, header in enumerate(headers): + dict_row[header] = row[index] + data.append(dict_row) + return data + + @property + def data_as_human_string(self): + return json.dumps(self.data_as_dicts, indent=2, separators=(',', ': ')) + + @property + def raw_rows(self): + return self._result['ResultSet']['Rows'] + + @property + def count(self): + """Returns the number of rows in the result set""" + return len(self.raw_rows) - 1 # Remove 1 to account for the header, which is always around + + @staticmethod + def _raw_row_to_list(row): + # For empty cells, there is no VarCharValue key + return [cell.get('VarCharValue', None) for cell in row['Data']] diff --git a/streamalert/scheduled_queries/main.py b/streamalert/scheduled_queries/main.py new file mode 100644 index 000000000..136101a32 --- /dev/null +++ b/streamalert/scheduled_queries/main.py @@ -0,0 +1,23 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +This file is the entry point for AWS Lambda. +""" +from streamalert.scheduled_queries.command.application import ScheduledQueries + + +def handler(event, _): + return ScheduledQueries().run(event) diff --git a/streamalert/scheduled_queries/query_packs/__init__.py b/streamalert/scheduled_queries/query_packs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/query_packs/configuration.py b/streamalert/scheduled_queries/query_packs/configuration.py new file mode 100644 index 000000000..5088af0e9 --- /dev/null +++ b/streamalert/scheduled_queries/query_packs/configuration.py @@ -0,0 +1,114 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from streamalert.shared.importer import import_folders + + +class QueryPackConfiguration: + + def __init__(self, query=None, params=None, name=None, + description=None, tags=None): + if not name: + raise RuntimeError('Query Pack missing name') + + if not query: + raise RuntimeError('Query Pack "{}" missing query template'.format(name)) + + if not tags: + raise RuntimeError('Query Pack "{}" has no tags?'.format(name)) + + self._query_template = query + self._query_parameters = params + self._name = name + self._description = description + self._tags = tags if tags else [] + + QueryPackRepository.register(self) + + def generate_query(self, **kwargs): + """Returns a raw SQL query string""" + try: + return self.query_template.format(**kwargs) + except KeyError as e: + msg = ''' +Failed to generate query for pack: "{name}" +The provided query parameters were: +{kwargs} + +Error: +{error} +'''.strip().format(name=self.name, error=e, kwargs=kwargs) + raise KeyError(msg) + + + @property + def query_template(self): + return self._query_template + + @property + def query_parameters(self): + return self._query_parameters + + @property + def handler(self): + """ + @deprecated + Returns one of the, signally which DBMS handles this query + """ + return None + + @property + def name(self): + """Returns a name for this query pack""" + return self._name + + @property + def description(self): + """Returns a short description of what this query pack does""" + return self._description + + @property + def tags(self): + """Returns a list of string tags belonging to this query pack""" + return self._tags + + +class QueryPackRepository: + """A repository of all packs""" + QUERY_PACKS = {} + + @classmethod + def get_packs(cls): + """ + Returns: + list[QueryPack] + """ + return cls.QUERY_PACKS.values() + + @classmethod + def register(cls, config): + """ + Args: + config (QueryPackConfiguration) + """ + name = config.name + if name in cls.QUERY_PACKS: + raise RuntimeError('ERROR: Duplicate query pack name: "{}"'.format(name)) + + cls.QUERY_PACKS[name] = config + + @classmethod + def load_packs(cls, directories): + import_folders(*directories) diff --git a/streamalert/scheduled_queries/query_packs/manager.py b/streamalert/scheduled_queries/query_packs/manager.py new file mode 100644 index 000000000..0f3e2c12c --- /dev/null +++ b/streamalert/scheduled_queries/query_packs/manager.py @@ -0,0 +1,313 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import json +from logging import Logger + +from streamalert.scheduled_queries.handlers.athena import AthenaClient +from streamalert.scheduled_queries.query_packs.configuration import ( + QueryPackConfiguration, QueryPackRepository +) +from streamalert.scheduled_queries.query_packs.parameters import QueryParameterGenerator +from streamalert.scheduled_queries.state.state_manager import StateManager +from streamalert.scheduled_queries.support.clock import Clock + + +class QueryPackExecutionContext: + """A convenience service bundle for multiple services related to querying""" + + def __init__(self, cache=None, athena=None, logger=None, params=None, + repository=None, clock=None): + self._cache = cache # type: StateManager + self._athena = athena # type: AthenaClient + self._logger = logger # type: Logger + self._params = params # type: QueryParameterGenerator + self._repo = repository # type: QueryPackRepository + self._clock = clock # type: Clock + + @property + def state_manager(self): + return self._cache + + @property + def athena_client(self): + return self._athena + + @property + def logger(self): + return self._logger + + @property + def parameter_generator(self): + return self._params + + @property + def query_pack_repository(self): + return self._repo + + @property + def clock(self): + return self._clock + + +class QueryPack: + """An encapsulation of both the query configuration as well as the intent to execute it + + This "pack" includes any additional state, parameters, and other stuff. + """ + + def __init__(self, query_pack_configuration, execution_context): + self._configuration = query_pack_configuration # type: QueryPackConfiguration + self._execution_context = execution_context + + self._query_execution = None + self._query_execution_id = None + self._query_result = None + + self._query_parameters = { + param: self._execution_context.parameter_generator.generate(param) + for param in self._configuration.query_parameters + } + self._query_string = None + + @property + def unique_id(self): + return self._configuration.name + + @property + def query_pack_configuration(self): + """ + Returns: + QueryPackConfiguration + """ + return self._configuration + + @property + def query_execution(self): + """ + Returns: + AthenaQueryExecution + """ + return self._query_execution + + @property + def query_execution_id(self): + return self._query_execution_id + + @property + def query_result(self): + return self._query_result + + @property + def is_previously_started(self): + return self._query_execution_id is not None + + @property + def query_parameters(self): + return self._query_parameters + + @property + def query_string(self): + return self._query_string + + def load_from_cache(self): + cache_key = self.unique_id + if self._execution_context.state_manager.has(cache_key): + entry = self._execution_context.state_manager.get(cache_key) + query_execution_id = entry['query_execution_id'] + self._query_execution_id = query_execution_id + + def start_query(self): + """Kicks off the current query to Athena, returning a query execution id + + Calls to this method internally modify ths query pack, setting the query_execution_id + property. Calling this method when is_previous_started=True will do not thing. + """ + if self.is_previously_started: + return None + + self._query_execution_id = self._execution_context.athena_client.run_async_query( + self.generate_query_string() + ) + self.save_to_cache() + return self._query_execution_id + + def load_query_execution(self): + """Refreshes the query_execution property of this query pack + + Returns: + AthenaQueryExecution + """ + if not self.is_previously_started: + return None + + self._query_execution = self._execution_context.athena_client.get_query_execution( + self._query_execution_id + ) + return self._query_execution + + def fetch_results(self): + """Refreshes the query_result property of this query pack + + Returns: + AthenaQueryResult + """ + if not self._query_execution.is_succeeded(): + return None + + self._query_result = self._execution_context.athena_client.get_query_result( + self._query_execution + ) + return self._query_result + + def save_to_cache(self): + entry = { + 'query_execution_id': self._query_execution_id, + # 'query_string': self.generate_query_string(), + } + + self._execution_context.state_manager.set(self.unique_id, entry) + + def generate_query_string(self): + params = self._query_parameters + self._execution_context.logger.debug( + 'Generated Parameters: {}'.format(json.dumps(params, indent=2)) + ) + self._query_string = self._configuration.generate_query(**params) + return self._query_string + + +class QueryPacksManagerFactory: + """A factory service for generating QueryPacksManager instances""" + + def __init__(self, execution_context): + self._execution_context = execution_context # type: QueryPackExecutionContext + + def new_manager(self): + """ + Return: + QueryPacksManager + """ + manager = QueryPacksManager(self._execution_context) + manager.load_query_configurations() + + return manager + + +class QueryPacksManager: + """This class manages multiple query packs that are firing off simultaneously + + This class is not a service--it is a stateful container for QueryPacks, which themselves + can be stateful. + """ + + def __init__(self, execution_context): + self._execution_context = execution_context + + self._query_configs = [] # type: list[QueryPackConfiguration] + + self._query_packs = [] # type: list[QueryPack] + + def load_query_configurations(self): + repo_packs = self._execution_context.query_pack_repository.get_packs() + + # Tags are an inclusive filter + # If no tags are provided, then it includes all packs + # If multiple tags are provided, then only the packs that contain ALL OF THE TAGS + # will be run + configured_tags = self._execution_context.state_manager.get( + 'streamquery_configuration', {} + ).get('tags', []) + + for tag in configured_tags: + repo_packs = [ + pack + for pack in repo_packs + if tag in pack.tags + ] + + self._query_configs = repo_packs + + def initialize_query_packs(self): + """Sets up query packs for this manager. + + QueryPacks are a list of queries that this manager is intended to manage. + """ + + self._query_packs = [] + + for pack_config in self._query_configs: + query_pack = QueryPack(pack_config, self._execution_context) + query_pack.load_from_cache() + + self._query_packs.append(query_pack) + + def start_queries(self): + """Kicks off all query packs, if necessary + + This method is idempotent. + """ + for query_pack in self._query_packs: + self._kickoff_query(query_pack) + + @property + def query_packs(self): + return self._query_packs + + @property + def finished_query_packs(self): + return [ + query + for query in self._query_packs + if not query.load_query_execution().is_still_running() + ] + + @property + def num_registered_queries(self): + """This property is the number of configured queries, NOT the number of running ones""" + return len(self._query_configs) + + @property + def all_queries_finished(self): + return self.num_queries_still_running == 0 + + @property + def num_queries_still_running(self): + return len(self.query_packs) - len(self.finished_query_packs) + + def _kickoff_query(self, query_pack): + """Begins executing a query, given the QueryPackConfiguration + + Args: + query_pack (QueryPack) + + Returns: + QueryPack + """ + if query_pack.is_previously_started: + self._execution_context.logger.debug( + 'Existing Query Execution exists for "%s": [%s]', + query_pack.query_pack_configuration.name, + query_pack.query_execution_id + ) + return query_pack + + self._execution_context.logger.info( + 'Executing Query Pack "%s"...', query_pack.query_pack_configuration.name + ) + + query_pack.start_query() + + return query_pack diff --git a/streamalert/scheduled_queries/query_packs/parameters.py b/streamalert/scheduled_queries/query_packs/parameters.py new file mode 100644 index 000000000..d014e734d --- /dev/null +++ b/streamalert/scheduled_queries/query_packs/parameters.py @@ -0,0 +1,66 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from datetime import timedelta + +from streamalert.scheduled_queries.support.clock import Clock + + +# FIXME (derek.wang) +# In the future we can evaluate making this into a more customizable system similar to query +# packs. Users can define their own custom parameters. +class QueryParameterGenerator: + """This service helps queries generate dynamic parameters.""" + + def __init__(self, logger, clock): + self._logger = logger + self._clock = clock # type: Clock + + def generate(self, parameter): + if parameter == 'utcdatehour_minus7day': + # https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior + time = self._clock.now - timedelta(days=7) + return time.strftime('%Y-%m-%d-%H') + + if parameter == 'utcdatehour_minus1hour': + time = self._clock.now - timedelta(hours=1) + return time.strftime('%Y-%m-%d-%H') + + if parameter == 'utctimestamp_minus1hour': + time = self._clock.now - timedelta(hours=1) + return str(round(time.timestamp())) + + if parameter == 'utcdatehour_minus2hour': + time = self._clock.now - timedelta(hours=2) + return time.strftime('%Y-%m-%d-%H') + + if parameter == 'utcdatehour_minus1day': + time = self._clock.now - timedelta(days=1) + return time.strftime('%Y-%m-%d-%H') + + if parameter == 'utcdatehour_minus2day': + time = self._clock.now - timedelta(days=2) + return time.strftime('%Y-%m-%d-%H') + + if parameter == 'utcdatehour': + return self._clock.now.strftime('%Y-%m-%d-%H') + + if parameter == 'utctimestamp': + return str(round(self._clock.now.timestamp())) + + self._logger.error( + 'Parameter generator does not know how to handle "{}"'.format(parameter) + ) + return None diff --git a/streamalert/scheduled_queries/state/__init__.py b/streamalert/scheduled_queries/state/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/state/state_manager.py b/streamalert/scheduled_queries/state/state_manager.py new file mode 100644 index 000000000..b0e2a2a22 --- /dev/null +++ b/streamalert/scheduled_queries/state/state_manager.py @@ -0,0 +1,163 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from datetime import datetime + + +class StateManager: + """Encapsulation of a caching system that is currently backed by the filesystem + + The "state" of a StreamQuery execution is encapsulated by + """ + + def __init__(self, logger=None): + self._logger = logger + + self._data = {} + + def set(self, key, value): + self._data[key] = value + + def has(self, key): + return key in self._data + + def get(self, key, fallback=None): + return self._data.get(key, fallback) + + def delete(self, key): + del self._data[key] + + @property + def keys(self): + return list(self._data.keys()) + + def _dangerously_set_all_data(self, data): + """ + This method is NOT intended to be used by any classes outside of this module. + """ + self._data = data + + def _dangerously_get_all_data(self): + """ + This method is NOT intended to be used by any classes outside of this module. + """ + return self._data + + +class StepFunctionStateManager: + """State management when using AWS Step Functions + + The State of a step function is stored in a JSON blob that is passed from one State Machine + state to the next. In states that execute Lambda functions, the state is passed in via the + JSON event trigger. + """ + + def __init__(self, state_manager, logger, clock): + self._state_manager = state_manager + self._logger = logger + self._clock = clock + + def load_from_step_function_event(self, event): + """Given a lambda input event, loads the execution state of this StreamQuery iteration. + + When using Step Functions, lambda receives the state machine's state as the input event. + + ON FIRST execution, the expected event looks like this: + + { + "name": "streamquery_cloudwatch_trigger", + "event_id": "abcdabcd-1234-5678-1234-000001200000", + "source_arn": "arn:aws:events:us-east-1:123456789012:rule/myprefix_schedule_thing", + "streamquery_configuration": { + "clock": "2020-02-18T23:55:16Z", + "tags": [ + "hourly", + "production" + ] + } + } + + This represents the state of the Step Function state machine when it is first triggered + by CloudWatch. In the above event, the event is generated via CloudWatch. The + "streamquery_configuration" node is used to configure the lambda execution. + + @see terraform/modules/tf_scheduled_queries/cloudwatch_schedule.tf + + + Henceforth, the "state" is always stored under a single key, "step_function_state". + In these subsequent executions, the expected input event looks like this: + + { + "done": 0, + "continue": 1, + "step_function_state": { + "streamquery_configuration": { + "clock": "2020-02-18T23:55:16Z", + "tags": [ + "hourly", + "production" + ] + }, + "my_query": { + "query_execution_id": "70e509ed-c992-4096-8882-6bb070578347" + }, + "my_other_query": { + "query_execution_id": "b56cf6f3-d760-4abe-9345-fccd9cfa05e8" + }, + "my_done_query": { + "query_execution_id": "beeffc15-7608-48b4-89a4-a8e7ea81c5e6", + "sent_to_streamalert": true + } + ... + } + } + + This "step_function_state" stores both the configuration (tags & clock), as well as the + execution states of the scheduled queries. + + The "done" and "continue" flags at the stop of the event are + """ + # pylint: disable=protected-access + self._state_manager._dangerously_set_all_data(event.get('step_function_state', {})) + self._logger.info('Successfully loaded from Step Function Event') + + # Special; The first time we execute the function, our "step_function_state" is empty, so + # we will not have the streamquery_configuration set up. This code loads it from the + # input event. Henceforth, this "streamquery_configuration" will be saved to and loaded + # from "step_function_state". + if 'streamquery_configuration' in event: + # We expect 2 keys to exist, passed in from the CloudWatch rule input transformer: + # - clock: ISO timestamp in UTC + # - tags: Array of strings + self._logger.info('Loading configuration from first-run...') + self._state_manager.set('streamquery_configuration', event['streamquery_configuration']) + + # Now, wind the clock to the correct time, based upon the configuration + isotime = self._state_manager.get('streamquery_configuration', {}).get('clock', False) + if isotime: + clock_datetime = datetime.strptime(isotime, "%Y-%m-%dT%H:%M:%SZ") + self._clock.time_machine(clock_datetime) + self._logger.info('Winding clock to %s...', self._clock.now) + else: + self._logger.warning( + 'No clock configuration provided. Defaulting to %s', + self._clock.now + ) + + def write_to_step_function_response(self, response): + response.update({ + # pylint: disable=protected-access + 'step_function_state': self._state_manager._dangerously_get_all_data(), + }) diff --git a/streamalert/scheduled_queries/streamalert/__init__.py b/streamalert/scheduled_queries/streamalert/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/streamalert/kinesis.py b/streamalert/scheduled_queries/streamalert/kinesis.py new file mode 100644 index 000000000..57a9d7900 --- /dev/null +++ b/streamalert/scheduled_queries/streamalert/kinesis.py @@ -0,0 +1,145 @@ +""" +Copyright 2017-present, Airbnb Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import json + + +class KinesisClient: + """Encapsulation of all communication with and data structures sent to StreamAlert Kinesis""" + + STREAMQUERY_SCHEMA_VERSION = '1.0.0' + + def __init__(self, logger, client=None, kinesis_stream=None): + self._logger = logger + self._kinesis_stream = kinesis_stream + self._client = client + + def send_query_results(self, query_pack): + """Generates a request to Kinesis given the streamquery results, and dispatches them. + + Args: + query_pack (QueryPack): The QueryPack that successfully completed. + """ + result = query_pack.query_result # type: AthenaQueryResult + query = query_pack.query_pack_configuration # type: QueryPackConfiguration + + query_execution_id = query_pack.query_execution.query_execution_id + console_link = ( + 'https://us-east-1.console.aws.amazon.com/athena/home' + '?region=us-east-1#query/history/{}' + ).format(query_execution_id) + streamquery_result = { + "streamquery_schema_version": self.STREAMQUERY_SCHEMA_VERSION, + "execution": { + "name": query.name, + "description": query.description, + "query": query_pack.query_execution.query, + "query_parameters": query_pack.query_parameters, + "data_scanned_in_bytes": query_pack.query_execution.data_scanned_in_bytes, + "execution_time_ms": query_pack.query_execution.engine_execution_time_in_millis, + "tags": query.tags, + "query_execution_id": query_execution_id, + "console_link": console_link, + }, + "data": { + "headers": result.headers, + "rows": result.data_as_dicts, + "count": result.count, + }, + } + + self._logger.info( + 'Sending StreamQuery record to kinesis stream: {}'.format(self._kinesis_stream) + ) + self._logger.debug(json.dumps(streamquery_result, indent=2, separators=(', ', ': '))) + + response = self._client.put_records( + Records=[ + { + 'Data': json.dumps(streamquery_result), + 'PartitionKey': 'partitionKeyFoo' + }, + ], + StreamName=self._kinesis_stream + ) + self._logger.debug(response) + + if response['ResponseMetadata']['HTTPStatusCode'] == 200: + self._logger.info(' Success.') + else: + self._logger.info(' ERROR!') + + self._logger.info('Done.') + + def send_error_results(self, query_pack): + """Send Kinesis record to StreamAlert upon query failure + + In this case, there is no result. + + Args: + query_pack (QueryPack): The QueryPack that failed to complete. + """ + query = query_pack.query_pack_configuration # type: QueryPackConfiguration + + query_execution_id = query_pack.query_execution.query_execution_id + console_link = ( + 'https://us-east-1.console.aws.amazon.com/athena/home' + '?region=us-east-1#query/history/{}' + ).format(query_execution_id) + streamquery_result = { + "streamquery_schema_version": self.STREAMQUERY_SCHEMA_VERSION, + "execution": { + "name": query.name, + "description": query.description, + "query": query_pack.query_execution.query, + "query_parameters": query_pack.query_parameters, + "data_scanned_in_bytes": query_pack.query_execution.data_scanned_in_bytes, + "execution_time_ms": query_pack.query_execution.engine_execution_time_in_millis, + "tags": query.tags, + "query_execution_id": query_execution_id, + "console_link": console_link, + "error": { + "description": query_pack.query_execution.status_description + }, + }, + "data": { + "headers": [], + "rows": [], + "count": 0, + }, + } + + self._logger.info( + 'Sending StreamQuery record to kinesis stream: {}'.format(self._kinesis_stream) + ) + self._logger.debug(json.dumps(streamquery_result, indent=2, separators=(', ', ': '))) + + response = self._client.put_records( + Records=[ + { + 'Data': json.dumps(streamquery_result), + 'PartitionKey': 'partitionKeyFoo' + }, + ], + StreamName=self._kinesis_stream + ) + self._logger.debug(response) + + if response['ResponseMetadata']['HTTPStatusCode'] == 200: + self._logger.info(' Success.') + else: + self._logger.info(' ERROR!') + + self._logger.info('Done.') diff --git a/streamalert/scheduled_queries/support/__init__.py b/streamalert/scheduled_queries/support/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/streamalert/scheduled_queries/support/clock.py b/streamalert/scheduled_queries/support/clock.py new file mode 100644 index 000000000..5bf1cc817 --- /dev/null +++ b/streamalert/scheduled_queries/support/clock.py @@ -0,0 +1,30 @@ +from copy import copy +from datetime import datetime + + +class Clock: + """A service that provides time and time-manipulation methods""" + + def __init__(self): + self._internal_time = datetime.utcnow() + + @property + def now(self): + """Returns current time as a datetime object. + + (!) EXTREMELY IMPORTANT DETAIL: While this returns a modification-safe copy of the time, + the internal clock will ALWAYS BE THE SAME and corresponds to the Clock's + "_internal_time" property. + + Returns: + datetime + """ + return copy(self._internal_time) + + def time_machine(self, new_time): + """Changes the Clock's internal time + + Args: + new_time (datetime) + """ + self._internal_time = new_time diff --git a/streamalert/shared/__init__.py b/streamalert/shared/__init__.py index d079ce690..3832c9e6e 100644 --- a/streamalert/shared/__init__.py +++ b/streamalert/shared/__init__.py @@ -5,5 +5,6 @@ CLASSIFIER_FUNCTION_NAME = 'classifier' RULES_ENGINE_FUNCTION_NAME = 'rules_engine' RULE_PROMOTION_NAME = 'rule_promotion' +THREAT_INTEL_DOWNLOADER_NAME = 'threat_intel_downloader' CLUSTERED_FUNCTIONS = {CLASSIFIER_FUNCTION_NAME} diff --git a/streamalert/shared/config.py b/streamalert/shared/config.py index 57783fa1c..5ea70c995 100644 --- a/streamalert/shared/config.py +++ b/streamalert/shared/config.py @@ -16,6 +16,7 @@ from collections import defaultdict, OrderedDict import json import os +import re from streamalert.shared import CLUSTERED_FUNCTIONS from streamalert.shared.exceptions import ConfigError @@ -25,6 +26,12 @@ SUPPORTED_SOURCES = {'kinesis', 's3', 'sns', 'streamalert_app'} +# Used to detect special characters in log names. Log names can not contain special +# characters except "_" (underscore) because the log names will be referenced when +# create Athena tables and Firehose. +SPECIAL_CHAR_REGEX = re.compile(r'\W') +SPECIAL_CHAR_SUB = '_' + class TopLevelConfigKeys: """Define the available top level keys in the loaded config""" diff --git a/streamalert/shared/exceptions.py b/streamalert/shared/exceptions.py index b4dd9af7c..fb3fd085b 100644 --- a/streamalert/shared/exceptions.py +++ b/streamalert/shared/exceptions.py @@ -19,5 +19,5 @@ class StreamAlertError(Exception): """Base streamalert exception for inheritance""" -class ConfigError(StreamAlertError): +class ConfigError(StreamAlertError, ValueError): """Exception to be used for config related errors""" diff --git a/streamalert/shared/logger.py b/streamalert/shared/logger.py index 8379ccd4b..2c91bcc35 100644 --- a/streamalert/shared/logger.py +++ b/streamalert/shared/logger.py @@ -18,6 +18,8 @@ LOCAL_LOGGER_FMT = '[%(levelname)s %(asctime)s (%(name)s:%(lineno)d)]: %(message)s' +logging.basicConfig(level=logging.INFO, format=LOCAL_LOGGER_FMT) + class LogFormatter(logging.Formatter): @@ -41,20 +43,15 @@ def set_formatter(logger): Args: logger (logging.Logger): An instance of a logger for which to update the formatter """ - # Update the LambdaLoggerHandler formatter - if logger.hasHandlers(): - for handler in logger.handlers + logger.parent.handlers: - # pylint: disable=protected-access - # Retain the handlers format spec if it has one - fmt = handler.formatter._fmt if handler.formatter else None - handler.setFormatter(LogFormatter(fmt=fmt)) + # Update the LambdaLoggerHandler formatter if there is one + if not logger.hasHandlers(): return - # Otherwise, create a handler with the desired formatter - formatter = LogFormatter(fmt=LOCAL_LOGGER_FMT) - handler = logging.StreamHandler() - handler.setFormatter(formatter) - logger.addHandler(handler) + for handler in logger.handlers + logger.parent.handlers: + # pylint: disable=protected-access + # Retain the handlers format spec if it has one + fmt = handler.formatter._fmt if handler.formatter else None + handler.setFormatter(LogFormatter(fmt=fmt)) def get_logger(name, level=None): diff --git a/streamalert/shared/rule.py b/streamalert/shared/rule.py index 4009df9c6..f348045d8 100644 --- a/streamalert/shared/rule.py +++ b/streamalert/shared/rule.py @@ -59,6 +59,7 @@ def __init__(self, func, **kwargs): self.merge_by_keys = kwargs.get('merge_by_keys') self.merge_window_mins = kwargs.get('merge_window_mins') or 0 self.outputs = kwargs.get('outputs') + self.dynamic_outputs = kwargs.get('dynamic_outputs') self.publishers = kwargs.get('publishers') self.req_subkeys = kwargs.get('req_subkeys') self.initial_context = kwargs.get('context') @@ -199,6 +200,11 @@ def description(self, description): def outputs_set(self): return set(self.outputs or []) + + @property + def dynamic_outputs_set(self): + return set(self.dynamic_outputs or []) + @classmethod def disabled_rules(cls): return { diff --git a/streamalert/shared/utils.py b/streamalert/shared/utils.py index 67ca7dd47..30457a180 100644 --- a/streamalert/shared/utils.py +++ b/streamalert/shared/utils.py @@ -143,3 +143,26 @@ def get_keys(data, search_key, max_matches=-1): if val and isinstance(val, _CONTAINER_TYPES): containers.append(val) return results + +def get_database_name(config): + """Get the name of the athena database using the current config settings + Args: + config (CLIConfig): Loaded StreamAlert config + Returns: + str: The name of the athena database + """ + prefix = config['global']['account']['prefix'] + athena_config = config['lambda'].get('athena_partition_refresh_config') + + return athena_config.get('database_name', '{}_streamalert'.format(prefix)) + +def get_data_file_format(config): + """Get the data store format using the current config settings + Args: + config (CLIConfig): Loaded StreamAlert config + Returns: + str: The data store format either "parquet" or "json" + """ + athena_config = config['lambda'].get('athena_partition_refresh_config', {}) + + return athena_config.get('file_format') diff --git a/streamalert_cli/__init__.py b/streamalert_cli/__init__.py index e69de29bb..d97a878cb 100644 --- a/streamalert_cli/__init__.py +++ b/streamalert_cli/__init__.py @@ -0,0 +1,18 @@ +""" +Copyright 2017-present Airbnb, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import os + +STREAMALERT_CLI_ROOT = os.path.dirname(os.path.abspath(__file__)) diff --git a/terraform/modules/tf_alert_merger_iam/README.md b/streamalert_cli/_infrastructure/modules/tf_alert_merger_iam/README.md similarity index 100% rename from terraform/modules/tf_alert_merger_iam/README.md rename to streamalert_cli/_infrastructure/modules/tf_alert_merger_iam/README.md diff --git a/terraform/modules/tf_alert_merger_iam/main.tf b/streamalert_cli/_infrastructure/modules/tf_alert_merger_iam/main.tf similarity index 100% rename from terraform/modules/tf_alert_merger_iam/main.tf rename to streamalert_cli/_infrastructure/modules/tf_alert_merger_iam/main.tf diff --git a/terraform/modules/tf_alert_merger_iam/variables.tf b/streamalert_cli/_infrastructure/modules/tf_alert_merger_iam/variables.tf similarity index 100% rename from terraform/modules/tf_alert_merger_iam/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_alert_merger_iam/variables.tf diff --git a/terraform/modules/tf_alert_processor_iam/README.md b/streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/README.md similarity index 100% rename from terraform/modules/tf_alert_processor_iam/README.md rename to streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/README.md diff --git a/terraform/modules/tf_alert_processor_iam/main.tf b/streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/main.tf similarity index 92% rename from terraform/modules/tf_alert_processor_iam/main.tf rename to streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/main.tf index f7c5e5134..13133e0de 100644 --- a/terraform/modules/tf_alert_processor_iam/main.tf +++ b/streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/main.tf @@ -198,3 +198,24 @@ data "aws_iam_policy_document" "send_to_sqs_queues" { resources = ["${local.sqs_arn_prefix}:${element(local.sqs_outputs, count.index)}"] } } + +// Allow the Alert Processor to use ses:SendRawEmail +resource "aws_iam_role_policy" "send_raw_emails" { + name = "SendRawEmails" + role = var.role_id + policy = data.aws_iam_policy_document.send_raw_emails.json +} + +data "aws_iam_policy_document" "send_raw_emails" { + statement { + effect = "Allow" + + actions = [ + "ses:SendRawEmail" + ] + + // * because there isn't a way to state the emails or + // domains before the user puts them in as a secret + resources = ["*"] + } +} diff --git a/terraform/modules/tf_alert_processor_iam/variables.tf b/streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/variables.tf similarity index 100% rename from terraform/modules/tf_alert_processor_iam/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_alert_processor_iam/variables.tf diff --git a/terraform/modules/tf_app_iam/README.md b/streamalert_cli/_infrastructure/modules/tf_app_iam/README.md similarity index 100% rename from terraform/modules/tf_app_iam/README.md rename to streamalert_cli/_infrastructure/modules/tf_app_iam/README.md diff --git a/terraform/modules/tf_app_iam/main.tf b/streamalert_cli/_infrastructure/modules/tf_app_iam/main.tf similarity index 100% rename from terraform/modules/tf_app_iam/main.tf rename to streamalert_cli/_infrastructure/modules/tf_app_iam/main.tf diff --git a/terraform/modules/tf_app_iam/variables.tf b/streamalert_cli/_infrastructure/modules/tf_app_iam/variables.tf similarity index 100% rename from terraform/modules/tf_app_iam/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_app_iam/variables.tf diff --git a/terraform/modules/tf_athena/README.md b/streamalert_cli/_infrastructure/modules/tf_athena/README.md similarity index 100% rename from terraform/modules/tf_athena/README.md rename to streamalert_cli/_infrastructure/modules/tf_athena/README.md diff --git a/terraform/modules/tf_athena/iam.tf b/streamalert_cli/_infrastructure/modules/tf_athena/iam.tf similarity index 100% rename from terraform/modules/tf_athena/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_athena/iam.tf diff --git a/terraform/modules/tf_athena/kms.tf b/streamalert_cli/_infrastructure/modules/tf_athena/kms.tf similarity index 100% rename from terraform/modules/tf_athena/kms.tf rename to streamalert_cli/_infrastructure/modules/tf_athena/kms.tf diff --git a/terraform/modules/tf_athena/main.tf b/streamalert_cli/_infrastructure/modules/tf_athena/main.tf similarity index 100% rename from terraform/modules/tf_athena/main.tf rename to streamalert_cli/_infrastructure/modules/tf_athena/main.tf diff --git a/terraform/modules/tf_athena/outputs.tf b/streamalert_cli/_infrastructure/modules/tf_athena/outputs.tf similarity index 100% rename from terraform/modules/tf_athena/outputs.tf rename to streamalert_cli/_infrastructure/modules/tf_athena/outputs.tf diff --git a/terraform/modules/tf_athena/variables.tf b/streamalert_cli/_infrastructure/modules/tf_athena/variables.tf similarity index 100% rename from terraform/modules/tf_athena/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_athena/variables.tf diff --git a/terraform/modules/tf_classifier/README.md b/streamalert_cli/_infrastructure/modules/tf_classifier/README.md similarity index 100% rename from terraform/modules/tf_classifier/README.md rename to streamalert_cli/_infrastructure/modules/tf_classifier/README.md diff --git a/terraform/modules/tf_classifier/firehose.tf b/streamalert_cli/_infrastructure/modules/tf_classifier/firehose.tf similarity index 96% rename from terraform/modules/tf_classifier/firehose.tf rename to streamalert_cli/_infrastructure/modules/tf_classifier/firehose.tf index c777fabb9..8fdfa89f8 100644 --- a/terraform/modules/tf_classifier/firehose.tf +++ b/streamalert_cli/_infrastructure/modules/tf_classifier/firehose.tf @@ -6,7 +6,7 @@ resource "aws_iam_role_policy" "classifier_firehose" { } locals { - stream_prefix = "${var.firehose_use_prefix ? "${var.prefix}_" : ""}streamalert_data_" + stream_prefix = "${var.firehose_use_prefix ? "${var.prefix}_" : ""}streamalert_" } // IAM Policy Doc: Allow the Classifier to PutRecord* on any StreamAlert Data Firehose diff --git a/terraform/modules/tf_classifier/iam.tf b/streamalert_cli/_infrastructure/modules/tf_classifier/iam.tf similarity index 100% rename from terraform/modules/tf_classifier/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_classifier/iam.tf diff --git a/terraform/modules/tf_classifier/sns.tf b/streamalert_cli/_infrastructure/modules/tf_classifier/sns.tf similarity index 100% rename from terraform/modules/tf_classifier/sns.tf rename to streamalert_cli/_infrastructure/modules/tf_classifier/sns.tf diff --git a/terraform/modules/tf_classifier/variables.tf b/streamalert_cli/_infrastructure/modules/tf_classifier/variables.tf similarity index 100% rename from terraform/modules/tf_classifier/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_classifier/variables.tf diff --git a/terraform/modules/tf_cloudtrail/README.md b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/README.md similarity index 100% rename from terraform/modules/tf_cloudtrail/README.md rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/README.md diff --git a/terraform/modules/tf_cloudtrail/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf similarity index 84% rename from terraform/modules/tf_cloudtrail/main.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf index 60d149a85..931562579 100644 --- a/terraform/modules/tf_cloudtrail/main.tf +++ b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf @@ -94,6 +94,7 @@ resource "aws_cloudtrail" "streamalert" { s3_bucket_name = aws_s3_bucket.cloudtrail_bucket.id cloud_watch_logs_role_arn = var.cloudwatch_logs_role_arn // defaults to null cloud_watch_logs_group_arn = var.cloudwatch_logs_group_arn // defaults to null + sns_topic_name = var.send_to_sns ? aws_sns_topic.cloudtrail[0].name : null enable_log_file_validation = true enable_logging = var.enable_logging include_global_service_events = true @@ -224,3 +225,42 @@ data "aws_iam_policy_document" "cloudtrail_bucket" { } } } + +// Replace any noncompliant characters with hyphens for the topic name +locals { + sanitized_topic_name = replace(var.s3_bucket_name, "/[^a-zA-Z0-9_-]/", "-") +} + +resource "aws_sns_topic" "cloudtrail" { + count = var.send_to_sns ? 1 : 0 + + name = local.sanitized_topic_name +} + +// SNS topic policy document for cloudtrail to sns +resource "aws_sns_topic_policy" "cloudtrail" { + count = var.send_to_sns ? 1 : 0 + + arn = aws_sns_topic.cloudtrail[0].arn + policy = data.aws_iam_policy_document.cloudtrail[0].json +} + +data "aws_iam_policy_document" "cloudtrail" { + count = var.send_to_sns ? 1 : 0 + + statement { + sid = "AWSCloudTrailSNSPublish" + effect = "Allow" + + principals { + type = "Service" + identifiers = ["cloudtrail.amazonaws.com"] + } + + actions = ["SNS:Publish"] + + resources = [ + aws_sns_topic.cloudtrail[0].arn, + ] + } +} diff --git a/terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/README.md b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/README.md similarity index 100% rename from terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/README.md rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/README.md diff --git a/terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/main.tf similarity index 100% rename from terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/main.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/main.tf diff --git a/terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/output.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/output.tf similarity index 100% rename from terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/output.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/output.tf diff --git a/terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/variables.tf similarity index 100% rename from terraform/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/modules/tf_cloudtrail_cloudwatch/variables.tf diff --git a/terraform/modules/tf_cloudtrail/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf similarity index 89% rename from terraform/modules/tf_cloudtrail/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf index aa7c30dec..e16fbe550 100644 --- a/terraform/modules/tf_cloudtrail/variables.tf +++ b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf @@ -49,6 +49,12 @@ variable "s3_event_selector_type" { description = "Type of S3 object level logging to enable via CloudTrail. Choices are: 'ReadOnly', 'WriteOnly', 'All', or '', where '' disables this feature" } +variable "send_to_sns" { + type = bool + default = false + description = "Whether or not events should be sent to SNS when objects are created in S3. This creates an SNS topic when set to true" +} + variable "cloudwatch_logs_role_arn" { type = string default = null diff --git a/terraform/modules/tf_cloudwatch_events/README.md b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/README.md similarity index 100% rename from terraform/modules/tf_cloudwatch_events/README.md rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/README.md diff --git a/terraform/modules/tf_cloudwatch_events/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/main.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_events/main.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/main.tf diff --git a/terraform/modules/tf_cloudwatch_events/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/variables.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_events/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/variables.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/README.md b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/README.md similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/README.md rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/README.md diff --git a/terraform/modules/tf_cloudwatch_logs_destination/iam.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/iam.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/iam.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/modules/destination/iam.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/iam.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/modules/destination/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/iam.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/modules/destination/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/main.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/modules/destination/main.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/main.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/modules/destination/output.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/output.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/modules/destination/output.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/output.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/modules/destination/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/variables.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/modules/destination/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/modules/destination/variables.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/output.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/output.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/output.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/output.tf diff --git a/terraform/modules/tf_cloudwatch_logs_destination/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/variables.tf similarity index 100% rename from terraform/modules/tf_cloudwatch_logs_destination/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_cloudwatch_logs_destination/variables.tf diff --git a/terraform/modules/tf_flow_logs/README.md b/streamalert_cli/_infrastructure/modules/tf_flow_logs/README.md similarity index 100% rename from terraform/modules/tf_flow_logs/README.md rename to streamalert_cli/_infrastructure/modules/tf_flow_logs/README.md diff --git a/terraform/modules/tf_flow_logs/iam.tf b/streamalert_cli/_infrastructure/modules/tf_flow_logs/iam.tf similarity index 100% rename from terraform/modules/tf_flow_logs/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_flow_logs/iam.tf diff --git a/terraform/modules/tf_flow_logs/main.tf b/streamalert_cli/_infrastructure/modules/tf_flow_logs/main.tf similarity index 100% rename from terraform/modules/tf_flow_logs/main.tf rename to streamalert_cli/_infrastructure/modules/tf_flow_logs/main.tf diff --git a/terraform/modules/tf_flow_logs/output.tf b/streamalert_cli/_infrastructure/modules/tf_flow_logs/output.tf similarity index 100% rename from terraform/modules/tf_flow_logs/output.tf rename to streamalert_cli/_infrastructure/modules/tf_flow_logs/output.tf diff --git a/terraform/modules/tf_flow_logs/variables.tf b/streamalert_cli/_infrastructure/modules/tf_flow_logs/variables.tf similarity index 100% rename from terraform/modules/tf_flow_logs/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_flow_logs/variables.tf diff --git a/terraform/modules/tf_globals/README.md b/streamalert_cli/_infrastructure/modules/tf_globals/README.md similarity index 100% rename from terraform/modules/tf_globals/README.md rename to streamalert_cli/_infrastructure/modules/tf_globals/README.md diff --git a/terraform/modules/tf_globals/alerts_firehose/iam.tf b/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/iam.tf similarity index 81% rename from terraform/modules/tf_globals/alerts_firehose/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/iam.tf index 30b31385b..25594de7e 100644 --- a/terraform/modules/tf_globals/alerts_firehose/iam.tf +++ b/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/iam.tf @@ -51,6 +51,27 @@ data "aws_iam_policy_document" "firehose_s3" { } } +// IAM Policy: Interact with the Glue Catalog +resource "aws_iam_role_policy" "stream_alert_firehose_glue" { + name = "streamalert_firehose_read_glue_catalog" + role = "${aws_iam_role.firehose.id}" + + policy = "${data.aws_iam_policy_document.firehose_glue_catalog.json}" +} + +// IAM Policy Document: Interact with the Glue Catalog +data "aws_iam_policy_document" "firehose_glue_catalog" { + statement { + effect = "Allow" + + actions = [ + "glue:GetTableVersions" + ] + + resources = ["*"] + } +} + // CloudWatch Log Stream: S3Delivery resource "aws_cloudwatch_log_stream" "s3_delivery" { name = "S3Delivery" diff --git a/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/main.tf b/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/main.tf new file mode 100644 index 000000000..cb5d4a26a --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/main.tf @@ -0,0 +1,135 @@ +locals { + # Athena reads all data stored under the 's3://bucketname/prefix/'. When the file + # format is Parquet, Athena would throw "HIVE_CANNOT_OPEN_SPLIT" when there are + # *.gz files. + # https://docs.aws.amazon.com/athena/latest/ug/tables-location-format.html + # So all data in parquet format will be saved s3 bucket with prefix + # "s3://bucketname/parquet/alerts". + s3_path_prefix = "parquet/alerts" +} + +locals { + stream_name = "${var.prefix}_streamalert_alert_delivery" + bucket_arn = "arn:aws:s3:::${var.bucket_name}" + alerts_location = "s3://${var.bucket_name}/${local.s3_path_prefix}" + ser_de_params_key = var.file_format == "parquet" ? "serialization.format" : "ignore.malformed.json" + ser_de_params_value = var.file_format == "parquet" ? "1" : "true" +} + +resource "aws_kinesis_firehose_delivery_stream" "streamalerts" { + name = local.stream_name + destination = var.file_format == "parquet" ? "extended_s3" : "s3" + + // AWS Firehose Stream for Alerts to S3 and saved in JSON format + dynamic "s3_configuration" { + for_each = var.file_format == "parquet" ? [] : [var.file_format] + content { + role_arn = aws_iam_role.firehose.arn + bucket_arn = local.bucket_arn + prefix = "alerts/" + buffer_size = var.buffer_size + buffer_interval = var.buffer_interval + compression_format = "GZIP" + kms_key_arn = var.kms_key_arn + + cloudwatch_logging_options { + enabled = true + log_group_name = aws_cloudwatch_log_group.firehose.name + log_stream_name = "S3Delivery" + } + } + } + + // AWS Firehose Stream for Alerts to S3 and saved in Parquet format + dynamic "extended_s3_configuration" { + for_each = var.file_format == "parquet" ? [var.file_format] : [] + content { + role_arn = aws_iam_role.firehose.arn + bucket_arn = local.bucket_arn + prefix = "${local.s3_path_prefix}/dt=!{timestamp:yyyy-MM-dd-HH}/" + error_output_prefix = "${local.s3_path_prefix}/!{firehose:error-output-type}/" + buffer_size = var.buffer_size + buffer_interval = var.buffer_interval + + # The S3 destination's compression format must be set to UNCOMPRESSED + # when data format conversion is enabled. + compression_format = "UNCOMPRESSED" + kms_key_arn = var.kms_key_arn + + data_format_conversion_configuration { + input_format_configuration { + deserializer { + # more resilient with log schemas that have nested JSON comparing to hive_json_ser_de + open_x_json_ser_de {} + } + } + output_format_configuration { + serializer { + parquet_ser_de {} + } + } + schema_configuration { + database_name = var.alerts_db_name + role_arn = aws_iam_role.firehose.arn + table_name = "alerts" + } + } + + cloudwatch_logging_options { + enabled = true + log_group_name = aws_cloudwatch_log_group.firehose.name + log_stream_name = "S3Delivery" + } + } + } + + depends_on = [aws_glue_catalog_table.alerts] + + tags = { + Name = "StreamAlert" + } +} + +// CloudWatch Log Group: Firehose +resource "aws_cloudwatch_log_group" "firehose" { + name = "/aws/kinesisfirehose/${local.stream_name}" + retention_in_days = var.cloudwatch_log_retention + + tags = { + Name = "StreamAlert" + } +} + +// Alert athena table +resource "aws_glue_catalog_table" "alerts" { + count = var.file_format == "parquet" ? 1 : 0 + name = "alerts" + database_name = var.alerts_db_name + + table_type = "EXTERNAL_TABLE" + + partition_keys { + name = "dt" + type = "string" + } + + storage_descriptor { + location = local.alerts_location + input_format = var.file_format == "parquet" ? "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat" : "org.apache.hadoop.mapred.TextInputFormat" + output_format = var.file_format == "parquet" ? "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" : "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" + + ser_de_info { + name = "${var.file_format}_ser_de" + serialization_library = var.file_format == "parquet" ? "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" : "org.openx.data.jsonserde.JsonSerDe" + parameters = map(local.ser_de_params_key, local.ser_de_params_value) + } + + dynamic "columns" { + for_each = var.alerts_schema + content { + name = columns.value[0] + type = columns.value[1] + } + } + } +} diff --git a/terraform/modules/tf_globals/alerts_firehose/variables.tf b/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/variables.tf similarity index 72% rename from terraform/modules/tf_globals/alerts_firehose/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/variables.tf index 139d74f36..4eb29096c 100644 --- a/terraform/modules/tf_globals/alerts_firehose/variables.tf +++ b/streamalert_cli/_infrastructure/modules/tf_globals/alerts_firehose/variables.tf @@ -26,10 +26,18 @@ variable "cloudwatch_log_retention" { type = number } -variable "compression_format" { +variable "file_format" { type = string } variable "kms_key_arn" { type = string } + +variable "alerts_db_name" { + type = string +} + +variable "alerts_schema" { + type = list(tuple([string, string])) +} diff --git a/terraform/modules/tf_globals/classifier_queue/iam.tf b/streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/iam.tf similarity index 100% rename from terraform/modules/tf_globals/classifier_queue/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/iam.tf diff --git a/terraform/modules/tf_globals/classifier_queue/kms.tf b/streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/kms.tf similarity index 100% rename from terraform/modules/tf_globals/classifier_queue/kms.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/kms.tf diff --git a/terraform/modules/tf_globals/classifier_queue/output.tf b/streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/output.tf similarity index 100% rename from terraform/modules/tf_globals/classifier_queue/output.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/output.tf diff --git a/terraform/modules/tf_globals/classifier_queue/sqs.tf b/streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/sqs.tf similarity index 100% rename from terraform/modules/tf_globals/classifier_queue/sqs.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/sqs.tf diff --git a/terraform/modules/tf_globals/classifier_queue/variables.tf b/streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/variables.tf similarity index 100% rename from terraform/modules/tf_globals/classifier_queue/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/classifier_queue/variables.tf diff --git a/terraform/modules/tf_globals/main.tf b/streamalert_cli/_infrastructure/modules/tf_globals/main.tf similarity index 92% rename from terraform/modules/tf_globals/main.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/main.tf index d61bdf1fd..300eec537 100644 --- a/terraform/modules/tf_globals/main.tf +++ b/streamalert_cli/_infrastructure/modules/tf_globals/main.tf @@ -6,9 +6,11 @@ module "alerts_firehose" { buffer_size = var.alerts_firehose_buffer_size buffer_interval = var.alerts_firehose_buffer_interval cloudwatch_log_retention = var.alerts_firehose_cloudwatch_log_retention - compression_format = var.alerts_firehose_compression_format kms_key_arn = var.kms_key_arn bucket_name = var.alerts_firehose_bucket_name == "" ? "${var.prefix}-streamalerts" : var.alerts_firehose_bucket_name + alerts_db_name = var.alerts_db_name + file_format = var.alerts_file_format + alerts_schema = var.alerts_schema } module "classifier_queue" { diff --git a/terraform/modules/tf_globals/output.tf b/streamalert_cli/_infrastructure/modules/tf_globals/output.tf similarity index 100% rename from terraform/modules/tf_globals/output.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/output.tf diff --git a/terraform/modules/tf_globals/variables.tf b/streamalert_cli/_infrastructure/modules/tf_globals/variables.tf similarity index 77% rename from terraform/modules/tf_globals/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_globals/variables.tf index 108f42503..7eaee49a7 100644 --- a/terraform/modules/tf_globals/variables.tf +++ b/streamalert_cli/_infrastructure/modules/tf_globals/variables.tf @@ -34,11 +34,6 @@ variable "alerts_firehose_cloudwatch_log_retention" { default = 14 } -variable "alerts_firehose_compression_format" { - type = string - default = "GZIP" -} - variable "alerts_table_read_capacity" { type = number default = 5 @@ -49,6 +44,18 @@ variable "alerts_table_write_capacity" { default = 5 } +variable "alerts_db_name" {} + +variable "alerts_file_format" { + type = string + description = "Either parquet or json" +} + +variable "alerts_schema" { + type = list(tuple([string, string])) + description = "Schema used to create Athena alerts table in terraform" +} + variable "enable_rule_staging" { default = false } diff --git a/terraform/modules/tf_kinesis_events/README.md b/streamalert_cli/_infrastructure/modules/tf_kinesis_events/README.md similarity index 100% rename from terraform/modules/tf_kinesis_events/README.md rename to streamalert_cli/_infrastructure/modules/tf_kinesis_events/README.md diff --git a/terraform/modules/tf_kinesis_events/main.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_events/main.tf similarity index 100% rename from terraform/modules/tf_kinesis_events/main.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_events/main.tf diff --git a/terraform/modules/tf_kinesis_events/variables.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_events/variables.tf similarity index 100% rename from terraform/modules/tf_kinesis_events/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_events/variables.tf diff --git a/terraform/modules/tf_kinesis_firehose_delivery_stream/README.md b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/README.md similarity index 100% rename from terraform/modules/tf_kinesis_firehose_delivery_stream/README.md rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/README.md diff --git a/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/main.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/main.tf new file mode 100644 index 000000000..b1f2b53ca --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/main.tf @@ -0,0 +1,151 @@ +// AWS Firehose Stream: StreamAlert Data +// +// This resource is broken out into its own module due to the way +// Terraform handles list interpolation on resources. +// + +locals { + # Athena reads all data stored under the 's3://bucketname/prefix/'. When the file + # format is Parquet, Athena would throw "HIVE_CANNOT_OPEN_SPLIT" when there are + # *.gz files. + # https://docs.aws.amazon.com/athena/latest/ug/tables-location-format.html + # So all data in parquet format will be saved s3 bucket with prefix + # "s3://bucketname/parquet/[data-type]". + # glue_catalog_table_name maps to data-type if the length of data-type is not to long. + s3_path_prefix = "parquet/${var.glue_catalog_table_name}" +} + +locals { + # Athena reads all data stored under the 's3://bucketname/prefix/'. When the file + # format is Parquet, Athena would throw "HIVE_CANNOT_OPEN_SPLIT" when there are + # *.gz files. + # https://docs.aws.amazon.com/athena/latest/ug/tables-location-format.html + # So all data in parquet format will be saved s3 bucket with prefix "alerts/parquet". + data_location = "s3://${var.s3_bucket_name}/${local.s3_path_prefix}" + ser_de_params_key = var.file_format == "parquet" ? "serialization.format" : "ignore.malformed.json" + ser_de_params_value = var.file_format == "parquet" ? "1" : "true" +} + +resource "aws_kinesis_firehose_delivery_stream" "streamalert_data" { + name = var.stream_name + destination = var.file_format == "parquet" ? "extended_s3" : "s3" + + // AWS Firehose Stream for data to S3 and saved in JSON format + dynamic "s3_configuration" { + for_each = var.file_format == "parquet" ? [] : [var.file_format] + content { + role_arn = var.role_arn + bucket_arn = "arn:aws:s3:::${var.s3_bucket_name}" + prefix = "${var.glue_catalog_table_name}/" + buffer_size = var.buffer_size + buffer_interval = var.buffer_interval + compression_format = "GZIP" + kms_key_arn = var.kms_key_arn + } + } + + // AWS Firehose Stream for data to S3 and saved in Parquet format + dynamic "extended_s3_configuration" { + for_each = var.file_format == "parquet" ? [var.file_format] : [] + content { + role_arn = var.role_arn + bucket_arn = "arn:aws:s3:::${var.s3_bucket_name}" + prefix = "${local.s3_path_prefix}/dt=!{timestamp:yyyy-MM-dd-HH}/" + error_output_prefix = "${local.s3_path_prefix}/!{firehose:error-output-type}/" + buffer_size = var.buffer_size + buffer_interval = var.buffer_interval + + # The S3 destination's compression format must be set to UNCOMPRESSED + # when data format conversion is enabled. + compression_format = "UNCOMPRESSED" + kms_key_arn = var.kms_key_arn + + data_format_conversion_configuration { + input_format_configuration { + deserializer { + # # more resilient with log schemas that have nested JSON comparing to hive_json_ser_de + open_x_json_ser_de {} + } + } + output_format_configuration { + serializer { + parquet_ser_de {} + } + } + schema_configuration { + database_name = var.glue_catalog_db_name + role_arn = var.role_arn + table_name = var.glue_catalog_table_name + } + } + } + } + + depends_on = [aws_glue_catalog_table.data] + + tags = { + Name = "StreamAlert" + } +} + +// AWS CloudWatch Metric Alarm for this Firehose +resource "aws_cloudwatch_metric_alarm" "firehose_records_alarm" { + count = var.enable_alarm ? 1 : 0 + alarm_name = "${aws_kinesis_firehose_delivery_stream.streamalert_data.name}_record_count" + namespace = "AWS/Firehose" + metric_name = "IncomingRecords" + statistic = "Sum" + comparison_operator = "LessThanThreshold" + threshold = var.alarm_threshold + evaluation_periods = var.evaluation_periods + period = var.period_seconds + alarm_description = "StreamAlert Firehose record count less than expected threshold: ${var.stream_name}" + alarm_actions = var.alarm_actions + + dimensions = { + DeliveryStreamName = aws_kinesis_firehose_delivery_stream.streamalert_data.name + } + + tags = { + Name = "StreamAlert" + } +} + +// data athena table +resource "aws_glue_catalog_table" "data" { + count = var.file_format == "parquet" ? 1 : 0 + name = var.glue_catalog_table_name + database_name = var.glue_catalog_db_name + + table_type = "EXTERNAL_TABLE" + + # parameters = { + # EXTERNAL = "TRUE" + # "parquet.compression" = "UNCOMPRESSED" + # } + + partition_keys { + name = "dt" + type = "string" + } + + storage_descriptor { + location = local.data_location + input_format = var.file_format == "parquet" ? "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat" : "org.apache.hadoop.mapred.TextInputFormat" + output_format = var.file_format == "parquet" ? "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" : "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" + + ser_de_info { + name = "${var.file_format}_ser_de" + serialization_library = var.file_format == "parquet" ? "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" : "org.openx.data.jsonserde.JsonSerDe" + parameters = map(local.ser_de_params_key, local.ser_de_params_value) + } + + dynamic "columns" { + for_each = var.schema + content { + name = columns.value[0] + type = columns.value[1] + } + } + } +} diff --git a/terraform/modules/tf_kinesis_firehose_delivery_stream/outputs.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/outputs.tf similarity index 100% rename from terraform/modules/tf_kinesis_firehose_delivery_stream/outputs.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/outputs.tf diff --git a/terraform/modules/tf_kinesis_firehose_delivery_stream/variables.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/variables.tf similarity index 75% rename from terraform/modules/tf_kinesis_firehose_delivery_stream/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/variables.tf index f093dcd14..12e7729b3 100644 --- a/terraform/modules/tf_kinesis_firehose_delivery_stream/variables.tf +++ b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_delivery_stream/variables.tf @@ -1,9 +1,7 @@ -variable "prefix" { - type = string -} -variable "use_prefix" { - description = "When true, prepends the StreamAlert prefix to the AWS Firehose resource name" +variable "stream_name" { + type = string + description = "Fully qualified name to use for delivery stream" } variable "buffer_size" { @@ -14,13 +12,9 @@ variable "buffer_interval" { default = 300 } -variable "compression_format" { - type = string - default = "GZIP" -} - -variable "log_name" { - type = string +variable "file_format" { + type = string + description = "Either parquet or json" } variable "role_arn" { @@ -60,3 +54,15 @@ variable "alarm_actions" { default = [] description = "Optional list of CloudWatch alarm actions (e.g. SNS topic ARNs)" } + +variable "glue_catalog_db_name" { + type = string +} + +variable "glue_catalog_table_name" { + type = string +} + +variable "schema" { + type = list(tuple([string, string])) +} diff --git a/terraform/modules/tf_kinesis_firehose_setup/README.md b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/README.md similarity index 100% rename from terraform/modules/tf_kinesis_firehose_setup/README.md rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/README.md diff --git a/terraform/modules/tf_kinesis_firehose_setup/iam.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/iam.tf similarity index 80% rename from terraform/modules/tf_kinesis_firehose_setup/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/iam.tf index 2a759dffa..01c220b46 100644 --- a/terraform/modules/tf_kinesis_firehose_setup/iam.tf +++ b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/iam.tf @@ -84,3 +84,24 @@ data "aws_iam_policy_document" "firehose_cloudwatch" { ] } } + +// IAM Policy: Interact with the Glue Catalog +resource "aws_iam_role_policy" "streamalert_firehose_glue" { + name = "streamalert_firehose_read_glue_catalog" + role = "${aws_iam_role.streamalert_kinesis_firehose.id}" + + policy = "${data.aws_iam_policy_document.firehose_glue_catalog.json}" +} + +// IAM Policy Document: Interact with the Glue Catalog +data "aws_iam_policy_document" "firehose_glue_catalog" { + statement { + effect = "Allow" + + actions = [ + "glue:GetTableVersions" + ] + + resources = ["*"] + } +} diff --git a/terraform/modules/tf_kinesis_firehose_setup/main.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/main.tf similarity index 100% rename from terraform/modules/tf_kinesis_firehose_setup/main.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/main.tf diff --git a/terraform/modules/tf_kinesis_firehose_setup/outputs.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/outputs.tf similarity index 100% rename from terraform/modules/tf_kinesis_firehose_setup/outputs.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/outputs.tf diff --git a/terraform/modules/tf_kinesis_firehose_setup/variables.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/variables.tf similarity index 100% rename from terraform/modules/tf_kinesis_firehose_setup/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_firehose_setup/variables.tf diff --git a/terraform/modules/tf_kinesis_streams/README.md b/streamalert_cli/_infrastructure/modules/tf_kinesis_streams/README.md similarity index 100% rename from terraform/modules/tf_kinesis_streams/README.md rename to streamalert_cli/_infrastructure/modules/tf_kinesis_streams/README.md diff --git a/terraform/modules/tf_kinesis_streams/iam.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_streams/iam.tf similarity index 100% rename from terraform/modules/tf_kinesis_streams/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_streams/iam.tf diff --git a/terraform/modules/tf_kinesis_streams/main.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_streams/main.tf similarity index 100% rename from terraform/modules/tf_kinesis_streams/main.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_streams/main.tf diff --git a/terraform/modules/tf_kinesis_streams/outputs.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_streams/outputs.tf similarity index 100% rename from terraform/modules/tf_kinesis_streams/outputs.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_streams/outputs.tf diff --git a/terraform/modules/tf_kinesis_streams/variables.tf b/streamalert_cli/_infrastructure/modules/tf_kinesis_streams/variables.tf similarity index 100% rename from terraform/modules/tf_kinesis_streams/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_kinesis_streams/variables.tf diff --git a/terraform/modules/tf_lambda/README.md b/streamalert_cli/_infrastructure/modules/tf_lambda/README.md similarity index 100% rename from terraform/modules/tf_lambda/README.md rename to streamalert_cli/_infrastructure/modules/tf_lambda/README.md diff --git a/terraform/modules/tf_lambda/cloudwatch.tf b/streamalert_cli/_infrastructure/modules/tf_lambda/cloudwatch.tf similarity index 100% rename from terraform/modules/tf_lambda/cloudwatch.tf rename to streamalert_cli/_infrastructure/modules/tf_lambda/cloudwatch.tf diff --git a/terraform/modules/tf_lambda/iam.tf b/streamalert_cli/_infrastructure/modules/tf_lambda/iam.tf similarity index 100% rename from terraform/modules/tf_lambda/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_lambda/iam.tf diff --git a/terraform/modules/tf_lambda/main.tf b/streamalert_cli/_infrastructure/modules/tf_lambda/main.tf similarity index 100% rename from terraform/modules/tf_lambda/main.tf rename to streamalert_cli/_infrastructure/modules/tf_lambda/main.tf diff --git a/terraform/modules/tf_lambda/output.tf b/streamalert_cli/_infrastructure/modules/tf_lambda/output.tf similarity index 100% rename from terraform/modules/tf_lambda/output.tf rename to streamalert_cli/_infrastructure/modules/tf_lambda/output.tf diff --git a/terraform/modules/tf_lambda/variables.tf b/streamalert_cli/_infrastructure/modules/tf_lambda/variables.tf similarity index 100% rename from terraform/modules/tf_lambda/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_lambda/variables.tf diff --git a/terraform/modules/tf_lookup_tables_dynamodb/README.md b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_dynamodb/README.md similarity index 100% rename from terraform/modules/tf_lookup_tables_dynamodb/README.md rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_dynamodb/README.md diff --git a/terraform/modules/tf_lookup_tables_dynamodb/main.tf b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_dynamodb/main.tf similarity index 100% rename from terraform/modules/tf_lookup_tables_dynamodb/main.tf rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_dynamodb/main.tf diff --git a/terraform/modules/tf_lookup_tables_dynamodb/variables.tf b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_dynamodb/variables.tf similarity index 100% rename from terraform/modules/tf_lookup_tables_dynamodb/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_dynamodb/variables.tf diff --git a/terraform/modules/tf_lookup_tables_policy/README.md b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_policy/README.md similarity index 100% rename from terraform/modules/tf_lookup_tables_policy/README.md rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_policy/README.md diff --git a/terraform/modules/tf_lookup_tables_policy/main.tf b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_policy/main.tf similarity index 100% rename from terraform/modules/tf_lookup_tables_policy/main.tf rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_policy/main.tf diff --git a/terraform/modules/tf_lookup_tables_policy/variables.tf b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_policy/variables.tf similarity index 100% rename from terraform/modules/tf_lookup_tables_policy/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_policy/variables.tf diff --git a/terraform/modules/tf_lookup_tables_s3/README.md b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_s3/README.md similarity index 100% rename from terraform/modules/tf_lookup_tables_s3/README.md rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_s3/README.md diff --git a/terraform/modules/tf_lookup_tables_s3/main.tf b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_s3/main.tf similarity index 100% rename from terraform/modules/tf_lookup_tables_s3/main.tf rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_s3/main.tf diff --git a/terraform/modules/tf_lookup_tables_s3/variables.tf b/streamalert_cli/_infrastructure/modules/tf_lookup_tables_s3/variables.tf similarity index 100% rename from terraform/modules/tf_lookup_tables_s3/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_lookup_tables_s3/variables.tf diff --git a/terraform/modules/tf_metric_alarms/README.md b/streamalert_cli/_infrastructure/modules/tf_metric_alarms/README.md similarity index 100% rename from terraform/modules/tf_metric_alarms/README.md rename to streamalert_cli/_infrastructure/modules/tf_metric_alarms/README.md diff --git a/terraform/modules/tf_metric_alarms/main.tf b/streamalert_cli/_infrastructure/modules/tf_metric_alarms/main.tf similarity index 100% rename from terraform/modules/tf_metric_alarms/main.tf rename to streamalert_cli/_infrastructure/modules/tf_metric_alarms/main.tf diff --git a/terraform/modules/tf_metric_alarms/variables.tf b/streamalert_cli/_infrastructure/modules/tf_metric_alarms/variables.tf similarity index 100% rename from terraform/modules/tf_metric_alarms/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_metric_alarms/variables.tf diff --git a/terraform/modules/tf_metric_filters/README.md b/streamalert_cli/_infrastructure/modules/tf_metric_filters/README.md similarity index 100% rename from terraform/modules/tf_metric_filters/README.md rename to streamalert_cli/_infrastructure/modules/tf_metric_filters/README.md diff --git a/terraform/modules/tf_metric_filters/main.tf b/streamalert_cli/_infrastructure/modules/tf_metric_filters/main.tf similarity index 100% rename from terraform/modules/tf_metric_filters/main.tf rename to streamalert_cli/_infrastructure/modules/tf_metric_filters/main.tf diff --git a/terraform/modules/tf_metric_filters/variables.tf b/streamalert_cli/_infrastructure/modules/tf_metric_filters/variables.tf similarity index 100% rename from terraform/modules/tf_metric_filters/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_metric_filters/variables.tf diff --git a/terraform/modules/tf_monitoring/main.tf b/streamalert_cli/_infrastructure/modules/tf_monitoring/main.tf similarity index 100% rename from terraform/modules/tf_monitoring/main.tf rename to streamalert_cli/_infrastructure/modules/tf_monitoring/main.tf diff --git a/terraform/modules/tf_monitoring/variables.tf b/streamalert_cli/_infrastructure/modules/tf_monitoring/variables.tf similarity index 100% rename from terraform/modules/tf_monitoring/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_monitoring/variables.tf diff --git a/terraform/modules/tf_rule_promotion_iam/README.md b/streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/README.md similarity index 100% rename from terraform/modules/tf_rule_promotion_iam/README.md rename to streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/README.md diff --git a/terraform/modules/tf_rule_promotion_iam/main.tf b/streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/main.tf similarity index 100% rename from terraform/modules/tf_rule_promotion_iam/main.tf rename to streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/main.tf diff --git a/terraform/modules/tf_rule_promotion_iam/variables.tf b/streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/variables.tf similarity index 97% rename from terraform/modules/tf_rule_promotion_iam/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/variables.tf index 3958095c1..03b64de7a 100644 --- a/terraform/modules/tf_rule_promotion_iam/variables.tf +++ b/streamalert_cli/_infrastructure/modules/tf_rule_promotion_iam/variables.tf @@ -28,7 +28,7 @@ variable "athena_results_bucket_arn" { variable "alerts_bucket" { description = "Name of S3 bucket where alerts are stored and queryable by Athena" - type = list(string) + type = string } variable "s3_kms_key_arn" { diff --git a/terraform/modules/tf_rules_engine/README.md b/streamalert_cli/_infrastructure/modules/tf_rules_engine/README.md similarity index 100% rename from terraform/modules/tf_rules_engine/README.md rename to streamalert_cli/_infrastructure/modules/tf_rules_engine/README.md diff --git a/terraform/modules/tf_rules_engine/iam.tf b/streamalert_cli/_infrastructure/modules/tf_rules_engine/iam.tf similarity index 100% rename from terraform/modules/tf_rules_engine/iam.tf rename to streamalert_cli/_infrastructure/modules/tf_rules_engine/iam.tf diff --git a/terraform/modules/tf_rules_engine/lambda.tf b/streamalert_cli/_infrastructure/modules/tf_rules_engine/lambda.tf similarity index 100% rename from terraform/modules/tf_rules_engine/lambda.tf rename to streamalert_cli/_infrastructure/modules/tf_rules_engine/lambda.tf diff --git a/terraform/modules/tf_rules_engine/variables.tf b/streamalert_cli/_infrastructure/modules/tf_rules_engine/variables.tf similarity index 100% rename from terraform/modules/tf_rules_engine/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_rules_engine/variables.tf diff --git a/terraform/modules/tf_s3_events/main.tf b/streamalert_cli/_infrastructure/modules/tf_s3_events/main.tf similarity index 100% rename from terraform/modules/tf_s3_events/main.tf rename to streamalert_cli/_infrastructure/modules/tf_s3_events/main.tf diff --git a/terraform/modules/tf_s3_events/variables.tf b/streamalert_cli/_infrastructure/modules/tf_s3_events/variables.tf similarity index 100% rename from terraform/modules/tf_s3_events/variables.tf rename to streamalert_cli/_infrastructure/modules/tf_s3_events/variables.tf diff --git a/streamalert_cli/_infrastructure/modules/tf_scheduled_queries/cloudwatch_schedule.tf b/streamalert_cli/_infrastructure/modules/tf_scheduled_queries/cloudwatch_schedule.tf new file mode 100644 index 000000000..8bfc7fee1 --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_scheduled_queries/cloudwatch_schedule.tf @@ -0,0 +1,63 @@ +/* + * CloudWatch schedules + */ +resource "aws_cloudwatch_event_rule" "event" { + count = length(var.query_packs) + + name = "${var.prefix}_streamalert_scheduled_queries_event_${count.index}" + description = var.query_packs[count.index].description + schedule_expression = var.query_packs[count.index].schedule_expression +} + +resource "aws_cloudwatch_event_target" "run_step_function" { + count = length(var.query_packs) + + rule = aws_cloudwatch_event_rule.event[count.index].name + arn = aws_sfn_state_machine.state_machine.id + role_arn = aws_iam_role.iam_for_cloudwatch_schedule.arn + + /* + * The input transformer takes the CloudWatch event, which looks something like this... + * { + * "version": "0", + * "id": "91190ee0-a078-9c42-15b6-f3d418fae67d", + * "detail-type": "Scheduled Event", + * "source": "aws.events", + * "account": "123456789012", + * "time": "2019-06-14T18:39:21Z", + * "region": "us-east-1", + * "resources": [ + * "arn:aws:events:us-east-1:123456789012:rule/something_streamalert_schedule_hourly" + * ], + * "detail": {} + * } + * + * And transforms it into something more like this: + * { + * "name": "streamalert_scheduled_queries_cloudwatch_trigger", + * "event_id": "9119abcd-abcd-abcd-abcd-f3d418fae67d", + * "source_arn": "arn:aws:events:us-east-1:123456789012:rule/something_streamalert_scheduled_queries", + * "function_start_time": "2019-06-14T18:39:21Z", + * "tags": ["tag1", "tag2"] + * } + */ + input_transformer { + input_paths = { + time = "$.time" + id = "$.id" + source_arn = "$.resources[0]" + } + input_template = <, + "source_arn": , + "streamquery_configuration": { + "clock":