diff --git a/metron-deployment/packaging/docker/rpm-docker/SPECS/metron.spec b/metron-deployment/packaging/docker/rpm-docker/SPECS/metron.spec index e4b99ca4dc..ea9f8b086c 100644 --- a/metron-deployment/packaging/docker/rpm-docker/SPECS/metron.spec +++ b/metron-deployment/packaging/docker/rpm-docker/SPECS/metron.spec @@ -311,6 +311,7 @@ This package installs the Metron Solr Common files %dir %{metron_home}/config %{metron_home}/bin/create_collection.sh %{metron_home}/bin/delete_collection.sh +%{metron_home}/bin/create_configset.sh %{metron_home}/bin/install_solr.sh %{metron_home}/bin/start_solr.sh %{metron_home}/bin/start_solr_topology.sh diff --git a/metron-platform/metron-solr/metron-solr-common/README.md b/metron-platform/metron-solr/metron-solr-common/README.md index 88f74375db..2cc4f0e47f 100644 --- a/metron-platform/metron-solr/metron-solr-common/README.md +++ b/metron-platform/metron-solr/metron-solr-common/README.md @@ -164,3 +164,142 @@ The `create_collection.sh` script depends on schemas installed in `$METRON_HOME/ Additional schemas should be installed in that location if using the `create_collection.sh` script. Any collection can be deleted with the `delete_collection.sh` script. These scripts use the [Solr Collection API](http://lucene.apache.org/solr/guide/7_4/collections-api.html). + +## Time routed alias support +An alias is a pointer that points to a Collection. Sending a document to an alias sends it to the collection the alias points too. +The collection an alias points to can be changed with a single, low-cost operation. Time Routed Aliases (TRAs) is a SolrCloud feature +that manages an alias and a time sequential series of collections. + +A TRA automatically creates new collections and (optionally) deletes old ones as it routes documents to the correct collection +based on the timestamp of the event. This approach allows for indefinite indexing of data without degradation of performance otherwise +experienced due to the continuous growth of a single index. + +A TRA is defined with a minimum time and a defined interval period and SOLR provides a collection for each interval for a +contiguous set of datetime intervals from the start date to the maximum received document date. Collections are created to host documents based on examining the document's event-time. If a document does not currently +have a collection created for it, then starting at the minimum date SOLR will create a collection for each interval that does not have one + up until the interval period needed to store the current document. + +See SOLR documentation [\(1\)](https://lucene.apache.org/solr/guide/7_4/time-routed-aliases.html) +[\(2\)](https://lucene.apache.org/solr/guide/7_4/collections-api.html#createalias) for more information. + +### Setting up Time routed alias support + +Using SOLR's tme-based routing requires using SOLR's native datetime types. At the moment, Metron uses the LongTrie field type +to store dates, which is not a SOLR native datetime type. At a later stage the Metron code-base will be changed to use SOLR native datetime types +(as the LongTrie type is deprecated), but for now a workaround procedure has been created to allow for the use of time-based routing, while at the + same time allowing for Metron to continue to use the LongTrie type. This procedure only works for new collections, and is as follows: + +1. Add the following field type definition near the end of the schema.xml document (the entry must be inside the schema tags) + ``` + + ``` + + +1. Add the following field definition near the start of the schema.xml document (the entry must be inside the schema tags) + ``` + + ``` + + +1. Create the configset for the collection: Assuming that the relevant collections schema.xml and solrconfig.xml are located in +`$METRON_HOME/config/schema/$COLLECTION_NAME` folder, use the following command: + ``` + $METRON_HOME/bin/create_configset $COLLECTION_NAME + ``` + + +1. Create the time-based routing alias for the collection: +Assuming the following values: + * SOLR_HOST: Host SOLR is installed on + + * ALIAS_NAME: Name of the new alias + + * ROUTER_START: Beginning time-period datetime in ISO-8601 standard - milliseconds potion of the date must be 0, some examples are +'2018-01-14T21:00:00:00', 'NOW/SECOND', 'NOW/DAY' + + * ROUTER_FIELD: The name of the field in the incoming document that contains the datetime to route on - field must be of SOLR type DateTrie or DatePoint. + For METRON this is standardised as field `datetime`. + + * ROUTER_INTERVAL: SOLR Date math format. The interval of time that each collection holds. eg "+1DAY", "+6HOUR", "+1WEEK" (`+` must be URL encoded to `%2B` ) + + * ROUTER_MAXFUTUREMS: Optional field containing the number of milliseconds into the future that it is considered valid to have an event time for. + Documents with an event time exceeding this time period in the future are considered invalid and an error is returned. Used as a sanity check to prevent + the creation of unnecessary collections due to corrupted datetime values in events. Defaults is to ignore anything more then 10 minutes into the future. + + * ROUTER_AUTODELETEAGE: Optional field in SOLR Date math format. If this field is present, any time a collection is created, + the oldest collections are assessed for deletion. Collections are deleted if the datetime interval they represent is older then + NOW - AUTODELETE_INTERVAL. eg -2WEEK, -3MONTH, -1YEAR. (`-` is a valid URL character that does not need to URL encoded.) + + * CONFIGSET: Name of the collection configset that was created in the previous step - this is used a template for new collections. + + * CREATE-COLLECTION.*: These allow for Create collection options (e.g. numShards or numReplicas) to be specified directly in the + create alias command. + + Then the following command will create a time-routed alias: + ``` + curl "http://$SOLR_HOST:8983/solr/admin/collections?action=CREATEALIAS\ + &name=$ALIAS_NAME\ + &router.start=$ROUTER_START\ + &router.field=$ROUTER_FIELD\ + &router.name=time\ + &router.interval=$ROUTER_INTERVAL\ + &router.maxFutureMs=$ROUTER_MAXFUTUREMS\ + &create-collection.collection.configName=$CONFIGSET\ + &create-collection.numShards=2" + ``` + + +1. Add a Metron Parser Stellar field transformation to the parser config that adds a correctly formatted datetime string to the event as it is being parsed: + 1. Set environment variables for later reference + ``` + source /etc/default/metron + export HDP_HOME="/usr/hdp/current" + export PARSER_NAME= + ``` + + 1. Pull the most recent sensor parser config from zookeeper + ``` + ${METRON_HOME}/bin/zk_load_configs.sh -o ${METRON_HOME}/config/zookeeper -m PULL -c PARSER -n $PARSER_NAME -z $ZOOKEEPER + ``` + + 1. Open the file to the relevant sensor parser at `$METRON_HOME/config/zookeeper/parsers/$PARSER_NAME.json` + + 1. Add to the sensor parser config json field the following transformation: + ``` + "fieldTransformations" : [{ + input + "transformation" : "STELLAR" + ,"output" : [ "datetime" ] + ,"config" : { + "datetime" : "DATE_FORMAT("yyyy-MM-dd'T'HH:mm:ss.SSSX",timestamp)" + } + }] + ``` + + 1. Push the configuration back to zookeeper + ``` + ${METRON_HOME}/bin/zk_load_configs.sh -i ${METRON_HOME}/config/zookeeper -m PUSH -c PARSER -n $PARSER_NAME -z $ZOOKEEPER + ``` + + 1. Run kafka console to monitor correct operation of the field transformation + ``` + ${HDP_HOME}/kafka-broker/bin/kafka-console-consumer.sh --bootstrap-server $BROKERLIST --topic $PARSER_NAME + ``` + + +1. Config Metron SOLR indexing to push documents to the newly created Collection Alias. + 1. Pull the most recent index config from zookeeper + ``` + ${METRON_HOME}/bin/zk_load_configs.sh -o ${METRON_HOME}/config/zookeeper -m PULL -c INDEXING -n $PARSER_NAME -z $ZOOKEEPER + ``` + + 1. Edit the file ${METRON_HOME}/config/zookeeper/indexing/$PARSER_NAME.json + + 1. Update the solr/index field to the `ALIAS_NAME` value you configured for the SOLR time-based routing alias. + + 1. Push the configuration back to zookeeper + ``` + ${METRON_HOME}/bin/zk_load_configs.sh -i ${METRON_HOME}/config/zookeeper -m PUSH -c INDEXING -n $PARSER_NAME -z $ZOOKEEPER + ``` + + diff --git a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/bro/schema.xml b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/bro/schema.xml index 6be76a0e43..81de9576a1 100644 --- a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/bro/schema.xml +++ b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/bro/schema.xml @@ -36,6 +36,8 @@ + + guid @@ -697,5 +699,6 @@ + diff --git a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/error/schema.xml b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/error/schema.xml index 4aa80efc96..743753b201 100644 --- a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/error/schema.xml +++ b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/error/schema.xml @@ -55,6 +55,7 @@ + diff --git a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/metaalert/schema.xml b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/metaalert/schema.xml index 6555bf61d7..154a3d7c3d 100644 --- a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/metaalert/schema.xml +++ b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/metaalert/schema.xml @@ -26,6 +26,8 @@ + + @@ -58,5 +60,6 @@ + \ No newline at end of file diff --git a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/snort/schema.xml b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/snort/schema.xml index 3c57574a0b..dd66bd9eb8 100644 --- a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/snort/schema.xml +++ b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/snort/schema.xml @@ -21,6 +21,8 @@ + + @@ -90,5 +92,6 @@ + diff --git a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/yaf/schema.xml b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/yaf/schema.xml index 37e5f12d7b..9665e135fb 100644 --- a/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/yaf/schema.xml +++ b/metron-platform/metron-solr/metron-solr-common/src/main/config/schema/yaf/schema.xml @@ -21,6 +21,8 @@ + + @@ -96,5 +98,6 @@ + diff --git a/metron-platform/metron-solr/metron-solr-common/src/main/scripts/create_configset.sh b/metron-platform/metron-solr/metron-solr-common/src/main/scripts/create_configset.sh new file mode 100755 index 0000000000..59f8602908 --- /dev/null +++ b/metron-platform/metron-solr/metron-solr-common/src/main/scripts/create_configset.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +METRON_VERSION=${project.version} +METRON_HOME=/usr/metron/$METRON_VERSION +ZOOKEEPER=${ZOOKEEPER:-localhost:2181} +ZOOKEEPER_HOME=${ZOOKEEPER_HOME:-/usr/hdp/current/zookeeper-client} +SECURITY_ENABLED=${SECURITY_ENABLED:-false} +NEGOTIATE='' +if [ ${SECURITY_ENABLED,,} == 'true' ]; then + NEGOTIATE=' --negotiate -u : ' +fi + +# Get the first Solr node from the list of live nodes in Zookeeper +SOLR_NODE=`$ZOOKEEPER_HOME/bin/zkCli.sh -server $ZOOKEEPER ls /live_nodes | tail -n 1 | sed 's/\[\([^,]*\).*\]/\1/' | sed 's/_solr//'` + +# test for errors in SOLR URL +if [[ ${SOLR_NODE} =~ .*:null ]]; then + echo "Error occurred while attempting to read SOLR Cloud configuration data from Zookeeper."; + if ! [[ ${ZOOKEEPER} =~ .*/solr ]]; then + echo "Warning! Environment variable ZOOKEEPER=$ZOOKEEPER does not contain a chrooted zookeeper ensemble address - are you sure you do not mean ZOOKEEPER=$ZOOKEEPER/solr?"; + fi + exit 1; +fi + +# test for presence of datetime field in schema collection +DQT='"' +DATETIME_SCHEMA=" fieldTypes = dao.getColumnMetadata(Collections.singletonList("bro")); // Don't test all fields, just test a sample of different fields - Assert.assertEquals(263, fieldTypes.size()); + Assert.assertEquals(264, fieldTypes.size()); // Fields present in both with same type Assert.assertEquals(FieldType.TEXT, fieldTypes.get("guid")); @@ -148,7 +148,7 @@ public void returns_column_metadata_for_specified_indices() throws Exception { // getColumnMetadata with only snort { Map fieldTypes = dao.getColumnMetadata(Collections.singletonList("snort")); - Assert.assertEquals(33, fieldTypes.size()); + Assert.assertEquals(34, fieldTypes.size()); // Fields present in both with same type Assert.assertEquals(FieldType.TEXT, fieldTypes.get("guid"));