From 47b9aeb65f3e52e1192ab5a9fd93ff00d413cc3e Mon Sep 17 00:00:00 2001 From: Zhiguo Wu Date: Sat, 21 Dec 2024 21:29:10 +0800 Subject: [PATCH] BIGTOP-4132: Add Hive component on Bigtop-3.3.0 stack (#132) --- .../hadoop/configuration/hdfs-site.xml | 4 + .../bigtop/3.3.0/services/hadoop/order.json | 7 +- .../hive/configuration/beeline-log4j2.xml | 80 ++ .../services/hive/configuration/hive-env.xml | 112 ++ .../hive/configuration/hive-exec-log4j2.xml | 101 ++ .../hive/configuration/hive-log4j2.xml | 118 ++ .../services/hive/configuration/hive-site.xml | 1253 +++++++++++++++++ .../services/hive/configuration/hive.conf.xml | 64 + .../hive/configuration/llap-cli-log4j2.xml | 126 ++ .../hive/configuration/llap-daemon-log4j2.xml | 192 +++ .../bigtop/3.3.0/services/hive/metainfo.xml | 76 + .../bigtop/3.3.0/services/hive/order.json | 22 + .../bigtop/3.3.0/services/tez/metainfo.xml | 2 +- .../infra/1.0.0/services/mysql/order.json | 6 +- .../bigtop/v3_3_0/hive/HiveClientScript.java | 52 + .../v3_3_0/hive/HiveMetastoreScript.java | 142 ++ .../stack/bigtop/v3_3_0/hive/HiveParams.java | 162 +++ .../bigtop/v3_3_0/hive/HiveServer2Script.java | 105 ++ .../stack/bigtop/v3_3_0/hive/HiveSetup.java | 126 ++ .../stack/core/tarball/TarballDownloader.java | 2 +- .../core/utils/linux/LinuxFileUtils.java | 23 + 21 files changed, 2771 insertions(+), 4 deletions(-) create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/beeline-log4j2.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-env.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-exec-log4j2.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-log4j2.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-site.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive.conf.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-cli-log4j2.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-daemon-log4j2.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/metainfo.xml create mode 100644 bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/order.json create mode 100644 bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveClientScript.java create mode 100644 bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveMetastoreScript.java create mode 100644 bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java create mode 100644 bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveServer2Script.java create mode 100644 bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveSetup.java diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/configuration/hdfs-site.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/configuration/hdfs-site.xml index 85fd05d4c..13f2c2d06 100644 --- a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/configuration/hdfs-site.xml +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/configuration/hdfs-site.xml @@ -92,6 +92,10 @@ regardless of whether 'dfs.namenode.checkpoint.period' has expired. + + hadoop.proxyuser.hive.hosts + * + dfs.replication.max 512 diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/order.json b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/order.json index 88d0990f1..4d7b8d3c7 100644 --- a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/order.json +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hadoop/order.json @@ -11,7 +11,8 @@ "NAMENODE-STOP": [ "DATANODE-STOP", "SECONDARYNAMENODE-STOP", - "HBASE_MASTER-STOP" + "HBASE_MASTER-STOP", + "HIVE_METASTORE-STOP" ], "NAMENODE-START": [ "ZKFC-START", @@ -43,6 +44,10 @@ "NODEMANAGER-RESTART": [ "NAMENODE-RESTART" ], + "NODEMANAGER-STOP": [ + "HIVE_METASTORE-STOP", + "HIVESERVER2-STOP" + ], "HISTORY_SERVER-START": ["NAMENODE-START", "DATANODE-START"], "HISTORY_SERVER-RESTART": ["NAMENODE-RESTART"] } \ No newline at end of file diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/beeline-log4j2.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/beeline-log4j2.xml new file mode 100644 index 000000000..11140c1d2 --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/beeline-log4j2.xml @@ -0,0 +1,80 @@ + + + + + + content + beeline-log4j2 template + Custom beeline-log4j2.properties + +# list of properties +property.hive.log.level = WARN +property.hive.root.logger = console + +# list of all appenders +appenders = console + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n + +# list of all loggers +loggers = HiveConnection + +# HiveConnection logs useful info for dynamic service discovery +logger.HiveConnection.name = org.apache.hive.jdbc.HiveConnection +logger.HiveConnection.level = INFO + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} + +]]> + + + longtext + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-env.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-env.xml new file mode 100644 index 000000000..73a638915 --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-env.xml @@ -0,0 +1,112 @@ + + + + + + hive_log_dir + /var/log/hive + Hive Log Dir + Directory for Hive Log files. + + + hive_pid_dir + /var/run/hive + Hive PID Dir + Hive PID Dir. + + + hive_heapsize + 512 + Hive Java heap size + HiveServer2 Heap Size + + + hive_metastore_heapsize + 1024 + Hive Metastore Java heap size + Metastore Heap Size + + + heap_dump_path + /tmp + Path for heap dump file + Heap dump path + + + + + content + hive-env template + This is the freemarker template for hive-env.sh file + +export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xms${HADOOP_HEAPSIZE}m -Xmx${HADOOP_HEAPSIZE}m" + +export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=${heap_dump_path}" + +# Larger heap size may be required when running queries over large number of files or partitions. +# By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be +# appropriate for hive server (hwi etc). + + +# Set HADOOP_HOME to point to a specific hadoop install directory +HADOOP_HOME=${hadoop_home} + +export HIVE_HOME=${hive_home} + +# Hive Configuration Directory can be controlled by: +export HIVE_CONF_DIR=${hive_conf_dir} + +# Folder containing extra libraries required for hive compilation/execution can be controlled by: +if [ "$HIVE_AUX_JARS_PATH" != "" ]; then +export HIVE_AUX_JARS_PATH=$HIVE_AUX_JARS_PATH +fi + +export METASTORE_PORT=${hive_metastore_port} +]]> + + + longtext + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-exec-log4j2.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-exec-log4j2.xml new file mode 100644 index 000000000..1568c59ab --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-exec-log4j2.xml @@ -0,0 +1,101 @@ + + + + + + content + hive-exec-log4j2 template + Custom hive-exec-log4j2.properties + +# list of properties +property.hive.log.level = INFO +property.hive.root.logger = FA +property.hive.query.id = hadoop +property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name} +property.hive.log.file = ${sys:hive.query.id}.log + +# list of all appenders +appenders = console, FA + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n + +# simple file appender +appender.FA.type = RandomAccessFile +appender.FA.name = FA +appender.FA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file} +appender.FA.layout.type = PatternLayout +appender.FA.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n + +# list of all loggers +loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX + +logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn +logger.NIOServerCnxn.level = WARN + +logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO +logger.ClientCnxnSocketNIO.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} + +]]> + + + longtext + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-log4j2.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-log4j2.xml new file mode 100644 index 000000000..f8796d0ce --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-log4j2.xml @@ -0,0 +1,118 @@ + + + + + + content + hive-log4j2 template + Custom hive-log4j2.properties + +# list of properties +property.hive.log.level = INFO +property.hive.root.logger = DRFA +property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name} +property.hive.log.file = hive.log +property.hive.perflogger.log.level = INFO + +# list of all appenders +appenders = console, DRFA + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n + +# daily rolling file appender +appender.DRFA.type = RollingRandomAccessFile +appender.DRFA.name = DRFA +appender.DRFA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file} +# Use %pid in the filePattern to append @ to the filename if you want separate log files for different CLI session +appender.DRFA.filePattern = ${sys:hive.log.dir}/${sys:hive.log.file}.%d{yyyy-MM-dd} +appender.DRFA.layout.type = PatternLayout +appender.DRFA.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n +appender.DRFA.policies.type = Policies +appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy +appender.DRFA.policies.time.interval = 1 +appender.DRFA.policies.time.modulate = true +appender.DRFA.strategy.type = DefaultRolloverStrategy +appender.DRFA.strategy.max = 30 + +# list of all loggers +loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, PerfLogger, AmazonAws, ApacheHttp + +logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn +logger.NIOServerCnxn.level = WARN + +logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO +logger.ClientCnxnSocketNIO.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +logger.AmazonAws.name=com.amazonaws +logger.AmazonAws.level = INFO + +logger.ApacheHttp.name=org.apache.http +logger.ApacheHttp.level = INFO + +logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger +logger.PerfLogger.level = ${sys:hive.perflogger.log.level} + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} + +]]> + + + longtext + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-site.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-site.xml new file mode 100644 index 000000000..32aa7b702 --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive-site.xml @@ -0,0 +1,1253 @@ + + + + + + tez.session.am.dag.submit.timeout.secs + 0 + + Time (in seconds) for which the Tez AM should wait for a DAG to be submitted before shutting down + + + + hive.server2.materializedviews.registry.impl + DUMMY + + Expects one of [default, dummy]. + The implementation that we should use for the materialized views registry. + DEFAULT: Default cache for materialized views + DUMMY: Do not cache materialized views and hence forward requests to metastore + + + + hive.server2.max.start.attempts + + This number of times HiveServer2 will attempt to start before exiting, sleeping 60 seconds between retries. + + 5 + + + hive.server2.transport.mode + binary + Expects one of [binary, http]. Transport mode of HiveServer2. + + + hive.default.fileformat + TextFile + Default file format for CREATE TABLE statement. + Default File Format + + + hive.metastore.sasl.enabled + false + + If true, the metastore thrift interface will be secured with SASL. Clients must authenticate with Kerberos. + + + + hive.metastore.execute.setugi + true + + In unsecure mode, setting this property to true will cause the metastore to execute DFS operations + using the client's reported user and group permissions. Note that this property must be set on both the + client and server sides. Further note that its best effort. If client sets its to true and server sets it to + false, client setting will be ignored. + + + + hive.optimize.bucketmapjoin.sortedmerge + false + + If the tables being joined are sorted and bucketized on the join columns, and they have the same number + of buckets, a sort-merge join can be performed by setting this parameter as true. + + + + hive.tez.container.size + 682 + + By default, Tez uses the java options from map tasks. Use this property to override that value. + + Tez Container Size + + + hive.tez.input.format + org.apache.hadoop.hive.ql.io.HiveInputFormat + The default input format for Tez. Tez groups splits in the Application Master. + + + hive.tez.java.opts + -server -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps + Java command line options for Tez. + + + hive.txn.timeout + 300 + + Time after which transactions are declared aborted if the client has not sent a heartbeat, in seconds. + + + + hive.compactor.initiator.on + true + + Whether to run the compactor's initiator thread in this metastore instance or not. If there is more + than one instance of the thrift metastore this should be set to true on only one instance. Setting true on + only one host can be achieved by creating a config-group containing the metastore host, and overriding the + default value to true in it. + + Run Compactor + + + hive.compactor.worker.threads + 5 + + Number of compactor worker threads to run on this metastore instance. Can be different values on different metastore instances. + + Number of threads used by Compactor + + + hive.create.as.insert.only + false + + Whether the eligible tables should be created as ACID insert-only by default. + Does not apply to external tables, the ones using storage handlers, etc. + + Create Tables as ACID Insert Only + + + metastore.create.as.acid + false + + Whether the eligible tables should be created as full ACID by default. + Does not apply to external tables, the ones using storage handlers, etc. + + Create Tables as Full ACID + + + hive.compactor.delta.num.threshold + 10 + + Number of delta files that must exist in a directory before the compactor will attempt a minor compaction. + + + + hive.compactor.abortedtxn.threshold + 1000 + + Number of aborted transactions involving a particular table or partition before major compaction is initiated. + + + + datanucleus.cache.level2.type + none + + Determines caching mechanism DataNucleus L2 cache will use. It is strongly recommended to use + default value of 'none' as other values may cause consistency errors in Hive. + + + + hive.metastore.connect.retries + 24 + Number of retries while opening a connection to metastore + + + hive.metastore.failure.retries + 24 + Number of retries upon failure of Thrift metastore calls + + + hive.metastore.client.connect.retry.delay + 5s + + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not specified. + Number of seconds for the client to wait between consecutive connection attempts + + + + hive.metastore.client.socket.timeout + 1800s + + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not specified. + MetaStore Client socket timeout in seconds + + + + hive.mapjoin.bucket.cache.size + 10000 + + + + hive.cluster.delegation.token.store.class + org.apache.hadoop.hive.thrift.ZooKeeperTokenStore + + The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster. + + + + hive.server2.support.dynamic.service.discovery + true + + Whether HiveServer2 supports dynamic service discovery for its clients. + To support this, each instance of HiveServer2 currently uses ZooKeeper to register itself, + when it is brought up. JDBC/ODBC clients should use the ZooKeeper ensemble: hive.zookeeper.quorum + in their connection string. + + + + hive.exec.scratchdir + Hive Exec Scratchdir + /tmp/hive + + HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each + connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with + ${hive.scratch.dir.permission}. + + + + hive.exec.submitviachild + false + + + + hive.exec.submit.local.task.via.child + true + + Determines whether local tasks (typically mapjoin hashtable generation phase) runs in + separate JVM (true recommended) or not. + Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues. + + + + hive.exec.compress.output + false + + This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. + The compression codec and other options are determined from Hadoop config variables mapred.output.compress* + + + + hive.exec.compress.intermediate + false + + This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. + The compression codec and other options are determined from Hadoop config variables mapred.output.compress* + + + + hive.exec.reducers.bytes.per.reducer + 67108864 + + Defines the size per reducer. For example, if it is set to 64M, given 256M input size, 4 reducers will be used. + + Data per Reducer + + + hive.exec.reducers.max + 1009 + + max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is + negative, Hive will use this one as the max number of reducers when automatically determine number of reducers. + + + + hive.exec.pre.hooks + + + Comma-separated list of pre-execution hooks to be invoked for each statement. + A pre-execution hook is specified as the name of a Java class which implements the + org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. + + + + hive.exec.post.hooks + + + Comma-separated list of post-execution hooks to be invoked for each statement. + A post-execution hook is specified as the name of a Java class which implements the + org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. + + + + hive.exec.failure.hooks + + + Comma-separated list of on-failure hooks to be invoked for each statement. + An on-failure hook is specified as the name of Java class which implements the + org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. + + + + hive.exec.parallel + false + Whether to execute jobs in parallel + + + hive.exec.parallel.thread.number + 8 + How many jobs at most can be executed in parallel + + + hive.mapred.reduce.tasks.speculative.execution + false + Whether speculative execution for reducers should be turned on. + + + hive.exec.dynamic.partition + true + Whether or not to allow dynamic partitions in DML/DDL. + + + hive.exec.dynamic.partition.mode + nonstrict + + In strict mode, the user must specify at least one static partition + in case the user accidentally overwrites all partitions. + NonStrict allows all partitions of a table to be dynamic. + + Allow all partitions to be Dynamic + + + hive.exec.max.dynamic.partitions + 5000 + Maximum number of dynamic partitions allowed to be created in total. + + + hive.exec.max.dynamic.partitions.pernode + 2000 + Maximum number of dynamic partitions allowed to be created in each mapper/reducer node. + + + hive.exec.max.created.files + 100000 + Maximum number of HDFS files created by all mappers/reducers in a MapReduce job. + + + hive.metastore.warehouse.dir + Hive Metastore Warehouse directory + /warehouse/tablespace/managed/hive + location of default database for the warehouse + + + hive.metastore.warehouse.external.dir + Hive Metastore Warehouse External directory + /warehouse/tablespace/external/hive + location of default database for the warehouse of external tables + + + hive.lock.manager + + + + hive.metastore.uris + thrift://localhost:9083 + + Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore. + + + + hive.metastore.server.max.threads + 100000 + Maximum number of worker threads in the Thrift server's pool. + + + hive.metastore.kerberos.keytab.file + /etc/security/keytabs/hive.service.keytab + + The path to the Kerberos Keytab file containing the metastore Thrift server's service principal. + + + + hive.cluster.delegation.token.store.zookeeper.znode + /hive/cluster/delegation + The root path for token store data. + + + hive.metastore.cache.pinobjtypes + Table,Database,Type,FieldSchema,Order + List of comma separated metastore object types that should be pinned in the cache + + + hive.metastore.pre.event.listeners + org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener + List of comma separated listeners for metastore events. + + + hive.metastore.authorization.storage.checks + false + + Should the metastore do authorization checks against the underlying storage (usually hdfs) + for operations like drop-partition (disallow the drop-partition if the user in + question doesn't have permissions to delete the corresponding directory + on the storage). + + + + hive.server2.idle.session.timeout + 1d + + + hive.server2.idle.operation.timeout + 6h + + + hive.strict.managed.tables + false + + + hive.txn.strict.locking.mode + false + + + hive.materializedview.rewriting.incremental + false + + + hive.map.aggr + true + Whether to use map-side aggregation in Hive Group By queries + + + hive.cbo.enable + true + Flag to control enabling Cost Based Optimizations using Calcite framework. + Enable Cost Based Optimizer + + + hive.mapjoin.optimized.hashtable + true + + Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez, + because memory-optimized hashtable cannot be serialized. + + + + hive.smbjoin.cache.rows + 10000 + How many rows with the same key value should be cached in memory per smb joined table. + + + hive.map.aggr.hash.percentmemory + 0.5 + Portion of total memory to be used by map-side group aggregation hash table + + + hive.map.aggr.hash.force.flush.memory.threshold + 0.9 + + The max memory to be used by map-side group aggregation hash table. + If the memory usage is higher than this number, force to flush data + + + + hive.map.aggr.hash.min.reduction + 0.5 + + Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. + Set to 1 to make sure hash aggregation is never turned off. + + + + hive.merge.mapfiles + true + Merge small files at the end of a map-only job + + + hive.merge.mapredfiles + false + Merge small files at the end of a map-reduce job + + + hive.merge.tezfiles + false + Merge small files at the end of a Tez DAG + + + hive.merge.size.per.task + 256000000 + Size of merged files at the end of the job + + + hive.merge.smallfiles.avgsize + 16000000 + + When the average output file size of a job is less than this number, Hive will start an additional + map-reduce job to merge the output files into bigger files. This is only done for map-only jobs + if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true. + + + + hive.merge.rcfile.block.level + true + + + + hive.merge.orcfile.stripe.level + true + + When hive.merge.mapfiles or hive.merge.mapredfiles is enabled while writing a + table with ORC file format, enabling this config will do stripe level fast merge + for small ORC files. Note that enabling this config will not honor padding tolerance + config (hive.exec.orc.block.padding.tolerance). + + + + hive.orc.splits.include.file.footer + false + + If turned on splits generated by orc will include metadata about the stripes in the file. This + data is read remotely (from the client or HS2 machine) and sent to all the tasks. + + + + hive.orc.compute.splits.num.threads + 10 + How many threads orc should use to create splits in parallel. + + + hive.auto.convert.join + true + + Whether Hive enables the optimization about converting common join into mapjoin based on the input file size + + + + hive.auto.convert.join.noconditionaltask + true + + Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. + If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the + specified size, the join is directly converted to a mapjoin (there is no conditional task). + + + + hive.limit.optimize.enable + true + Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first. + + + hive.tez.cpu.vcores + -1 + + By default Tez will ask for however many cpus map-reduce is configured to use per container. This can be used to overwrite. + + + + hive.tez.log.level + INFO + + The log level to use for tasks executing as part of the DAG. + Used only if hive.tez.java.opts is used to configure Java options. + + + + hive.enforce.sortmergebucketmapjoin + true + + If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ? + + + + hive.auto.convert.sortmerge.join + true + + Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join. + + + + hive.auto.convert.sortmerge.join.to.mapjoin + true + + If hive.auto.convert.sortmerge.join is set to true, and a join was converted to a sort-merge join, + this parameter decides whether each table should be tried as a big table, and effectively a map-join should be + tried. That would create a conditional task with n+1 children for a n-way join (1 child for each table as the + big table), and the backup task will be the sort-merge join. In some cases, a map-join would be faster than a + sort-merge join, if there is no advantage of having the output bucketed and sorted. For example, if a very big sorted + and bucketed table with few files (say 10 files) are being joined with a very small sorter and bucketed table + with few files (10 files), the sort-merge join will only use 10 mappers, and a simple map-only join might be faster + if the complete small table can fit in memory, and a map-join can be performed. + + + + hive.optimize.constant.propagation + true + Whether to enable constant propagation optimizer + + + hive.optimize.metadataonly + true + + + + hive.optimize.null.scan + true + Dont scan relations which are guaranteed to not generate any rows + + + hive.optimize.bucketmapjoin + true + Whether to try bucket mapjoin + + + hive.optimize.reducededuplication + true + + Remove extra map-reduce jobs if the data is already clustered by the same key which needs to be used again. + This should always be set to true. Since it is a new feature, it has been made configurable. + + + + hive.optimize.reducededuplication.min.reducer + 4 + + Reduce deduplication merges two RSs by moving key/parts/reducer-num of the child RS to parent RS. + That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make + very slow, single MR. + The optimization will be automatically disabled if number of reducers would be less than specified value. + + + + hive.optimize.sort.dynamic.partition + false + + When enabled dynamic partitioning column will be globally sorted. + This way we can keep only one record writer open for each partition value + in the reducer thereby reducing the memory pressure on reducers. + + Sort Partitions Dynamically + + + hive.stats.autogather + true + A flag to gather statistics automatically during the INSERT OVERWRITE command. + + + hive.stats.dbclass + fs + + Expects one of the pattern in [jdbc(:.*), hbase, counter, custom, fs]. + The storage that stores temporary Hive statistics. Currently, jdbc, hbase, counter and custom type are + supported. + + + + hive.stats.fetch.partition.stats + true + + Annotation of operator tree with statistics information requires partition level basic + statistics like number of rows, data size and file size. Partition statistics are fetched from + metastore. Fetching partition statistics for each needed partition can be expensive when the + number of partitions is high. This flag can be used to disable fetching of partition statistics + from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes + and will estimate the number of rows from row schema. + + Fetch partition stats at compiler + + + hive.stats.fetch.column.stats + false + + Annotation of operator tree with statistics information requires column statistics. + Column statistics are fetched from metastore. Fetching column statistics for each needed column + can be expensive when the number of columns is high. This flag can be used to disable fetching + of column statistics from metastore. + + Fetch column stats at compiler + + + hive.zookeeper.namespace + hive_zookeeper_namespace + The parent node under which all ZooKeeper nodes are created. + + + hive.txn.manager + org.apache.hadoop.hive.ql.lockmgr.DbTxnManager + + Transaction Manager + + + hive.txn.max.open.batch + 1000 + + Maximum number of transactions that can be fetched in one call to open_txns(). + Increasing this will decrease the number of delta files created when + streaming data into Hive. But it will also increase the number of + open transactions at any given time, possibly impacting read performance. + + + + hive.support.concurrency + true + Support concurrency and use locks, needed for Transactions. Requires Zookeeper. + Use Locking + + + hive.cli.print.header + false + Whether to print the names of the columns in query output. + + + hive.compactor.worker.timeout + 86400 + + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not + specified. + Time before a given compaction in working state is declared a failure + and returned to the initiated state. + + + + hive.compactor.check.interval + 300 + + Expects a time value with unit (d/day, h/hour, m/min, s/sec, ms/msec, us/usec, ns/nsec), which is sec if not + specified. + Time between checks to see if any partitions need compacted. + This should be kept high because each check for compaction requires many calls against the NameNode. + + + + hive.compactor.delta.pct.threshold + 0.1f + Percentage (by size) of base that deltas can be before major compaction is initiated. + + + hive.fetch.task.conversion + more + + Expects one of [none, minimal, more]. + Some select queries can be converted to single FETCH task minimizing latency. + Currently the query should be single sourced not having any subquery and should not have + any aggregations or distincts (which incurs RS), lateral views and joins. + 0. none : disable hive.fetch.task.conversion + 1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only + 2. more : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns) + + + + hive.fetch.task.conversion.threshold + 1073741824 + + Input threshold for applying hive.fetch.task.conversion. If target table is native, input length + is calculated by summation of file lengths. If it's not native, storage handler for the table + can optionally implement org.apache.hadoop.hive.ql.metadata.InputEstimator interface. + + + + hive.fetch.task.aggr + false + + Aggregation queries with no group-by clause (for example, select count(*) from src) execute + final aggregations in single reduce task. If this is set true, Hive delegates final aggregation + stage to fetch task, possibly decreasing the query time. + + + + hive.security.metastore.authorization.manager + Hive Authorization Manager + org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider + + authorization manager class name to be used in the metastore for authorization. + The user defined authorization class should implement interface + org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider. + + + + hive.security.metastore.authorization.auth.reads + true + If this is true, metastore authorizer authorizes read actions on database, table + + + hive.security.metastore.authenticator.manager + org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator + + authenticator manager class name to be used in the metastore for authentication. + The user defined authenticator should implement interface + org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider. + + + + hive.server2.logging.operation.enabled + true + When true, HS2 will save operation logs + + + hive.server2.logging.operation.log.location + HiveServer2 Logging Operation Log Location + /tmp/hive/operation_logs + Top level directory where operation logs are stored if logging functionality is enabled + + + hive.server2.zookeeper.namespace + hiveserver2 + The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery. + + + hive.server2.thrift.http.port + 10001 + Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'http'. + + + hive.server2.thrift.port + 10000 + HiveServer2 Port + TCP port number to listen on, default 10000. + + + hive.server2.thrift.sasl.qop + auth + + Expects one of [auth, auth-int, auth-conf]. + Sasl QOP value; Set it to one of following values to enable higher levels of + protection for HiveServer2 communication with clients. + "auth" - authentication only (default) + "auth-int" - authentication plus integrity protection + "auth-conf" - authentication plus integrity and confidentiality protection + This is applicable only if HiveServer2 is configured to use Kerberos authentication. + + + + hive.server2.thrift.max.worker.threads + 500 + Maximum number of Thrift worker threads + + + hive.server2.allow.user.substitution + true + Allow alternate user to be specified as part of HiveServer2 open connection request. + + + hive.server2.authentication.spnego.keytab + /etc/security/keytabs/spnego.service.keytab + + keytab file for SPNego principal, optional, + typical value would look like /etc/security/keytabs/spnego.service.keytab, + This keytab would be used by HiveServer2 when Kerberos security is enabled and + HTTP transport mode is used. + This needs to be set only if SPNEGO is to be used in authentication. + SPNego authentication would be honored only if valid + hive.server2.authentication.spnego.principal + and + hive.server2.authentication.spnego.keytab + are specified. + + + + hive.server2.authentication + Authentication mode, default NONE. Options are NONE, NOSASL, KERBEROS, LDAP, PAM and CUSTOM + + NONE + HiveServer2 Authentication + + + hive.metastore.event.db.notification.api.auth + true + + Should metastore do authorization against database notification related APIs such as get_next_notification. + If set to true, then only the superusers in proxy settings have the permission. + + + + hive.server2.enable.doAs + true + + Setting this property to true will have HiveServer2 execute + Hive operations as the user making the calls to it. + + Run as end user instead of Hive user + + + hive.server2.table.type.mapping + CLASSIC + + Expects one of [classic, hive]. + This setting reflects how HiveServer2 will report the table types for JDBC and other + client implementations that retrieve the available tables and supported table types + HIVE : Exposes Hive's native table types like MANAGED_TABLE, EXTERNAL_TABLE, VIRTUAL_VIEW + CLASSIC : More generic types like TABLE and VIEW + + + + hive.server2.use.SSL + false + Set this to true for using SSL encryption in HiveServer2. + Use SSL + + + hive.user.install.directory + Hive User Install directory + /user/ + + If hive (in tez mode only) cannot find a usable hive jar in "hive.jar.directory", + it will upload the hive jar to "hive.user.install.directory/user.name" + and use it to run queries. + + + + hive.vectorized.groupby.maxentries + 100000 + + Max number of entries in the vector group by aggregation hashtables. + Exceeding this will trigger a flush irrelevant of memory pressure condition. + + + + hive.merge.nway.joins + false + Merge adjacent joins into a single n-way join + + + hive.prewarm.enabled + false + Enables container prewarm for Tez (Hadoop 2 only) + Hold Containers to Reduce Latency + + + hive.prewarm.numcontainers + 3 + Controls the number of containers to prewarm for Tez (Hadoop 2 only) + Number of Containers Held + + + hive.convert.join.bucket.mapjoin.tez + false + + Whether joins can be automatically converted to bucket map joins in hive + when tez is used as the execution engine. + + + + hive.tez.auto.reducer.parallelism + true + + Turn on Tez' auto reducer parallelism feature. When enabled, Hive will still estimate data sizes + and set parallelism estimates. Tez will sample source vertices' output sizes and adjust the estimates at + runtime as necessary. + + Allow dynamic numbers of reducers + + + hive.tez.max.partition.factor + 2.0 + + When auto reducer parallelism is enabled this factor will be used to over-partition data in shuffle edges. + + + + hive.tez.min.partition.factor + 0.25 + + When auto reducer parallelism is enabled this factor will be used to put a lower limit to the number + of reducers that tez specifies. + + + + hive.tez.dynamic.partition.pruning + true + + When dynamic pruning is enabled, joins on partition keys will be processed by sending events from + the processing vertices to the tez application master. These events will be used to prune unnecessary + partitions. + + Allow dynamic partition pruning + + + hive.tez.dynamic.partition.pruning.max.event.size + 1048576 + + Maximum size of events sent by processors in dynamic pruning. If this size is crossed no pruning will take place. + + + + hive.tez.dynamic.partition.pruning.max.data.size + 104857600 + Maximum total data size of events in dynamic pruning. + + + hive.tez.smb.number.waves + 0.5 + + The number of waves in which to run the SMB join. Account for cluster being occupied. Ideally should be 1 wave. + + + + hive.vectorized.execution.enabled + true + + This flag should be set to true to enable vectorized mode of query execution. + The default value is false. + + Enable Vectorization and Map Vectorization + + + hive.auto.convert.join.noconditionaltask.size + 52428800 + + If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. However, if it + is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size, the join is directly + converted to a mapjoin(there is no conditional task). + + For Map Join, per Map memory threshold + + + hive.optimize.index.filter + true + Whether to enable automatic use of indexes + Push Filters to Storage + + + hive.vectorized.groupby.checkinterval + 4096 + + Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed. + + + + hive.vectorized.groupby.flush.percent + 0.1 + Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded. + + + hive.compute.query.using.stats + true + + When set to true Hive will answer a few queries like count(1) purely using stats + stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true. + For more advanced stats collection need to run analyze table queries. + + Compute simple queries using stats only + + + hive.limit.pushdown.memory.usage + 0.04 + The max memory to be used for hash in RS operator for top K selection. + + + hive.server2.tez.sessions.per.default.queue + 1 + + A positive integer that determines the number of Tez sessions that should be + launched on each of the queues specified by "hive.server2.tez.default.queues". + Determines the parallelism on each queue. + + Session per queue + + + hive.driver.parallel.compilation + true + This flag allows HiveServer2 to compile queries in parallel. + Compile queries in parallel + + + hive.server2.tez.initialize.default.sessions + false + + This flag is used in HiveServer2 to enable a user to use HiveServer2 without + turning on Tez for HiveServer2. The user could potentially want to run queries + over Tez without the pool of sessions. + + Start Tez session at Initialization + + + hive.server2.tez.default.queues + Default query queues + default + + A list of comma separated values corresponding to YARN queues of the same name. + When HiveServer2 is launched in Tez mode, this configuration needs to be set + for multiple Tez sessions to run in parallel on the cluster. + + + + hive.server2.webui.port + 10002 + Web UI port address + + + hive.server2.webui.use.ssl + false + Enable SSL for HiveServer2 Interactive + + + hive.server2.webui.enable.cors + true + Enable cross origin requests (CORS) + + + hive.server2.webui.cors.allowed.headers + X-Requested-With,Content-Type,Accept,Origin,X-Requested-By,x-requested-by + Comma separated list of http headers that are allowed when CORS is enabled. + + + hive.exec.orc.split.strategy + HYBRID + + This is not a user level config. BI strategy is used when the requirement is to spend less time in split + generation + as opposed to query execution (split generation does not read or cache file footers). + ETL strategy is used when spending little more time in split generation is acceptable + (split generation reads and caches file footers). HYBRID chooses between the above strategies + based on heuristics. + + + + hive.vectorized.execution.reduce.enabled + true + + This flag should be set to true to enable vectorized mode of the reduce-side of + query execution. + + Enable Reduce Vectorization + + + hive.default.fileformat.managed + ORC + + Default file format for CREATE TABLE statement applied to managed tables only. + External tables will be created with default file format. Leaving this null + will result in using the default file format for all tables. + + + + hive.hook.proto.base-directory + ${hive.metastore.warehouse.external.dir}/sys.db/query_data/ + Base directory for hive proto hook. + + + hive.execution.mode + container + Chooses whether query fragments will run in container or in llap + + + hive.tez.input.generate.consistent.splits + true + Whether to generate consistent split locations when generating splits in the AM + + + hive.tez.exec.print.summary + true + Display breakdown of execution steps, for every query executed by the shell. + + + hive.vectorized.execution.mapjoin.native.enabled + true + + This flag should be set to true to enable native (i.e. non-pass through) vectorization + of queries using MapJoin. + + + + hive.vectorized.execution.mapjoin.minmax.enabled + true + + This flag should be set to true to enable vector map join hash tables to + use max / max filtering for integer join queries using MapJoin. + + + + hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled + true + + This flag should be set to true to enable use of native fast vector map join hash tables in + queries using MapJoin. + + + + hive.optimize.dynamic.partition.hashjoin + true + + Whether to enable dynamically partitioned hash join optimization. + This setting is also dependent on enabling hive.auto.convert.join + + + + hive.metastore.event.listeners + + Listeners for metastore events + + + hive.mapjoin.hybridgrace.hashtable + false + + Whether to use hybrid grace hash join as the join method for mapjoin. + Applies to dynamically partitioned joins when running in LLAP, but not to regular + broadcast(map) joins. hive.llap.enable.grace.join.in.llap is used for this. + + + + hive.tez.cartesian-product.enabled + true + Use Tez cartesian product edge for Hive cartesian product + + + hive.tez.bucket.pruning + true + + When pruning is enabled, filters on bucket columns will be processed by + filtering the splits against a bitset of included buckets. This needs predicates + produced by hive.optimize.ppd and hive.optimize.index.filters. + + + + hive.service.metrics.codahale.reporter.classes + org.apache.hadoop.hive.common.metrics.metrics2.JsonFileMetricsReporter,org.apache.hadoop.hive.common.metrics.metrics2.JmxMetricsReporter,org.apache.hadoop.hive.common.metrics.metrics2.Metrics2Reporter + Comma separated list of reporter implementation classes for metric class + + + hive.metastore.dml.events + true + If true, the metastore will be asked to fire events for DML operations + + + hive.repl.cm.enabled + + Turn on ChangeManager, so delete files will go to cmrootdir. + + + hive.metastore.transactional.event.listeners + org.apache.hive.hcatalog.listener.DbNotificationListener + + A comma separated list of Java classes that implement the + org.apache.hadoop.hive.metastore.MetaStoreEventListener interface. Both the metastore event and + corresponding listener method will be invoked in the same JDO transaction. + + + + hive.repl.cmrootdir + + Root dir for ChangeManager, used for deleted files. + + + hive.repl.rootdir + + HDFS root dir for all replication dumps. + + + hive.vectorized.adaptor.usage.mode + chosen + + Specifies the extent to which the VectorUDFAdaptor will be used for UDFs that do not have a corresponding + vectorized class. + 0. none : disable any usage of VectorUDFAdaptor + 1. chosen : use VectorUDFAdaptor for a small set of UDFs that were chosen for good performance + 2. all : use VectorUDFAdaptor for all UDFs + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive.conf.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive.conf.xml new file mode 100644 index 000000000..c7bce78fc --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/hive.conf.xml @@ -0,0 +1,64 @@ + + + + + + hive_user_nofile_limit + 128000 + Max open files limit setting for Hive user. + + + hive_user_nproc_limit + 65536 + Max number of processes limit setting for Hive user. + + + content + hive.conf template + This is the freemarker template for Hive file + + + + longtext + + + \ No newline at end of file diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-cli-log4j2.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-cli-log4j2.xml new file mode 100644 index 000000000..901816f7f --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-cli-log4j2.xml @@ -0,0 +1,126 @@ + + + + + + content + llap-cli-log4j2 template + Custom llap-cli-log4j2.properties + +# list of properties +property.hive.log.level = WARN +property.hive.root.logger = console +property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name} +property.hive.log.file = llap-cli.log +property.hive.llapstatus.consolelogger.level = INFO + +# list of all appenders +appenders = console, DRFA, llapstatusconsole + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %p %c{2}: %m%n + +# llapstatusconsole appender +appender.llapstatusconsole.type = Console +appender.llapstatusconsole.name = llapstatusconsole +appender.llapstatusconsole.target = SYSTEM_ERR +appender.llapstatusconsole.layout.type = PatternLayout +appender.llapstatusconsole.layout.pattern = %m%n + +# daily rolling file appender +appender.DRFA.type = RollingRandomAccessFile +appender.DRFA.name = DRFA +appender.DRFA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file} +# Use %pidn in the filePattern to append @ to the filename if you want separate log files for different CLI session +appender.DRFA.filePattern = ${sys:hive.log.dir}/${sys:hive.log.file}.%d{yyyy-MM-dd} +appender.DRFA.layout.type = PatternLayout +appender.DRFA.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n +appender.DRFA.policies.type = Policies +appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy +appender.DRFA.policies.time.interval = 1 +appender.DRFA.policies.time.modulate = true +appender.DRFA.strategy.type = DefaultRolloverStrategy +appender.DRFA.strategy.max = 30 + +# list of all loggers +loggers = ZooKeeper, DataNucleus, Datastore, JPOX, HadoopConf, LlapStatusServiceDriverConsole + +logger.ZooKeeper.name = org.apache.zookeeper +logger.ZooKeeper.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +logger.HadoopConf.name = org.apache.hadoop.conf.Configuration +logger.HadoopConf.level = ERROR + +logger.LlapStatusServiceDriverConsole.name = LlapStatusServiceDriverConsole +logger.LlapStatusServiceDriverConsole.additivity = false +logger.LlapStatusServiceDriverConsole.level = ${sys:hive.llapstatus.consolelogger.level} + + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root, DRFA +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} +rootLogger.appenderRef.DRFA.ref = DRFA +logger.LlapStatusServiceDriverConsole.appenderRefs = llapstatusconsole, DRFA +logger.LlapStatusServiceDriverConsole.appenderRef.llapstatusconsole.ref = llapstatusconsole +logger.LlapStatusServiceDriverConsole.appenderRef.DRFA.ref = DRFA + +]]> + + + longtext + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-daemon-log4j2.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-daemon-log4j2.xml new file mode 100644 index 000000000..0ef8491e4 --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/configuration/llap-daemon-log4j2.xml @@ -0,0 +1,192 @@ + + + + + + content + llap-daemon-log4j2 template + Custom llap-daemon-log4j2.properties + +# list of properties +property.llap.daemon.log.level = INFO +property.llap.daemon.root.logger = console +property.llap.daemon.log.dir = . +property.llap.daemon.log.file = llapdaemon.log +property.llap.daemon.historylog.file = llapdaemon_history.log +property.llap.daemon.log.maxfilesize = 256MB +property.llap.daemon.log.maxbackupindex = 240 + +# list of all appenders +appenders = console, RFA, HISTORYAPPENDER, query-routing + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{ISO8601} %5p [%t (%X{fragmentId})] %c{2}: %m%n + +# rolling file appender +appender.RFA.type = RollingRandomAccessFile +appender.RFA.name = RFA +appender.RFA.fileName = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.log.file} +appender.RFA.filePattern = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.log.file}_%d{yyyy-MM-dd-HH}_%i.done +appender.RFA.layout.type = PatternLayout +appender.RFA.layout.pattern = %d{ISO8601} %-5p [%t (%X{fragmentId})] %c: %m%n +appender.RFA.policies.type = Policies +appender.RFA.policies.time.type = TimeBasedTriggeringPolicy +appender.RFA.policies.time.interval = 1 +appender.RFA.policies.time.modulate = true +appender.RFA.policies.size.type = SizeBasedTriggeringPolicy +appender.RFA.policies.size.size = ${sys:llap.daemon.log.maxfilesize} +appender.RFA.strategy.type = DefaultRolloverStrategy +appender.RFA.strategy.max = ${sys:llap.daemon.log.maxbackupindex} + +# history file appender +appender.HISTORYAPPENDER.type = RollingRandomAccessFile +appender.HISTORYAPPENDER.name = HISTORYAPPENDER +appender.HISTORYAPPENDER.fileName = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.historylog.file} +appender.HISTORYAPPENDER.filePattern = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.historylog.file}_%d{yyyy-MM-dd-HH}_%i.done +appender.HISTORYAPPENDER.layout.type = PatternLayout +appender.HISTORYAPPENDER.layout.pattern = %m%n +appender.HISTORYAPPENDER.policies.type = Policies +appender.HISTORYAPPENDER.policies.size.type = SizeBasedTriggeringPolicy +appender.HISTORYAPPENDER.policies.size.size = ${sys:llap.daemon.log.maxfilesize} +appender.HISTORYAPPENDER.policies.time.type = TimeBasedTriggeringPolicy +appender.HISTORYAPPENDER.policies.time.interval = 1 +appender.HISTORYAPPENDER.policies.time.modulate = true +appender.HISTORYAPPENDER.strategy.type = DefaultRolloverStrategy +appender.HISTORYAPPENDER.strategy.max = ${sys:llap.daemon.log.maxbackupindex} + +# queryId based routing file appender +appender.query-routing.type = Routing +appender.query-routing.name = query-routing +appender.query-routing.routes.type = Routes +appender.query-routing.routes.pattern = $${ctx:queryId} +#Purge policy for query-based Routing Appender +appender.query-routing.purgePolicy.type = IdlePurgePolicy +appender.query-routing.purgePolicy.timeToLive = 60 +appender.query-routing.purgePolicy.timeUnit = SECONDS +# default route +appender.query-routing.routes.route-default.type = Route +appender.query-routing.routes.route-default.key = $${ctx:queryId} +appender.query-routing.routes.route-default.ref = RFA +# queryId based route +appender.query-routing.routes.route-mdc.type = Route +appender.query-routing.routes.route-mdc.file-mdc.type = LlapRandomAccessFileAppender +appender.query-routing.routes.route-mdc.file-mdc.name = query-file-appender +appender.query-routing.routes.route-mdc.file-mdc.fileName = ${sys:llap.daemon.log.dir}/${ctx:queryId}-${ctx:dagId}.log +appender.query-routing.routes.route-mdc.file-mdc.layout.type = PatternLayout +appender.query-routing.routes.route-mdc.file-mdc.layout.pattern = %d{ISO8601} %5p [%t (%X{fragmentId})] %c{2}: %m%n + +# list of all loggers +loggers = PerfLogger, EncodedReader, NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, HistoryLogger, LlapIoImpl, LlapIoOrc, LlapIoCache, LlapIoLocking, TezSM, TezSS, TezHC, LlapDaemon + +logger.LlapDaemon.name = org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon +logger.LlapDaemon.level = INFO + +# shut up the Tez logs that log debug-level stuff on INFO + +logger.TezSM.name = org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager.fetch +logger.TezSM.level = WARN +logger.TezSS.name = org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleScheduler.fetch +logger.TezSS.level = WARN +logger.TezHC.name = org.apache.tez.http.HttpConnection.url +logger.TezHC.level = WARN + +logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger +logger.PerfLogger.level = DEBUG + +logger.EncodedReader.name = org.apache.hadoop.hive.ql.io.orc.encoded.EncodedReaderImpl +logger.EncodedReader.level = INFO + +logger.LlapIoImpl.name = LlapIoImpl +logger.LlapIoImpl.level = INFO + +logger.LlapIoOrc.name = LlapIoOrc +logger.LlapIoOrc.level = WARN + +logger.LlapIoCache.name = LlapIoCache +logger.LlapIoCache.level = WARN + +logger.LlapIoLocking.name = LlapIoLocking +logger.LlapIoLocking.level = WARN + +logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn +logger.NIOServerCnxn.level = WARN + +logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO +logger.ClientCnxnSocketNIO.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +logger.HistoryLogger.name = org.apache.hadoop.hive.llap.daemon.HistoryLogger +logger.HistoryLogger.level = INFO +logger.HistoryLogger.additivity = false +logger.HistoryLogger.appenderRefs = HistoryAppender +logger.HistoryLogger.appenderRef.HistoryAppender.ref = HISTORYAPPENDER + +# root logger +rootLogger.level = ${sys:llap.daemon.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:llap.daemon.root.logger} + +]]> + + + longtext + + + diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/metainfo.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/metainfo.xml new file mode 100644 index 000000000..7ce431aaf --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/metainfo.xml @@ -0,0 +1,76 @@ + + + + + + hive + Hive + + The Apache Hive is a distributed, fault-tolerant data warehouse system + that enables analytics at a massive scale and facilitates reading, writing, + and managing petabytes of data residing in distributed storage using SQL. + + 3.1.3-1 + hive + + + + hiveserver2 + HiveServer2 + server + 1+ + + + + hive_metastore + Hive Metastore + server + 1+ + + + + hive_client + Hive Client + client + 1+ + + + + + + + x86_64 + aarch64 + + + + hive-3.1.3-1.tar.gz + SHA-256:1118e8c485ccc52dbf06a54604659c2bdbd7b4d4ba366aa40c000585303fcbf9 + + + + + + + mysql + hadoop + + + \ No newline at end of file diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/order.json b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/order.json new file mode 100644 index 000000000..19452cf0d --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/hive/order.json @@ -0,0 +1,22 @@ +{ + "HIVE_METASTORE-START": [ + "MYSQL_SERVER-START", + "NAMENODE-START", + "NODEMANAGER-START" + ], + "HIVE_METASTORE-RESTART": [ + "MYSQL_SERVER-RESTART", + "NAMENODE-RESTART", + "NODEMANAGER-RESTART" + ], + "HIVESERVER2-START": [ + "NODEMANAGER-START", + "ZOOKEEPER_SERVER-START", + "HIVE_METASTORE-START" + ], + "HIVESERVER2-RESTART": [ + "NODEMANAGER-RESTART", + "ZOOKEEPER_SERVER-RESTART", + "HIVE_METASTORE-RESTART" + ] +} \ No newline at end of file diff --git a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/tez/metainfo.xml b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/tez/metainfo.xml index 509db8315..d35a1973f 100644 --- a/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/tez/metainfo.xml +++ b/bigtop-manager-server/src/main/resources/stacks/bigtop/3.3.0/services/tez/metainfo.xml @@ -51,7 +51,7 @@ - hadoop + hive diff --git a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/mysql/order.json b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/mysql/order.json index 9e26dfeeb..2462a50ac 100644 --- a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/mysql/order.json +++ b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/mysql/order.json @@ -1 +1,5 @@ -{} \ No newline at end of file +{ + "MYSQL_SERVER-STOP": [ + "HIVE_METASTORE-STOP" + ] +} \ No newline at end of file diff --git a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveClientScript.java b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveClientScript.java new file mode 100644 index 000000000..2456ddf0d --- /dev/null +++ b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveClientScript.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bigtop.manager.stack.bigtop.v3_3_0.hive; + +import org.apache.bigtop.manager.common.shell.ShellResult; +import org.apache.bigtop.manager.stack.core.spi.param.Params; +import org.apache.bigtop.manager.stack.core.spi.script.AbstractClientScript; +import org.apache.bigtop.manager.stack.core.spi.script.Script; + +import com.google.auto.service.AutoService; +import lombok.extern.slf4j.Slf4j; + +import java.util.Properties; + +@Slf4j +@AutoService(Script.class) +public class HiveClientScript extends AbstractClientScript { + + @Override + public ShellResult add(Params params) { + Properties properties = new Properties(); + properties.setProperty(PROPERTY_KEY_SKIP_LEVELS, "1"); + + return super.add(params, properties); + } + + @Override + public ShellResult configure(Params params) { + return HiveSetup.configure(params); + } + + @Override + public String getComponentName() { + return "hive_client"; + } +} diff --git a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveMetastoreScript.java b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveMetastoreScript.java new file mode 100644 index 000000000..181ab29a4 --- /dev/null +++ b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveMetastoreScript.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bigtop.manager.stack.bigtop.v3_3_0.hive; + +import org.apache.bigtop.manager.common.constants.Constants; +import org.apache.bigtop.manager.common.constants.MessageConstants; +import org.apache.bigtop.manager.common.message.entity.pojo.RepoInfo; +import org.apache.bigtop.manager.common.shell.ShellResult; +import org.apache.bigtop.manager.common.utils.os.OSDetection; +import org.apache.bigtop.manager.stack.core.exception.StackException; +import org.apache.bigtop.manager.stack.core.spi.param.Params; +import org.apache.bigtop.manager.stack.core.spi.script.AbstractServerScript; +import org.apache.bigtop.manager.stack.core.spi.script.Script; +import org.apache.bigtop.manager.stack.core.tarball.TarballDownloader; +import org.apache.bigtop.manager.stack.core.utils.LocalSettings; +import org.apache.bigtop.manager.stack.core.utils.linux.LinuxFileUtils; +import org.apache.bigtop.manager.stack.core.utils.linux.LinuxOSUtils; + +import com.google.auto.service.AutoService; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.text.MessageFormat; +import java.util.Properties; + +@Slf4j +@AutoService(Script.class) +public class HiveMetastoreScript extends AbstractServerScript { + + @Override + public ShellResult add(Params params) { + Properties properties = new Properties(); + properties.setProperty(PROPERTY_KEY_SKIP_LEVELS, "1"); + + ShellResult shellResult = super.add(params, properties); + + // Download mysql jdbc driver + RepoInfo repoInfo = LocalSettings.repos().stream() + .filter(r -> OSDetection.getArch().equals(r.getArch()) && r.getType() == 2) + .findFirst() + .orElseThrow(() -> new RuntimeException("Cannot find repo for os: [" + OSDetection.getOS() + + "] and arch: [" + OSDetection.getArch() + "]")); + String mysqlDriver = repoInfo.getBaseUrl() + "/mysql-connector-j-8.0.33.jar"; + TarballDownloader.download(mysqlDriver, params.stackHome()); + LinuxFileUtils.moveFile(params.stackHome() + "/mysql-connector-j-8.0.33.jar", params.serviceHome() + "/lib/"); + LinuxFileUtils.updateOwner(params.serviceHome() + "/lib", params.user(), params.group(), true); + LinuxFileUtils.updatePermissions(params.serviceHome() + "/lib", Constants.PERMISSION_755, true); + + return shellResult; + } + + @Override + public ShellResult configure(Params params) { + return HiveSetup.configure(params); + } + + @Override + public ShellResult start(Params params) { + configure(params); + HiveParams hiveParams = (HiveParams) params; + try { + initSchema(params); + String cmd = MessageFormat.format( + "{0}/hive-service.sh metastore " + hiveParams.getHiveMetastorePidFile(), + hiveParams.serviceHome() + "/bin"); + ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd, hiveParams.user()); + if (shellResult.getExitCode() != 0) { + throw new StackException("Failed to start HiveMetastore: {0}", shellResult.getErrMsg()); + } + long startTime = System.currentTimeMillis(); + long maxWaitTime = 5000; + long pollInterval = 500; + + while (System.currentTimeMillis() - startTime < maxWaitTime) { + ShellResult statusResult = status(params); + if (statusResult.getExitCode() == 0) { + return statusResult; + } + Thread.sleep(pollInterval); + } + return status(params); + } catch (Exception e) { + throw new StackException(e); + } + } + + @Override + public ShellResult stop(Params params) { + HiveParams hiveParams = (HiveParams) params; + int pid = Integer.parseInt( + LinuxFileUtils.readFile(hiveParams.getHiveMetastorePidFile()).replaceAll("\r|\n", "")); + String cmd = "kill -9 " + pid; + try { + return LinuxOSUtils.sudoExecCmd(cmd, hiveParams.user()); + } catch (IOException e) { + throw new StackException(e); + } + } + + @Override + public ShellResult status(Params params) { + HiveParams hiveParams = (HiveParams) params; + return LinuxOSUtils.checkProcess(hiveParams.getHiveMetastorePidFile()); + } + + private void initSchema(Params params) throws Exception { + HiveParams hiveParams = (HiveParams) params; + String cmd = hiveParams.serviceHome() + "/bin/schematool -validate -dbType mysql"; + ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd, hiveParams.user()); + String clusterName = LocalSettings.cluster().getName(); + if (shellResult.getExitCode() != MessageConstants.SUCCESS_CODE + && shellResult.getErrMsg().contains("Table '" + clusterName + "_hive.VERSION' doesn't exist")) { + // init schema + cmd = hiveParams.serviceHome() + "/bin/schematool -initSchema -dbType mysql"; + shellResult = LinuxOSUtils.sudoExecCmd(cmd, hiveParams.user()); + if (shellResult.getExitCode() != MessageConstants.SUCCESS_CODE) { + throw new StackException(shellResult.getErrMsg()); + } + } + } + + @Override + public String getComponentName() { + return "hive_metastore"; + } +} diff --git a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java new file mode 100644 index 000000000..68abf5b28 --- /dev/null +++ b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bigtop.manager.stack.bigtop.v3_3_0.hive; + +import org.apache.bigtop.manager.common.message.entity.payload.CommandPayload; +import org.apache.bigtop.manager.stack.bigtop.param.BigtopParams; +import org.apache.bigtop.manager.stack.core.annotations.GlobalParams; +import org.apache.bigtop.manager.stack.core.spi.param.Params; +import org.apache.bigtop.manager.stack.core.utils.LocalSettings; + +import com.google.auto.service.AutoService; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.util.List; +import java.util.Map; + +@Getter +@Slf4j +@AutoService(Params.class) +@NoArgsConstructor +public class HiveParams extends BigtopParams { + + private String hiveLogDir = "/var/log/hive"; + private String hivePidDir = "/var/run/hive"; + + private String hiveserver2PidFile; + private String hiveMetastorePidFile; + + private Integer metastorePort; + private String hiveEnvContent; + private String hiveLog4j2Content; + private String beelineLog4j2Content; + private String hiveExecLog4j2Content; + private String llapCliLog4j2Content; + private String llapDaemonLog4j2Content; + + private final String hiveShellContent = + "dir=$(dirname $0)\n$dir/hive --service $1 > /dev/null 2>&1 &\necho $! > $2"; + + public HiveParams(CommandPayload commandPayload) { + super(commandPayload); + globalParamsMap.put("java_home", javaHome()); + globalParamsMap.put("hadoop_home", hadoopHome()); + globalParamsMap.put("hive_home", serviceHome()); + globalParamsMap.put("hive_conf_dir", confDir()); + globalParamsMap.put("security_enabled", false); + globalParamsMap.put("hive_user", user()); + globalParamsMap.put("hive_group", group()); + + hiveserver2PidFile = hivePidDir + "/hiveserver2.pid"; + hiveMetastorePidFile = hivePidDir + "/hive-metastore.pid"; + } + + public String hiveLimits() { + Map hiveConf = LocalSettings.configurations(getServiceName(), "hive.conf"); + return (String) hiveConf.get("content"); + } + + @GlobalParams + public Map hiveSite() { + Map configurations = LocalSettings.configurations(getServiceName(), "hive-site"); + String metastoreUris = configurations.get("hive.metastore.uris").toString(); + + String[] split = metastoreUris.split(":"); + metastorePort = Integer.parseInt(split[split.length - 1]); + globalParamsMap.put("hive_metastore_port", metastorePort); + + // Auto generate zookeeper properties for hive-site.xml + Map zooCfg = LocalSettings.configurations("zookeeper", "zoo.cfg"); + List zookeeperQuorum = LocalSettings.hosts("zookeeper_server"); + + configurations.put("hive.zookeeper.client.port", zooCfg.get("clientPort")); + configurations.put("hive.zookeeper.quorum", String.join(",", zookeeperQuorum)); + + // Auto generate database properties for hive-site.xml + String mysqlHost = LocalSettings.hosts("mysql_server").get(0); + String mysqlPassword = LocalSettings.configurations("mysql", "common") + .get("root_password") + .toString(); + String clusterName = LocalSettings.cluster().getName(); + configurations.put("hive.metastore.db.type", "mysql"); + configurations.put( + "javax.jdo.option.ConnectionURL", + "jdbc:mysql://" + mysqlHost + ":3306/" + clusterName + + "_hive?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true"); + configurations.put("javax.jdo.option.ConnectionDriverName", "com.mysql.cj.jdbc.Driver"); + configurations.put("javax.jdo.option.ConnectionUserName", "root"); + configurations.put("javax.jdo.option.ConnectionPassword", mysqlPassword); + return configurations; + } + + @GlobalParams + public Map hiveEnv() { + Map hiveEnv = LocalSettings.configurations(getServiceName(), "hive-env"); + hivePidDir = (String) hiveEnv.get("hive_pid_dir"); + hiveLogDir = (String) hiveEnv.get("hive_log_dir"); + hiveEnvContent = (String) hiveEnv.get("content"); + return hiveEnv; + } + + @GlobalParams + public Map hiveLog4j2() { + Map configurations = LocalSettings.configurations(getServiceName(), "hive-log4j2"); + hiveLog4j2Content = (String) configurations.get("content"); + return configurations; + } + + @GlobalParams + public Map beelineLog4j2() { + Map configurations = LocalSettings.configurations(getServiceName(), "beeline-log4j2"); + beelineLog4j2Content = (String) configurations.get("content"); + return configurations; + } + + @GlobalParams + public Map hiveExecLog4j2() { + Map configurations = LocalSettings.configurations(getServiceName(), "hive-exec-log4j2"); + hiveExecLog4j2Content = (String) configurations.get("content"); + return configurations; + } + + @GlobalParams + public Map llapCliLog4j2() { + Map configurations = LocalSettings.configurations(getServiceName(), "llap-cli-log4j2"); + llapCliLog4j2Content = (String) configurations.get("content"); + return configurations; + } + + @GlobalParams + public Map llapDaemonLog4j2() { + Map configurations = LocalSettings.configurations(getServiceName(), "llap-daemon-log4j2"); + llapDaemonLog4j2Content = (String) configurations.get("content"); + return configurations; + } + + public String hadoopHome() { + return stackHome() + "/hadoop"; + } + + @Override + public String getServiceName() { + return "hive"; + } +} diff --git a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveServer2Script.java b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveServer2Script.java new file mode 100644 index 000000000..20b65b4ec --- /dev/null +++ b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveServer2Script.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bigtop.manager.stack.bigtop.v3_3_0.hive; + +import org.apache.bigtop.manager.common.shell.ShellResult; +import org.apache.bigtop.manager.stack.core.exception.StackException; +import org.apache.bigtop.manager.stack.core.spi.param.Params; +import org.apache.bigtop.manager.stack.core.spi.script.AbstractServerScript; +import org.apache.bigtop.manager.stack.core.spi.script.Script; +import org.apache.bigtop.manager.stack.core.utils.linux.LinuxFileUtils; +import org.apache.bigtop.manager.stack.core.utils.linux.LinuxOSUtils; + +import com.google.auto.service.AutoService; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.text.MessageFormat; +import java.util.Properties; + +@Slf4j +@AutoService(Script.class) +public class HiveServer2Script extends AbstractServerScript { + + @Override + public ShellResult add(Params params) { + Properties properties = new Properties(); + properties.setProperty(PROPERTY_KEY_SKIP_LEVELS, "1"); + + return super.add(params, properties); + } + + @Override + public ShellResult configure(Params params) { + return HiveSetup.configure(params); + } + + @Override + public ShellResult start(Params params) { + configure(params); + HiveParams hiveParams = (HiveParams) params; + String cmd = MessageFormat.format( + "{0}/hive-service.sh hiveserver2 " + hiveParams.getHiveserver2PidFile(), + hiveParams.serviceHome() + "/bin"); + try { + ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd, hiveParams.user()); + if (shellResult.getExitCode() != 0) { + throw new StackException("Failed to start HiveServer2: {0}", shellResult.getErrMsg()); + } + long startTime = System.currentTimeMillis(); + long maxWaitTime = 5000; + long pollInterval = 500; + + while (System.currentTimeMillis() - startTime < maxWaitTime) { + ShellResult statusResult = status(params); + if (statusResult.getExitCode() == 0) { + return statusResult; + } + Thread.sleep(pollInterval); + } + return status(params); + } catch (Exception e) { + throw new StackException(e); + } + } + + @Override + public ShellResult stop(Params params) { + HiveParams hiveParams = (HiveParams) params; + int pid = Integer.parseInt( + LinuxFileUtils.readFile(hiveParams.getHiveserver2PidFile()).replaceAll("\r|\n", "")); + String cmd = "kill -9 " + pid; + try { + return LinuxOSUtils.sudoExecCmd(cmd, hiveParams.user()); + } catch (IOException e) { + throw new StackException(e); + } + } + + @Override + public ShellResult status(Params params) { + HiveParams hiveParams = (HiveParams) params; + return LinuxOSUtils.checkProcess(hiveParams.getHiveserver2PidFile()); + } + + @Override + public String getComponentName() { + return "hiveserver2"; + } +} diff --git a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveSetup.java b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveSetup.java new file mode 100644 index 000000000..6ba97337c --- /dev/null +++ b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveSetup.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bigtop.manager.stack.bigtop.v3_3_0.hive; + +import org.apache.bigtop.manager.common.constants.Constants; +import org.apache.bigtop.manager.common.shell.ShellResult; +import org.apache.bigtop.manager.stack.bigtop.v3_3_0.hadoop.HadoopParams; +import org.apache.bigtop.manager.stack.core.enums.ConfigType; +import org.apache.bigtop.manager.stack.core.spi.param.Params; +import org.apache.bigtop.manager.stack.core.utils.linux.LinuxFileUtils; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.text.MessageFormat; + +import static org.apache.bigtop.manager.common.constants.Constants.PERMISSION_755; + +@Slf4j +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class HiveSetup { + + public static ShellResult configure(Params params) { + log.info("Configuring Hive"); + HiveParams hiveParams = (HiveParams) params; + + String confDir = hiveParams.confDir(); + String hiveUser = hiveParams.user(); + String hiveGroup = hiveParams.group(); + + LinuxFileUtils.createDirectories(hiveParams.getHiveLogDir(), hiveUser, hiveGroup, PERMISSION_755, true); + LinuxFileUtils.createDirectories(hiveParams.getHivePidDir(), hiveUser, hiveGroup, PERMISSION_755, true); + + LinuxFileUtils.toFile( + ConfigType.CONTENT, + MessageFormat.format("{0}/hive-service.sh", hiveParams.serviceHome() + "/bin"), + hiveUser, + hiveGroup, + Constants.PERMISSION_755, + hiveParams.getHiveShellContent()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.hiveLimits(), + MessageFormat.format("{0}/hive.conf", HadoopParams.LIMITS_CONF_DIR), + Constants.ROOT_USER, + Constants.ROOT_USER, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.getHiveEnvContent(), + MessageFormat.format("{0}/hive-env.sh", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + LinuxFileUtils.toFile( + ConfigType.XML, + MessageFormat.format("{0}/hive-site.xml", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.hiveSite()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.getHiveLog4j2Content(), + MessageFormat.format("{0}/hive-log4j2.properties", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.getBeelineLog4j2Content(), + MessageFormat.format("{0}/beeline-log4j2.properties", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.getHiveExecLog4j2Content(), + MessageFormat.format("{0}/hive-exec-log4j2.properties", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.getLlapCliLog4j2Content(), + MessageFormat.format("{0}/llap-cli-log4j2.properties", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + LinuxFileUtils.toFileByTemplate( + hiveParams.getLlapDaemonLog4j2Content(), + MessageFormat.format("{0}/llap-daemon-log4j2.properties", confDir), + hiveUser, + hiveGroup, + Constants.PERMISSION_644, + hiveParams.getGlobalParamsMap()); + + log.info("Successfully configured Hive"); + return ShellResult.success(); + } +} diff --git a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/tarball/TarballDownloader.java b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/tarball/TarballDownloader.java index 363ce53e3..4bf316659 100644 --- a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/tarball/TarballDownloader.java +++ b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/tarball/TarballDownloader.java @@ -61,7 +61,7 @@ public static void download(String remoteUrl, String saveDir, PackageInfo packag log.info("Checksum validate successfully for [{}]", localFile.getAbsolutePath()); } - private static void download(String remoteUrl, String saveDir) { + public static void download(String remoteUrl, String saveDir) { int i = 1; while (true) { Boolean downloaded = downloadFile(remoteUrl, saveDir); diff --git a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/linux/LinuxFileUtils.java b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/linux/LinuxFileUtils.java index c0912fc91..87070926a 100644 --- a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/linux/LinuxFileUtils.java +++ b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/linux/LinuxFileUtils.java @@ -275,6 +275,29 @@ public static String readFile(String source) { } } + public static String writeFile(String source, String content) { + if (StringUtils.isBlank(source)) { + throw new StackException("source must not be empty"); + } + + List builderParameters = new ArrayList<>(); + builderParameters.add("echo"); + builderParameters.add(content); + builderParameters.add(">"); + builderParameters.add(source); + + try { + ShellResult shellResult = sudoExecCmd(builderParameters); + if (shellResult.getExitCode() != MessageConstants.SUCCESS_CODE) { + throw new StackException(shellResult.getErrMsg()); + } + + return shellResult.getOutput(); + } catch (IOException e) { + throw new StackException(e); + } + } + /** * create symbolic link *