diff --git a/apps/backend/agent/solution_maker.py b/apps/backend/agent/solution_maker.py index ef7d6d3eb..ae6bfab10 100644 --- a/apps/backend/agent/solution_maker.py +++ b/apps/backend/agent/solution_maker.py @@ -104,12 +104,20 @@ def choose_script_file(cls, host: models.Host, is_execute_on_target: bool) -> st return script_file_name @staticmethod - def get_gse_extra_config_dir(os_type: str): + def get_gse_extra_config_dir(host: models.Host): extra_config_sub_dir: str = "user_conf" + os_type: str = host.os_type + extra_dir: str = settings.GSE_ENVIRON_DIR if os_type.upper() == constants.OsType.WINDOWS: - return json.dumps(PathHandler(os_type).join(settings.GSE_ENVIRON_WIN_DIR, extra_config_sub_dir))[1:-1] + extra_dir = settings.GSE_ENVIRON_WIN_DIR + + if not host.ap.is_use_sudo: + extra_dir: str = host.ap.get_agent_config(os_type)["setup_path"] + + if os_type.upper() == constants.OsType.WINDOWS: + return json.dumps(PathHandler(os_type).join(extra_dir, extra_config_sub_dir))[1:-1] else: - return PathHandler(os_type).join(settings.GSE_ENVIRON_DIR, extra_config_sub_dir) + return PathHandler(os_type).join(extra_dir, extra_config_sub_dir) class BaseExecutionSolutionMaker(metaclass=abc.ABCMeta): @@ -271,6 +279,12 @@ def get_run_cmd_base_params(self) -> typing.List[str]: f"-s {self.pipeline_id}", ] + if self.host_ap.is_use_sudo: + run_dir = f'GSE_AGENT_RUN_DIR={self.agent_config["run_path"]}' + data_dir = f'GSE_AGENT_DATA_DIR={self.agent_config["data_path"]}' + log_dir = f'GSE_AGENT_LOG_DIR={self.agent_config["log_path"]}' + run_cmd_params.append(f"-v {run_dir} {data_dir} {log_dir}") + # 系统开启使用密码注册 Windows 服务时,需额外传入 -U -P 参数,用于注册 Windows 服务,详见 setup_agent.bat 脚本 if self.need_encrypted_password(): # GSE 密码注册场景暂不启用国密,使用固定 RSA 的方式 @@ -313,6 +327,7 @@ def add_sudo_to_cmds(self, execution_solution: ExecutionSolution): self.host.os_type == constants.OsType.WINDOWS, self.identity_data.account in [constants.LINUX_ACCOUNT], self.script_file_name == constants.SetupScriptFileName.SETUP_PAGENT_PY.value, + self.host_ap.is_use_sudo is False, ] ): return @@ -366,7 +381,7 @@ def get_create_pre_dirs_step(self, is_shell_adapter: bool = False) -> ExecutionS if not self.agent_setup_info.is_legacy: # GSE 1.0 不需要创建额外配置目录 - filepath_necessary_names.append(ExecutionSolutionTools.get_gse_extra_config_dir(self.host.os_type)) + filepath_necessary_names.append(ExecutionSolutionTools.get_gse_extra_config_dir(self.host)) dirs_to_be_created: typing.Set[str] = {self.dest_dir} for filepath_necessary_name in filepath_necessary_names: @@ -533,6 +548,7 @@ def make(self) -> ExecutionSolution: execution_solution: ExecutionSolution = self._make() if self.is_combine_cmd_step: self.combine_cmd_step(execution_solution) + self.add_sudo_to_cmds(execution_solution) return execution_solution @@ -587,7 +603,11 @@ def shell_cmd_adapter( shell: str = "bash" else: shell: str = suffix - run_cmd = f"nohup {shell} {run_cmd} &> {self.dest_dir}nm.nohup.out &" + + if self.host.os_type.lower() == backend_api_constants.OS.AIX: + run_cmd = f"nohup {shell} {run_cmd} > {self.dest_dir}nm.nohup.out 2>&1 &" + else: + run_cmd = f"nohup {shell} {run_cmd} &> {self.dest_dir}nm.nohup.out &" curl_cmd: str = ("curl", f"{dest_dir}curl.exe")[self.host.os_type == constants.OsType.WINDOWS] download_cmd = ( diff --git a/apps/backend/components/collections/agent_new/install.py b/apps/backend/components/collections/agent_new/install.py index c9314e30b..92f071590 100644 --- a/apps/backend/components/collections/agent_new/install.py +++ b/apps/backend/components/collections/agent_new/install.py @@ -821,6 +821,8 @@ def handle_report_data(self, host: models.Host, sub_inst_id: int, success_callba untreated_healthz_result = base64.b64decode(untreated_healthz_result.encode()).decode() except binascii.Error: pass + except UnicodeDecodeError: + pass # 去除可能存在的前缀 if untreated_healthz_result.startswith("healthz:"): diff --git a/apps/backend/components/collections/job.py b/apps/backend/components/collections/job.py index 5fee80cd0..5fc092bc6 100644 --- a/apps/backend/components/collections/job.py +++ b/apps/backend/components/collections/job.py @@ -130,6 +130,22 @@ def request_single_job_and_create_map( account_alias = (settings.BACKEND_UNIX_ACCOUNT, settings.BACKEND_WINDOWS_ACCOUNT)[ os_type == constants.OsType.WINDOWS ] + + account_set: set = set() + for host in job_params["target_server"][host_interaction_from]: + if host_interaction_from == "host_id_list": + account = models.Host.objects.get(bk_host_id=host).identity.account + account_set.add(account) + + if host_interaction_from == "ip_list": + account = models.Host.objects.get(inner_ip=host["ip"]).identity.account + account_set.add(account) + + if len(account_set) > 1: + raise AppBaseException(_("目标机器账户不一致,请检查")) + + account_alias = account_set.pop() + script_language = (constants.ScriptLanguageType.SHELL.value, constants.ScriptLanguageType.BAT.value)[ os_type == constants.OsType.WINDOWS ] diff --git a/apps/backend/subscription/steps/agent_adapter/config_context/context_helper.py b/apps/backend/subscription/steps/agent_adapter/config_context/context_helper.py index 7e355732b..aebd6faae 100644 --- a/apps/backend/subscription/steps/agent_adapter/config_context/context_helper.py +++ b/apps/backend/subscription/steps/agent_adapter/config_context/context_helper.py @@ -112,7 +112,7 @@ def __post_init__(self): ] ) ], - extra_config_directory=ExecutionSolutionTools.get_gse_extra_config_dir(self.host.os_type), + extra_config_directory=ExecutionSolutionTools.get_gse_extra_config_dir(self.host), ), context_dataclass.AccessConfigContext( cluster_endpoints=",".join( diff --git a/apps/node_man/migrations/0084_accesspoint_is_use_sudo.py b/apps/node_man/migrations/0084_accesspoint_is_use_sudo.py new file mode 100644 index 000000000..5959e21cf --- /dev/null +++ b/apps/node_man/migrations/0084_accesspoint_is_use_sudo.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.4 on 2024-08-26 08:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("node_man", "0083_subscription_operate_info"), + ] + + operations = [ + migrations.AddField( + model_name="accesspoint", + name="is_use_sudo", + field=models.BooleanField(default=True, verbose_name="是否使用sudo"), + ), + ] diff --git a/apps/node_man/models.py b/apps/node_man/models.py index 8dc074de9..2d0cf201b 100644 --- a/apps/node_man/models.py +++ b/apps/node_man/models.py @@ -566,6 +566,7 @@ class AccessPoint(models.Model): proxy_package = JSONField(_("Proxy上的安装包"), default=list) outer_callback_url = models.CharField(_("节点管理外网回调地址"), max_length=128, blank=True, null=True, default="") callback_url = models.CharField(_("节点管理内网回调地址"), max_length=128, blank=True, null=True, default="") + is_use_sudo = models.BooleanField(_("是否使用sudo"), default=True) @property def file_endpoint_info(self) -> EndpointInfo: diff --git a/apps/node_man/serializers/ap.py b/apps/node_man/serializers/ap.py index 9adf3fd8a..596c87df6 100644 --- a/apps/node_man/serializers/ap.py +++ b/apps/node_man/serializers/ap.py @@ -52,6 +52,7 @@ class ListSerializer(serializers.ModelSerializer): is_default = serializers.BooleanField(label=_("是否默认接入点,不可删除")) proxy_package = serializers.JSONField(label=_("Proxy上的安装包")) file_cache_dirs = serializers.SerializerMethodField(label=_("文件缓存目录")) + is_use_sudo = serializers.BooleanField(label=_("是否使用sudo")) def to_representation(self, instance): ret = super(ListSerializer, self).to_representation(instance) @@ -117,6 +118,7 @@ class ZKSerializer(serializers.Serializer): bscp_config = serializers.DictField(_("BSCP配置"), required=False) outer_callback_url = serializers.CharField(label=_("节点管理外网回调地址"), required=False, allow_blank=True) callback_url = serializers.CharField(label=_("节点管理内网回调地址"), required=False, allow_blank=True) + is_use_sudo = serializers.BooleanField(label=_("是否使用sudo"), required=False, default=True) def validate(self, data): gse_version_list: List[str] = list(set(AccessPoint.objects.values_list("gse_version", flat=True))) diff --git a/docs/solution/how_use_no_sudo_user_install_agent.md b/docs/solution/how_use_no_sudo_user_install_agent.md new file mode 100644 index 000000000..8e19f8412 --- /dev/null +++ b/docs/solution/how_use_no_sudo_user_install_agent.md @@ -0,0 +1,86 @@ +# 使用无免密sudo账户安装agent注意事项 + +*请确保使用无免密sudo权限账户安装agent的机器未曾使用过非该账户安装过agent* + +*或在使用无免密sudo权限账户安装agent前将原agent卸载并清理痕迹* + +```shell +# 基于默认接入点包括但不限于 +/tmp/nm* +/tmp/xuoasefasd.err +/tmp/bkjob +/usr/local/gse +/var/run/ipc.stat +/var/log/gse +/var/run/gse +/var/run/ipc.state.report +/var/lib/gse +``` + +*请在配置接入点时确认agent的安装账户拥有hostid文件路径的读写操作权限* + +hostid文件的路径默认为`Linux: /var/lib/gse/host Windows: c:/gse/data/host` + +> 二进制部署版本中默认位于cmdb的/data/bkee/cmdb/server/conf/common.yaml文件eventServer-hostIdentifier中配置 +> +> 容器化版本中默认位于cmdb的/data/bkhelmfile/blueking/environments/default/bkcmdb-values.yaml.gotmpl文件中的common-eventServer-hostIdentifier中配置 + +*请确保所用账户拥有一下系统程序操作权限* + +```shell +/usr/bin/curl, +/usr/bin/mkdir, +/usr/bin/ls, +/usr/bin/cat, +/usr/bin/which, +/usr/bin/ping, +/usr/bin/echo, +/usr/bin/chmod, +/usr/bin/nohup, +/usr/bin/tail, +/usr/bin/ps, +/usr/bin/date, +/usr/bin/tee, +/usr/bin/uname, +/usr/bin/rm, +/usr/bin/awk, +/usr/bin/lsof, +/usr/bin/stat, +/usr/bin/readlink, +/usr/bin/grep, +/usr/bin/read, +/usr/bin/hash, +/usr/bin/timeout, +/usr/bin/bash, +/usr/bin/sed, +/usr/bin/chattr, +/usr/bin/cd, +/usr/bin/cp, +/usr/bin/wait, +/usr/bin/tr, +/usr/bin/wc, +/usr/bin/mktemp, +/usr/bin/seq, +/usr/bin/sleep, +/usr/bin/df, +/usr/bin/pidof, +/usr/bin/tar, +/usr/bin/gzip, +/usr/bin/pgrep, +/usr/bin/xargs, +``` + +## 接入点配置 + +接入点配置中 `hostid路径` 必须与上述 cmdb 的配置文件中指定的路径一致 + +创建接入点后根据接入点页面中显示的接入点id到节点管理数据库中执行下述语句 + +```sql +use bk_nodeman; +update node_man_accesspoint set is_use_sudo=0 where id={接入点页面中显示的id}; +``` + +## 作业平台 + +作业平台需要新建该无免密sudo权限的账户的执行账户 \ No newline at end of file diff --git a/script_tools/agent_tools/agent2/setup_agent.ksh b/script_tools/agent_tools/agent2/setup_agent.ksh new file mode 100644 index 000000000..4b15c85a4 --- /dev/null +++ b/script_tools/agent_tools/agent2/setup_agent.ksh @@ -0,0 +1,855 @@ +#!/bin/ksh +# vim:ft=sh expandtab sts=4 ts=4 sw=4 nu +# gse agent 2.0 安装脚本, 仅在节点管理2.0中使用 + +# DEFAULT DEFINITION +NODE_TYPE=agent + +GSE_AGENT_RUN_DIR=/var/run/gse +GSE_AGENT_DATA_DIR=/var/lib/gse +GSE_AGENT_LOG_DIR=/var/log/gse + + +GSE_AGENT_CONFIG="gse_agent.conf" +set -A AGENT_CONFIGS gse_agent.conf +set -A AGENT_CLEAN_UP_DIRS bin + +# 收到如下信号或者exit退出时,执行清理逻辑 +#trap quit 1 2 3 4 5 6 7 8 10 11 12 13 14 15 +trap 'cleanup' HUP INT QUIT ABRT SEGV PIPE ALRM TERM EXIT +trap 'report_err $LINENO; exit 1; ' ERR + +log () { local L=INFO D; D="$(date +%F\ %T)"; echo "$D $L $*" | tee -a "$LOG_FILE"; bulk_report_step_status "$LOG_FILE" "$BULK_LOG_SIZE" ; return 0; } +warn () { local L=WARN D; D="$(date +%F\ %T)"; echo "$D $L $*" | tee -a "$LOG_FILE"; bulk_report_step_status "$LOG_FILE" "$BULK_LOG_SIZE" ; return 0; } +err () { local L=ERROR D; D="$(date +%F\ %T)"; echo "$D $L $*" | tee -a "$LOG_FILE"; bulk_report_step_status "$LOG_FILE" "$BULK_LOG_SIZE" ; return 1; } +fail () { local L=ERROR D; D="$(date +%F\ %T)"; echo "$D $L $*" | tee -a "$LOG_FILE"; bulk_report_step_status "$LOG_FILE" "$BULK_LOG_SIZE" URG; exit 1; } + +get_cpu_arch () { + local cmd=$1 + CPU_ARCH=$($cmd) + CPU_ARCH=$(echo ${CPU_ARCH} | tr 'A-Z' 'a-z') + if [[ "$CPU_ARCH" == *x86_64* ]]; then + return 0 + elif [[ "$CPU_ARCH" == *x86* ]]; then + return 0 + elif [[ "$CPU_ARCH" == *aarch* ]]; then + return 0 + elif [[ "$CPU_ARCH" == *powerpc* ]]; then + return 0 + else + return 1 + fi +} + +get_cpu_arch "uname -p" || get_cpu_arch "uname -m" || arch || fail get_cpu_arch "Failed to get CPU arch, please contact the developer." + +# 清理逻辑:保留本次的LOG_FILE,下次运行时会删除历史的LOG_FILE。 +# 保留安装脚本本身 +cleanup () { + bulk_report_step_status "$LOG_FILE" "$BULK_LOG_SIZE" URG # 上报所有剩余的日志 + rm -rf /tmp/logpipe + + if ! [[ $DEBUG = "true" ]]; then + local GLOBIGNORE="$LOG_FILE*" + for file in "$TMP_DIR"/nm.* ; do + if [ -e "$file" ]; then + echo "removing $file" + rm -r "$file" + fi + done + fi + + exit 0 +} + +# 打印错误行数信息 +report_err () { + awk -v LN="$1" -v L="ERROR" -v D="$(date +%F\ %T)" \ + 'NR>LN-3 && NR>>":""), $0 }' $0 +} + +validate_setup_path () { + set -A invalid_path_prefix /tmp /var /etc /bin /lib /lib64 /boot /mnt /proc /dev /run /sys /sbin /root + + set -A invalid_path /usr /usr/bin /usr/sbin /usr/local/lib /usr/include /usr/lib /usr/lib64 /usr/libexec + + + local p1="${AGENT_SETUP_PATH%/$NODE_TYPE*}" + local p2="${p1%/gse*}" + local p p3 + + if [[ "$p1" == "${AGENT_SETUP_PATH}" ]] || [[ "$p2" == "$AGENT_SETUP_PATH" ]]; then + fail check_env FAILED "$AGENT_SETUP_PATH is not allowed to install agent" + fi + for p in "${invalid_path[@]}"; do + if [[ "${p2}" == "$p" ]]; then + fail check_env FAILED "$AGENT_SETUP_PATH is not allowed to install agent" + fi + done + for p in "${invalid_path_prefix[@]}"; do + p3=$(echo "$p2" |sed 's/$p/$p2/g') + if [[ "$p3" != "$p2" ]]; then + fail check_env FAILED "$AGENT_SETUP_PATH is not allowed to install agent" + fi + done +} + +is_port_listen () { + local BT_PORT_START=$1 + local BT_PORT_END=$2 + sleep 1 + + while (("$BT_PORT_START" <= "$BT_PORT_END")) + do + netstat -aon |grep "${BT_PORT_START}" |grep LISTEN 2>/dev/null && return 0 + let BT_PORT_START=BT_PORT_START+1 + done + return 1 +} + +is_connected () { + local i port=$1 + + for i in {0..15}; do + sleep 1 + netstat -aon |grep ${port} |grep ESTABLISHED 2>/dev/null && return 0 + done + + return 1 +} + +# 用法:通过ps的comm字段获取pid,pgid和pid相同是为gse_master +get_pid () { + local proc=${1:-agent} + + pattern1=$(ps -eo pid,pgid,comm | grep gse_${proc} | sed -n 1p |awk '{print$1,$2}') + pattern2=$(ps -eo pid,pgid,comm | grep gse_${proc} | sed -n 2p |awk '{print$1,$2}') + + set -A pids1 $pattern1 + set -A pids2 $pattern2 + + if [[ ${pids1[0]} == ${pids1[1]} ]];then + set -A gse_master ${pids1[0]} + set -A gse_workers ${pids2[0]} + elif [[ ${pids2[0]} == ${pids2[1]} ]];then + set -A gse_master ${pids2[0]} + set -A gse_workers ${pids1[0]} + else + echo 'no gse_master' + fi + + printf "%d\n" "${pids[@]}" +} + +is_base64_command_exist() { + if ! command -v base64 >/dev/null 2>&1; then + return 1 + else + return 0 + fi +} + +is_process_ok () { + local proc=${1:-agent} + + sleep 5 + get_pid "$proc" + + if [ "${#gse_master[@]}" -eq 0 ]; then + fail setup_agent FAILED "process check: no gseMaster found. gse_${proc} process abnormal (node type:$NODE_TYPE)" + fi + + if [ "${#gse_master[@]}" -gt 1 ]; then + fail setup_agent FAILED "process check: multi gseMaster found. gse_${proc} process abnormal (node type:$NODE_TYPE)" + fi + + # worker 进程在某些任务情况下可能不只一个,只要都是一个爹,多个worker也是正常,不为0即可 + if [ "${#gse_workers[@]}" -eq 0 ]; then + fail setup_agent FAILED "process check: ${proc}Worker not found (node type:$NODE_TYPE)" + fi +} + +check_heathz_by_gse () { + local SLEEP_TIME=1 RETRY_COUNT=0 + + for i in {0..2}; do + local result execution_code + if [ -f "${GSE_AGENT_CONFIG_PATH}" ]; then + result=$("${AGENT_SETUP_PATH}"/bin/gse_agent -f "${GSE_AGENT_CONFIG_PATH}" --healthz 1) + else + result=$("${AGENT_SETUP_PATH}"/bin/gse_agent --healthz 1) + fi + execution_code=$? + if [[ "${execution_code}" -eq 0 ]]; then + break + else + sleep "${SLEEP_TIME}" + RETRY_COUNT=$((RETRY_COUNT + 1)) + if [[ "${RETRY_COUNT}" -ge 3 ]]; then + log healthz_check INFO "gse_agent healthz check return code: ${execution_code}" + report_result=$(echo "$result" | awk -F': ' '{print $2}') + if is_base64_command_exist; then + report_result=$(echo "$result" | base64 -w 0) + else + report_result=$(echo "$result" | tr "\"" "\'") + fi + log report_healthz INFO "${report_result}" + fail healthz_check FAILED "gse healthz check failed with retry count: $RETRY_COUNT" + fi + fi + done + report_result=$(echo "$result" | awk -F': ' '{print $2}') + if is_base64_command_exist; then + report_result=$(echo "$result" | base64 -w 0) + else + report_result=$(echo "$result" | tr "\"" "\'") + fi + log report_healthz - "${report_result}" + log healthz_check INFO "gse_agent healthz check success" +} + +remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + + local tmpcron + local datatemp=$(date +%s) + + crontab -l | grep -v "$AGENT_SETUP_PATH/bin/gsectl" > /tmp/cron.$datatemp + crontab /tmp/cron.$datatemp && rm -f /tmp/cron.$datatemp + + # 下面这段代码是为了确保修改的crontab能立即生效 + ps -eo pid,comm | grep cron |awk '{print$1}' | xargs kill -9 +} + +setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + + local rcfile=/etc/rc.local + + if [ -f $rcfile ];then + # 先删后加,避免重复 + #sed -i "\|${AGENT_SETUP_PATH}/bin/gsectl|d" $rcfile + tmp_rcfile=$(grep -v "${AGENT_SETUP_PATH}/bin/gsectl") + echo "$tmp_rcfile" >$rcfile + else + touch "$rcfile" && chmod 755 "$rcfile" + fi + + echo "[ -f $AGENT_SETUP_PATH/bin/gsectl ] && $AGENT_SETUP_PATH/bin/gsectl start >/var/log/gse_start.log 2>&1" >>$rcfile +} + +registe_agent_with_excepte () { + local SLEEP_TIME=1 RETRY_COUNT=0 + + for i in {0..2}; do + local registe_result registe_code + if [ -f "${GSE_AGENT_CONFIG_PATH}" ]; then + registe_result=$($AGENT_SETUP_PATH/bin/gse_agent -f "${GSE_AGENT_CONFIG_PATH}" --register 2>&1) + else + registe_result=$($AGENT_SETUP_PATH/bin/gse_agent --register 2>&1) + fi + registe_code=$? + if [[ "${registe_code}" -eq 0 ]] && [[ ! "${registe_result}" == *overwrite* ]]; then + log report_agent_id DONE "$registe_result" + break + else + sleep "${SLEEP_TIME}" + RETRY_COUNT=$((RETRY_COUNT + 1)) + if [[ "${RETRY_COUNT}" -ge 3 ]]; then + fail register_agent_id FAILED "register agent id failed, error: ${registe_result}" + fi + fi + done +} + +register_agent_id () { + if [ ! -f "$AGENT_SETUP_PATH/bin/gse_agent" ]; then + fail register_agent_id FAILED "gse_agent file not exists in $AGENT_SETUP_PATH/bin" + fi + + if [[ "${UNREGISTER_AGENT_ID}" == "TRUE" ]]; then + log register_agent_id - "trying to unregister agent id" + unregister_agent_id SKIP + fi + + log register_agent_id - "trying to register agent id" + registe_agent_with_excepte +} + +unregister_agent_id () { + local skip="$1" + log unregister_agent_id - "trying to unregister agent id" + if [ -f "$AGENT_SETUP_PATH/bin/gse_agent" ]; then + if [ -f "${GSE_AGENT_CONFIG_PATH}" ]; then + unregister_agent_id_result=$("$AGENT_SETUP_PATH"/bin/gse_agent -f "${GSE_AGENT_CONFIG_PATH}" --unregister 2>&1) + else + unregister_agent_id_result=$("$AGENT_SETUP_PATH"/bin/gse_agent --unregister 2>&1) + fi + + if [[ $? -eq 0 ]]; then + log unregister_agent_id SUCCESS "unregister agent id succeed" + else + if [[ "${skip}" == "SKIP" ]]; then + warn unregister_agent_id - "unregister agent id failed, but skip it. error: ${unregister_agent_id_result}" + else + fail unregister_agent_id FAILED "unregister agent id failed, error: ${unregister_agent_id_result}" + fi + fi + else + warn unregister_agent_id - "gse_agent file not exists in $AGENT_SETUP_PATH/bin" + fi +} + +start_agent () { + local i p + + "$AGENT_SETUP_PATH"/bin/gsectl start || fail setup_agent FAILED "start gse agent failed" + + sleep 3 + is_process_ok agent +} + +remove_proxy_if_exists () { + local i pids + local path=${AGENT_SETUP_PATH%/*}/proxy + + ! [[ -d $path ]] && return 0 + "$path/bin/gsectl" stop + + for p in agent transit btsvr opts; do + for i in {0..10}; do + set -A pids $(ps -ef pid,comm | grep gse_"$p" | awk '{print$1}') + if [ ${#pids[@]} -eq 0 ]; then + # 进程已退,继续检查下一个进程 + break + elif [ "$i" == 10 ]; then + # 强杀 + kill -9 "${pids[@]}" + else + sleep 1 + fi + done + done + + rm -rf "$path" +} + +stop_agent () { + local i pids + ! [[ -d $AGENT_SETUP_PATH ]] && return 0 + "$AGENT_SETUP_PATH/bin/gsectl" stop + + for i in 1 2 3 4 5 6 7 8 9 10; do + set -A pids $(ps -eo pid,comm | grep gse_agent |awk '{print$1}') + #read -r -a pids <<< "$(pidof "$AGENT_SETUP_PATH"/bin/gse_agent)" + if [[ ${#pids[@]} -eq 0 ]]; then + log setup_agent SUCCESS 'old agent has been stopped successfully' + break + elif [[ $i -eq 10 ]]; then + kill -9 "${pids[@]}" + else + sleep 1 + fi + done +} + +clean_up_agent_directory () { + for dir in "${AGENT_CLEAN_UP_DIRS[@]}"; do + rm -rf "${AGENT_SETUP_PATH}"/"${dir}" + done +} + +remove_agent () { + log remove_agent - 'trying to stop old agent' + stop_agent + + log remove_agent - "trying to remove old agent directory(${AGENT_SETUP_PATH}/${AGENT_CLEAN_UP_DIRS[@]})" + + if [[ "$REMOVE" == "TRUE" ]]; then + unregister_agent_id + clean_up_agent_directory + log remove_agent DONE "agent removed" + exit 0 + fi + clean_up_agent_directory +} + +get_config () { + local filename http_status + + log get_config - "request $NODE_TYPE config file(s)" + + for filename in "${AGENT_CONFIGS[@]}"; do + tmp_time=$(date +%Y%m%d_%H%M%S) + tmp_date=$(date +%s) + touch "/tmp/nm.reqbody."$tmp_time"."$tmp_date".json" + touch "/tmp/nm.reqresp."$tmp_time"."$tmp_date".json" + tmp_json_body="/tmp/nm.reqbody."$tmp_time"."$b_date".json" + tmp_json_resp="/tmp/nm.reqresp."$tmp_time"."$b_date".json" + cat > "$tmp_json_body" <<_OO_ +{ + "bk_cloud_id": ${CLOUD_ID}, + "filename": "${filename}", + "node_type": "${NODE_TYPE}", + "inner_ip": "${LAN_ETH_IP}", + "token": "${TOKEN}" +} +_OO_ + + http_status=$(http_proxy=$HTTP_PROXY https_proxy=$HTTP_PROXY \ + curl -s -g -S -X POST --retry 5 -d@"$tmp_json_body" "$CALLBACK_URL"/get_gse_config/ -o "$TMP_DIR/$filename" --silent -w "%{http_code}") + rm -f "$tmp_json_body" "$tmp_json_resp" + + if [[ "$http_status" != "200" ]]; then + fail get_config FAILED "request config $filename failed. request info:$CLOUD_ID,$LAN_ETH_IP,$NODE_TYPE,$filename,$TOKEN. http status:$http_status, file content: $(cat "$TMP_DIR/$filename")" + fi + done +} + +setup_agent () { + log setup_agent START "setup agent. (extract, render config)" + report_mkdir "$AGENT_SETUP_PATH"/etc + + cd "$AGENT_SETUP_PATH/.." && ( gunzip -dc "$TMP_DIR/$PKG_NAME" | tar xf - || fail setup_proxy FAILED "decompress package $PKG_NAME failed" ) + + get_config + + for f in "${AGENT_CONFIGS[@]}"; do + if [[ -f $TMP_DIR/$f ]]; then + cp -fp "$TMP_DIR/${f}" "${AGENT_SETUP_PATH}"/etc/${f} + else + fail setup_agent FAILED "agent config file ${f} lost. please check." + fi + done + + # create dir + report_mkdir "$GSE_AGENT_RUN_DIR" "$GSE_AGENT_DATA_DIR" "$GSE_AGENT_LOG_DIR" + + register_agent_id + + check_heathz_by_gse + + start_agent + + log setup_agent DONE "gse agent is setup successfully." +} + +download_pkg () { + local f http_status path + local tmp_stdout tmp_stderr curl_pid + if [[ "${REMOVE}" == "TRUE" ]]; then + log download_pkg - "remove agent, no need to download package" + return 0 + fi + + log download_pkg START "download gse agent package from $COMPLETE_DOWNLOAD_URL/$PKG_NAME)." + cd "$TMP_DIR" && rm -f "$PKG_NAME" + + for f in $PKG_NAME; do + http_status=$(http_proxy=$HTTP_PROXY https_proxy=$HTTPS_PROXY curl -O $COMPLETE_DOWNLOAD_URL/$f \ + --silent -w "%{http_code}") + # HTTP status 000需要进一步研究 + if [[ $http_status != "200" ]] && [[ "$http_status" != "000" ]]; then + fail download_pkg FAILED "file $f download failed. (url:$COMPLETE_DOWNLOAD_URL/$f, http_status:$http_status)" + fi + done + + log download_pkg DONE "gse_agent package download succeeded" + log report_cpu_arch DONE "${CPU_ARCH}" +} + +check_deploy_result () { + # 端口监听状态 + local ret=0 + + get_pid + is_connected "$IO_PORT" || { fail check_deploy_result FAILED "agent(PID:$gse_master) is not connect to gse server"; ((ret++)); } + is_connected "$DATA_PORT" || { fail check_deploy_result FAILED "agent(PID:$gse_master) is not connect to gse server"; ((ret++)); } + + [ $ret -eq 0 ] && log check_deploy_result DONE "gse agent has bean deployed successfully" +} + +# 日志行转为json格式函数 +log_to_json() { + local date _time log_level step status message + local input="$1" + + # 使用 awk 分离各字段 + # 假设 date、_time、log_level、step 和 status 是分隔符之间的字段 + echo "$input" | awk ' + { + # 假设前五个字段是 date、_time、log_level、step 和 status + date = $1 + _time = $2 + log_level = $3 + step = $4 + status = $5 + message = "" + for (i = 6; i <= NF; i++) { + if (i == 6) { + message = $i + } else { + message = message " " $i + } + } + printf("%s %s %s %s %s %s\n", date, _time, log_level, step, status, message) + } + ' | { + read -r date _time log_level step status message + + # 合成完整的日期时间字符串 + datetime="$date $_time" + + # 使用 Perl 计算 Unix 时间戳 + timestamp=$(perl -e 'use Time::Piece; print Time::Piece->strptime($ARGV[0], "%Y-%m-%d %H:%M:%S")->epoch' "$datetime") + + # 输出 JSON 格式 + printf '{"timestamp": "%s", "level": "%s", "step":"%s", "log":"%s","status":"%s"}\n' \ + "$timestamp" "$log_level" "$step" "$message" "$status" + } +} + +# 读入LOG_FILE的日志然后批量上报 +# 用法:bulk_report_step_status +bulk_report_step_status () { + local log_file=$1 + local bulk_size=${2:-3} # 默认设置为累积三条报一次 + local is_urg=${3:-""} # 设置URG后立即上报 + local log_total_line diff + local bulk_log log line json_log + local tmp_json_body tmp_json_resp + + # 未设置上报API时,直接忽略 + [[ -z "$CALLBACK_URL" ]] && return 0 + log_total_line=$(wc -l <"$log_file" | tr -d ' ') + diff=$(( log_total_line - LOG_RPT_CNT )) + + if (( diff >= bulk_size )) || [[ $is_urg = "URG" ]]; then + LOG_RPT_CNT=$(expr $LOG_RPT_CNT + 1) #always report from next line + bulk_log=$(sed -n "${LOG_RPT_CNT},${log_total_line}p" "$log_file") + # 如果刚好 log_total_line能整除 bulk_size时,最后EXIT的URG调用会触发一个空行 + # 判断如果是空字符串则不上报 + if [[ -z "$bulk_log" ]]; then + return 0 + fi + else + return 0 + fi + LOG_RPT_CNT=$log_total_line + + # 构建log数组 + echo "$bulk_log" | while read -r line; do + log_json=$(log_to_json "$line") + log[${#log[@]}]=$log_json + done + # 生成log json array + json_log=$(printf "%s," "${log[@]}") + json_log=${json_log%,} + + tmp_time=$(date +%Y%m%d_%H%M%S) + tmp_date=$(date +%s) + touch "/tmp/nm.reqbody."$tmp_time"."$tmp_date".json" + touch "/tmp/nm.reqresp."$tmp_time"."$tmp_date".json" + tmp_json_body="/tmp/nm.reqbody."$tmp_time"."$tmp_date".json" + tmp_json_resp="/tmp/nm.reqresp."$tmp_time"."$tmp_date".json" + + cat > "$tmp_json_body" <<_OO_ +{ + "task_id": "$TASK_ID", + "token": "$TOKEN", + "logs": [ $json_log ] +} +_OO_ + + http_proxy=$HTTP_PROXY https_proxy=$HTTP_PROXY \ + curl -g -s -S -X POST --retry 5 -d@"$tmp_json_body" "$CALLBACK_URL"/report_log/ -o "$tmp_json_resp" + rm -f "$tmp_json_body" "$tmp_json_resp" +} + +report_step_status () { + local date _time log_level step status message + local tmp_json_body tmp_json_resp + + # 未设置上报API时,直接忽略 + [ -z "$CALLBACK_URL" ] && return 0 + + echo "$@" | read date _time log_level step status message + + tmp_time=$(date +%Y%m%d_%H%M%S) + tmp_date=$(date +%s) + touch "/tmp/nm.reqbody."$tmp_time"."$tmp_date".json" + touch "/tmp/nm.reqresp."$tmp_time"."$tmp_date".json" + tmp_json_body="/tmp/nm.reqbody."$tmp_time"."$tmp_date".json" + tmp_json_resp="/tmp/nm.reqresp."$tmp_time"."$tmp_date".json" + + cat > "$tmp_json_body" <<_OO_ +{ + "task_id": "$TASK_ID", + "token": "$TOKEN", + "logs": [ + { + "timestamp": "$(date +%s)", + "level": "$log_level", + "step": "$step", + "log": "$message", + "status": "$status" + } + ] +} +_OO_ + http_proxy=$HTTP_PROXY https_proxy=$HTTP_PROXY \ + curl -s -S -X POST -d@"$tmp_json_body" "$CALLBACK_URL"/report_log/ -o "$tmp_json_resp" + rm -f "$tmp_json_body" "$tmp_json_resp" +} + +validate_vars_string () { + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' +} + +check_pkgtool () { + local stderr_to + stderr_to=$(touch /tmp/nm.chkpkg.`date +%s`) + _yum=$(command -v yum) + _apt=$(command -v apt) + _dnf=$(command -v dnf) + + _curl=$(command -v curl) + + if [ -f "$_curl" ]; then + return 0 + else + log check_env - "trying to install curl by package management tool" + if [ -f "$_yum" ]; then + # yum 的报错可能有多行,此时错误信息的展示和上报需要单独处理 + yum -y -q install curl 2>"$stderr_to" || \ + fail check_env FAILED "install curl failed." + elif [ -f "$_apt" ]; then + apt-get -y install curl 2>"$stderr_to" || \ + fail check_env FAILED "install curl failed." + elif [ -f "$_dnf" ]; then + dnf -y -q install curl 2>"$stderr_to" || \ + fail check_env FAILED "install curl failed." + else + fail check_env FAILED "no curl command found and can not be installed by neither yum,dnf nor apt-get" + fi + + log check_env - "curl has been installed" + fi +} + +check_disk_space () { + if df -k "$TMP_DIR" | awk 'NR!=1 && $3 >= 300*1024 {x=1;}END{if (x== 1) {exit 0} else {exit 1}}'; then + log check_env - "check free disk space. done" + else + fail check_env FAILED "no enough space left on $TMP_DIR" + fi +} + +report_mkdir () { + local dirs="$@" + for dir in ${dirs[@]}; do + local result + if [[ -d "${dir}" ]]; then + continue + else + result="$(mkdir -p ${dir} 2>&1)" + if [ $? -ne 0 ]; then + if [[ -f "${dir}" ]]; then + fail check_env FAILED "create directory $dir failed. error: ${dir} exists and is a normal file" + else + fail check_env FAILED "create directory $dir failed. error: ${result}" + fi + fi + fi + done +} + +check_dir_permission () { + mkdir -p "$TMP_DIR" || fail check-env FAILED "custom temprary dir '$TMP_DIR' create failed." + datatemp=$(date +%s) + if ! `touch "$TMP_DIR/nm.test.$datatemp" &>/dev/null` ; then + rm "$TMP_DIR"/nm.test.* + fail check_env FAILED "create temp files failed in $TMP_DIR" + else + log check_env - "check temp dir write access: yes" + fi +} + +check_download_url () { + local http_status f + + if [[ "${REMOVE}" == "TRUE" ]]; then + return 0 + fi + + for f in $PKG_NAME; do + log check_env - "checking resource($COMPLETE_DOWNLOAD_URL/$f) url's validality" + http_status=$(curl -g -o /dev/null --silent -Iw '%{http_code}' "$COMPLETE_DOWNLOAD_URL/$f") + if [[ "$http_status" == "200" ]] || [[ "$http_status" == "000" ]]; then + log check_env - "check resource($COMPLETE_DOWNLOAD_URL/$f) url succeed" + else + fail check_env FAILED "check resource($COMPLETE_DOWNLOAD_URL/$f) url failed, http_status:$http_status" + fi + done +} + +check_target_clean () { + if [[ -d $AGENT_SETUP_PATH/ ]]; then + warn check_env - "directory $AGENT_SETUP_PATH is not clean. everything will be wiped unless -u was specified" + fi +} + +_help () { + + echo "${0%*/} -i CLOUD_ID -l URL -I LAN_IP [OPTIONS]" + + echo " -n NAME" + echo " -t VERSION" + echo " -I lan ip address on ethernet " + echo " -i CLOUD_ID" + echo " -l DOWNLOAD_URL" + echo " -s TASK_ID. [optional]" + echo " -c TOKEN. [optional]" + echo " -r CALLBACK_URL, [optional]" + echo " -x HTTP_PROXY, [optional]" + echo " -p AGENT_SETUP_PATH, [optional]" + echo " -e BT_FILE_SERVER_IP, [optional]" + echo " -a DATA_SERVER_IP, [optional]" + echo " -k TASK_SERVER_IP, [optional]" + echo " -N UPSTREAM_TYPE, 'server' or 'proxy' [optional]" + echo " -T TEMP directory, [optional]" + echo " -v CUSTOM VARIABLES ASSIGNMENT LISTS. [optional]" + echo " valid variables:" + echo " GSE_AGENT_RUN_DIR" + echo " GSE_AGENT_DATA_DIR" + echo " GSE_AGENT_LOG_DIR" + echo " -o enable override OPTION DEFINED VARIABLES by -v. [optional]" + echo " -O IO_PORT" + echo " -E FILE_SVR_PORT" + echo " -A DATA_PORT" + echo " -V BTSVR_THRIFT_PORT" + echo " -B BT_PORT" + echo " -S BT_PORT_START" + echo " -Z BT_PORT_END" + echo " -K TRACKER_PORT" + echo " -F UNREGISTER_AGENT_ID [optional]" + + exit 0 +} + +check_env () { + local node_type=${1:-$NODE_TYPE} + + log check_env START "checking prerequisite. NETWORK_POLICY,DISK_SPACE,PERMISSION,RESOURCE etc.[PID:$CURR_PID]" + + [ "$CLOUD_ID" != "0" ] && node_type=pagent + validate_setup_path + check_disk_space + check_dir_permission + check_pkgtool + check_download_url + check_target_clean + + log check_env DONE "checking prerequisite done, result: SUCCESS" +} + +# DEFAULT SETTINGS +CLOUD_ID=0 +TMP_DIR=/tmp +AGENT_SETUP_PATH="/usr/local/gse/${NODE_TYPE}" +CURR_PID=$$ +OVERIDE=false +REMOVE=false +UNREGISTER_AGENT_ID=false +CALLBACK_URL= +AGENT_PID= +DEBUG= + +# 已上报的日志行数 +LOG_RPT_CNT=0 +BULK_LOG_SIZE=3 + +# main program +while getopts n:t:I:i:l:s:uc:r:x:p:e:a:k:N:v:oT:RDO:E:A:V:B:S:Z:K:F arg; do + case $arg in + n) NAME="$OPTARG" ;; + t) VERSION="$OPTARG" ;; + I) LAN_ETH_IP=$OPTARG ;; + i) CLOUD_ID=$OPTARG ;; + l) DOWNLOAD_URL=${OPTARG%/} ;; + s) TASK_ID=$OPTARG ;; + c) TOKEN=$OPTARG ;; + r) CALLBACK_URL=$OPTARG ;; + x) HTTP_PROXY=$OPTARG; HTTPS_PROXY=$OPTARG ;; + p) AGENT_SETUP_PATH=$(echo "$OPTARG/$NODE_TYPE" | sed 's|//*|/|g') ;; + e) BT_FILE_SERVER_IP=$(echo "$OPTARG" | awk -F , '{print$1}') ;; + a) DATA_SERVER_IP=$(echo "$OPTARG" | awk -F , '{print$1}') ;; + k) TASK_SERVER_IP=$(echo "$OPTARG" | awk -F , '{print$1}') ;; + N) UPSTREAM_TYPE=$OPTARG ;; + v) VARS_LIST="$OPTARG" ;; + o) OVERIDE=TRUE ;; + T) TMP_DIR=$OPTARG; mkdir -p "$TMP_DIR" ;; + R) REMOVE=TRUE ;; + D) DEBUG=TRUE ;; + O) IO_PORT=$OPTARG ;; + E) FILE_SVR_PORT=$OPTARG ;; + A) DATA_PORT=$OPTARG ;; + V) BTSVR_THRIFT_PORT=$OPTARG ;; + B) BT_PORT=$OPTARG ;; + S) BT_PORT_START=$OPTARG ;; + Z) BT_PORT_END=$OPTARG ;; + K) TRACKER_PORT=$OPTARG ;; + F) UNREGISTER_AGENT_ID=TRUE ;; + *) _help ;; + esac +done + +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + +## 检查自定义环境变量 +for var_name in ${VARS_LIST}; do + validate_vars_string "$var_name" || fail "$var_name is not a valid name" + + case ${var_name%=*} in + CLOUD_ID | DOWNLOAD_URL | TASK_ID | CALLBACK_URL | HOST_LIST_FILE | NODEMAN_PROXY | AGENT_SETUP_PATH) + [ "$OVERIDE" == "TRUE" ] || continue ;; + VARS_LIST) continue ;; + esac + + eval "$var_name" +done + +# 获取包名 +PKG_NAME=${NAME}-${VERSION}.tgz +COMPLETE_DOWNLOAD_URL="${DOWNLOAD_URL}/agent/aix/${CPU_ARCH}" +GSE_AGENT_CONFIG_PATH="${AGENT_SETUP_PATH}/etc/${GSE_AGENT_CONFIG}" + +LOG_FILE="$TMP_DIR"/nm.${0##*/}.$TASK_ID +DEBUG_LOG_FILE=${TMP_DIR}/nm.${0##*/}.${TASK_ID}.debug + +# redirect STDOUT & STDERR to DEBUG +mkfifo /tmp/logpipe +tee "$DEBUG_LOG_FILE" < /tmp/logpipe & +exec > /tmp/logpipe 2>&1 + +log check_env - "Args are: $*" + +# removed remove_crontab、setup_startup_scripts -> 由 gsectl 判断是否添加 / 移除 + +for step in check_env \ + download_pkg \ + remove_agent \ + remove_proxy_if_exists \ + setup_agent \ + check_deploy_result; do + $step +done diff --git a/script_tools/agent_tools/agent2/setup_agent.sh b/script_tools/agent_tools/agent2/setup_agent.sh index 88c0e9de0..ce140f5a1 100755 --- a/script_tools/agent_tools/agent2/setup_agent.sh +++ b/script_tools/agent_tools/agent2/setup_agent.sh @@ -130,7 +130,6 @@ validate_setup_path () { /sys /sbin /root - /home ) local invalid_path=( @@ -314,6 +313,10 @@ check_heathz_by_gse () { } remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX) @@ -327,6 +330,10 @@ remove_crontab () { } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + check_rc_file local rcfile=$RC_LOCAL_FILE @@ -475,7 +482,10 @@ remove_agent () { log remove_agent - "trying to remove old agent directory(${AGENT_SETUP_PATH}/${AGENT_CLEAN_UP_DIRS[@]})" cd "${AGENT_SETUP_PATH}" || return 0 - for file in `lsattr -R |egrep "i-" |awk '{print $NF}'`;do echo "--- $file" && chattr -i $file ;done + + if [ $IS_SUPER == true ]; then + for file in `lsattr -R |egrep "i-" |awk '{print $NF}'`;do echo "--- $file" && chattr -i $file ;done + fi cd - if [[ "$REMOVE" == "TRUE" ]]; then @@ -686,7 +696,7 @@ _OO_ } validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -886,6 +896,13 @@ while getopts n:t:I:i:l:s:uc:r:x:p:e:a:k:N:v:oT:RDO:E:A:V:B:S:Z:K:F arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + ## 检查自定义环境变量 for var_name in ${VARS_LIST//;/ /}; do validate_vars_string "$var_name" || fail "$var_name is not a valid name" diff --git a/script_tools/agent_tools/agent2/setup_agent.zsh b/script_tools/agent_tools/agent2/setup_agent.zsh index 468c19bd9..56d8f73f3 100644 --- a/script_tools/agent_tools/agent2/setup_agent.zsh +++ b/script_tools/agent_tools/agent2/setup_agent.zsh @@ -133,7 +133,6 @@ validate_setup_path () { /sys /sbin /root - /home ) local invalid_path=( @@ -314,6 +313,10 @@ check_heathz_by_gse () { } remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX) @@ -332,6 +335,10 @@ get_daemon_file () { } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + get_daemon_file touch $DAEMON_FILE_PATH$DAEMON_FILE_NAME bash -c "cat >$DAEMON_FILE_NAME" << EOF @@ -487,7 +494,9 @@ remove_agent () { log remove_agent - "trying to remove old agent directory(${AGENT_SETUP_PATH}/${AGENT_CLEAN_UP_DIRS[@]})" cd "${AGENT_SETUP_PATH}" - for file in `ls -lR@ |ggrep -E "i-" |awk '{print $NF}'`;do echo "--- $file" && chattr -i $file ;done + if [ $IS_SUPER == true ]; then + for file in `ls -lR@ |ggrep -E "i-" |awk '{print $NF}'`;do echo "--- $file" && chattr -i $file ;done + fi cd - if [[ "$REMOVE" == "TRUE" ]]; then @@ -696,7 +705,7 @@ _OO_ } validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -897,6 +906,13 @@ while getopts n:t:I:i:l:s:uc:r:x:p:e:a:k:N:v:oT:RDO:E:A:V:B:S:Z:K:F arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + ## 检查自定义环境变量 for var_name in ${VARS_LIST//;/ /}; do validate_vars_string "$var_name" || fail "$var_name is not a valid name" diff --git a/script_tools/agent_tools/agent2/setup_proxy.sh b/script_tools/agent_tools/agent2/setup_proxy.sh index 8ed7b13c8..096f13e21 100755 --- a/script_tools/agent_tools/agent2/setup_proxy.sh +++ b/script_tools/agent_tools/agent2/setup_proxy.sh @@ -311,6 +311,10 @@ report_mkdir () { } remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX) @@ -324,6 +328,10 @@ remove_crontab () { } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + check_rc_file local rcfile=$RC_LOCAL_FILE @@ -663,7 +671,7 @@ _OO_ } validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -875,6 +883,12 @@ while getopts n:t:I:i:l:s:uc:r:x:p:e:a:k:N:g:v:oT:RO:E:A:V:B:S:Z:K:F arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi ## 检查自定义环境变量 for var_name in ${VARS_LIST//;/ /}; do diff --git a/script_tools/gsectl/agent/aix/gsectl b/script_tools/gsectl/agent/aix/gsectl new file mode 100644 index 000000000..e510330b1 --- /dev/null +++ b/script_tools/gsectl/agent/aix/gsectl @@ -0,0 +1,672 @@ +#!/bin/ksh +# vim:ft=sh sts=4 ts=4 expandtab + +# 切换到本脚本(gsectl)所在的目录,并设置WORK_HOME变量为上一级目录 +export LC_ALL=C +cd ${0%/*} 2>/dev/null +WORK_HOME=${PWD%/bin} +WORK_HOME=`echo $WORK_HOME |sed 's/\/$//g'` +INSTALL_ENV=`echo $WORK_HOME |awk -F/ '{print $(NF-1)}'` + +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + +VAR_RUN_DIR=/var/run +if [ $IS_SUPER == false ]; then + if [ ! -d ${PWD}/run ]; then + mkdir -p ${PWD}/run + fi + VAR_RUN_DIR=${PWD}/run +fi + +# 设置agent的max open files +ulimit -n 409600 2>/dev/null +ulimit -c unlimited + +usage () { + echo "useage: gsectl ACTION [MODULE_NAME ... ]" + echo "" + echo "ACTION list: start, stop, restart" + echo " start start gse_agent" + echo " stop stop gse_agent" + echo " restart restart gse_agent" + echo " reload reload gse_agent" + echo " watch watch gse_agent without systemd" +} + +# 启动agent +start_by_binary () { + + local ret=0 + local rt + local info + + info=$(_status) + rt=$? + case $rt in + 0) status="process:gse_agent pid:${info[0]} etime:${info[1]} Already RUNNING" ;; + 1) status="ERROR STATUS" ;; + 2) status="EXIT" ;; + 3) status="Reload failed" ;; + 4) status="have more than one ppid equal 1" ;; + esac + + if [ $rt -eq 0 ];then + printf "%s: %s\n" "gse_agent" "$status" + exit 0 + else + echo "have no gse_agent Running, status: $status, then starting" + fi + + if [ $rt -eq 4 ];then + if [ `ps -ef |egrep gse_agent |egrep -w $WORK_HOME |awk '$3 == 1' |egrep -v grep |wc -l` -ge 1 ];then + echo "have more than one agentWorker process with ppid equal 1, need to kill" + #ps -ef |egrep gse_agent |egrep -w $WORK_HOME |awk '$3 == 1' |awk '{print $2}' |xargs kill -9 + fi + fi + + echo "start gse_agent ..." + ( ./gse_agent -f $WORK_HOME/etc/gse_agent.conf ) 1>/tmp/start_${node_type}_tmp.log 2>&1; sleep 3 + + + __status "start"; + if [ $? -ne 0 ];then + tail /tmp/start_${node_type}_tmp.log + return 1 + fi +} + +# 停止agent +stop_by_binary () { + # 调用gse_agent --quit停止进程,并等待它退出 + if [ -f ./gse_agent ]; then + ( ./gse_agent --quit ) >/dev/null 2>&1 + sleep 3 + else + echo "no such file: gse_agent. " + return 1 + fi + + _status stop + # 状态码为2的时候,表示进程不存在的了 + if [[ $? -eq 2 ]]; then + echo "gse agent stop successful" + return 0 + else + echo "gse agent stop failed" + return 1 + fi +} + +# 重启agent +restart_by_binary () { + stop_by_binary $module && start_by_binary $module +} + +# 重载agent +reload_by_binary () { + echo "reload gse_agent ..." + ( ./gse_agent --reload ) >/dev/null 2>&1; sleep 5 + + __status "reload"; +} + +# 检测agent状态 +status_by_binary () { + local rt + local info + + info=$(_status) + rt=$? + case $rt in + 0) status="pid:${info[0]} etime:${info[1]} RUNNING" ;; + 1) status="ERROR STATUS" ;; + 2) status="EXIT" ;; + 3) status="Reload failed" ;; + 4) status="have more than one ppid equal 1" ;; + esac + printf "%s: %s\n" "gse_agent" "$status" + return $rt +} + +# 检测agent健康状态 +healthz_by_binary () { + local rt + local info + + info=$(_healthz) + printf "%s\n" "$info" + return $rt +} + +red_echo () { [ "$HASTTY" != "1" ] && echo "$@" || echo -e "\033[031;1m$*\033[0m"; } +blue_echo () { [ "$HASTTY" != "1" ] && echo "$@" || echo -e "\033[034;1m$*\033[0m"; } +green_echo () { [ "$HASTTY" != "1" ] && echo "$@" || echo -e "\033[032;1m$*\033[0m"; } + +log () { + # 打印消息, 并记录到日志, 日志文件由 LOG_FILE 变量定义 + local retval=$? + local timestamp=$(date +%Y%m%d-%H%M%S) + local level=INFO + local func_seq=$(echo "${FUNCNAME[@]}" | sed 's/ /-/g') + local logfile=${LOG_FILE:=/tmp/watch_${INSTALL_ENV}_${node_type}.log} + local minute + local firstday + + # 如果当前时间为当月1号0点时间,则重命名日志文件名称 + # 获取当前时间的分钟数及当月1号 + minute=$(date +%M) + firstday=$(date +%d) + + # 判断是否为当月1号0点时间 + if [ "$minute" == "00" -a "$firstday" == "01" ]; then + if [ -f ${LOG_FILE}_$(date -d "last month" '+%Y%m').log ];then + echo "backup log already exists" + else + echo "[$(blue_echo ${EXTERNAL_IP}-$LAN_IP)]$timestamp $level|$BASH_LINENO|${func_seq} The current day is first day of month, reset the log file to new one ." >>$logfile + [ -f $LOG_FILE ] && mv $LOG_FILE ${LOG_FILE}_$(date -d "last month" '+%Y%m').log + touch $LOG_FILE + if [ -f /tmp/watch_gse2_agent.log ];then + mv /tmp/watch_gse2_agent.log /tmp/watch_gse2_agent_$(date -d "last month" '+%Y%m').log + fi + fi + fi + + local opt= + + if [ "${1:0:1}" == "-" ]; then + opt=$1 + shift 1 + else + opt="" + fi + + echo -e $opt "[$(blue_echo ${EXTERNAL_IP:-$LAN_IP})]$timestamp|$BASH_LINENO\t$*" + echo "[$(blue_echo ${EXTERNAL_IP}-$LAN_IP)]$timestamp $level|$BASH_LINENO|${func_seq} $*" >>$logfile + + return $retval +} + +watch_by_binary () { + log "=================================" + log "Start detecting..." + local module="agent" + + # 设置记录上次脚本运行的文件 + LAST_RUN_FILE=$VAR_RUN_DIR/already_run_times_$module + + # 如果文件存在,则读取文件中记录的次数 + if [ -f $LAST_RUN_FILE ]; then + run_count=$(cat $LAST_RUN_FILE) + else + run_count=0 + fi + + # 如果当前时间为整点时间,则重置计数,重新开始检测 + # 获取当前时间的分钟数 + minute=$(date +%M) + + # 判断是否为整点时间 + if [ "$minute" == "00" ]; then + if [ -f $LAST_RUN_FILE -a $run_count -gt 0 ];then + log "The current time is on the hour, reset the counter $run_count -> 0, and restart the detection." + echo 0 > $LAST_RUN_FILE + fi + fi + + # 设置告警阈值 + THRESHOLD=5 + + # 检查上一次脚本是否存在 + if [ -f $VAR_RUN_DIR/gsectl_check_agent_status.pid ]; then + pid=`cat $VAR_RUN_DIR/gsectl_check_agent_status.pid` + if [ -d "/proc/$pid" ]; then + log "`date +'%F %T.%N'` Last Script: $0 Detection status: PID:$pid is until running , no longer checking the status of the module: ${module}" + return + else + # 如果超过阈值,则发出告警 + if [ $run_count -ge $THRESHOLD ]; then + log "`date +'%F %T.%N'` Script: $0 Detection status: Failed to start the process, exceeded $run_count cycles, no longer checking the status of the module: ${module}" + return + else + log "`date +'%F %T.%N'` The previous script: $0 watch has ended, starting a new detection" + fi + fi + fi + + # 记录当前脚本的 PID + echo $$ > $VAR_RUN_DIR/gsectl_check_agent_status.pid + + # 检测gse_agent是否正常存在的逻辑 + if [ -z "${module}" ]; then + echo "watch: get module: ${module} failed" + log "watch: get module: ${module} failed" + else + if ! _status ${module}; then + stop_by_binary + start_by_binary + if [ $? -ne 0 ];then + log "`date +'%F %T.%N'` Process failed to start, increment counter" + run_count=$((run_count + 1)) + echo $run_count > $LAST_RUN_FILE + fi + else + if [ $run_count -ne 0 ];then + log "`date +'%F %T.%N'` The previous script: $0 Detection ${module} status is Running , then reset the count" + echo 0 > $LAST_RUN_FILE + fi + fi + fi + return +} + +start_by_crontab () { + start_by_binary + add_startup_to_boot + setup_crontab + return +} + +stop_by_crontab () { + remove_crontab + stop_by_binary + return +} + + +reload_by_crontab () { + reload_by_binary + add_startup_to_boot + setup_crontab + return +} + +restart_by_crontab () { + restart_by_binary + add_startup_to_boot + setup_crontab + return +} + + +status_by_crontab () { + status_by_binary + return +} + +healthz_by_crontab () { + healthz_by_binary + return +} + +watch_by_crontab () { + watch_by_binary + return +} + + +start_by_rclocal () { + remove_crontab + start_by_binary + + add_startup_to_boot + return +} + +stop_by_rclocal () { + stop_by_binary + return +} + +reload_by_rclocal () { + remove_crontab + reload_by_binary + add_startup_to_boot + return +} + +restart_by_rclocal () { + remove_crontab + restart_by_binary + add_startup_to_boot + return +} + + +status_by_rclocal () { + status_by_binary + return +} + +healthz_by_rclocal () { + healthz_by_binary + return +} + +check_rc_file () { + RC_LOCAL_FILE="/etc/rc.local" + if [ -f "$RC_LOCAL_FILE" ]; then + return 0 + elif [ -f "/etc/rc.d/rc.local" ]; then + RC_LOCAL_FILE="/etc/rc.d/rc.local" + elif [ -f "/etc/init.d/rc.local" ]; then + RC_LOCAL_FILE="/etc/init.d/rc.local" + elif [ -f "/etc/init.d/boot.local" ]; then + RC_LOCAL_FILE="/etc/init.d/boot.local" + else + RC_LOCAL_FILE="`ls -l "/etc/rc.local" | awk '{print $NF}'`" + fi +} + +add_startup_to_boot () { + + # 非root用户无法操作rclocal + if [ $IS_SUPER == false ]; then + echo "Not root user, can't operate rc.local" + return + fi + + local module=agent + + # 添加启动项到 rc.local + echo "Check startup items, and if not existing, add the [${module}] startup item to rc.local" + + check_rc_file + local rcfile=$RC_LOCAL_FILE + + chmod +x $rcfile + + # 先删后加,避免重复 + rm ${rcfile}.bak && cp ${rcfile} ${rcfile}.bak + sed "\|${WORK_HOME}/bin/gsectl start ${module}|d" $rcfile > ${rcfile}.tmp + rm $rcfile && mv ${rcfile}.tmp ${rcfile} + + echo "[ -f ${WORK_HOME}/bin/gsectl ] && ${WORK_HOME}/bin/gsectl start ${module} 1>>/var/log/${INSTALL_ENV}_${node_type}.log 2>&1" >>$rcfile +} + +setup_crontab () { + local tmpcron + + if [ -n "`crontab -l | grep \"$WORK_HOME/bin/gsectl\" |egrep -v \"^#|\s+#\"`" ];then + echo "The watch detection entry is already in the crontab..." + return 0 + fi + + tmpcron=/tmp/cron.XXXXXXX + + ( + crontab -l | grep -v "$WORK_HOME/bin/gsectl" + echo "#$WORK_HOME/bin/gsectl Agent check, add by NodeMan @ `date +'%F %T'`" + echo "* * * * * $WORK_HOME/bin/gsectl watch agent 1>>/tmp/watch_gse2_agent.log 2>&1" + ) > "$tmpcron" + + crontab "$tmpcron" && rm -f "$tmpcron" + crontab -l |egrep "$WORK_HOME" +} + +remove_crontab () { + local tmpcron + local datatemp=$(date +%s) + + crontab -l | grep -v "$AGENT_SETUP_PATH/bin/gsectl" > /tmp/cron.$datatemp + crontab /tmp/cron.$datatemp && rm -f /tmp/cron.$datatemp + + # 下面这段代码是为了确保修改的crontab能立即生效 + if [ $IS_SUPER == true ]; then + ps -eo pid,comm | grep cron |awk '{print$1}' | xargs kill -9 + else + crontab -l | crontab - + fi +} + +get_process_runtime () { + local p_status tmp_gse_master_pid_info tmp_gse_agent_master_pids _pid PID + p_status=1 + + sleep 3 + + for i in {1..20} + do + set -A tmp_gse_agent_master_pids $(ps -eo ppid,pid,args | awk '$1 == 1 && $3 ~ /gse_agent/ {print $2}' | xargs) + + for _pid in "${tmp_gse_agent_master_pids[@]}"; do + tmp_abs_path=$(ps -eo pid,args | grep $_pid | grep -v grep | awk '{for (i=2; i<=NF; i++) printf "%s ", $i; print ""}') + # 两个路径都用readlink -f 防止有软链接目录 + # master既然存在,先判断路径是否包含WORK_HOME + if [[ "$tmp_abs_path" == *"${WORK_HOME}"* ]]; then + # 找到了匹配的pid + # 获取进程pid的启动时间 + PID=$_pid + ETIME=$(ps -o etime= -p $PID | tr -d ' ') + TIME_DIFF=$(convert_to_seconds $ETIME) + + if [ $TIME_DIFF -le 20 ]; then + echo "gse_agent -> $PID has been running for $TIME_DIFF seconds, check $i times" + p_status=0 + break 2 + else + echo "gse_agent -> $PID has been running for $TIME_DIFF seconds, restart not yet successful, check $i times" + sleep 1 + fi + fi + done + done + return $p_status +} + +convert_to_seconds() { + local etime="$1" + local days=0 + local hours=0 + local minutes=0 + local seconds=0 + + if [[ "$etime" == *-*-* ]]; then + # 格式是 D-HH:MM:SS + days=$(echo "$etime" | awk -F '-' '{print $1}') + etime=$(echo "$etime" | awk -F '-' '{print $2}') + fi + + if [[ "$etime" == *:* ]]; then + # 格式是 HH:MM:SS 或 MM:SS + if [[ "$etime" == *:*:* ]]; then + # HH:MM:SS + hours=$(echo "$etime" | awk -F ':' '{print $1}') + minutes=$(echo "$etime" | awk -F ':' '{print $2}') + seconds=$(echo "$etime" | awk -F ':' '{print $3}') + else + # MM:SS + minutes=$(echo "$etime" | awk -F ':' '{print $1}') + seconds=$(echo "$etime" | awk -F ':' '{print $2}') + fi + fi + + # 转换为秒 + total_seconds=$((days * 86400 + hours * 3600 + minutes * 60 + seconds)) + echo "$total_seconds" +} + +__status (){ + local action=$1 + + # 最多等待20s来判断是否真正启动成功 + for i in {0..20}; do + if [ "$action" == "stop" ];then + if [ $(ps -eo pid,comm,args | grep gse_agent |egrep "${WORK_HOME}" |wc -l) -eq 0 ];then + echo "gse_agent $action $action success" + break + elif [ $i -eq 20 ];then + echo "gse_agent $action $action failed" + return 1 + else + sleep 1 + fi + else + if _status $action >/dev/null; then + # 启动正常,直接退出,返回码0 + echo "gse agent start successful" + + if [ "$action" == "start" -o "$action" == "restart" ];then + get_process_runtime + if [ $? -ne 0 ];then + echo "gse_agent $action failed" + return 3 + fi + elif [ "$action" == "reload" ];then + for i in {0..5}; do + get_process_runtime + if [ $? -eq 0 ];then + break + elif [ $? -ne 0 ];then + sleep 2 + elif [ $i -eq 5 ];then + echo "gse_agent $action failed" + return 3 + fi + done + fi + + return 0 + elif [ $i -eq 20 ]; then + # i等于20,超时退出,返回码1 + echo "gse agent start failed" + return 1 + else + sleep 2 + fi + fi + done +} + +# 返回码: +# 0: 正常,且成对出现 +# 1:异常,存在master进程但是worker不存在 +# 2: 异常,没有master进程存在 +# 3: 异常,进程重启、reload、启动失败 +_status () { + local gse_master_info _pid pid abs_path + local action=$1 + + if [ "$action" == "reload" ];then + # 如果是reload,需要新的进程启动,才能继续判断进程是否符合正常情况 + get_process_runtime + if [ $? -ne 0 ];then + echo "gse_agent $action failed" + return 3 + fi + fi + + # 初筛,考虑到gse组件的父、子进程都是名为gse_agent的,且它的父进程应该是等于1 + # ps的-o参数指定输出字段%P(ppid)、%p(pid)、%a(args) + # 所以下面命令是拉出所有进程名为gse_agent,且父进程为1,进程参数包含gse_agent的进程信息 + set -A gse_agent_master_pids $(ps -eo ppid,pid,args | awk '{print $1 "|" $2 "|" substr($0, index($0,$3))}' | awk -F'|' '$1 == 1 && $3 ~ /gse_agent/' | awk -F'|' '{print $2}' | xargs) + + if [[ -z "$gse_agent_master_pids" ]]; then + # 连master都没有,那不用做更深入的判断,直接返回false + return 2 + fi + set -A gse_master_pids_by_exe_path + + for _pid in "${gse_agent_master_pids[@]}"; do + abs_path=$(ps -eo pid,args | grep $_pid | grep -v grep | awk '{for (i=2; i<=NF; i++) printf "%s ", $i; print ""}') + # 两个路径都用readlink -f 防止有软链接目录 + # master既然存在,先判断路径是否包含WORK_HOME + if [[ "$abs_path" == *"${WORK_HOME}"* ]]; then + # 找到了匹配的pid + gse_master_pids_by_exe_path[${#gse_master_pids_by_exe_path[@]}]=$_pid + fi + done + + agent_id_file=${WORK_HOME}/bin/run/agent.pid + if [[ ${#gse_master_pids_by_exe_path} -eq 0 ]]; then + # 连master都没有,那不用做更深入的判断,直接返回false + return 2 + elif [[ ${#gse_master_pids_by_exe_path[@]} -gt 1 && -f ${agent_id_file} ]]; then + # 兼容存在游离gse_agent worker进程的场景 + gse_master_pid=$(cat $agent_id_file) + else + gse_master_pid=$gse_master_pids_by_exe_path + fi + + # 查看该gseMaster进程是否子进程Worker(>=1) + if [[ $(ps -eo ppid=,pid= | grep "^ *$gse_master_pid " | wc -l) -eq 0 ]]; then + return 1 + fi + # 运行到这里时就可以获取进程状态详细信息输出到STDOUT,并返回0了 + ps -p $gse_master_pid -o pid,etime | tail -n +2 | while read -r pid etime; do + echo "PID: $pid, Elapsed Time: $etime" + done + return 0 +} + +_healthz () { + ./gse_agent --healthz +} + +get_auto_type () { + # 由节点管理进行渲染,当前环境使用 {{ AUTO_TYPE }} + echo "{{ AUTO_TYPE }}" + return + echo "crontab" +} + +detect_node_type () { + case $WORK_HOME in + *"$INSTALL_ENV"/proxy) node_type=proxy ;; + *"$INSTALL_ENV"/agent) node_type=agent ;; + *) node_type=unknown ;; + esac + + echo $node_type >$WORK_HOME/.gse_node_type +} + +# main +action="$1"; shift +module="agent" + +auto_type=$(get_auto_type) +if [ "${auto_type}" == "systemd" ]; then + echo "AIX systems do not support systemd" + exit +fi + +if [ -s $WORK_HOME/.gse_node_type ]; then + read node_type ignore <$WORK_HOME/.gse_node_type +else + detect_node_type +fi + +if [ "${node_type}" == "unknown" ];then + echo "wrong node type: ${node_type}" + exit +fi + +if [ $auto_type == "crontab" ]; then + case $action in + start) start_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + stop) stop_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + restart) restart_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + status) status_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + reload) reload_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + healthz) healthz_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + watch) watch_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + -h|*) usage ; exit 255 ;; + esac +elif [ $auto_type == "rclocal" ]; then + case $action in + start) start_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + stop) stop_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + restart) restart_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + status) status_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + reload) reload_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + healthz) healthz_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + -h|*) usage ; exit 255 ;; + esac +fi + + +exit $? diff --git a/script_tools/gsectl/agent/darwin/gsectl b/script_tools/gsectl/agent/darwin/gsectl index 86bf7909b..2a35f3292 100755 --- a/script_tools/gsectl/agent/darwin/gsectl +++ b/script_tools/gsectl/agent/darwin/gsectl @@ -8,6 +8,21 @@ WORK_HOME=${PWD%/bin} WORK_HOME=`echo $WORK_HOME |sed 's/\/$//g'` INSTALL_ENV=`echo $WORK_HOME |awk -F/ '{print $(NF-1)}'` +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + +VAR_RUN_DIR=/var/run +if [ $IS_SUPER == false ]; then + if [ ! -d ${PWD}/run ]; then + mkdir -p ${PWD}/run + fi + VAR_RUN_DIR=${PWD}/run +fi + # 设置agent的max open files ulimit -n 409600 2>/dev/null ulimit -c unlimited @@ -186,7 +201,7 @@ watch_by_binary () { local module="agent" # 设置记录上次脚本运行的文件 - LAST_RUN_FILE=/var/run/already_run_times_$module + LAST_RUN_FILE=$VAR_RUN_DIR/already_run_times_$module # 如果文件存在,则读取文件中记录的次数 if [ -f $LAST_RUN_FILE ]; then @@ -211,8 +226,8 @@ watch_by_binary () { THRESHOLD=5 # 检查上一次脚本是否存在 - if [ -f /var/run/gsectl_check_agent_status.pid ]; then - pid=`cat /var/run/gsectl_check_agent_status.pid` + if [ -f $VAR_RUN_DIR/gsectl_check_agent_status.pid ]; then + pid=`cat $VAR_RUN_DIR/gsectl_check_agent_status.pid` if lsof -p $pid >/dev/null; then log "`date +'%F %T.%N'` Last Script: $0 Detection status: PID:$pid is until running , no longer checking the status of the module: ${module}" return @@ -228,7 +243,7 @@ watch_by_binary () { fi # 记录当前脚本的 PID - echo $$ > /var/run/gsectl_check_agent_status.pid + echo $$ > $VAR_RUN_DIR/gsectl_check_agent_status.pid # 检测gse_agent是否正常存在的逻辑 if [ -z "${module}" ]; then @@ -444,10 +459,18 @@ is_systemd_supported () { is_use_systemd () { local module="agent" - if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then - return 0 + if [ $IS_SUPER == false ]; then + if [ -f $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service ];then + return 0 + else + return 1 + fi else - return 1 + if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then + return 0 + else + return 1 + fi fi } @@ -509,7 +532,11 @@ check_rc_file () { } get_daemon_file () { - DAEMON_FILE_PATH="/Library/LaunchDaemons/" + if [ $IS_SUPER == true ]; then + DAEMON_FILE_PATH="/Library/LaunchDaemons/" + else + DAEMON_FILE_PATH="~/Library/LaunchAgents/" + fi DAEMON_FILE_NAME="com.tencent.$(echo ${WORK_HOME%*/} | tr '/' '.' | awk -F '.' '{print $(NF-1)"."$NF}').Daemon.plist" } @@ -550,7 +577,7 @@ LimitNOFILE=512000 LimitCORE=infinity WorkingDirectory=${WORK_HOME}/bin PIDFile=${WORK_HOME}/bin/run/${module}.pid -ExecStart=${WORK_HOME}/bin/gse_agent -f /usr/local/${INSTALL_ENV}/${node_type}/etc/gse_agent.conf +ExecStart=${WORK_HOME}/bin/gse_agent -f ${WORK_HOME}/etc/gse_agent.conf ExecReload=${WORK_HOME}/bin/gse_agent --reload ExecStop=${WORK_HOME}/bin/gse_agent --quit Type=forking @@ -563,26 +590,48 @@ RestartSec=10 WantedBy=multi-user.target EOF - if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then - if [ `md5sum /tmp/${INSTALL_ENV}_${module}.service |awk '{print $1}'` == `md5sum /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service |awk '{print $1}'` ];then - echo "${INSTALL_ENV}_${module}.service have no change..." + if [ $IS_SUPER == true ]; then + if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then + if [ `md5sum /tmp/${INSTALL_ENV}_${module}.service |awk '{print $1}'` == `md5sum /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service |awk '{print $1}'` ];then + echo "${INSTALL_ENV}_${module}.service have no change..." + else + echo "update ${INSTALL_ENV}_${module}.service" + cp /tmp/${INSTALL_ENV}_${module}.service /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service + systemctl daemon-reload + systemctl enable ${INSTALL_ENV}_${module}.service + fi else - echo "update ${INSTALL_ENV}_${module}.service" + echo "copy ${INSTALL_ENV}_${module}.service" cp /tmp/${INSTALL_ENV}_${module}.service /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service systemctl daemon-reload systemctl enable ${INSTALL_ENV}_${module}.service fi + + # 删除rc.local里的启动项 + check_rc_file + sed -i "\|${WORK_HOME}/bin/gsectl start ${module}|d" $RC_LOCAL_FILE else - echo "copy ${INSTALL_ENV}_${module}.service" - cp /tmp/${INSTALL_ENV}_${module}.service /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service - systemctl daemon-reload - systemctl enable ${INSTALL_ENV}_${module}.service + if [ -f $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service ];then + if [ `md5sum /tmp/${INSTALL_ENV}_${module}.service |awk '{print $1}'` == `md5sum $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service |awk '{print $1}'` ];then + echo "${INSTALL_ENV}_${module}.service have no change..." + else + echo "update ${INSTALL_ENV}_${module}.service" + cp /tmp/${INSTALL_ENV}_${module}.service $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service + systemctl --user daemon-reload + systemctl --user enable ${INSTALL_ENV}_${module}.service + fi + else + if [ ! -d "$HOME/.config/systemd" ]; then + echo "文件夹 ~/.config/systemd 不存在,正在创建..." + mkdir -p $HOME/.config/systemd/user + fi + echo "copy ${INSTALL_ENV}_${module}.service" + cp /tmp/${INSTALL_ENV}_${module}.service $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service + systemctl --user daemon-reload + systemctl --user enable ${INSTALL_ENV}_${module}.service + fi fi - # 删除rc.local里的启动项 - check_rc_file - sed -i "\|${WORK_HOME}/bin/gsectl start ${module}|d" $RC_LOCAL_FILE - # 删除crontab里的watch条目 remove_crontab } @@ -590,10 +639,18 @@ EOF remove_systemd_config (){ local module="agent" - if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then - systemctl stop ${INSTALL_ENV}_${module}.service - systemctl disable ${INSTALL_ENV}_${module}.service - rm /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service + if [ $IS_SUPER == true ]; then + if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then + systemctl stop ${INSTALL_ENV}_${module}.service + systemctl disable ${INSTALL_ENV}_${module}.service + rm /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service + fi + else + if [ -f $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service ];then + systemctl --user stop ${INSTALL_ENV}_${module}.service + systemctl --user disable ${INSTALL_ENV}_${module}.service + rm $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service + fi fi } @@ -619,14 +676,18 @@ setup_crontab () { remove_crontab (){ local tmpcron - tmpcron=/tmp/cron.XXXXXX + tmpcron= /tmp/cron.XXXXXX crontab -l |grep -E -v "$WORK_HOME" >$tmpcron crontab $tmpcron && rm -f $tmpcron # 下面这段代码是为了确保修改的crontab立即生效 - if pgrep -x crond &>/dev/null; then - pkill -HUP -x crond + if [ $IS_SUPER == true ]; then + if pgrep -x crond &>/dev/null; then + pkill -HUP -x crond + fi + else + crontab -l | crontab - fi } @@ -830,33 +891,33 @@ if [ "${node_type}" == "unknown" ];then fi if [ $auto_type == "systemd" ]; then case $action in - start) start_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - stop) stop_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - restart) restart_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - status) status_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - reload) reload_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - healthz) healthz_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + start) start_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + stop) stop_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + restart) restart_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + status) status_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + reload) reload_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + healthz) healthz_by_systemd 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; -h|*) usage ; exit 255 ;; esac elif [ $auto_type == "crontab" ]; then case $action in - start) start_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - stop) stop_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - restart) restart_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - status) status_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - reload) reload_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - healthz) healthz_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - watch) watch_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + start) start_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + stop) stop_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + restart) restart_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + status) status_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + reload) reload_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + healthz) healthz_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + watch) watch_by_crontab 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; -h|*) usage ; exit 255 ;; esac elif [ $auto_type == "rclocal" ]; then case $action in - start) start_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - stop) stop_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - restart) restart_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - status) status_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - reload) reload_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; - healthz) healthz_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + start) start_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + stop) stop_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + restart) restart_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + status) status_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + reload) reload_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; + healthz) healthz_by_rclocal 2>&1 | tee /tmp/nm_"${auto_type}"_"${action}".log ;; -h|*) usage ; exit 255 ;; esac fi diff --git a/script_tools/gsectl/agent/linux/gsectl b/script_tools/gsectl/agent/linux/gsectl index 8d9ca1b60..a6ef443bc 100755 --- a/script_tools/gsectl/agent/linux/gsectl +++ b/script_tools/gsectl/agent/linux/gsectl @@ -7,6 +7,21 @@ WORK_HOME=${PWD%/bin} WORK_HOME=`echo $WORK_HOME |sed 's/\/$//g'` INSTALL_ENV=`echo $WORK_HOME |awk -F/ '{print $(NF-1)}'` +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + +VAR_RUN_DIR=/var/run +if [ $IS_SUPER == false ]; then + if [ ! -d ${PWD}/run ]; then + mkdir -p ${PWD}/run + fi + VAR_RUN_DIR=${PWD}/run +fi + # 设置agent的max open files ulimit -n 409600 2>/dev/null ulimit -c unlimited @@ -185,7 +200,7 @@ watch_by_binary () { local module="agent" # 设置记录上次脚本运行的文件 - LAST_RUN_FILE=/var/run/already_run_times_$module + LAST_RUN_FILE=$VAR_RUN_DIR/already_run_times_$module # 如果文件存在,则读取文件中记录的次数 if [ -f $LAST_RUN_FILE ]; then @@ -210,8 +225,8 @@ watch_by_binary () { THRESHOLD=5 # 检查上一次脚本是否存在 - if [ -f /var/run/gsectl_check_agent_status.pid ]; then - pid=`cat /var/run/gsectl_check_agent_status.pid` + if [ -f $VAR_RUN_DIR/gsectl_check_agent_status.pid ]; then + pid=`cat $VAR_RUN_DIR/gsectl_check_agent_status.pid` if [ -d "/proc/$pid" ]; then log "`date +'%F %T.%N'` Last Script: $0 Detection status: PID:$pid is until running , no longer checking the status of the module: ${module}" return @@ -227,7 +242,7 @@ watch_by_binary () { fi # 记录当前脚本的 PID - echo $$ > /var/run/gsectl_check_agent_status.pid + echo $$ > $VAR_RUN_DIR/gsectl_check_agent_status.pid # 检测gse_agent是否正常存在的逻辑 if [ -z "${module}" ]; then @@ -443,10 +458,18 @@ is_systemd_supported () { is_use_systemd () { local module="agent" - if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then - return 0 + if [ $IS_SUPER == false ]; then + if [ -f $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service ];then + return 0 + else + return 1 + fi else - return 1 + if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then + return 0 + else + return 1 + fi fi } @@ -506,6 +529,12 @@ check_rc_file () { add_startup_to_boot () { + # 非root用户无法操作rclocal + if [ $IS_SUPER == false ]; then + echo "Not root user, can't operate rc.local" + return + fi + local module=agent # 添加启动项到 rc.local @@ -541,7 +570,7 @@ LimitNOFILE=512000 LimitCORE=infinity WorkingDirectory=${WORK_HOME}/bin PIDFile=${WORK_HOME}/bin/run/${module}.pid -ExecStart=${WORK_HOME}/bin/gse_agent -f /usr/local/${INSTALL_ENV}/${node_type}/etc/gse_agent.conf +ExecStart=${WORK_HOME}/bin/gse_agent -f ${WORK_HOME}/etc/gse_agent.conf ExecReload=${WORK_HOME}/bin/gse_agent --reload ExecStop=${WORK_HOME}/bin/gse_agent --quit Type=forking @@ -554,26 +583,48 @@ RestartSec=10 WantedBy=multi-user.target EOF - if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then - if [ `md5sum /tmp/${INSTALL_ENV}_${module}.service |awk '{print $1}'` == `md5sum /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service |awk '{print $1}'` ];then - echo "${INSTALL_ENV}_${module}.service have no change..." + if [ $IS_SUPER == true ]; then + if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then + if [ `md5sum /tmp/${INSTALL_ENV}_${module}.service |awk '{print $1}'` == `md5sum /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service |awk '{print $1}'` ];then + echo "${INSTALL_ENV}_${module}.service have no change..." + else + echo "update ${INSTALL_ENV}_${module}.service" + cp /tmp/${INSTALL_ENV}_${module}.service /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service + systemctl daemon-reload + systemctl enable ${INSTALL_ENV}_${module}.service + fi else - echo "update ${INSTALL_ENV}_${module}.service" + echo "copy ${INSTALL_ENV}_${module}.service" cp /tmp/${INSTALL_ENV}_${module}.service /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service systemctl daemon-reload systemctl enable ${INSTALL_ENV}_${module}.service fi + + # 删除rc.local里的启动项 + check_rc_file + sed -i "\|${WORK_HOME}/bin/gsectl start ${module}|d" $RC_LOCAL_FILE else - echo "copy ${INSTALL_ENV}_${module}.service" - cp /tmp/${INSTALL_ENV}_${module}.service /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service - systemctl daemon-reload - systemctl enable ${INSTALL_ENV}_${module}.service + if [ -f $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service ];then + if [ `md5sum /tmp/${INSTALL_ENV}_${module}.service |awk '{print $1}'` == `md5sum $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service |awk '{print $1}'` ];then + echo "${INSTALL_ENV}_${module}.service have no change..." + else + echo "update ${INSTALL_ENV}_${module}.service" + cp /tmp/${INSTALL_ENV}_${module}.service $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service + systemctl --user daemon-reload + systemctl --user enable ${INSTALL_ENV}_${module}.service + fi + else + if [ ! -d "$HOME/.config/systemd" ]; then + echo "文件夹 ~/.config/systemd 不存在,正在创建..." + mkdir -p $HOME/.config/systemd/user + fi + echo "copy ${INSTALL_ENV}_${module}.service" + cp /tmp/${INSTALL_ENV}_${module}.service $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service + systemctl --user daemon-reload + systemctl --user enable ${INSTALL_ENV}_${module}.service + fi fi - # 删除rc.local里的启动项 - check_rc_file - sed -i "\|${WORK_HOME}/bin/gsectl start ${module}|d" $RC_LOCAL_FILE - # 删除crontab里的watch条目 remove_crontab } @@ -581,10 +632,18 @@ EOF remove_systemd_config (){ local module="agent" - if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then - systemctl stop ${INSTALL_ENV}_${module}.service - systemctl disable ${INSTALL_ENV}_${module}.service - rm /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service + if [ $IS_SUPER == true ]; then + if [ -f /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service ];then + systemctl stop ${INSTALL_ENV}_${module}.service + systemctl disable ${INSTALL_ENV}_${module}.service + rm /usr/lib/systemd/system/${INSTALL_ENV}_${module}.service + fi + else + if [ -f $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service ];then + systemctl --user stop ${INSTALL_ENV}_${module}.service + systemctl --user disable ${INSTALL_ENV}_${module}.service + rm $HOME/.config/systemd/user/${INSTALL_ENV}_${module}.service + fi fi } @@ -616,8 +675,12 @@ remove_crontab (){ crontab $tmpcron && rm -f $tmpcron # 下面这段代码是为了确保修改的crontab立即生效 - if pgrep -x crond &>/dev/null; then - pkill -HUP -x crond + if [ $IS_SUPER == true ]; then + if pgrep -x crond &>/dev/null; then + pkill -HUP -x crond + fi + else + crontab -l | crontab - fi } diff --git a/script_tools/setup_agent.ksh b/script_tools/setup_agent.ksh index 7c99e064b..a7a8082c1 100644 --- a/script_tools/setup_agent.ksh +++ b/script_tools/setup_agent.ksh @@ -101,7 +101,7 @@ cleanup () { } validate_setup_path () { - set -A invalid_path_prefix /tmp /var /etc /bin /lib /lib64 /boot /mnt /proc /dev /run /sys /sbin /root /home + set -A invalid_path_prefix /tmp /var /etc /bin /lib /lib64 /boot /mnt /proc /dev /run /sys /sbin /root set -A invalid_path /usr /usr/bin /usr/sbin /usr/local/lib /usr/include /usr/lib /usr/lib64 /usr/libexec @@ -305,6 +305,11 @@ setup_crontab () { } remove_crontab () { + + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron local datatemp=$(date +%s) @@ -316,6 +321,10 @@ remove_crontab () { } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + local rcfile=/etc/rc.local if [ -f $rcfile ];then @@ -391,7 +400,9 @@ backup_config_file () { tmp_backup_file=$(mktemp "${TMP_DIR}"/nodeman_${file}_config.XXXXXXX) log backup_config_file - "backup $file to $tmp_backup_file" cp -rf "${AGENT_SETUP_PATH}"/etc/"${file}" "${tmp_backup_file}" - chattr +i "${tmp_backup_file}" + if [ $IS_SUPER == true ]; then + chattr +i "${tmp_backup_file}" + fi fi done } @@ -402,7 +413,9 @@ recovery_config_file () { time_filter_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*") [ -z "${time_filter_config_file}" ] && return 0 latest_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*" | xargs ls -rth | tail -n 1) - chattr -i "${latest_config_file}" + if [ $IS_SUPER == true ]; then + chattr -i "${latest_config_file}" + fi cp -rf "${latest_config_file}" "${AGENT_SETUP_PATH}"/etc/"${file}" rm -f "${latest_config_file}" log recovery_config_file - "recovery ${AGENT_SETUP_PATH}/etc/${file} from $latest_config_file" @@ -537,7 +550,7 @@ check_deploy_result () { } validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -731,6 +744,13 @@ while getopts I:i:l:s:uc:r:x:p:e:a:k:N:v:oT:RO:E:A:V:B:S:Z:K: arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + ## 检查自定义环境变量 VARS_LIST=$(echo "$VARS_LIST" | sed 's/;/ /g') for var_name in ${VARS_LIST}; do diff --git a/script_tools/setup_agent.sh b/script_tools/setup_agent.sh index 6479d2d23..3933e93ba 100644 --- a/script_tools/setup_agent.sh +++ b/script_tools/setup_agent.sh @@ -109,7 +109,7 @@ cleanup () { # 打印错误行数信息 report_err () { awk -v LN="$1" -v L="ERROR" -v D="$(date +%F\ %T)" \ - 'NR>LN-3 && NR>>":""), $0 }' $0 + 'NR>LN-3 && NR>>":""), $0 }' $0 } validate_setup_path () { @@ -128,7 +128,6 @@ validate_setup_path () { /sys /sbin /root - /home ) local invalid_path=( @@ -235,7 +234,7 @@ is_connected () { } is_gsecmdline_ok () { - /bin/gsecmdline -d 1430 -s test + $AGENT_SETUP_PATH/../plugins/bin/gsecmdline -d 1430 -s test } # 用法:通过ps的comm字段和二进制的绝对路径来精确获取pid @@ -392,6 +391,10 @@ pre_view () { } remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX) @@ -401,11 +404,15 @@ remove_crontab () { # 下面这段代码是为了确保修改的crontab能立即生效 if pgrep -x crond &>/dev/null; then - pkill -HUP -x crond + pkill -HUP -x crond fi } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + check_rc_file local rcfile=$RC_LOCAL_FILE @@ -487,7 +494,9 @@ backup_config_file () { tmp_backup_file=$(mktemp "${TMP_DIR}"/nodeman_${file}_config.XXXXXXX) log backup_config_file - "backup $file to $tmp_backup_file" cp -rf "${AGENT_SETUP_PATH}"/etc/"${file}" "${tmp_backup_file}" - chattr +i "${tmp_backup_file}" + if [ $IS_SUPER == true ]; then + chattr +i "${tmp_backup_file}" + fi fi done } @@ -498,7 +507,9 @@ recovery_config_file () { time_filter_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*") [ -z "${time_filter_config_file}" ] && return 0 latest_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*" | xargs ls -rth | tail -n 1) - chattr -i "${latest_config_file}" + if [ $IS_SUPER == true ]; then + chattr -i "${latest_config_file}" + fi cp -rf "${latest_config_file}" "${AGENT_SETUP_PATH}"/etc/"${file}" rm -f "${latest_config_file}" log recovery_config_file - "recovery ${AGENT_SETUP_PATH}/etc/${file} from $latest_config_file" @@ -512,7 +523,9 @@ remove_agent () { backup_config_file log remove_agent - "trying to remove old agent directory(${AGENT_SETUP_PATH})" cd "${AGENT_SETUP_PATH}" || return 0 - for file in `lsattr -R |egrep "i-" |awk '{print $NF}'`;do echo "--- $file" && chattr -i $file ;done + if [ $IS_SUPER == true ]; then + for file in `lsattr -R |egrep "i-" |awk '{print $NF}'`;do echo "--- $file" && chattr -i $file ;done + fi cd - rm -rf "${AGENT_SETUP_PATH}" @@ -557,11 +570,13 @@ setup_agent () { cd "$AGENT_SETUP_PATH/.." && tar xf "$TMP_DIR/$PKG_NAME" - # update gsecmdline under /bin - cp -fp plugins/bin/gsecmdline /bin/ - # 注意这里 /bin/ 可能是软链 - cp -fp plugins/etc/gsecmdline.conf /bin/../etc/ - chmod 775 /bin/gsecmdline + if [ $IS_SUPER == true ]; then + # update gsecmdline under /bin + cp -fp plugins/bin/gsecmdline /bin/ + # 注意这里 /bin/ 可能是软链 + cp -fp plugins/etc/gsecmdline.conf /bin/../etc/ + chmod 775 /bin/gsecmdline + fi # setup config file get_config @@ -726,7 +741,7 @@ _OO_ } validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -914,6 +929,13 @@ while getopts I:i:l:s:uc:r:x:p:e:a:k:N:v:oT:RDO:E:A:V:B:S:Z:K: arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + ## 检查自定义环境变量 for var_name in ${VARS_LIST//;/ /}; do validate_vars_string "$var_name" || fail "$var_name is not a valid name" diff --git a/script_tools/setup_agent.zsh b/script_tools/setup_agent.zsh index 25e65f8b5..315c9b2cc 100644 --- a/script_tools/setup_agent.zsh +++ b/script_tools/setup_agent.zsh @@ -25,7 +25,7 @@ report_step_status () { [ -z "$CALLBACK_URL" ] && return 0 # echo "$@" | read date _time log_level step status message - echo "$@" | read date _time log_level step + echo "$@" | read date _time log_level step tmp_time=$(date +%Y%m%d_%H%M%S) tmp_date=$(date +%s) @@ -107,7 +107,6 @@ validate_setup_path () { /lib /dev /sbin - /home ) local invalid_path=( @@ -211,7 +210,7 @@ is_connected () { } is_gsecmdline_ok () { - /bin/gsecmdline -d 1430 -s test + $AGENT_SETUP_PATH/../plugins/bin/gsecmdline -d 1430 -s test } # 用法:通过ps的comm字段和二进制的绝对路径来精确获取pid @@ -370,6 +369,10 @@ pre_view () { } remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX) @@ -379,11 +382,15 @@ remove_crontab () { # 下面这段代码是为了确保修改的crontab能立即生效 if pgrep -x crond &>/dev/null; then - pkill -HUP -x crond + pkill -HUP -x crond fi } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + get_daemon_file local damonfile=$DAEMON_FILE_NAME @@ -478,7 +485,9 @@ backup_config_file () { tmp_backup_file=$(mktemp "${TMP_DIR}"/nodeman_${file}_config.XXXXXXX) log backup_config_file - "backup $file to $tmp_backup_file" cp -rf "${AGENT_SETUP_PATH}"/etc/"${file}" "${tmp_backup_file}" - chattr +i "${tmp_backup_file}" + if [ $IS_SUPER == true ]; then + chattr +i "${tmp_backup_file}" + fi fi done } @@ -489,7 +498,9 @@ recovery_config_file () { time_filter_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*") [ -z "${time_filter_config_file}" ] && return 0 latest_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*" | xargs ls -rth | tail -n 1) - chattr -i "${latest_config_file}" + if [ $IS_SUPER == true ]; then + chattr -i "${latest_config_file}" + fi cp -rf "${latest_config_file}" "${AGENT_SETUP_PATH}"/etc/"${file}" rm -f "${latest_config_file}" log recovery_config_file - "recovery ${AGENT_SETUP_PATH}/etc/${file} from $latest_config_file" @@ -546,11 +557,13 @@ setup_agent () { cd "$AGENT_SETUP_PATH/.." && tar xf "$TMP_DIR/$PKG_NAME" - # update gsecmdline under /bin - cp -fp plugins/bin/gsecmdline /usr/bin/ - # 注意这里 /bin/ 可能是软链 - cp -fp plugins/etc/gsecmdline.conf /usr/bin/../etc/ - chmod 775 /bin/gsecmdline + if [ $IS_SUPER == true ]; then + # update gsecmdline under /bin + cp -fp plugins/bin/gsecmdline /usr/bin/ + # 注意这里 /bin/ 可能是软链 + cp -fp plugins/etc/gsecmdline.conf /usr/bin/../etc/ + chmod 775 /bin/gsecmdline + fi # setup config file get_config @@ -622,7 +635,7 @@ check_deploy_result () { validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -811,6 +824,13 @@ while getopts I:i:l:s:uc:r:x:p:e:a:k:N:v:oT:RDO:E:A:V:B:S:Z:K: arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi + ## 检查自定义环境变量 for var_name in ${VARS_LIST//;/ /}; do validate_vars_string "$var_name" || fail "$var_name is not a valid name" diff --git a/script_tools/setup_proxy.sh b/script_tools/setup_proxy.sh index 1a3ec146e..608d2ec1e 100755 --- a/script_tools/setup_proxy.sh +++ b/script_tools/setup_proxy.sh @@ -117,7 +117,6 @@ validate_setup_path () { /sys /sbin /root - /home ) local invalid_path=( @@ -366,6 +365,10 @@ pre_view () { } remove_crontab () { + if [ $IS_SUPER == false ]; then + return + fi + local tmpcron tmpcron=$(mktemp "$TMP_DIR"/cron.XXXXXXX) @@ -380,6 +383,10 @@ remove_crontab () { } setup_startup_scripts () { + if [ $IS_SUPER == false ]; then + return + fi + check_rc_file local rcfile=$RC_LOCAL_FILE @@ -516,9 +523,11 @@ setup_proxy () { cd "$AGENT_SETUP_PATH/.." && tar xf "$TMP_DIR/$PKG_NAME" - # update gsecmdline under /bin - cp -fp plugins/bin/gsecmdline /bin/ - chmod 775 /bin/gsecmdline + if [ $IS_SUPER == true ]; then + # update gsecmdline under /bin + cp -fp plugins/bin/gsecmdline /bin/ + chmod 775 /bin/gsecmdline + fi # setup config file get_config @@ -622,7 +631,7 @@ _OO_ } validate_vars_string () { - echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9]+=' + echo "$1" | grep -Pq '^[a-zA-Z_][a-zA-Z0-9_]*=' } check_pkgtool () { @@ -717,7 +726,9 @@ backup_config_file () { tmp_backup_file=$(mktemp "${TMP_DIR}"/nodeman_${file}_config.XXXXXXX) log backup_config_file - "backup $file to $tmp_backup_file" cp -rf "${AGENT_SETUP_PATH}"/etc/"${file}" "${tmp_backup_file}" - chattr +i "${tmp_backup_file}" + if [ $IS_SUPER == true ]; then + chattr +i "${tmp_backup_file}" + fi fi done } @@ -728,7 +739,9 @@ recovery_config_file () { time_filter_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*") [ -z "${time_filter_config_file}" ] && return 0 latest_config_file=$(find "${TMP_DIR}" -ctime -1 -name "nodeman_${file}_config*" | xargs ls -rth | tail -n 1) - chattr -i "${latest_config_file}" + if [ $IS_SUPER == true ]; then + chattr -i "${latest_config_file}" + fi cp -rf "${latest_config_file}" "${AGENT_SETUP_PATH}"/etc/"${file}" rm -f "${latest_config_file}" log recovery_config_file - "recovery ${AGENT_SETUP_PATH}/etc/${file} from $latest_config_file" @@ -827,6 +840,12 @@ while getopts I:i:l:s:uc:r:x:p:e:a:k:N:g:v:oT:RO:E:A:V:B:S:Z:K: arg; do esac done +IS_SUPER=true +if sudo -n true 2>/dev/null; then + IS_SUPER=true +else + IS_SUPER=false +fi ## 检查自定义环境变量 for var_name in ${VARS_LIST//;/ /}; do