From a4aa0bcd6375bdaffd25979694cd6f17582dcecf Mon Sep 17 00:00:00 2001 From: Chengxiong Ruan Date: Thu, 23 May 2024 18:34:00 -0700 Subject: [PATCH] cfgen: disable senpai on host with rotational root disk Summary: 1. adding a method to `Node` so that we can check if the root block device is ssd or not. 2. update oomd cfgen logic to disable senpai when root disk is hdd. Differential Revision: D57678416 fbshipit-source-id: 1f69d94db7219201172f3b50e86b78ebadc4adf9 --- src/oomd/cfgen/src/cfgen.rs | 18 +- .../cfgen/test/cfgen_test_inputs/devvm.json | 5 +- .../twshared_senpai_disabled.json | 97 ++++ .../twshared_t20_zionex.json | 432 ++++++++++++++++++ .../twshared_vll_shard00.json | 5 +- src/oomd/cfgen/test/cfgen_test_manifest.yml | 9 +- .../50-change-propagator.conf | 4 + .../twshared_senpai_disabled/oomd2.json | 177 +++++++ .../50-change-propagator.conf | 4 + .../twshared_t20_zionex/oomd2.json | 202 ++++++++ 10 files changed, 939 insertions(+), 14 deletions(-) create mode 100644 src/oomd/cfgen/test/cfgen_test_inputs/twshared_senpai_disabled.json create mode 100644 src/oomd/cfgen/test/cfgen_test_inputs/twshared_t20_zionex.json create mode 100644 src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/50-change-propagator.conf create mode 100644 src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/oomd2.json create mode 100644 src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/50-change-propagator.conf create mode 100644 src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/oomd2.json diff --git a/src/oomd/cfgen/src/cfgen.rs b/src/oomd/cfgen/src/cfgen.rs index 2702a07d..2ce0d9b6 100644 --- a/src/oomd/cfgen/src/cfgen.rs +++ b/src/oomd/cfgen/src/cfgen.rs @@ -16,7 +16,6 @@ fn oomd_json(node: &Node) -> json::JsonValue { HostType::OnDemand => od_json_config(&attrs), _ => default_json_config(&attrs), } - // TODO(chengxiong) add other templates } fn oomd_dropin(node: &Node) -> Dropin { @@ -44,7 +43,6 @@ fn default_json_config(attrs: &ConfigParams) -> json::JsonValue { rulesets.push(rule_senpai_drop_in_ruleset(attrs)); rulesets.push(rule_tw_container_drop_in_ruleset(attrs)); - // TODO(chengxiong): add more rule sections json::object! { "rulesets": rulesets, "version": CONFIG_VERSION, @@ -731,9 +729,8 @@ fn oomd2_oomd_restart_threshold() -> BTreeMap { } } -fn on_ssd(_node: &Node) -> bool { - true - // TODO(chengxiong): add this logic https://fburl.com/code/dqdu7ves +fn on_ssd(node: &Node) -> bool { + node.has_ssd_root() } fn io_latency_supported(_node: &Node) -> bool { @@ -783,7 +780,7 @@ fn fbtax2_blacklisted_jobs(node: &Node) -> Vec<&'static str> { } fn senpai_targets(node: &Node) -> Option { - if get_host_type(node) == HostType::TwShared { + if should_enable_senpai(node) { return Some(String::from( "system.slice,workload.slice/workload-wdb.slice,hostcritical.slice,workload.slice/workload-wdb.slice/*,hostcritical.slice/*", )); @@ -816,13 +813,16 @@ fn disable_senpai_dropin(node: &Node) -> bool { false } +fn should_enable_senpai(node: &Node) -> bool { + get_host_type(node) == HostType::TwShared && on_ssd(node) +} + fn get_host_type(node: &Node) -> HostType { - // TODO(chengxiong): add logic to determine host types. - if node.hostname_prefix() == "twshared".into() { + if node.hostname_prefix() == TWSHARED { return HostType::TwShared; } - if node.hostname_prefix() == "od".into() { + if node.hostname_prefix() == OD { return HostType::OnDemand; } diff --git a/src/oomd/cfgen/test/cfgen_test_inputs/devvm.json b/src/oomd/cfgen/test/cfgen_test_inputs/devvm.json index eef94b7b..52addf49 100644 --- a/src/oomd/cfgen/test/cfgen_test_inputs/devvm.json +++ b/src/oomd/cfgen/test/cfgen_test_inputs/devvm.json @@ -1,4 +1,4 @@ -@generated SignedSource<> +@generated SignedSource<> @codegen-command arc cfgen update-inputs fb-oomd { "fqdn": "devvm3170.cln0.facebook.com", @@ -45,7 +45,8 @@ "driver": "virtio_net", "driver_version": "1.0.0", "firmware_version": "", - "bus_info": "0000:00:02.0" + "bus_info": "0000:00:02.0", + "speed_mbps": 0 } } }, diff --git a/src/oomd/cfgen/test/cfgen_test_inputs/twshared_senpai_disabled.json b/src/oomd/cfgen/test/cfgen_test_inputs/twshared_senpai_disabled.json new file mode 100644 index 00000000..11d36f8c --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_inputs/twshared_senpai_disabled.json @@ -0,0 +1,97 @@ +@generated SignedSource<<4e1840d2c3dc9925b88a8f546af46041>> +@codegen-command arc cfgen update-inputs fb-oomd +{ + "fqdn": "twshared44829.07.ash9.facebook.com", + "region": "east", + "clusterType": "SERVICE_GENERIC_NON_MEMCACHE", + "modelId": 333245, + "kernelRelease": "5.12.0-0_fbk13_clang_7455_gb24de3bdb045", + "serverType": "TYPE_VI_FEED", + "experiments": [], + "cpuArchitecture": "skylake", + "metalosRootfs": false, + "provisioningConfig": { + "ethtoolByInterface": { + "eth0": { + "maxChannelsCombined": 128, + "maxChannelsRx": 64, + "maxChannelsTx": 64 + } + }, + "cpuCoreCount": 40, + "parentModelId": 336939, + "recoveryEnvironment": false, + "deviceType": "SERVER", + "datacenter": "ash9", + "cluster": "07", + "memTotal": 270023245824, + "osVersion": { + "distribution_name": "CentOS Stream release", + "version": 9, + "is_in_ramdisk": false, + "is_metalos": false + }, + "pciByAddress": { + "0000:5e:00.0": { + "vendor_id": 5348, + "device_id": 5833, + "class_code": 131072, + "board_part_number": "BCM957302M3023CBK" + } + }, + "static_smc_tiers": [], + "machine": "x86_64" + }, + "bootConfig": { + "ethtoolByInterface": { + "eth0": { + "driver": "bnxt_en", + "driver_version": "5.12.0-0_fbk13_clang_7455_gb24d", + "firmware_version": "20.6.167.0/pkg 20.6.4.12", + "bus_info": "0000:5e:00.0" + } + } + }, + "runtimeConfig": { + "hasHighPrivCert": true, + "regionRoutableCluster": "ash9.02", + "block_devices": { + "block_devices": { + "nvme0n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "SAMSUNG MZ1LB1T9HALS-000FB", + "serial": "S4JTNA0M941126", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "sda": { + "size_bytes": 2000398934016, + "is_rotational": true, + "model": "ST2000NM0008-2F3100", + "serial": "ZDS16SA4", + "physical_block_size": 512, + "logical_block_size": 512, + "is_root": true + } + } + }, + "dynamic_smc_tiers": [], + "cluster_state": "CLUSTER_IN_USE", + "installed_platforms": [ + "platform009", + "platform010", + "platform010-aarch64", + "platform010-compat" + ], + "device_nics_enum": [ + "ETH0", + "SVC0" + ] + }, + "reservationConfig": { + "active_machine_materialization_id": "6321b66383934", + "current_reservation_host_profile_id": "COMMON:NO_CPU_CONTROLLER|KERNEL:kernel_5.12.0-0_fbk13_clang_7455_gb24de3bdb045_base.v8|STORAGE:XFS_FULL|RUNTIME:DATASTORES_GENERIC_SYSCTLS|KERNEL_ARGS:kargs_default.v2|KERNEL_POLICY:KERNEL_POLICY_DATABASES|BOOT_CONFIG:bootloader814_initrd230_swap16G_native_arch" + } +} diff --git a/src/oomd/cfgen/test/cfgen_test_inputs/twshared_t20_zionex.json b/src/oomd/cfgen/test/cfgen_test_inputs/twshared_t20_zionex.json new file mode 100644 index 00000000..27f0633a --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_inputs/twshared_t20_zionex.json @@ -0,0 +1,432 @@ +@generated SignedSource<<36a93be5decb387899c480a3e1be4ac5>> +@codegen-command arc cfgen update-inputs fb-oomd +{ + "fqdn": "twshared30691.02.ftw6.facebook.com", + "region": "texas", + "clusterType": "SERVICE_GENERIC_NON_MEMCACHE", + "modelId": 341690, + "kernelRelease": "6.4.3-0_fbk10_zion_rc9_2482_g96807ccfca35", + "serverType": "TYPE_XX_GPU_TC", + "experiments": [], + "cpuArchitecture": "cooperlake", + "metalosRootfs": false, + "provisioningConfig": { + "ethtoolByInterface": { + "eth0": { + "maxChannelsCombined": 63 + }, + "eth1": { + "maxChannelsCombined": 63 + }, + "eth10": { + "maxChannelsCombined": 63 + }, + "eth11": { + "maxChannelsCombined": 63 + }, + "eth2": { + "maxChannelsCombined": 63 + }, + "eth3": { + "maxChannelsCombined": 63 + }, + "eth4": { + "maxChannelsCombined": 63 + }, + "eth5": { + "maxChannelsCombined": 63 + }, + "eth6": { + "maxChannelsCombined": 63 + }, + "eth7": { + "maxChannelsCombined": 63 + }, + "eth8": { + "maxChannelsCombined": 63 + }, + "eth9": { + "maxChannelsCombined": 63 + } + }, + "cpuCoreCount": 96, + "parentModelId": 341983, + "recoveryEnvironment": false, + "deviceType": "SERVER", + "datacenter": "ftw6", + "cluster": "02", + "memTotal": 1621459365888, + "osVersion": { + "distribution_name": "CentOS Stream release", + "version": 9, + "is_in_ramdisk": false, + "is_metalos": false + }, + "pciByAddress": { + "0000:16:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:17:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:30:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-CDAB_FB" + }, + "0000:4d:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:4e:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:6d:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-CDAB_FB" + }, + "0000:8d:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:8e:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:ad:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-CDAB_FB" + }, + "0000:cd:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:ce:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-VDA_FB" + }, + "0000:ed:00.0": { + "vendor_id": 5555, + "device_id": 4125, + "class_code": 131072, + "board_part_number": "MCX623435AC-CDAB_FB" + } + }, + "static_smc_tiers": [], + "machine": "x86_64" + }, + "bootConfig": { + "ethtoolByInterface": { + "eth0": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.32.1206 (FB_0000000025)", + "bus_info": "0000:30:00.0", + "speed_mbps": 100000 + }, + "eth1": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.32.1206 (FB_0000000025)", + "bus_info": "0000:6d:00.0", + "speed_mbps": 100000 + }, + "eth10": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:ce:00.0", + "speed_mbps": 200000 + }, + "eth11": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:4d:00.0", + "speed_mbps": 200000 + }, + "eth2": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.32.1206 (FB_0000000025)", + "bus_info": "0000:ad:00.0", + "speed_mbps": 100000 + }, + "eth3": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.32.1206 (FB_0000000025)", + "bus_info": "0000:ed:00.0", + "speed_mbps": 100000 + }, + "eth4": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:4e:00.0", + "speed_mbps": 200000 + }, + "eth5": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:17:00.0", + "speed_mbps": 200000 + }, + "eth6": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:8d:00.0", + "speed_mbps": 200000 + }, + "eth7": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:8e:00.0", + "speed_mbps": 200000 + }, + "eth8": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:16:00.0", + "speed_mbps": 200000 + }, + "eth9": { + "driver": "mlx5_core", + "driver_version": "6.4.3-0_fbk10_zion_rc9_2482_g96", + "firmware_version": "22.38.1002 (FB_0000000028)", + "bus_info": "0000:cd:00.0", + "speed_mbps": 200000 + } + } + }, + "runtimeConfig": { + "hasHighPrivCert": true, + "regionRoutableCluster": "ftw1.02", + "block_devices": { + "block_devices": { + "nvme0n1": { + "size_bytes": 256055095296, + "is_rotational": false, + "model": "WDC CL SN720 SDAQNTW-512G-1020", + "serial": "20477M801720", + "physical_block_size": 512, + "logical_block_size": 512, + "is_root": false + }, + "nvme10n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800501", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme11n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800535", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme12n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800503", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme13n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800504", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme14n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N702073", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": true + }, + "nvme15n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N702067", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": true + }, + "nvme16n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800498", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme17n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800495", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme18n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N801008", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme1n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800533", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme2n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800515", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme3n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800114", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme4n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800123", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme5n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800520", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme6n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800521", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme7n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800500", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme8n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800483", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "nvme9n1": { + "size_bytes": 1800360124416, + "is_rotational": false, + "model": "MZ1LB1T9HBLS-000FB", + "serial": "S5XANE0N800532", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + } + } + }, + "dynamic_smc_tiers": [ + "shadowrun.canaries.fb-tupperware-agent" + ], + "cluster_state": "CLUSTER_IN_USE", + "installed_platforms": [ + "platform009", + "platform010", + "platform010-compat" + ], + "device_nics_enum": [ + "ETH0", + "ETH1", + "ETH2", + "ETH3", + "SVC0", + "SVC0_1", + "SVC0_2", + "SVC0_3" + ] + }, + "reservationConfig": { + "active_machine_materialization_id": "61df5d85bd682", + "current_reservation_host_profile_id": "KERNEL:kernel_6.4.3-0_fbk10_zion_rc9_2482_g96807ccfca35_nvidia_535.154.05_base.v0|POWER:CPU_PERF_OPT|KERNEL_ARGS:kargs_crashkernel384M_multinic.v0|BOOT_CONFIG:bootloader814_initrd230_swap16G_native_arch|DRIVER_ARGS:t20_zionex_nvidia_driver_args.v1" + } +} diff --git a/src/oomd/cfgen/test/cfgen_test_inputs/twshared_vll_shard00.json b/src/oomd/cfgen/test/cfgen_test_inputs/twshared_vll_shard00.json index a8050a47..83ab69b0 100644 --- a/src/oomd/cfgen/test/cfgen_test_inputs/twshared_vll_shard00.json +++ b/src/oomd/cfgen/test/cfgen_test_inputs/twshared_vll_shard00.json @@ -1,4 +1,4 @@ -@generated SignedSource<<5d13b21f7d62653cb915aa1c8f76bbe0>> +@generated SignedSource<<227f1d4294407faa34c09b9e9650c02c>> @codegen-command arc cfgen update-inputs fb-oomd { "fqdn": "twshared2871.09.vll2.facebook.com", @@ -47,7 +47,8 @@ "driver": "mlx5_core", "driver_version": "6.4.3-0_fbk2_785_gacbb203ea6ff", "firmware_version": "14.32.1206 (FB_0000000006)", - "bus_info": "0000:02:00.0" + "bus_info": "0000:02:00.0", + "speed_mbps": 50000 } } }, diff --git a/src/oomd/cfgen/test/cfgen_test_manifest.yml b/src/oomd/cfgen/test/cfgen_test_manifest.yml index fb04a6d1..a0ecc2ed 100644 --- a/src/oomd/cfgen/test/cfgen_test_manifest.yml +++ b/src/oomd/cfgen/test/cfgen_test_manifest.yml @@ -11,8 +11,15 @@ library_samples: - devvm - twshared_vll_shard00 + - twshared_t20_zionex # Add more samples from https://fburl.com/code/vjwmkoa1 if needed samples: ondemand: - # A random host with od hostname prefix. + # A random host with od hostname prefix from `randbox sys.od` production_host: od2228.eag1.facebook.com + twshared_senpai_disabled: + # A host in twshared pool with hdd root + # serf get --fields name,storage_capacity,components[disk].is_rootdrive,components[disk].disk_obj.is_flash --limit 100 'hostnameScheme_obj.name=twshared,components[disk].is_rootdrive=1,components[disk].disk_obj.is_flash=0' + # If the query is too slow, just do serf get --fields name,storage_capacity,components[disk].is_rootdrive,components[disk].disk_obj.is_flash --limit 100 'hostnameScheme_obj.name=twshared' + # And then pick a record that has `components[disk].is_rootdrive=1` and `components[disk].disk_obj.is_flash=0` + production_host: twshared44829.07.ash9 diff --git a/src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/50-change-propagator.conf b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/50-change-propagator.conf new file mode 100644 index 00000000..240e90fc --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/50-change-propagator.conf @@ -0,0 +1,4 @@ +@generated SignedSource<<31b3f2f747768088bd5523d8e690bfac>> +@codegen-command arc cfgen update-outputs fb-oomd +[Service] +[Unit] diff --git a/src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/oomd2.json b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/oomd2.json new file mode 100644 index 00000000..1e9fcc2f --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_senpai_disabled/oomd2.json @@ -0,0 +1,177 @@ +@generated SignedSource<<2b86656315099f05b2d1754bef6c5220>> +@codegen-command arc cfgen update-outputs fb-oomd +{ + "rulesets": [ + { + "name": "system overview", + "silence-logs": "engine", + "detectors": [ + [ + "records system stats", + { + "name": "dump_cgroup_overview", + "args": { + "cgroup": "workload.slice" + } + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + }, + { + "name": "restart smc_proxy.service on memory threshold", + "detectors": [ + [ + "memory usage above", + { + "name": "memory_above", + "args": { + "cgroup": "smc_proxy.service", + "threshold_anon": "15G", + "duration": "10" + } + } + ] + ], + "actions": [ + { + "name": "systemd_restart", + "args": { + "service": "smc_proxy.service", + "post_action_delay": "20", + "dry": "false" + } + } + ] + }, + { + "name": "protection against heavy workload thrashing", + "drop-in": { + "disable-on-drop-in": true, + "detectors": true, + "actions": true + }, + "detectors": [ + [ + "sustained high workload memory pressure", + { + "name": "exists", + "args": { + "cgroup": "workload.slice/workload-tw.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.reservation.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.allotment.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/analyzer*,workload.slice/workload-tw.slice/*.reservation.slice/analyzer*,workload.slice/workload-tw.slice/*.allotment.slice/analyzer*,workload.slice/workload-tw.slice/bumblebee.*,workload.slice/workload-tw.slice/*.reservation.slice/bumblebee.*,workload.slice/workload-tw.slice/*.allotment.slice/bumblebee.*", + "negate": true + } + }, + { + "name": "pressure_above", + "args": { + "cgroup": "workload.slice/workload-tw.slice", + "resource": "memory", + "threshold": "80", + "duration": "180" + } + }, + { + "name": "memory_reclaim", + "args": { + "cgroup": "workload.slice/workload-tw.slice", + "duration": "10" + } + } + ] + ], + "actions": [ + { + "name": "kill_by_pg_scan", + "args": { + "cgroup": "workload.slice/workload-tw.slice/*", + "recursive": "true" + } + } + ] + }, + { + "name": "protection against low swap", + "detectors": [ + [ + "free swap goes below 10 percent", + { + "name": "exists", + "args": { + "cgroup": "workload.slice/workload-tw.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.reservation.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.allotment.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/analyzer*,workload.slice/workload-tw.slice/*.reservation.slice/analyzer*,workload.slice/workload-tw.slice/*.allotment.slice/analyzer*,workload.slice/workload-tw.slice/bumblebee.*,workload.slice/workload-tw.slice/*.reservation.slice/bumblebee.*,workload.slice/workload-tw.slice/*.allotment.slice/bumblebee.*", + "negate": true + } + }, + { + "name": "swap_free", + "args": { + "threshold_pct": "10" + } + } + ] + ], + "actions": [ + { + "name": "kill_by_swap_usage", + "args": { + "cgroup": "system.slice/*,workload.slice/workload-wdb.slice/*,workload.slice/workload-tw.slice/*", + "biased_swap_kill": "true", + "recursive": "true" + } + } + ] + }, + { + "name": "senpai drop-in ruleset", + "silence-logs": "engine", + "drop-in": { + "actions": true, + "disable-on-drop-in": true + }, + "detectors": [ + [ + "continue detector group", + { + "name": "continue", + "args": {} + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + }, + { + "name": "tw_container drop-in ruleset", + "drop-in": { + "detectors": true, + "actions": true, + "disable-on-drop-in": true + }, + "detectors": [ + [ + "continue", + { + "name": "stop", + "args": {} + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ], + "prekill_hook_timeout": "45" + } + ], + "version": "1.0.0" +} \ No newline at end of file diff --git a/src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/50-change-propagator.conf b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/50-change-propagator.conf new file mode 100644 index 00000000..240e90fc --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/50-change-propagator.conf @@ -0,0 +1,4 @@ +@generated SignedSource<<31b3f2f747768088bd5523d8e690bfac>> +@codegen-command arc cfgen update-outputs fb-oomd +[Service] +[Unit] diff --git a/src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/oomd2.json b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/oomd2.json new file mode 100644 index 00000000..d5379a34 --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_outputs/twshared_t20_zionex/oomd2.json @@ -0,0 +1,202 @@ +@generated SignedSource<> +@codegen-command arc cfgen update-outputs fb-oomd +{ + "rulesets": [ + { + "name": "system overview", + "silence-logs": "engine", + "detectors": [ + [ + "records system stats", + { + "name": "dump_cgroup_overview", + "args": { + "cgroup": "workload.slice" + } + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + }, + { + "name": "restart smc_proxy.service on memory threshold", + "detectors": [ + [ + "memory usage above", + { + "name": "memory_above", + "args": { + "cgroup": "smc_proxy.service", + "threshold_anon": "15G", + "duration": "10" + } + } + ] + ], + "actions": [ + { + "name": "systemd_restart", + "args": { + "service": "smc_proxy.service", + "post_action_delay": "20", + "dry": "false" + } + } + ] + }, + { + "name": "protection against heavy workload thrashing", + "drop-in": { + "disable-on-drop-in": true, + "detectors": true, + "actions": true + }, + "detectors": [ + [ + "sustained high workload memory pressure", + { + "name": "exists", + "args": { + "cgroup": "workload.slice/workload-tw.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.reservation.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.allotment.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/analyzer*,workload.slice/workload-tw.slice/*.reservation.slice/analyzer*,workload.slice/workload-tw.slice/*.allotment.slice/analyzer*,workload.slice/workload-tw.slice/bumblebee.*,workload.slice/workload-tw.slice/*.reservation.slice/bumblebee.*,workload.slice/workload-tw.slice/*.allotment.slice/bumblebee.*", + "negate": true + } + }, + { + "name": "pressure_above", + "args": { + "cgroup": "workload.slice/workload-tw.slice", + "resource": "memory", + "threshold": "80", + "duration": "180" + } + }, + { + "name": "memory_reclaim", + "args": { + "cgroup": "workload.slice/workload-tw.slice", + "duration": "10" + } + } + ] + ], + "actions": [ + { + "name": "kill_by_pg_scan", + "args": { + "cgroup": "workload.slice/workload-tw.slice/*", + "recursive": "true" + } + } + ] + }, + { + "name": "protection against low swap", + "detectors": [ + [ + "free swap goes below 10 percent", + { + "name": "exists", + "args": { + "cgroup": "workload.slice/workload-tw.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.reservation.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/*.allotment.slice/sigrid_online_trainer*,workload.slice/workload-tw.slice/analyzer*,workload.slice/workload-tw.slice/*.reservation.slice/analyzer*,workload.slice/workload-tw.slice/*.allotment.slice/analyzer*,workload.slice/workload-tw.slice/bumblebee.*,workload.slice/workload-tw.slice/*.reservation.slice/bumblebee.*,workload.slice/workload-tw.slice/*.allotment.slice/bumblebee.*", + "negate": true + } + }, + { + "name": "swap_free", + "args": { + "threshold_pct": "10" + } + } + ] + ], + "actions": [ + { + "name": "kill_by_swap_usage", + "args": { + "cgroup": "system.slice/*,workload.slice/workload-wdb.slice/*,workload.slice/workload-tw.slice/*", + "biased_swap_kill": "true", + "recursive": "true" + } + } + ] + }, + { + "name": "senpai ruleset", + "silence-logs": "engine", + "detectors": [ + [ + "continue detector group", + { + "name": "continue", + "args": {} + } + ] + ], + "actions": [ + { + "name": "senpai_poking", + "args": { + "io_pressure_pct": "1.0", + "memory_high_timeout_ms": "20", + "scuba_logger_dataset": "perfpipe_senpai_events", + "limit_min_bytes": "104857600", + "cgroup": "system.slice,workload.slice/workload-wdb.slice,hostcritical.slice,workload.slice/workload-wdb.slice/*,hostcritical.slice/*" + } + } + ] + }, + { + "name": "senpai drop-in ruleset", + "silence-logs": "engine", + "drop-in": { + "actions": true, + "disable-on-drop-in": true + }, + "detectors": [ + [ + "continue detector group", + { + "name": "continue", + "args": {} + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + }, + { + "name": "tw_container drop-in ruleset", + "drop-in": { + "detectors": true, + "actions": true, + "disable-on-drop-in": true + }, + "detectors": [ + [ + "continue", + { + "name": "stop", + "args": {} + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ], + "prekill_hook_timeout": "45" + } + ], + "version": "1.0.0" +} \ No newline at end of file