diff --git a/src/oomd/cfgen/src/cfgen.rs b/src/oomd/cfgen/src/cfgen.rs index f8518f01..633806af 100644 --- a/src/oomd/cfgen/src/cfgen.rs +++ b/src/oomd/cfgen/src/cfgen.rs @@ -602,7 +602,7 @@ fn ruleset_user_session_protection(node: &Node, attrs: &ConfigParams) -> RuleSet } fn maybe_nr_dying_descendants_rule(node: &Node) -> DetectorElement { - if node.in_dynamic_smc_tier("devbig") { + if node.is_devserver() && node.hostname_prefix() == DEVBIG { // See https://fb.workplace.com/groups/linux.fbk/permalink/2924541514245339/ detector_rule!( name: "nr_dying_descendants", diff --git a/src/oomd/cfgen/test/cfgen_test_inputs/devbig.json b/src/oomd/cfgen/test/cfgen_test_inputs/devbig.json new file mode 100644 index 00000000..0edbbcb6 --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_inputs/devbig.json @@ -0,0 +1,99 @@ +@generated SignedSource<<896ff729516b5e965bc9b110160cf100>> +@codegen-command arc cfgen update-inputs fb-oomd +{ + "fqdn": "devbig284.ash8.facebook.com", + "region": "east", + "clusterType": "SERVICE_GENERIC_NON_MEMCACHE", + "modelId": 336209, + "kernelRelease": "5.19.0-0_fbk21_hardened_12633_g4db063a1bcb5", + "serverType": "TYPE_VI_FEED", + "experiments": [], + "cpuArchitecture": "skylake", + "metalosRootfs": false, + "provisioningConfig": { + "ethtoolByInterface": { + "eth0": { + "maxChannelsCombined": 63 + } + }, + "cpuCoreCount": 40, + "parentModelId": 336939, + "recoveryEnvironment": false, + "deviceType": "SERVER", + "datacenter": "ash8", + "cluster": "08", + "memTotal": 270017355776, + "osVersion": { + "distribution_name": "CentOS Stream release", + "version": 9, + "is_in_ramdisk": false, + "is_metalos": false + }, + "pciByAddress": { + "0000:5e:00.0": { + "vendor_id": 5555, + "device_id": 4117, + "class_code": 131072, + "board_part_number": "MCX4411D-ACAN_FB", + "current_speed_mts": 8000, + "current_width": 8 + } + }, + "static_smc_tiers": [], + "machine": "x86_64" + }, + "bootConfig": { + "ethtoolByInterface": { + "eth0": { + "driver": "mlx5_core", + "driver_version": "5.19.0-0_fbk21_hardened_12633_g", + "firmware_version": "14.27.2606 (FB_0000000005)", + "bus_info": "0000:5e:00.0", + "speed_mbps": 25000 + } + } + }, + "runtimeConfig": { + "hasHighPrivCert": true, + "regionRoutableCluster": "ash9.02", + "block_devices": { + "block_devices": { + "nvme0n1": { + "size_bytes": 1920383410176, + "is_rotational": false, + "model": "KXD51LN11T92 TOSHIBA", + "serial": "798S10S0T7RQ", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": false + }, + "sda": { + "size_bytes": 2000398934016, + "is_rotational": true, + "model": "TOSHIBA MG04ACA200A", + "serial": "79REKGBZFKGA", + "physical_block_size": 4096, + "logical_block_size": 4096, + "is_root": true + } + } + }, + "dynamic_smc_tiers": [], + "cluster_state": "CLUSTER_IN_USE", + "installed_platforms": [ + "platform009", + "platform010", + "platform010-aarch64", + "platform010-compat" + ], + "device_nics_enum": [ + "ETH0", + "SVC0" + ], + "loaded_kernel_modules": [] + }, + "reservationConfig": { + "active_machine_materialization_id": "", + "current_reservation_host_profile_id": "NEWLY_PROVISIONED_PROFILE" + } +} diff --git a/src/oomd/cfgen/test/cfgen_test_manifest.yml b/src/oomd/cfgen/test/cfgen_test_manifest.yml index b6e6e712..5b9dc688 100644 --- a/src/oomd/cfgen/test/cfgen_test_manifest.yml +++ b/src/oomd/cfgen/test/cfgen_test_manifest.yml @@ -29,3 +29,6 @@ samples: synmon: # this host is a sample from the "wdb config monitor" scuba table production_host: synmon017.rva2.facebook.com + devbig: + # this host is a sample from the "wdb config monitor" scuba table + production_host: devbig284.ash8.facebook.com diff --git a/src/oomd/cfgen/test/cfgen_test_outputs/devbig/50-change-propagator.conf b/src/oomd/cfgen/test/cfgen_test_outputs/devbig/50-change-propagator.conf new file mode 100644 index 00000000..c44ba252 --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_outputs/devbig/50-change-propagator.conf @@ -0,0 +1,6 @@ +@generated SignedSource<<3dd8c7637bb7afa680fc168e9c49060d>> +@codegen-command arc cfgen update-outputs fb-oomd +[Service] +Environment=OOMD_ARGS='--interval 1 --config /etc/oomd2.json --drop-in-dir /run/oomd/dropin' + +[Unit] diff --git a/src/oomd/cfgen/test/cfgen_test_outputs/devbig/oomd2.json b/src/oomd/cfgen/test/cfgen_test_outputs/devbig/oomd2.json new file mode 100644 index 00000000..dfc6595e --- /dev/null +++ b/src/oomd/cfgen/test/cfgen_test_outputs/devbig/oomd2.json @@ -0,0 +1,192 @@ +@generated SignedSource<> +@codegen-command arc cfgen update-outputs fb-oomd +{ + "rulesets": [ + { + "name": "system overview", + "silence-logs": "engine", + "detectors": [ + [ + "records system stats", + { + "name": "dump_cgroup_overview", + "args": { + "cgroup": "system.slice" + } + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + }, + { + "name": "user session protection", + "detectors": [ + [ + "user pressure above 40 for 300s", + { + "name": "pressure_above", + "args": { + "cgroup": "user.slice,workload.slice,www.slice", + "duration": "300", + "resource": "memory", + "threshold": "40" + } + }, + { + "name": "nr_dying_descendants", + "args": { + "cgroup": "/", + "lte": "true", + "count": "30000" + } + }, + { + "name": "memory_reclaim", + "args": { + "cgroup": "user.slice,workload.slice,www.slice", + "duration": "30" + } + } + ], + [ + "system pressure above 60 for 300s", + { + "name": "pressure_above", + "args": { + "cgroup": "system.slice", + "duration": "300", + "resource": "memory", + "threshold": "60" + } + }, + { + "name": "nr_dying_descendants", + "args": { + "cgroup": "/", + "lte": "true", + "count": "30000" + } + }, + { + "name": "memory_reclaim", + "args": { + "cgroup": "system.slice", + "duration": "30" + } + } + ] + ], + "actions": [ + { + "name": "kill_by_memory_size_or_growth", + "args": { + "cgroup": "user.slice/,system.slice/,workload.slice/,www.slice/", + "recursive": "true" + } + } + ] + }, + { + "name": "protection against low swap", + "detectors": [ + [ + "free swap goes below 5%", + { + "name": "swap_free", + "args": { + "threshold_pct": "5" + } + } + ] + ], + "actions": [ + { + "name": "kill_by_swap_usage", + "args": { + "cgroup": "user.slice/,system.slice/,workload.slice/,www.slice/", + "recursive": "true", + "threshold": "5" + } + } + ] + }, + { + "name": "senpai drop-in ruleset", + "silence-logs": "engine", + "drop-in": { + "disable-on-drop-in": true, + "actions": true + }, + "detectors": [ + [ + "continue detector group", + { + "name": "continue", + "args": {} + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + }, + { + "name": "restart smc_proxy.service on memory threshold", + "detectors": [ + [ + "memory usage above", + { + "name": "memory_above", + "args": { + "cgroup": "smc_proxy.service", + "duration": "10", + "threshold_anon": "15G" + } + } + ] + ], + "actions": [ + { + "name": "systemd_restart", + "args": { + "dry": "false", + "post_action_delay": "20", + "service": "smc_proxy.service" + } + } + ] + }, + { + "name": "tw_container drop-in ruleset", + "drop-in": { + "disable-on-drop-in": true, + "detectors": true, + "actions": true + }, + "detectors": [ + [ + "continue", + { + "name": "stop", + "args": {} + } + ] + ], + "actions": [ + { + "name": "continue", + "args": {} + } + ] + } + ], + "version": "1.0.0" +} \ No newline at end of file