Skip to content

Commit

Permalink
fix(k8smeta): introduce proc-scan to recover the initial host state
Browse files Browse the repository at this point in the history
Signed-off-by: Andrea Terzolo <[email protected]>
  • Loading branch information
Andreagit97 authored and poiana committed Jul 24, 2024
1 parent a00cc75 commit 4585f3b
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 35 deletions.
2 changes: 1 addition & 1 deletion plugins/k8smeta/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ plugins:
# name of the node on which the Falco instance is running. (required)
nodeName: kind-control-plane
# verbosity level for the plugin logger (optional)
verbosity: warn # (default: info)
verbosity: warning # (default: info)
# path to the PEM encoding of the server root certificates. (optional)
# Used to open an authanticated GRPC channel with the collector.
# If empty the connection will be insecure.
Expand Down
154 changes: 126 additions & 28 deletions plugins/k8smeta/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ limitations under the License.
#include <sstream>
#include <re2/re2.h>
#include <fstream>
#include <filesystem>

#define ADD_MODIFY_TABLE_ENTRY(_resource_name, _resource_table) \
if(resource_kind.compare(_resource_name) == 0) \
Expand All @@ -51,6 +52,38 @@ limitations under the License.
// This is the regex needed to extract the pod_uid from the cgroup
static re2::RE2 pattern(RGX_POD, re2::RE2::POSIX);

std::string get_pod_uid_from_cgroup_string(const std::string& cgroup_first_line)
{
// We set the pod uid to `""` if we are not able to extract it.
std::string pod_uid = "";

// Here `cgroup_first_line` can have 2 layouts:
//
// 1 - If it arrives from our driver -> `controller=cgroup_path`
// Example:
// `cpuset=/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-pod05869489-8c7f-45dc-9abd-1b1620787bb1.slice/cri-containerd-2f92446a3fbfd0b7a73457b45e96c75a25c5e44e7b1bcec165712b906551c261.scope\0`
//
// 2 - If it arrives from the /proc scan -> `hierarchy
// ID:controller:cgroup_path` Check if the cgroup version is relevant here
// or not...
// todo!: i'm not sure if all controllers have the same format in cgroupv1
if(re2::RE2::PartialMatch(cgroup_first_line, pattern, &pod_uid))
{
// Here `pod_uid` could have 2 possible layouts:
// - (driver cgroup) pod05869489-8c7f-45dc-9abd-1b1620787bb1
// - (driver systemd) pod05869489_8c7f_45dc_9abd_1b1620787bb1

// As a first thing we remove the "pod" prefix from `pod_uid`
pod_uid.erase(0, 3);

// Then we convert `_` into `-` if we are in `systemd` notation.
// The final `pod_uid` layout will be:
// 05869489-8c7f-45dc-9abd-1b1620787bb1
std::replace(pod_uid.begin(), pod_uid.end(), '_', '-');
}
return pod_uid;
}

//////////////////////////
// General plugin API
//////////////////////////
Expand Down Expand Up @@ -219,6 +252,75 @@ void my_plugin::parse_init_config(nlohmann::json& config_json)
}
}

void my_plugin::do_initial_proc_scan()
{
std::filesystem::directory_iterator dir_iter;
std::string proc_root = m_host_proc + "/proc";
try
{
SPDLOG_DEBUG("Start the /proc scan under: '{}'", proc_root);
dir_iter = std::filesystem::directory_iterator(proc_root);
}
catch(std::filesystem::filesystem_error& err)
{
SPDLOG_ERROR("cannot iter over '{}' for initial proc scan: {}",
proc_root, err.what());
return;
}

int64_t tid = 0;
std::string proc_path = "";
std::string cgroup_line = "";
for(const auto& entry : dir_iter)
{
auto file_name = entry.path().filename();
// The file_name here should be `1`,`2` so the thread id, not the
// process id. We should exclude other directories or files like
// `bootconfig`,`vmstat`, ...

if(!entry.is_directory() ||
(tid = strtol(file_name.c_str(), NULL, 10)) == 0)
{
// skip if not a tid directory.
continue;
}

// Example of the path: `/proc/1/cgroup`
proc_path = std::string(proc_root)
.append("/")
.append(file_name.c_str())
.append("/cgroup");
SPDLOG_TRACE("Try to scan under: '{}'", proc_path);

std::ifstream file(proc_path);

if(file.is_open())
{
// Read the first line from the file
if(std::getline(file, cgroup_line))
{
// todo!: check the cgroupv1 layout
std::string pod_uid =
get_pod_uid_from_cgroup_string(cgroup_line);
if(!pod_uid.empty())
{
m_thread_id_pod_uid_map[tid] = pod_uid;
}
}
else
{
SPDLOG_WARN("cannot retrieve the cgroup first line for '{}'",
proc_path);
}
file.close();
}
else
{
SPDLOG_WARN("cannot open '{}'", proc_path);
}
}
}

bool my_plugin::init(falcosecurity::init_input& in)
{
using st = falcosecurity::state_value_type;
Expand Down Expand Up @@ -268,6 +370,12 @@ bool my_plugin::init(falcosecurity::init_input& in)
SPDLOG_CRITICAL(m_lasterr);
return false;
}

// Here we do /proc scan to catch the pod_uid from already running
// processes.
// We cannot populate the sinsp thread table because when we call `init` it
// is still empty. The /proc scan in sinsp is done after the plugin init.
do_initial_proc_scan();
return true;
}

Expand Down Expand Up @@ -934,7 +1042,16 @@ bool my_plugin::extract(const falcosecurity::extract_fields_input& in)
// The process is not into a pod, stop here.
if(pod_uid.empty())
{
return false;
// We try to obtain the pod_uid from our internal cache populated during
// the initial /proc scan
auto it = m_thread_id_pod_uid_map.find(thread_id);
if(it == m_thread_id_pod_uid_map.end())
{
return false;
}
// The ideal thing would be to write it in the sinsp thread table but in
// the extraction phase we don't have a table writer.
pod_uid = it->second;
}

// Try to find the entry associated with the pod_uid
Expand Down Expand Up @@ -1204,19 +1321,17 @@ static inline sinsp_param get_syscall_evt_param(void* evt, uint32_t num_param)
dataoffset};
}

bool inline my_plugin::extract_pod_uid(
bool inline my_plugin::parse_process_events(
const falcosecurity::parse_event_input& in)
{
auto res_param = get_syscall_evt_param(in.get_event_reader().get_buf(),
EXECVE_CLONE_RES_PARAM_IDX);

// - For execve/execveat we exclude failed syscall events
// - For clone/fork/clone3 we exclude failed syscall events (ret<0) and
// caller events (ret>0).
// When the new thread is in a container in libsinsp we only parse the
// child exit event, so we can do the same thing here. In the child the
// return value is `0`.
if(*((uint64_t*)(res_param.param_pointer)) != 0)
// - For execve/execveat we exclude failed syscall events (ret<0)
// - For clone/fork/vfork/clone3 we exclude failed syscall events (ret<0)
int64_t ret = 0;
memcpy(&ret, res_param.param_pointer, sizeof(ret));
if(ret < 0)
{
return false;
}
Expand All @@ -1242,24 +1357,7 @@ bool inline my_plugin::extract_pod_uid(
// cpuset=/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-pod05869489-8c7f-45dc-9abd-1b1620787bb1.slice/cri-containerd-2f92446a3fbfd0b7a73457b45e96c75a25c5e44e7b1bcec165712b906551c261.scope\0
// So we can put it in a string and apply our regex.
std::string cgroup_first_charbuf = (char*)cgroup_param.param_pointer;

// We set the pod uid to `""` if we are not able to extract it.
std::string pod_uid = "";

if(re2::RE2::PartialMatch(cgroup_first_charbuf, pattern, &pod_uid))
{
// Here `pod_uid` could have 2 possible layouts:
// - (driver cgroup) pod05869489-8c7f-45dc-9abd-1b1620787bb1
// - (driver systemd) pod05869489_8c7f_45dc_9abd_1b1620787bb1

// As a first thing we remove the "pod" prefix from `pod_uid`
pod_uid.erase(0, 3);

// Then we convert `_` into `-` if we are in `systemd` notation.
// The final `pod_uid` layout will be:
// 05869489-8c7f-45dc-9abd-1b1620787bb1
std::replace(pod_uid.begin(), pod_uid.end(), '_', '-');
}
std::string pod_uid = get_pod_uid_from_cgroup_string(cgroup_first_charbuf);

// retrieve thread entry associated with the event tid
auto& tr = in.get_table_reader();
Expand Down Expand Up @@ -1287,7 +1385,7 @@ bool my_plugin::parse_event(const falcosecurity::parse_event_input& in)
case PPME_SYSCALL_FORK_20_X:
case PPME_SYSCALL_VFORK_20_X:
case PPME_SYSCALL_CLONE3_X:
return extract_pod_uid(in);
return parse_process_events(in);
default:
SPDLOG_ERROR("received an unknown event type {}",
int32_t(evt.get_type()));
Expand Down
8 changes: 7 additions & 1 deletion plugins/k8smeta/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class my_plugin

bool init(falcosecurity::init_input& in);

void do_initial_proc_scan();

//////////////////////////
// Async capability
//////////////////////////
Expand Down Expand Up @@ -235,7 +237,8 @@ class my_plugin

bool inline parse_async_event(const falcosecurity::parse_event_input& in);

bool inline extract_pod_uid(const falcosecurity::parse_event_input& in);
bool inline parse_process_events(
const falcosecurity::parse_event_input& in);

bool parse_event(const falcosecurity::parse_event_input& in);

Expand All @@ -251,6 +254,8 @@ class my_plugin
std::string m_collector_port;
std::string m_node_name;
std::string m_ca_PEM_encoding;
// todo!: populate it when parsing the config.
std::string m_host_proc;

// State tables
std::unordered_map<std::string, resource_layout> m_pod_table;
Expand All @@ -261,6 +266,7 @@ class my_plugin
std::unordered_map<std::string, resource_layout>
m_replication_controller_table;
std::unordered_map<std::string, resource_layout> m_deamonset_table;
std::unordered_map<int64_t, std::string> m_thread_id_pod_uid_map;

// Last error of the plugin
std::string m_lasterr;
Expand Down
7 changes: 7 additions & 0 deletions plugins/k8smeta/test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,10 @@ make build-tests
# run tests against the test server
make run-tests
```

To run only some tests you need to use the test binary directly

```bash
# from the `build` directory
sudo ./libs_tests/libsinsp/test/unit-test-libsinsp --gtest_filter='*plugin_k8s_PPME_SYSCALL_CLONE3_X_parse'
```
79 changes: 74 additions & 5 deletions plugins/k8smeta/test/src/parsing_pod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,20 @@ limitations under the License.
int64_t p1_vpid = 1; \
\
std::string expected_pod_uid = "5eaeeca9-2277-460b-a4bf-5a0783f6d49f"; \
auto evt = generate_clone_x_event( \
\
/* We generate a clone exit event for the parent. */ \
/* This is parsed but the pod_uid is not extracted. */ \
auto evt = generate_clone_x_event(p1_tid, INIT_TID, INIT_PID, INIT_PTID, \
0, INIT_TID, INIT_PTID, "init", \
{"cpuset=/"}, event); \
ASSERT_EQ(evt->get_type(), event); \
auto init_thread_entry = thread_table->get_entry(INIT_TID); \
ASSERT_NE(init_thread_entry, nullptr); \
std::string pod_uid; \
init_thread_entry->get_dynamic_field(fieldacc, pod_uid); \
ASSERT_EQ(pod_uid, ""); \
\
evt = generate_clone_x_event( \
0, p1_tid, p1_pid, p1_ptid, PPM_CL_CHILD_IN_PIDNS, p1_vtid, \
p1_vpid, "bash", \
{"cpuset=/kubepods/besteffort/pod" + expected_pod_uid + \
Expand All @@ -55,10 +68,9 @@ limitations under the License.
event); \
ASSERT_EQ(evt->get_type(), event); \
\
auto init_thread_entry = thread_table->get_entry(p1_tid); \
ASSERT_NE(init_thread_entry, nullptr); \
std::string pod_uid; \
init_thread_entry->get_dynamic_field(fieldacc, pod_uid); \
auto p1_tid_entry = thread_table->get_entry(p1_tid); \
ASSERT_NE(p1_tid_entry, nullptr); \
p1_tid_entry->get_dynamic_field(fieldacc, pod_uid); \
ASSERT_EQ(pod_uid, expected_pod_uid);

#define EXECVE_EXECVEAT_TEST(event) \
Expand Down Expand Up @@ -334,3 +346,60 @@ TEST_F(sinsp_with_test_input, plugin_k8s_check_thread_entry_is_removed)
// Now we should have only one entry in the thread table
ASSERT_EQ(thread_table->entries_count(), 1);
}

TEST_F(sinsp_with_test_input, plugin_k8s_parse_parent_clone)
{
std::shared_ptr<sinsp_plugin> plugin_owner;
filter_check_list pl_flist;
ASSERT_PLUGIN_INITIALIZATION(plugin_owner, pl_flist)

add_default_init_thread();
open_inspector();

auto &reg = m_inspector.get_table_registry();
auto thread_table = reg->get_table<int64_t>(THREAD_TABLE_NAME);
auto field =
thread_table->dynamic_fields()->fields().find(POD_UID_FIELD_NAME);
auto fieldacc = field->second.new_accessor<std::string>();

int64_t p1_tid = 2;
int64_t p1_pid = 2;
int64_t p1_ptid = INIT_TID;
int64_t p1_vtid = 1;
int64_t p1_vpid = 1;
int64_t p2_tid = 3;

// Create process p1, that is a child of init
std::string expected_pod_uid = "5eaeeca9-2277-460b-a4bf-5a0783f6d49f";
generate_clone_x_event(0, p1_tid, p1_pid, p1_ptid, PPM_CL_CHILD_IN_PIDNS,
p1_vtid, p1_vpid, "bash",
{"cpuset=/kubepods/besteffort/pod" +
expected_pod_uid +
"/691e0ffb65010b2b611f3a15b7f76c48466192e673e156f38"
"bd2f8e25acd6bbc"},
PPME_SYSCALL_CLONE_20_X);
auto p1_thread_entry = thread_table->get_entry(p1_tid);
ASSERT_NE(p1_thread_entry, nullptr);
std::string pod_uid = "";
p1_thread_entry->get_dynamic_field(fieldacc, pod_uid);
ASSERT_EQ(pod_uid, expected_pod_uid);

// we clear the pod_uid manually so we check that the pod_uid will be
// populated by the next clone parent event.
std::string empty_pod_uid = "";
p1_thread_entry->set_dynamic_field(fieldacc, empty_pod_uid);
p1_thread_entry->get_dynamic_field(fieldacc, pod_uid);
ASSERT_EQ(pod_uid, empty_pod_uid);

// Clone parent exit event for p1
generate_clone_x_event(p2_tid, p1_tid, p1_pid, p1_ptid,
PPM_CL_CHILD_IN_PIDNS, p1_vtid, p1_vpid, "bash",
{"cpuset=/kubepods/besteffort/pod" +
expected_pod_uid +
"/691e0ffb65010b2b611f3a15b7f76c48466192e673e156f38"
"bd2f8e25acd6bbc"},
PPME_SYSCALL_CLONE_20_X);
// We have again the pod_uid for the parent thread
p1_thread_entry->get_dynamic_field(fieldacc, pod_uid);
ASSERT_EQ(pod_uid, expected_pod_uid);
}

0 comments on commit 4585f3b

Please sign in to comment.