Skip to content

Commit

Permalink
Merge pull request #354 from network-intelligence/dev
Browse files Browse the repository at this point in the history
Add STUN telemetry and a document on JSON output guidelines
  • Loading branch information
andrewchi authored and GitHub Enterprise committed Dec 20, 2024
2 parents 6e46935 + 460bf0c commit ead81d4
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 2 deletions.
25 changes: 25 additions & 0 deletions doc/guidelines.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
## Best Practices for JSON Output



This note provides guidance for developers of code that generates JSON output, with the goals of producing JSON that works well with Parquet and `jq`.

#### Principles

- All names and strings must be valid UTF-8 with JSON characters escaped.
- Data from packets is not trusted to be in the correct format.
- No spaces or dashes in names.
- Prefer lowercase.
- There should be no empty JSON objects.
- For compressibility, highly variable fields (e.g. IP.ID) should be at the tail end of a record, not the front.
- Avoid using network data as JSON keys, so that keys are consistent (and thus parquet-friendly) and follow the other guidelines.
- There should be no empty JSON arrays (if semantically necessary, exceptions can be made if we pre-deploy the json2parquet schema).
- In an array of objects, the objects can have distinct schema, as long as any name that appears in more than one object schema has the same type in all objects.
- Prefer flat schemas where possible; avoid arrays of objects unless necessary.

#### Resources

The class utf8_safe_string
https://wwwin-github.cisco.com/network-intelligence/mercury-transition/blob/dev/src/libmerc/utf8.hpp#L931
can be used to safely convert packet data into a string that can be
used as e.g. a JSON array or object name.
7 changes: 7 additions & 0 deletions src/libmerc/buffer_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,13 @@ class output_buffer : public buffer_stream {
return buffer;
}

std::pair<const uint8_t *, const uint8_t *> get_datum() const {
if (trunc) {
return { nullptr, nullptr };
}
return { (uint8_t *)buffer, (uint8_t *)buffer + doff };
}

};

#endif /* BUFFER_STREAM_H */
7 changes: 7 additions & 0 deletions src/libmerc/pkt_proc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,13 @@ struct do_observation {
analysis_.reset_user_agent();
}

void operator()(stun::message &m) {
// create event and send it to the data/stats aggregator
event_string ev_str{k_, analysis_, m};
mq_->push(ev_str.construct_event_string());
analysis_.reset_user_agent();
}

template <typename T>
void operator()(T &) { }

Expand Down
39 changes: 37 additions & 2 deletions src/libmerc/stun.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "utf8.hpp"
#include "match.h"
#include "fingerprint.h"
#include "result.h"
#include "util_obj.h"
#include <unordered_map>

namespace stun {
Expand Down Expand Up @@ -471,6 +473,8 @@ namespace stun {

uint16_t get_type() const { return type; }

datum get_value() const { return value; }

};


Expand Down Expand Up @@ -772,6 +776,7 @@ namespace stun {
class message : public base_protocol {
header hdr;
datum body;
datum software;

public:

Expand Down Expand Up @@ -959,6 +964,13 @@ namespace stun {
} else {
; // by default, attribute information is not included in fingerprint
}

// remember SOFTWARE for later use in analysis
//
if (attr.value.get_type() == attr_type::SOFTWARE) {
software = attr.value.get_value();
}

} else {
break;
}
Expand All @@ -970,10 +982,33 @@ namespace stun {
// analyzes the dst_ip, dst_port, and SOFTWARE attribute
// value, using a classifier selected by the stun fingerprint
//
bool do_analysis(const struct key &, struct analysis_context &, classifier*) {
// request format: dst_addr, dst_port
// response format: src_addr, src_port

bool do_analysis(const struct key &flow_key, struct analysis_context &ac, classifier*) {

// create a json-friendly utf8 copy of the SOFTWARE atribute's value field
//
// TBD
utf8_safe_string<MAX_USER_AGENT_LEN> utf8_software{software};

// handle message classes appropriately: reverse the
// addresses and ports in the flow key for responses,
// leave the flow key untouched for requests, and ignore
// all other message classes
//
key k{flow_key};
if ((hdr.get_message_class() & 0b10) == 0b10) {
//
// success_resp and error_resp: swap addrs and ports
//
k.reverse();
}
ac.destination.init({nullptr,nullptr}, // domain name
utf8_software.get_datum(), // user agent
{nullptr,nullptr}, // alpn
k // flow key, used for dst_addr and dst_port
);

return false;
}

Expand Down
4 changes: 4 additions & 0 deletions src/libmerc/utf8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,10 @@ class utf8_safe_string {
return buf.get_buffer_start();
}

datum get_datum() const {
return buf.get_datum();
}

/// performs unit tests for \ref class utf8_safe_string and
/// returns `true` if they all pass, and `false` otherwise
///
Expand Down
14 changes: 14 additions & 0 deletions src/libmerc/util_obj.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,20 @@ struct key {
snprintf(src_port_string, MAX_PORT_STR_LEN, "%u", src_port);
}

void reverse() {
std::swap(src_port, dst_port);
switch (ip_vers) {
case 4:
std::swap(addr.ipv4.src, addr.ipv4.dst);
break;
case 6:
std::swap(addr.ipv6.src, addr.ipv6.dst);
break;
default:
;
}
}

};

struct eth_addr : public datum {
Expand Down

0 comments on commit ead81d4

Please sign in to comment.