Skip to content

Commit

Permalink
[Llama] Dump RSS info for Linux
Browse files Browse the repository at this point in the history
Differential Revision: D62222512

Pull Request resolved: pytorch#5101
  • Loading branch information
digantdesai authored Sep 6, 2024
1 parent a25db2f commit 17103dc
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
13 changes: 13 additions & 0 deletions examples/models/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ Error Runner::generate(
stats_.model_load_end_ms = util::time_in_ms();
}

ET_LOG(
Info,
"RSS after loading model: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// Wrap the token_callback with print function
std::function<void(const std::string&)> wrapped_callback =
[token_callback](const std::string& piece) {
Expand Down Expand Up @@ -213,6 +218,10 @@ Error Runner::generate(

// print the first token from prefill. No prev_token so use cur_token for it.
wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
ET_LOG(
Info,
"RSS after prompt prefill: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// start the main loop
prompt_tokens.push_back(cur_token);
Expand All @@ -221,6 +230,10 @@ Error Runner::generate(

stats_.inference_end_ms = util::time_in_ms();
printf("\n");
ET_LOG(
Info,
"RSS after finishing text generation: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

if (num_prompt_tokens + num_generated_tokens == seq_len) {
ET_LOG(Info, "Sequence length (%i tokens) reached!", seq_len);
Expand Down
21 changes: 19 additions & 2 deletions examples/models/llava/runner/llava_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ Error LlavaRunner::generate(
ET_CHECK_OK_OR_RETURN_ERROR(load());
}

ET_LOG(
Info,
"RSS after loading model: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// Wrap the token_callback with print function
std::function<void(const std::string&)> wrapped_callback =
[token_callback](const std::string& piece) {
Expand All @@ -149,9 +154,21 @@ Error LlavaRunner::generate(
// prefill images
prefill_images(images, pos);

ET_LOG(
Info,
"RSS after prompt and image prefill: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// Generate tokens
return generate_from_pos(
prompt, seq_len, pos, wrapped_callback, stats_callback);
Error err =
generate_from_pos(prompt, seq_len, pos, wrapped_callback, stats_callback);

ET_LOG(
Info,
"RSS after finishing text generation: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

return err;
}

} // namespace torch::executor
25 changes: 25 additions & 0 deletions extension/llm/runner/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#include <stdio.h>
#include <time.h>
#include <cctype>
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
#include <sys/resource.h>
#endif

namespace executorch {
namespace extension {
Expand Down Expand Up @@ -44,6 +47,27 @@ long inline time_in_ms() {
return time.tv_sec * 1000 + time.tv_nsec / 1000000;
}

// ----------------------------------------------------------------------------
// utilities: memory usage

// Returns the current RSS in bytes. Returns 0 if not supported.
// RSS: Resident Set Size, the amount of memory currently in the RAM for this
// process. These values are approximate, and are only used for logging
// purposes.
size_t inline get_rss_bytes() {
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
struct rusage r_usage;
if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
return r_usage.ru_maxrss * 1024;
}
#endif // __linux__ || __ANDROID__ || __unix__
// Unsupported platform like Windows, or getrusage() failed.
// __APPLE__ and __MACH__ are not supported because r_usage.ru_maxrss does not
// consistently return kbytes on macOS. On older versions of macOS, it
// returns bytes, but on newer versions it returns kbytes. Need to figure out
// when this changed.
return 0;
}
} // namespace llm
} // namespace extension
} // namespace executorch
Expand All @@ -53,6 +77,7 @@ namespace executor {
namespace util {
// TODO(T197294990): Remove these deprecated aliases once all users have moved
// to the new `::executorch` namespaces.
using ::executorch::extension::llm::get_rss_bytes;
using ::executorch::extension::llm::safe_printf;
using ::executorch::extension::llm::time_in_ms;
} // namespace util
Expand Down

0 comments on commit 17103dc

Please sign in to comment.