Skip to content

Commit

Permalink
Merge bitcoin#31174: tinyformat: Add compile-time checking for litera…
Browse files Browse the repository at this point in the history
…l format strings

fe39acf tinyformat: Add compile-time checking for literal format strings (Ryan Ofsky)
184f34f util: Support dynamic width & precision in ConstevalFormatString (Ryan Ofsky)

Pull request description:

  Add compile-time checking for literal format strings passed to `strprintf` and `tfm::format` to make sure the right number of format arguments are passed.

  There is still no compile-time checking if non-literal `std::string` or `bilingual_str` format strings are passed, but this is improved in other PRs:

  - [bitcoin#31061](bitcoin#31061) implements compile-time checking for translated strings
  - [bitcoin#31072](bitcoin#31072) increases compile-time checking by using literal strings as format strings, instead of `std::string` and `bilingual_str`
  - [bitcoin#31149](bitcoin#31149) may drop the `std::string`  overload for `strprintf` to [require](bitcoin#31149 (comment)) compile-time checking

ACKs for top commit:
  maflcko:
    re-ACK fe39acf 🕐
  l0rinc:
    ACK fe39acf
  hodlinator:
    re-ACK fe39acf

Tree-SHA512: f1ddef0c96b9468c5ffe31b42dc19f1922583dd14f2e180b618d992c98614c5cc7db9f9cd917ef503f833bbc7dbec78e4489d0035416dce6840837e1d66d87cb
  • Loading branch information
fanquake committed Nov 14, 2024
2 parents 69c0313 + fe39acf commit f44e39c
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 62 deletions.
44 changes: 37 additions & 7 deletions src/test/util_string_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ inline void PassFmt(util::ConstevalFormatString<NumArgs> fmt)
decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt);
}
template <unsigned WrongNumArgs>
inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error)
inline void FailFmtWithError(const char* wrong_fmt, std::string_view error)
{
BOOST_CHECK_EXCEPTION(util::ConstevalFormatString<WrongNumArgs>::Detail_CheckNumFormatSpecifiers(wrong_fmt), const char*, HasReason(error));
}
Expand All @@ -44,6 +44,8 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
PassFmt<1>("%+2s");
PassFmt<1>("%.6i");
PassFmt<1>("%5.2f");
PassFmt<1>("%5.f");
PassFmt<1>("%.f");
PassFmt<1>("%#x");
PassFmt<1>("%1$5i");
PassFmt<1>("%1$-5i");
Expand All @@ -54,32 +56,60 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
PassFmt<1>("%_");
PassFmt<1>("%\n");

// The `*` specifier behavior is unsupported and can lead to runtime
// errors when used in a ConstevalFormatString. Please refer to the
// note in the ConstevalFormatString docs.
PassFmt<1>("%*c");
PassFmt<2>("%2$*3$d");
PassFmt<1>("%.*f");
PassFmt<2>("%*c");
PassFmt<2>("%+*c");
PassFmt<2>("%.*f");
PassFmt<3>("%*.*f");
PassFmt<3>("%2$*3$d");
PassFmt<3>("%2$*3$.9d");
PassFmt<3>("%2$.*3$d");
PassFmt<3>("%2$9.*3$d");
PassFmt<3>("%2$+9.*3$d");
PassFmt<4>("%3$*2$.*4$f");

// Make sure multiple flag characters "- 0+" are accepted
PassFmt<3>("'%- 0+*.*f'");
PassFmt<3>("'%1$- 0+*3$.*2$f'");

auto err_mix{"Format specifiers must be all positional or all non-positional!"};
FailFmtWithError<1>("%s%1$s", err_mix);
FailFmtWithError<2>("%2$*d", err_mix);
FailFmtWithError<2>("%*2$d", err_mix);
FailFmtWithError<2>("%.*3$d", err_mix);
FailFmtWithError<2>("%2$.*d", err_mix);

auto err_num{"Format specifier count must match the argument count!"};
FailFmtWithError<1>("", err_num);
FailFmtWithError<0>("%s", err_num);
FailFmtWithError<2>("%s", err_num);
FailFmtWithError<0>("%1$s", err_num);
FailFmtWithError<2>("%1$s", err_num);
FailFmtWithError<1>("%*c", err_num);

auto err_0_pos{"Positional format specifier must have position of at least 1"};
FailFmtWithError<1>("%$s", err_0_pos);
FailFmtWithError<1>("%$", err_0_pos);
FailFmtWithError<0>("%0$", err_0_pos);
FailFmtWithError<0>("%0$s", err_0_pos);
FailFmtWithError<2>("%2$*$d", err_0_pos);
FailFmtWithError<2>("%2$*0$d", err_0_pos);
FailFmtWithError<3>("%3$*2$.*$f", err_0_pos);
FailFmtWithError<3>("%3$*2$.*0$f", err_0_pos);

auto err_term{"Format specifier incorrectly terminated by end of string"};
FailFmtWithError<1>("%", err_term);
FailFmtWithError<1>("%9", err_term);
FailFmtWithError<1>("%9.", err_term);
FailFmtWithError<1>("%9.9", err_term);
FailFmtWithError<1>("%*", err_term);
FailFmtWithError<1>("%+*", err_term);
FailFmtWithError<1>("%.*", err_term);
FailFmtWithError<1>("%9.*", err_term);
FailFmtWithError<1>("%1$", err_term);
FailFmtWithError<1>("%1$9", err_term);
FailFmtWithError<2>("%1$*2$", err_term);
FailFmtWithError<2>("%1$.*2$", err_term);
FailFmtWithError<2>("%1$9.*2$", err_term);
}

BOOST_AUTO_TEST_SUITE_END()
30 changes: 17 additions & 13 deletions src/tinyformat.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ namespace tfm = tinyformat;
#include <iostream>
#include <sstream>
#include <stdexcept> // Added for Bitcoin Core
#include <util/string.h> // Added for Bitcoin Core

#ifndef TINYFORMAT_ASSERT
# include <cassert>
Expand Down Expand Up @@ -178,6 +179,18 @@ namespace tfm = tinyformat;

namespace tinyformat {

// Added for Bitcoin Core. Wrapper for checking format strings at compile time.
// Unlike ConstevalFormatString this supports std::string for runtime string
// formatting without compile time checks.
template <unsigned num_params>
struct FormatStringCheck {
consteval FormatStringCheck(const char* str) : fmt{util::ConstevalFormatString<num_params>{str}.fmt} {}
FormatStringCheck(const std::string& str) : fmt{str.c_str()} {}
FormatStringCheck(util::ConstevalFormatString<num_params> str) : fmt{str.fmt} {}
operator const char*() { return fmt; }
const char* fmt;
};

// Added for Bitcoin Core
class format_error: public std::runtime_error
{
Expand Down Expand Up @@ -1056,15 +1069,15 @@ inline void vformat(std::ostream& out, const char* fmt, FormatListRef list)

/// Format list of arguments to the stream according to given format string.
template<typename... Args>
void format(std::ostream& out, const char* fmt, const Args&... args)
void format(std::ostream& out, FormatStringCheck<sizeof...(Args)> fmt, const Args&... args)
{
vformat(out, fmt, makeFormatList(args...));
}

/// Format list of arguments according to the given format string and return
/// the result as a string.
template<typename... Args>
std::string format(const char* fmt, const Args&... args)
std::string format(FormatStringCheck<sizeof...(Args)> fmt, const Args&... args)
{
std::ostringstream oss;
format(oss, fmt, args...);
Expand All @@ -1073,13 +1086,13 @@ std::string format(const char* fmt, const Args&... args)

/// Format list of arguments to std::cout, according to the given format string
template<typename... Args>
void printf(const char* fmt, const Args&... args)
void printf(FormatStringCheck<sizeof...(Args)> fmt, const Args&... args)
{
format(std::cout, fmt, args...);
}

template<typename... Args>
void printfln(const char* fmt, const Args&... args)
void printfln(FormatStringCheck<sizeof...(Args)> fmt, const Args&... args)
{
format(std::cout, fmt, args...);
std::cout << '\n';
Expand Down Expand Up @@ -1145,15 +1158,6 @@ TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_FORMAT_FUNCS)

#endif

// Added for Bitcoin Core
template<typename... Args>
std::string format(const std::string &fmt, const Args&... args)
{
std::ostringstream oss;
format(oss, fmt.c_str(), args...);
return oss.str();
}

} // namespace tinyformat

// Added for Bitcoin Core:
Expand Down
87 changes: 45 additions & 42 deletions src/util/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#define BITCOIN_UTIL_STRING_H

#include <span.h>
#include <tinyformat.h>

#include <array>
#include <cstdint>
Expand All @@ -25,53 +24,65 @@ namespace util {
* strings, to reduce the likelihood of tinyformat throwing exceptions at
* run-time. Validation is partial to try and prevent the most common errors
* while avoiding re-implementing the entire parsing logic.
*
* @note Counting of `*` dynamic width and precision fields (such as `%*c`,
* `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as
* they are not used in the codebase. Usage of these fields is not counted and
* can lead to run-time exceptions. Code wanting to use the `*` specifier can
* side-step this struct and call tinyformat directly.
*/
template <unsigned num_params>
struct ConstevalFormatString {
const char* const fmt;
consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); }
constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str)
constexpr static void Detail_CheckNumFormatSpecifiers(const char* str)
{
unsigned count_normal{0}; // Number of "normal" specifiers, like %s
unsigned count_pos{0}; // Max number in positional specifier, like %8$s
for (auto it{str.begin()}; it < str.end();) {
if (*it != '%') {
++it;
continue;
}
for (auto it{str}; *it != '\0'; ++it) {
if (*it != '%' || *++it == '%') continue; // Skip escaped %%

auto add_arg = [&] {
unsigned maybe_num{0};
while ('0' <= *it && *it <= '9') {
maybe_num *= 10;
maybe_num += *it - '0';
++it;
}

if (*it == '$') {
++it;
// Positional specifier, like %8$s
if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
count_pos = std::max(count_pos, maybe_num);
} else {
// Non-positional specifier, like %s
++count_normal;
}
};

if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
if (*it == '%') {
// Percent escape: %%
++it;
continue;
}
// Increase argument count and consume positional specifier, if present.
add_arg();

unsigned maybe_num{0};
while ('0' <= *it && *it <= '9') {
maybe_num *= 10;
maybe_num += *it - '0';
++it;
// Consume flags.
while (*it == '#' || *it == '0' || *it == '-' || *it == ' ' || *it == '+') ++it;

auto parse_size = [&] {
if (*it == '*') {
++it;
add_arg();
} else {
while ('0' <= *it && *it <= '9') ++it;
}
};

if (*it == '$') {
// Positional specifier, like %8$s
if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
count_pos = std::max(count_pos, maybe_num);
if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
} else {
// Non-positional specifier, like %s
++count_normal;
// Consume dynamic or static width value.
parse_size();

// Consume dynamic or static precision value.
if (*it == '.') {
++it;
parse_size();
}
// The remainder "[flags][width][.precision][length]type" of the
// specifier is not checked. Parsing continues with the next '%'.

if (*it == '\0') throw "Format specifier incorrectly terminated by end of string";

// Length and type in "[flags][width][.precision][length]type"
// is not checked. Parsing continues with the next '%'.
}
if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!";
unsigned count{count_normal | count_pos};
Expand Down Expand Up @@ -235,12 +246,4 @@ template <typename T1, size_t PREFIX_LEN>
}
} // namespace util

namespace tinyformat {
template <typename... Args>
std::string format(util::ConstevalFormatString<sizeof...(Args)> fmt, const Args&... args)
{
return format(fmt.fmt, args...);
}
} // namespace tinyformat

#endif // BITCOIN_UTIL_STRING_H

0 comments on commit f44e39c

Please sign in to comment.