Skip to content

Commit

Permalink
Preprocessor: Add debug dump options
Browse files Browse the repository at this point in the history
Adds support for GCC's -dD, -dM, -dN flags which enable dumping macro
names and/or definitions along with or instead of preprocessor output.
  • Loading branch information
ehaas authored Aug 8, 2024
1 parent b35370d commit 1d24511
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 21 deletions.
32 changes: 30 additions & 2 deletions src/aro/Driver.zig
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ color: ?bool = null,
nobuiltininc: bool = false,
nostdinc: bool = false,
nostdlibinc: bool = false,
debug_dump_letters: packed struct(u3) {
d: bool = false,
m: bool = false,
n: bool = false,

/// According to GCC, specifying letters whose behavior conflicts is undefined.
/// We follow clang in that `-dM` always takes precedence over `-dD`
pub fn getPreprocessorDumpMode(self: @This()) Preprocessor.DumpMode {
if (self.m) return .macros_only;
if (self.d) return .macros_and_result;
if (self.n) return .macro_names_and_result;
return .result_only;
}
} = .{},

/// Full path to the aro executable
aro_name: []const u8 = "",
Expand Down Expand Up @@ -92,6 +106,9 @@ pub const usage =
\\
\\Compile options:
\\ -c, --compile Only run preprocess, compile, and assemble steps
\\ -dM Output #define directives for all the macros defined during the execution of the preprocessor
\\ -dD Like -dM except that it outputs both the #define directives and the result of preprocessing
\\ -dN Like -dD, but emit only the macro names, not their expansions.
\\ -D <macro>=<value> Define <macro> to <value> (defaults to 1)
\\ -E Only run the preprocessor
\\ -fchar8_t Enable char8_t (enabled by default in C23 and later)
Expand Down Expand Up @@ -234,6 +251,12 @@ pub fn parseArgs(
d.system_defines = .no_system_defines;
} else if (mem.eql(u8, arg, "-c") or mem.eql(u8, arg, "--compile")) {
d.only_compile = true;
} else if (mem.eql(u8, arg, "-dD")) {
d.debug_dump_letters.d = true;
} else if (mem.eql(u8, arg, "-dM")) {
d.debug_dump_letters.m = true;
} else if (mem.eql(u8, arg, "-dN")) {
d.debug_dump_letters.n = true;
} else if (mem.eql(u8, arg, "-E")) {
d.only_preprocess = true;
} else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) {
Expand Down Expand Up @@ -636,13 +659,17 @@ fn processSource(
if (d.comp.langopts.ms_extensions) {
d.comp.ms_cwd_source_id = source.id;
}

const dump_mode = d.debug_dump_letters.getPreprocessorDumpMode();
if (d.verbose_pp) pp.verbose = true;
if (d.only_preprocess) {
pp.preserve_whitespace = true;
if (d.line_commands) {
pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives;
}
switch (dump_mode) {
.macros_and_result, .macro_names_and_result => pp.store_macro_tokens = true,
.result_only, .macros_only => {},
}
}

try pp.preprocessSources(&.{ source, builtin, user_macros });
Expand All @@ -663,7 +690,8 @@ fn processSource(
defer if (d.output_name != null) file.close();

var buf_w = std.io.bufferedWriter(file.writer());
pp.prettyPrintTokens(buf_w.writer()) catch |er|

pp.prettyPrintTokens(buf_w.writer(), dump_mode) catch |er|
return d.fatal("unable to write result: {s}", .{errorDescription(er)});

buf_w.flush() catch |er|
Expand Down
112 changes: 102 additions & 10 deletions src/aro/Preprocessor.zig
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ poisoned_identifiers: std.StringHashMap(void),
/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},

/// Store `keyword_define` and `keyword_undef` tokens.
/// Used to implement preprocessor debug dump options
/// Must be false unless in -E mode (parser does not handle those token types)
store_macro_tokens: bool = false,

/// Memory is retained to avoid allocation on every single token.
top_expansion_buf: ExpandBuf,

Expand Down Expand Up @@ -622,9 +627,12 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpans
}
if_level -= 1;
},
.keyword_define => try pp.define(&tokenizer),
.keyword_define => try pp.define(&tokenizer, directive),
.keyword_undef => {
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
if (pp.store_macro_tokens) {
try pp.addToken(tokFromRaw(directive));
}

_ = pp.defines.remove(macro_name);
try pp.expectNl(&tokenizer);
Expand Down Expand Up @@ -2508,7 +2516,7 @@ fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Tok
}

/// Defines a new macro and warns if it is a duplicate
fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void {
fn defineMacro(pp: *Preprocessor, define_tok: RawToken, name_tok: RawToken, macro: Macro) Error!void {
const name_str = pp.tokSlice(name_tok);
const gop = try pp.defines.getOrPut(pp.gpa, name_str);
if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
Expand All @@ -2529,11 +2537,14 @@ fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void {
if (pp.verbose) {
pp.verboseLog(name_tok, "macro {s} defined", .{name_str});
}
if (pp.store_macro_tokens) {
try pp.addToken(tokFromRaw(define_tok));
}
gop.value_ptr.* = macro;
}

/// Handle a #define directive.
fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
fn define(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken) Error!void {
// Get macro name and validate it.
const macro_name = tokenizer.nextNoWS();
if (macro_name.id == .keyword_defined) {
Expand All @@ -2556,15 +2567,15 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
// Check for function macros and empty defines.
var first = tokenizer.next();
switch (first.id) {
.nl, .eof => return pp.defineMacro(macro_name, .{
.nl, .eof => return pp.defineMacro(define_tok, macro_name, .{
.params = &.{},
.tokens = &.{},
.var_args = false,
.loc = tokFromRaw(macro_name).loc,
.is_func = false,
}),
.whitespace => first = tokenizer.next(),
.l_paren => return pp.defineFn(tokenizer, macro_name, first),
.l_paren => return pp.defineFn(tokenizer, define_tok, macro_name, first),
else => try pp.err(first, .whitespace_after_macro_name),
}
if (first.id == .hash_hash) {
Expand Down Expand Up @@ -2623,7 +2634,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
}

const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(macro_name, .{
try pp.defineMacro(define_tok, macro_name, .{
.loc = tokFromRaw(macro_name).loc,
.tokens = list,
.params = undefined,
Expand All @@ -2633,7 +2644,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
}

/// Handle a function like #define directive.
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void {
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken, macro_name: RawToken, l_paren: RawToken) Error!void {
assert(macro_name.id.isMacroIdentifier());
var params = std.ArrayList([]const u8).init(pp.gpa);
defer params.deinit();
Expand Down Expand Up @@ -2810,7 +2821,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa

const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(macro_name, .{
try pp.defineMacro(define_tok, macro_name, .{
.is_func = true,
.params = param_list,
.var_args = var_args or gnu_var_args.len != 0,
Expand Down Expand Up @@ -3273,8 +3284,78 @@ fn printLinemarker(
// After how many empty lines are needed to replace them with linemarkers.
const collapse_newlines = 8;

pub const DumpMode = enum {
/// Standard preprocessor output; no macros
result_only,
/// Output only #define directives for all the macros defined during the execution of the preprocessor
/// Only macros which are still defined at the end of preprocessing are printed.
/// Only the most recent definition is printed
/// Defines are printed in arbitrary order
macros_only,
/// Standard preprocessor output; but additionally output #define's and #undef's for macros as they are encountered
macros_and_result,
/// Same as macros_and_result, except only the macro name is printed for #define's
macro_names_and_result,
};

/// Pretty-print the macro define or undef at location `loc`.
/// We re-tokenize the directive because we are printing a macro that may have the same name as one in
/// `pp.defines` but a different definition (due to being #undef'ed and then redefined)
fn prettyPrintMacro(pp: *Preprocessor, w: anytype, loc: Source.Location, parts: enum { name_only, name_and_body }) !void {
const source = pp.comp.getSource(loc.id);
var tokenizer: Tokenizer = .{
.buf = source.buf,
.langopts = pp.comp.langopts,
.source = source.id,
.index = loc.byte_offset,
};
var prev_ws = false; // avoid printing multiple whitespace if /* */ comments are within the macro def
var saw_name = false; // do not print comments before the name token is seen.
while (true) {
const tok = tokenizer.next();
switch (tok.id) {
.comment => {
if (saw_name) {
prev_ws = false;
try w.print("{s}", .{pp.tokSlice(tok)});
}
},
.nl, .eof => break,
.whitespace => {
if (!prev_ws) {
try w.writeByte(' ');
prev_ws = true;
}
},
else => {
prev_ws = false;
try w.print("{s}", .{pp.tokSlice(tok)});
},
}
if (tok.id == .identifier or tok.id == .extended_identifier) {
if (parts == .name_only) break;
saw_name = true;
}
}
}

fn prettyPrintMacrosOnly(pp: *Preprocessor, w: anytype) !void {
var it = pp.defines.valueIterator();
while (it.next()) |macro| {
if (macro.is_builtin) continue;

try w.writeAll("#define ");
try pp.prettyPrintMacro(w, macro.loc, .name_and_body);
try w.writeByte('\n');
}
}

/// Pretty print tokens and try to preserve whitespace.
pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype, macro_dump_mode: DumpMode) !void {
if (macro_dump_mode == .macros_only) {
return pp.prettyPrintMacrosOnly(w);
}

const tok_ids = pp.tokens.items(.id);

var i: u32 = 0;
Expand Down Expand Up @@ -3366,6 +3447,17 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
last_nl = true;
},
.keyword_define, .keyword_undef => {
switch (macro_dump_mode) {
.macros_and_result, .macro_names_and_result => {
try w.writeByte('#');
try pp.prettyPrintMacro(w, cur.loc, if (macro_dump_mode == .macros_and_result) .name_and_body else .name_only);
last_nl = false;
},
.result_only => unreachable, // `pp.store_macro_tokens` should be false for standard preprocessor output
.macros_only => unreachable, // handled by prettyPrintMacrosOnly
}
},
else => {
const slice = pp.expandedSlice(cur);
try w.writeAll(slice);
Expand Down Expand Up @@ -3396,7 +3488,7 @@ test "Preserve pragma tokens sometimes" {
const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
const eof = try pp.preprocess(test_runner_macros);
try pp.addToken(eof);
try pp.prettyPrintTokens(buf.writer());
try pp.prettyPrintTokens(buf.writer(), .result_only);
return allocator.dupe(u8, buf.items);
}

Expand Down
9 changes: 9 additions & 0 deletions test/cases/debug dump macro names.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//aro-args -E -dN

#define CHECK_PARTIAL_MATCH

#define FOO 42
#define BAR FOO
int x = BAR;
#undef FOO
#define FOO 43
9 changes: 9 additions & 0 deletions test/cases/debug dump macros and results.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//aro-args -E -dD

#define CHECK_PARTIAL_MATCH

#define FOO 42
#define BAR FOO
int x = BAR;
#undef FOO
#define FOO 43
9 changes: 9 additions & 0 deletions test/cases/debug dump macros.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//aro-args -E -dM

#define CHECK_PARTIAL_MATCH

#define FOO 42
#define BAR FOO
int x = FOO;
#undef BAR
#define BAR 43
5 changes: 5 additions & 0 deletions test/cases/expanded/debug dump macro names.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#define FOO
#define BAR
int x = 42;
#undef FOO
#define FOO
5 changes: 5 additions & 0 deletions test/cases/expanded/debug dump macros and results.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#define FOO 42
#define BAR FOO
int x = 42;
#undef FOO
#define FOO 43
1 change: 1 addition & 0 deletions test/cases/expanded/debug dump macros.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#define BAR 43
Loading

0 comments on commit 1d24511

Please sign in to comment.