From 267dc5946b42b18babda48987557f3db60ce7a79 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Mon, 5 Aug 2024 10:37:10 -0700 Subject: [PATCH 1/2] Preprocessor: Add debug dump options Adds support for GCC's -dD, -dM, -dN flags which enable dumping macro names and/or definitions along with or instead of preprocessor output. --- src/aro/Driver.zig | 31 ++++- src/aro/Preprocessor.zig | 112 ++++++++++++++++-- test/cases/debug dump macro names.c | 9 ++ test/cases/debug dump macros and results.c | 9 ++ test/cases/debug dump macros.c | 9 ++ test/cases/expanded/debug dump macro names.c | 5 + .../expanded/debug dump macros and results.c | 5 + test/cases/expanded/debug dump macros.c | 1 + test/runner.zig | 44 +++++-- 9 files changed, 204 insertions(+), 21 deletions(-) create mode 100644 test/cases/debug dump macro names.c create mode 100644 test/cases/debug dump macros and results.c create mode 100644 test/cases/debug dump macros.c create mode 100644 test/cases/expanded/debug dump macro names.c create mode 100644 test/cases/expanded/debug dump macros and results.c create mode 100644 test/cases/expanded/debug dump macros.c diff --git a/src/aro/Driver.zig b/src/aro/Driver.zig index db0b1155..8fda79a1 100644 --- a/src/aro/Driver.zig +++ b/src/aro/Driver.zig @@ -47,6 +47,19 @@ color: ?bool = null, nobuiltininc: bool = false, nostdinc: bool = false, nostdlibinc: bool = false, +debug_dump_letters: packed struct(u3) { + d: bool = false, + m: bool = false, + n: bool = false, + + /// Specifying letters whose behavior conflicts is undefined. + pub fn getPreprocessorDumpMode(self: @This()) Preprocessor.DumpMode { + if (self.d) return .macros_and_result; + if (self.m) return .macros_only; + if (self.n) return .macro_names_and_result; + return .result_only; + } +} = .{}, /// Full path to the aro executable aro_name: []const u8 = "", @@ -92,6 +105,9 @@ pub const usage = \\ \\Compile options: \\ -c, --compile Only run preprocess, compile, and assemble steps + \\ -dM Output #define directives for all the macros defined during the execution of the preprocessor + \\ -dD Like -dM except that it outputs both the #define directives and the result of preprocessing + \\ -dN Like -dD, but emit only the macro names, not their expansions. \\ -D = Define to (defaults to 1) \\ -E Only run the preprocessor \\ -fchar8_t Enable char8_t (enabled by default in C23 and later) @@ -234,6 +250,12 @@ pub fn parseArgs( d.system_defines = .no_system_defines; } else if (mem.eql(u8, arg, "-c") or mem.eql(u8, arg, "--compile")) { d.only_compile = true; + } else if (mem.eql(u8, arg, "-dD")) { + d.debug_dump_letters.d = true; + } else if (mem.eql(u8, arg, "-dM")) { + d.debug_dump_letters.m = true; + } else if (mem.eql(u8, arg, "-dN")) { + d.debug_dump_letters.n = true; } else if (mem.eql(u8, arg, "-E")) { d.only_preprocess = true; } else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) { @@ -636,13 +658,17 @@ fn processSource( if (d.comp.langopts.ms_extensions) { d.comp.ms_cwd_source_id = source.id; } - + const dump_mode = d.debug_dump_letters.getPreprocessorDumpMode(); if (d.verbose_pp) pp.verbose = true; if (d.only_preprocess) { pp.preserve_whitespace = true; if (d.line_commands) { pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives; } + switch (dump_mode) { + .macros_and_result, .macro_names_and_result => pp.store_macro_tokens = true, + .result_only, .macros_only => {}, + } } try pp.preprocessSources(&.{ source, builtin, user_macros }); @@ -663,7 +689,8 @@ fn processSource( defer if (d.output_name != null) file.close(); var buf_w = std.io.bufferedWriter(file.writer()); - pp.prettyPrintTokens(buf_w.writer()) catch |er| + + pp.prettyPrintTokens(buf_w.writer(), dump_mode) catch |er| return d.fatal("unable to write result: {s}", .{errorDescription(er)}); buf_w.flush() catch |er| diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig index 072e75aa..16be92b4 100644 --- a/src/aro/Preprocessor.zig +++ b/src/aro/Preprocessor.zig @@ -97,6 +97,11 @@ poisoned_identifiers: std.StringHashMap(void), /// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{}, +/// Store `keyword_define` and `keyword_undef` tokens. +/// Used to implement preprocessor debug dump options +/// Must be false unless in -E mode (parser does not handle those token types) +store_macro_tokens: bool = false, + /// Memory is retained to avoid allocation on every single token. top_expansion_buf: ExpandBuf, @@ -622,9 +627,12 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpans } if_level -= 1; }, - .keyword_define => try pp.define(&tokenizer), + .keyword_define => try pp.define(&tokenizer, directive), .keyword_undef => { const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; + if (pp.store_macro_tokens) { + try pp.addToken(tokFromRaw(directive)); + } _ = pp.defines.remove(macro_name); try pp.expectNl(&tokenizer); @@ -2508,7 +2516,7 @@ fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Tok } /// Defines a new macro and warns if it is a duplicate -fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void { +fn defineMacro(pp: *Preprocessor, define_tok: RawToken, name_tok: RawToken, macro: Macro) Error!void { const name_str = pp.tokSlice(name_tok); const gop = try pp.defines.getOrPut(pp.gpa, name_str); if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) { @@ -2529,11 +2537,14 @@ fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void { if (pp.verbose) { pp.verboseLog(name_tok, "macro {s} defined", .{name_str}); } + if (pp.store_macro_tokens) { + try pp.addToken(tokFromRaw(define_tok)); + } gop.value_ptr.* = macro; } /// Handle a #define directive. -fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { +fn define(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken) Error!void { // Get macro name and validate it. const macro_name = tokenizer.nextNoWS(); if (macro_name.id == .keyword_defined) { @@ -2556,7 +2567,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { // Check for function macros and empty defines. var first = tokenizer.next(); switch (first.id) { - .nl, .eof => return pp.defineMacro(macro_name, .{ + .nl, .eof => return pp.defineMacro(define_tok, macro_name, .{ .params = &.{}, .tokens = &.{}, .var_args = false, @@ -2564,7 +2575,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { .is_func = false, }), .whitespace => first = tokenizer.next(), - .l_paren => return pp.defineFn(tokenizer, macro_name, first), + .l_paren => return pp.defineFn(tokenizer, define_tok, macro_name, first), else => try pp.err(first, .whitespace_after_macro_name), } if (first.id == .hash_hash) { @@ -2623,7 +2634,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { } const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items); - try pp.defineMacro(macro_name, .{ + try pp.defineMacro(define_tok, macro_name, .{ .loc = tokFromRaw(macro_name).loc, .tokens = list, .params = undefined, @@ -2633,7 +2644,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { } /// Handle a function like #define directive. -fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void { +fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken, macro_name: RawToken, l_paren: RawToken) Error!void { assert(macro_name.id.isMacroIdentifier()); var params = std.ArrayList([]const u8).init(pp.gpa); defer params.deinit(); @@ -2810,7 +2821,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa const param_list = try pp.arena.allocator().dupe([]const u8, params.items); const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items); - try pp.defineMacro(macro_name, .{ + try pp.defineMacro(define_tok, macro_name, .{ .is_func = true, .params = param_list, .var_args = var_args or gnu_var_args.len != 0, @@ -3273,8 +3284,78 @@ fn printLinemarker( // After how many empty lines are needed to replace them with linemarkers. const collapse_newlines = 8; +pub const DumpMode = enum { + /// Standard preprocessor output; no macros + result_only, + /// Output only #define directives for all the macros defined during the execution of the preprocessor + /// Only macros which are still defined at the end of preprocessing are printed. + /// Only the most recent definition is printed + /// Defines are printed in arbitrary order + macros_only, + /// Standard preprocessor output; but additionally output #define's and #undef's for macros as they are encountered + macros_and_result, + /// Same as macros_and_result, except only the macro name is printed for #define's + macro_names_and_result, +}; + +/// Pretty-print the macro define or undef at location `loc`. +/// We re-tokenize the directive because we are printing a macro that may have the same name as one in +/// `pp.defines` but a different definition (due to being #undef'ed and then redefined) +fn prettyPrintMacro(pp: *Preprocessor, w: anytype, loc: Source.Location, parts: enum { name_only, name_and_body }) !void { + const source = pp.comp.getSource(loc.id); + var tokenizer: Tokenizer = .{ + .buf = source.buf, + .langopts = pp.comp.langopts, + .source = source.id, + .index = loc.byte_offset, + }; + var prev_ws = false; // avoid printing multiple whitespace if /* */ comments are within the macro def + var saw_name = false; // do not print comments before the name token is seen. + while (true) { + const tok = tokenizer.next(); + switch (tok.id) { + .comment => { + if (saw_name) { + prev_ws = false; + try w.print("{s}", .{pp.tokSlice(tok)}); + } + }, + .nl, .eof => break, + .whitespace => { + if (!prev_ws) { + try w.writeByte(' '); + prev_ws = true; + } + }, + else => { + prev_ws = false; + try w.print("{s}", .{pp.tokSlice(tok)}); + }, + } + if (tok.id == .identifier or tok.id == .extended_identifier) { + if (parts == .name_only) break; + saw_name = true; + } + } +} + +fn prettyPrintMacrosOnly(pp: *Preprocessor, w: anytype) !void { + var it = pp.defines.valueIterator(); + while (it.next()) |macro| { + if (macro.is_builtin) continue; + + try w.writeAll("#define "); + try pp.prettyPrintMacro(w, macro.loc, .name_and_body); + try w.writeByte('\n'); + } +} + /// Pretty print tokens and try to preserve whitespace. -pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void { +pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype, macro_dump_mode: DumpMode) !void { + if (macro_dump_mode == .macros_only) { + return pp.prettyPrintMacrosOnly(w); + } + const tok_ids = pp.tokens.items(.id); var i: u32 = 0; @@ -3366,6 +3447,17 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void { try pp.printLinemarker(w, line_col.line_no, source, .@"resume"); last_nl = true; }, + .keyword_define, .keyword_undef => { + switch (macro_dump_mode) { + .macros_and_result, .macro_names_and_result => { + try w.writeByte('#'); + try pp.prettyPrintMacro(w, cur.loc, if (macro_dump_mode == .macros_and_result) .name_and_body else .name_only); + last_nl = false; + }, + .result_only => unreachable, // `pp.store_macro_tokens` should be false for standard preprocessor output + .macros_only => unreachable, // handled by prettyPrintMacrosOnly + } + }, else => { const slice = pp.expandedSlice(cur); try w.writeAll(slice); @@ -3396,7 +3488,7 @@ test "Preserve pragma tokens sometimes" { const test_runner_macros = try comp.addSourceFromBuffer("", source_text); const eof = try pp.preprocess(test_runner_macros); try pp.addToken(eof); - try pp.prettyPrintTokens(buf.writer()); + try pp.prettyPrintTokens(buf.writer(), .result_only); return allocator.dupe(u8, buf.items); } diff --git a/test/cases/debug dump macro names.c b/test/cases/debug dump macro names.c new file mode 100644 index 00000000..58123472 --- /dev/null +++ b/test/cases/debug dump macro names.c @@ -0,0 +1,9 @@ +//aro-args -E -dN + +#define CHECK_PARTIAL_MATCH + +#define FOO 42 +#define BAR FOO +int x = BAR; +#undef FOO +#define FOO 43 diff --git a/test/cases/debug dump macros and results.c b/test/cases/debug dump macros and results.c new file mode 100644 index 00000000..b87cbfe4 --- /dev/null +++ b/test/cases/debug dump macros and results.c @@ -0,0 +1,9 @@ +//aro-args -E -dD + +#define CHECK_PARTIAL_MATCH + +#define FOO 42 +#define BAR FOO +int x = BAR; +#undef FOO +#define FOO 43 diff --git a/test/cases/debug dump macros.c b/test/cases/debug dump macros.c new file mode 100644 index 00000000..dcf837c0 --- /dev/null +++ b/test/cases/debug dump macros.c @@ -0,0 +1,9 @@ +//aro-args -E -dM + +#define CHECK_PARTIAL_MATCH + +#define FOO 42 +#define BAR FOO +int x = FOO; +#undef BAR +#define BAR 43 diff --git a/test/cases/expanded/debug dump macro names.c b/test/cases/expanded/debug dump macro names.c new file mode 100644 index 00000000..4984a839 --- /dev/null +++ b/test/cases/expanded/debug dump macro names.c @@ -0,0 +1,5 @@ +#define FOO +#define BAR +int x = 42; +#undef FOO +#define FOO \ No newline at end of file diff --git a/test/cases/expanded/debug dump macros and results.c b/test/cases/expanded/debug dump macros and results.c new file mode 100644 index 00000000..f0d18298 --- /dev/null +++ b/test/cases/expanded/debug dump macros and results.c @@ -0,0 +1,5 @@ +#define FOO 42 +#define BAR FOO +int x = 42; +#undef FOO +#define FOO 43 \ No newline at end of file diff --git a/test/cases/expanded/debug dump macros.c b/test/cases/expanded/debug dump macros.c new file mode 100644 index 00000000..8c82f1a1 --- /dev/null +++ b/test/cases/expanded/debug dump macros.c @@ -0,0 +1 @@ +#define BAR 43 \ No newline at end of file diff --git a/test/runner.zig b/test/runner.zig index 83283d15..a6677598 100644 --- a/test/runner.zig +++ b/test/runner.zig @@ -10,11 +10,19 @@ const AllocatorError = std.mem.Allocator.Error; var general_purpose_allocator = std.heap.GeneralPurposeAllocator(.{}){}; +const AddCommandLineArgsResult = struct { + bool, + aro.Preprocessor.Linemarkers, + aro.Compilation.SystemDefinesMode, + aro.Preprocessor.DumpMode, +}; + /// Returns only_preprocess and line_markers settings if saw -E -fn addCommandLineArgs(comp: *aro.Compilation, file: aro.Source, macro_buf: anytype) !struct { bool, aro.Preprocessor.Linemarkers, aro.Compilation.SystemDefinesMode } { +fn addCommandLineArgs(comp: *aro.Compilation, file: aro.Source, macro_buf: anytype) !AddCommandLineArgsResult { var only_preprocess = false; var line_markers: aro.Preprocessor.Linemarkers = .none; var system_defines: aro.Compilation.SystemDefinesMode = .include_system_defines; + var dump_mode: aro.Preprocessor.DumpMode = .result_only; comp.langopts.gnuc_version = 40201; // Set to clang default value since we do not call parseArgs if there are no args if (std.mem.startsWith(u8, file.buf, "//aro-args")) { var test_args = std.ArrayList([]const u8).init(comp.gpa); @@ -28,6 +36,7 @@ fn addCommandLineArgs(comp: *aro.Compilation, file: aro.Source, macro_buf: anyty _ = try driver.parseArgs(std.io.null_writer, macro_buf, test_args.items); only_preprocess = driver.only_preprocess; system_defines = driver.system_defines; + dump_mode = driver.debug_dump_letters.getPreprocessorDumpMode(); if (only_preprocess) { if (driver.line_commands) { line_markers = if (driver.use_line_directives) .line_directives else .numeric_directives; @@ -50,7 +59,7 @@ fn addCommandLineArgs(comp: *aro.Compilation, file: aro.Source, macro_buf: anyty } } - return .{ only_preprocess, line_markers, system_defines }; + return .{ only_preprocess, line_markers, system_defines, dump_mode }; } fn testOne(allocator: std.mem.Allocator, path: []const u8, test_dir: []const u8) !void { @@ -64,7 +73,7 @@ fn testOne(allocator: std.mem.Allocator, path: []const u8, test_dir: []const u8) var macro_buf = std.ArrayList(u8).init(comp.gpa); defer macro_buf.deinit(); - _, _, const system_defines = try addCommandLineArgs(&comp, file, macro_buf.writer()); + _, _, const system_defines, _ = try addCommandLineArgs(&comp, file, macro_buf.writer()); const user_macros = try comp.addSourceFromBuffer("", macro_buf.items); const builtin_macros = try comp.generateBuiltinMacros(system_defines); @@ -211,7 +220,7 @@ pub fn main() !void { var macro_buf = std.ArrayList(u8).init(comp.gpa); defer macro_buf.deinit(); - const only_preprocess, const linemarkers, const system_defines = try addCommandLineArgs(&comp, file, macro_buf.writer()); + const only_preprocess, const linemarkers, const system_defines, const dump_mode = try addCommandLineArgs(&comp, file, macro_buf.writer()); const user_macros = try comp.addSourceFromBuffer("", macro_buf.items); const builtin_macros = try comp.generateBuiltinMacros(system_defines); @@ -222,6 +231,9 @@ pub fn main() !void { if (only_preprocess) { pp.preserve_whitespace = true; pp.linemarkers = linemarkers; + if (dump_mode != .result_only) { + pp.store_macro_tokens = true; + } } try pp.addBuiltinMacros(); @@ -279,12 +291,26 @@ pub fn main() !void { var output = std.ArrayList(u8).init(gpa); defer output.deinit(); - try pp.prettyPrintTokens(output.writer()); + try pp.prettyPrintTokens(output.writer(), dump_mode); - if (std.testing.expectEqualStrings(expected_output, output.items)) - ok_count += 1 - else |_| - fail_count += 1; + if (pp.defines.contains("CHECK_PARTIAL_MATCH")) { + const index = std.mem.indexOf(u8, output.items, expected_output); + if (index != null) { + ok_count += 1; + } else { + fail_count += 1; + std.debug.print("\n====== expected to find: =========\n", .{}); + std.debug.print("{s}", .{expected_output}); + std.debug.print("\n======== but did not find it in this: =========\n", .{}); + std.debug.print("{s}", .{output.items}); + std.debug.print("\n======================================\n", .{}); + } + } else { + if (std.testing.expectEqualStrings(expected_output, output.items)) + ok_count += 1 + else |_| + fail_count += 1; + } continue; } From 43b7a809a23eee39858fee0355be592afced3e0f Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Tue, 6 Aug 2024 12:33:40 -0700 Subject: [PATCH 2/2] Driver: update preprocessor debug letter precedence --- src/aro/Driver.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aro/Driver.zig b/src/aro/Driver.zig index 8fda79a1..16daf774 100644 --- a/src/aro/Driver.zig +++ b/src/aro/Driver.zig @@ -52,10 +52,11 @@ debug_dump_letters: packed struct(u3) { m: bool = false, n: bool = false, - /// Specifying letters whose behavior conflicts is undefined. + /// According to GCC, specifying letters whose behavior conflicts is undefined. + /// We follow clang in that `-dM` always takes precedence over `-dD` pub fn getPreprocessorDumpMode(self: @This()) Preprocessor.DumpMode { - if (self.d) return .macros_and_result; if (self.m) return .macros_only; + if (self.d) return .macros_and_result; if (self.n) return .macro_names_and_result; return .result_only; }