Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preprocessor: Add debug dump options #745

Merged
merged 2 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions src/aro/Driver.zig
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,19 @@ color: ?bool = null,
nobuiltininc: bool = false,
nostdinc: bool = false,
nostdlibinc: bool = false,
debug_dump_letters: packed struct(u3) {
d: bool = false,
m: bool = false,
n: bool = false,

/// Specifying letters whose behavior conflicts is undefined.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could specify that we match clang's behavior.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it actually match clang as it is now? I just chose the current order because it was alphabetical.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok just did a quick check - clang doesn't support -dN; and -dM wins over -dD (it always does -dM regardless of order if -dD and -dM are provided). I'll update it to match that.

pub fn getPreprocessorDumpMode(self: @This()) Preprocessor.DumpMode {
if (self.d) return .macros_and_result;
if (self.m) return .macros_only;
if (self.n) return .macro_names_and_result;
return .result_only;
}
} = .{},

/// Full path to the aro executable
aro_name: []const u8 = "",
Expand Down Expand Up @@ -92,6 +105,9 @@ pub const usage =
\\
\\Compile options:
\\ -c, --compile Only run preprocess, compile, and assemble steps
\\ -dM Output #define directives for all the macros defined during the execution of the preprocessor
\\ -dD Like -dM except that it outputs both the #define directives and the result of preprocessing
\\ -dN Like -dD, but emit only the macro names, not their expansions.
\\ -D <macro>=<value> Define <macro> to <value> (defaults to 1)
\\ -E Only run the preprocessor
\\ -fchar8_t Enable char8_t (enabled by default in C23 and later)
Expand Down Expand Up @@ -234,6 +250,12 @@ pub fn parseArgs(
d.system_defines = .no_system_defines;
} else if (mem.eql(u8, arg, "-c") or mem.eql(u8, arg, "--compile")) {
d.only_compile = true;
} else if (mem.eql(u8, arg, "-dD")) {
d.debug_dump_letters.d = true;
} else if (mem.eql(u8, arg, "-dM")) {
d.debug_dump_letters.m = true;
} else if (mem.eql(u8, arg, "-dN")) {
d.debug_dump_letters.n = true;
} else if (mem.eql(u8, arg, "-E")) {
d.only_preprocess = true;
} else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) {
Expand Down Expand Up @@ -636,13 +658,17 @@ fn processSource(
if (d.comp.langopts.ms_extensions) {
d.comp.ms_cwd_source_id = source.id;
}

const dump_mode = d.debug_dump_letters.getPreprocessorDumpMode();
if (d.verbose_pp) pp.verbose = true;
if (d.only_preprocess) {
pp.preserve_whitespace = true;
if (d.line_commands) {
pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives;
}
switch (dump_mode) {
.macros_and_result, .macro_names_and_result => pp.store_macro_tokens = true,
.result_only, .macros_only => {},
}
}

try pp.preprocessSources(&.{ source, builtin, user_macros });
Expand All @@ -663,7 +689,8 @@ fn processSource(
defer if (d.output_name != null) file.close();

var buf_w = std.io.bufferedWriter(file.writer());
pp.prettyPrintTokens(buf_w.writer()) catch |er|

pp.prettyPrintTokens(buf_w.writer(), dump_mode) catch |er|
return d.fatal("unable to write result: {s}", .{errorDescription(er)});

buf_w.flush() catch |er|
Expand Down
112 changes: 102 additions & 10 deletions src/aro/Preprocessor.zig
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ poisoned_identifiers: std.StringHashMap(void),
/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},

/// Store `keyword_define` and `keyword_undef` tokens.
/// Used to implement preprocessor debug dump options
/// Must be false unless in -E mode (parser does not handle those token types)
store_macro_tokens: bool = false,

/// Memory is retained to avoid allocation on every single token.
top_expansion_buf: ExpandBuf,

Expand Down Expand Up @@ -622,9 +627,12 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpans
}
if_level -= 1;
},
.keyword_define => try pp.define(&tokenizer),
.keyword_define => try pp.define(&tokenizer, directive),
.keyword_undef => {
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
if (pp.store_macro_tokens) {
try pp.addToken(tokFromRaw(directive));
}

_ = pp.defines.remove(macro_name);
try pp.expectNl(&tokenizer);
Expand Down Expand Up @@ -2508,7 +2516,7 @@ fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Tok
}

/// Defines a new macro and warns if it is a duplicate
fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void {
fn defineMacro(pp: *Preprocessor, define_tok: RawToken, name_tok: RawToken, macro: Macro) Error!void {
const name_str = pp.tokSlice(name_tok);
const gop = try pp.defines.getOrPut(pp.gpa, name_str);
if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
Expand All @@ -2529,11 +2537,14 @@ fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void {
if (pp.verbose) {
pp.verboseLog(name_tok, "macro {s} defined", .{name_str});
}
if (pp.store_macro_tokens) {
try pp.addToken(tokFromRaw(define_tok));
}
gop.value_ptr.* = macro;
}

/// Handle a #define directive.
fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
fn define(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken) Error!void {
// Get macro name and validate it.
const macro_name = tokenizer.nextNoWS();
if (macro_name.id == .keyword_defined) {
Expand All @@ -2556,15 +2567,15 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
// Check for function macros and empty defines.
var first = tokenizer.next();
switch (first.id) {
.nl, .eof => return pp.defineMacro(macro_name, .{
.nl, .eof => return pp.defineMacro(define_tok, macro_name, .{
.params = &.{},
.tokens = &.{},
.var_args = false,
.loc = tokFromRaw(macro_name).loc,
.is_func = false,
}),
.whitespace => first = tokenizer.next(),
.l_paren => return pp.defineFn(tokenizer, macro_name, first),
.l_paren => return pp.defineFn(tokenizer, define_tok, macro_name, first),
else => try pp.err(first, .whitespace_after_macro_name),
}
if (first.id == .hash_hash) {
Expand Down Expand Up @@ -2623,7 +2634,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
}

const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(macro_name, .{
try pp.defineMacro(define_tok, macro_name, .{
.loc = tokFromRaw(macro_name).loc,
.tokens = list,
.params = undefined,
Expand All @@ -2633,7 +2644,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
}

/// Handle a function like #define directive.
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void {
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken, macro_name: RawToken, l_paren: RawToken) Error!void {
assert(macro_name.id.isMacroIdentifier());
var params = std.ArrayList([]const u8).init(pp.gpa);
defer params.deinit();
Expand Down Expand Up @@ -2810,7 +2821,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa

const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(macro_name, .{
try pp.defineMacro(define_tok, macro_name, .{
.is_func = true,
.params = param_list,
.var_args = var_args or gnu_var_args.len != 0,
Expand Down Expand Up @@ -3273,8 +3284,78 @@ fn printLinemarker(
// After how many empty lines are needed to replace them with linemarkers.
const collapse_newlines = 8;

pub const DumpMode = enum {
/// Standard preprocessor output; no macros
result_only,
/// Output only #define directives for all the macros defined during the execution of the preprocessor
/// Only macros which are still defined at the end of preprocessing are printed.
/// Only the most recent definition is printed
/// Defines are printed in arbitrary order
macros_only,
/// Standard preprocessor output; but additionally output #define's and #undef's for macros as they are encountered
macros_and_result,
/// Same as macros_and_result, except only the macro name is printed for #define's
macro_names_and_result,
};

/// Pretty-print the macro define or undef at location `loc`.
/// We re-tokenize the directive because we are printing a macro that may have the same name as one in
/// `pp.defines` but a different definition (due to being #undef'ed and then redefined)
fn prettyPrintMacro(pp: *Preprocessor, w: anytype, loc: Source.Location, parts: enum { name_only, name_and_body }) !void {
const source = pp.comp.getSource(loc.id);
var tokenizer: Tokenizer = .{
.buf = source.buf,
.langopts = pp.comp.langopts,
.source = source.id,
.index = loc.byte_offset,
};
var prev_ws = false; // avoid printing multiple whitespace if /* */ comments are within the macro def
var saw_name = false; // do not print comments before the name token is seen.
while (true) {
const tok = tokenizer.next();
switch (tok.id) {
.comment => {
if (saw_name) {
prev_ws = false;
try w.print("{s}", .{pp.tokSlice(tok)});
}
},
.nl, .eof => break,
.whitespace => {
if (!prev_ws) {
try w.writeByte(' ');
prev_ws = true;
}
},
else => {
prev_ws = false;
try w.print("{s}", .{pp.tokSlice(tok)});
},
}
if (tok.id == .identifier or tok.id == .extended_identifier) {
if (parts == .name_only) break;
saw_name = true;
}
}
}

fn prettyPrintMacrosOnly(pp: *Preprocessor, w: anytype) !void {
var it = pp.defines.valueIterator();
while (it.next()) |macro| {
if (macro.is_builtin) continue;

try w.writeAll("#define ");
try pp.prettyPrintMacro(w, macro.loc, .name_and_body);
try w.writeByte('\n');
}
}

/// Pretty print tokens and try to preserve whitespace.
pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype, macro_dump_mode: DumpMode) !void {
if (macro_dump_mode == .macros_only) {
return pp.prettyPrintMacrosOnly(w);
}

const tok_ids = pp.tokens.items(.id);

var i: u32 = 0;
Expand Down Expand Up @@ -3366,6 +3447,17 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
last_nl = true;
},
.keyword_define, .keyword_undef => {
switch (macro_dump_mode) {
.macros_and_result, .macro_names_and_result => {
try w.writeByte('#');
try pp.prettyPrintMacro(w, cur.loc, if (macro_dump_mode == .macros_and_result) .name_and_body else .name_only);
last_nl = false;
},
.result_only => unreachable, // `pp.store_macro_tokens` should be false for standard preprocessor output
.macros_only => unreachable, // handled by prettyPrintMacrosOnly
}
},
else => {
const slice = pp.expandedSlice(cur);
try w.writeAll(slice);
Expand Down Expand Up @@ -3396,7 +3488,7 @@ test "Preserve pragma tokens sometimes" {
const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
const eof = try pp.preprocess(test_runner_macros);
try pp.addToken(eof);
try pp.prettyPrintTokens(buf.writer());
try pp.prettyPrintTokens(buf.writer(), .result_only);
return allocator.dupe(u8, buf.items);
}

Expand Down
9 changes: 9 additions & 0 deletions test/cases/debug dump macro names.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//aro-args -E -dN

#define CHECK_PARTIAL_MATCH

#define FOO 42
#define BAR FOO
int x = BAR;
#undef FOO
#define FOO 43
9 changes: 9 additions & 0 deletions test/cases/debug dump macros and results.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//aro-args -E -dD

#define CHECK_PARTIAL_MATCH

#define FOO 42
#define BAR FOO
int x = BAR;
#undef FOO
#define FOO 43
9 changes: 9 additions & 0 deletions test/cases/debug dump macros.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//aro-args -E -dM

#define CHECK_PARTIAL_MATCH

#define FOO 42
#define BAR FOO
int x = FOO;
#undef BAR
#define BAR 43
5 changes: 5 additions & 0 deletions test/cases/expanded/debug dump macro names.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#define FOO
#define BAR
int x = 42;
#undef FOO
#define FOO
5 changes: 5 additions & 0 deletions test/cases/expanded/debug dump macros and results.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#define FOO 42
#define BAR FOO
int x = 42;
#undef FOO
#define FOO 43
1 change: 1 addition & 0 deletions test/cases/expanded/debug dump macros.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#define BAR 43
Loading
Loading