diff --git a/src/Codegen_legacy.zig b/src/Codegen_legacy.zig index 527b4220..b2bf3fe3 100644 --- a/src/Codegen_legacy.zig +++ b/src/Codegen_legacy.zig @@ -78,6 +78,9 @@ pub fn generateTree(comp: *Compilation, tree: Tree) Compilation.Error!*Object { error.CodegenFailed => continue, }, + // TODO + .file_scope_asm => {}, + else => unreachable, } } diff --git a/src/Diagnostics.zig b/src/Diagnostics.zig index 36807828..33dd5047 100644 --- a/src/Diagnostics.zig +++ b/src/Diagnostics.zig @@ -166,6 +166,7 @@ pub const Options = packed struct { @"gnu-union-cast": Kind = .default, @"pointer-sign": Kind = .default, @"fuse-ld-path": Kind = .default, + @"language-extension-token": Kind = .default, }; const messages = struct { @@ -2376,6 +2377,16 @@ const messages = struct { const msg = "--rtlib=libgcc requires --unwindlib=libgcc"; const kind = .@"error"; }; + pub const gnu_asm_disabled = struct { + const msg = "GNU-style inline assembly is disabled"; + const kind = .@"error"; + }; + pub const extension_token_used = struct { + const msg = "extension used"; + const kind = .off; + const pedantic = true; + const opt = "language-extension-token"; + }; }; list: std.ArrayListUnmanaged(Message) = .{}, diff --git a/src/Driver.zig b/src/Driver.zig index b1abaac6..e2f68628 100644 --- a/src/Driver.zig +++ b/src/Driver.zig @@ -91,6 +91,8 @@ pub const usage = \\ -fno-declspec Disable support for __declspec attributes \\ -ffp-eval-method=[source|double|extended] \\ Evaluation method to use for floating-point arithmetic + \\ -fgnu-inline-asm Enable GNU style inline asm (default: enabled) + \\ -fno-gnu-inline-asm Disable GNU style inline asm \\ -fms-extensions Enable support for Microsoft extensions \\ -fno-ms-extensions Disable support for Microsoft extensions \\ -fdollars-in-identifiers @@ -225,6 +227,10 @@ pub fn parseArgs( d.comp.langopts.dollars_in_identifiers = false; } else if (mem.eql(u8, arg, "-fdigraphs")) { d.comp.langopts.digraphs = true; + } else if (mem.eql(u8, arg, "-fgnu-inline-asm")) { + d.comp.langopts.gnu_asm = true; + } else if (mem.eql(u8, arg, "-fno-gnu-inline-asm")) { + d.comp.langopts.gnu_asm = false; } else if (mem.eql(u8, arg, "-fno-digraphs")) { d.comp.langopts.digraphs = false; } else if (option(arg, "-fmacro-backtrace-limit=")) |limit_str| { diff --git a/src/LangOpts.zig b/src/LangOpts.zig index 81647fd2..4cdfd3de 100644 --- a/src/LangOpts.zig +++ b/src/LangOpts.zig @@ -107,6 +107,9 @@ char_signedness_override: ?std.builtin.Signedness = null, /// If set, override the default availability of char8_t (by default, enabled in C2X and later; disabled otherwise) has_char8_t_override: ?bool = null, +/// Whether to allow GNU-style inline assembly +gnu_asm: bool = true, + pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void { self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard; } diff --git a/src/Parser.zig b/src/Parser.zig index 384424ba..70fad3c8 100644 --- a/src/Parser.zig +++ b/src/Parser.zig @@ -663,7 +663,10 @@ pub fn parse(pp: *Preprocessor) Compilation.Error!Tree { continue; }, else => |e| return e, - }) |_| continue; + }) |node| { + try p.decl_buf.append(node); + continue; + } if (p.eatToken(.semicolon)) |tok| { try p.errTok(.extra_semi, tok); continue; @@ -3635,50 +3638,242 @@ fn convertInitList(p: *Parser, il: InitList, init_ty: Type) Error!NodeIndex { } } -/// assembly : keyword_asm asmQual* '(' asmStr ')' +fn msvcAsmStmt(p: *Parser) Error!?NodeIndex { + return p.todo("MSVC assembly statements"); +} + +/// asmOperand : ('[' IDENTIFIER ']')? asmStr '(' expr ')' +fn asmOperand(p: *Parser, names: *std.ArrayList(?TokenIndex), constraints: *NodeList, exprs: *NodeList) Error!void { + if (p.eatToken(.l_bracket)) |l_bracket| { + const ident = (try p.eatIdentifier()) orelse { + try p.err(.expected_identifier); + return error.ParsingFailed; + }; + try names.append(ident); + try p.expectClosing(l_bracket, .r_bracket); + } else { + try names.append(null); + } + const constraint = try p.asmStr(); + try constraints.append(constraint.node); + + const l_paren = p.eatToken(.l_paren) orelse { + try p.errExtra(.expected_token, p.tok_i, .{ .tok_id = .{ .actual = p.tok_ids[p.tok_i], .expected = .l_paren } }); + return error.ParsingFailed; + }; + const res = try p.expr(); + try p.expectClosing(l_paren, .r_paren); + try res.expect(p); + try exprs.append(res.node); +} + +/// gnuAsmStmt +/// : asmStr +/// | asmStr ':' asmOperand* +/// | asmStr ':' asmOperand* ':' asmOperand* +/// | asmStr ':' asmOperand* ':' asmOperand* : asmStr? (',' asmStr)* +/// | asmStr ':' asmOperand* ':' asmOperand* : asmStr? (',' asmStr)* : IDENTIFIER (',' IDENTIFIER)* +fn gnuAsmStmt(p: *Parser, quals: Tree.GNUAssemblyQualifiers, l_paren: TokenIndex) Error!NodeIndex { + const asm_str = try p.asmStr(); + try p.checkAsmStr(asm_str.val, l_paren); + + if (p.tok_ids[p.tok_i] == .r_paren) { + return p.addNode(.{ + .tag = .gnu_asm_simple, + .ty = .{ .specifier = .void }, + .data = .{ .un = asm_str.node }, + }); + } + + const expected_items = 8; // arbitrarily chosen, most assembly will have fewer than 8 inputs/outputs/constraints/names + const bytes_needed = expected_items * @sizeOf(?TokenIndex) + expected_items * 3 * @sizeOf(NodeIndex); + + var stack_fallback = std.heap.stackFallback(bytes_needed, p.comp.gpa); + const allocator = stack_fallback.get(); + + // TODO: Consider using a TokenIndex of 0 instead of null if we need to store the names in the tree + var names = std.ArrayList(?TokenIndex).initCapacity(allocator, expected_items) catch unreachable; // stack allocation already succeeded + defer names.deinit(); + var constraints = NodeList.initCapacity(allocator, expected_items) catch unreachable; // stack allocation already succeeded + defer constraints.deinit(); + var exprs = NodeList.initCapacity(allocator, expected_items) catch unreachable; //stack allocation already succeeded + defer exprs.deinit(); + var clobbers = NodeList.initCapacity(allocator, expected_items) catch unreachable; //stack allocation already succeeded + defer clobbers.deinit(); + + // Outputs + var ate_extra_colon = false; + if (p.eatToken(.colon) orelse p.eatToken(.colon_colon)) |tok_i| { + ate_extra_colon = p.tok_ids[tok_i] == .colon_colon; + if (!ate_extra_colon) { + if (p.tok_ids[p.tok_i].isStringLiteral() or p.tok_ids[p.tok_i] == .l_bracket) { + while (true) { + try p.asmOperand(&names, &constraints, &exprs); + if (p.eatToken(.comma) == null) break; + } + } + } + } + + const num_outputs = names.items.len; + + // Inputs + if (ate_extra_colon or p.tok_ids[p.tok_i] == .colon or p.tok_ids[p.tok_i] == .colon_colon) { + if (ate_extra_colon) { + ate_extra_colon = false; + } else { + ate_extra_colon = p.tok_ids[p.tok_i] == .colon_colon; + p.tok_i += 1; + } + if (!ate_extra_colon) { + if (p.tok_ids[p.tok_i].isStringLiteral() or p.tok_ids[p.tok_i] == .l_bracket) { + while (true) { + try p.asmOperand(&names, &constraints, &exprs); + if (p.eatToken(.comma) == null) break; + } + } + } + } + std.debug.assert(names.items.len == constraints.items.len and constraints.items.len == exprs.items.len); + const num_inputs = names.items.len - num_outputs; + _ = num_inputs; + + // Clobbers + if (ate_extra_colon or p.tok_ids[p.tok_i] == .colon or p.tok_ids[p.tok_i] == .colon_colon) { + if (ate_extra_colon) { + ate_extra_colon = false; + } else { + ate_extra_colon = p.tok_ids[p.tok_i] == .colon_colon; + p.tok_i += 1; + } + if (!ate_extra_colon and p.tok_ids[p.tok_i].isStringLiteral()) { + while (true) { + const clobber = try p.asmStr(); + try clobbers.append(clobber.node); + if (p.eatToken(.comma) == null) break; + } + } + } + + if (!quals.goto and (p.tok_ids[p.tok_i] != .r_paren or ate_extra_colon)) { + try p.errExtra(.expected_token, p.tok_i, .{ .tok_id = .{ .actual = p.tok_ids[p.tok_i], .expected = .r_paren } }); + return error.ParsingFailed; + } + + // Goto labels + var num_labels: u32 = 0; + if (ate_extra_colon or p.tok_ids[p.tok_i] == .colon) { + if (!ate_extra_colon) { + p.tok_i += 1; + } + while (true) { + const ident = (try p.eatIdentifier()) orelse { + try p.err(.expected_identifier); + return error.ParsingFailed; + }; + const ident_str = p.tokSlice(ident); + const label = p.findLabel(ident_str) orelse blk: { + try p.labels.append(.{ .unresolved_goto = ident }); + break :blk ident; + }; + try names.append(ident); + + const elem_ty = try p.arena.create(Type); + elem_ty.* = .{ .specifier = .void }; + const result_ty = Type{ .specifier = .pointer, .data = .{ .sub_type = elem_ty } }; + + const label_addr_node = try p.addNode(.{ + .tag = .addr_of_label, + .data = .{ .decl_ref = label }, + .ty = result_ty, + }); + try exprs.append(label_addr_node); + + num_labels += 1; + if (p.eatToken(.comma) == null) break; + } + } else if (quals.goto) { + try p.errExtra(.expected_token, p.tok_i, .{ .tok_id = .{ .actual = p.tok_ids[p.tok_i], .expected = .colon } }); + return error.ParsingFailed; + } + + // TODO: validate and insert into AST + return .none; +} + +fn checkAsmStr(p: *Parser, asm_str: Value, tok: TokenIndex) !void { + if (!p.comp.langopts.gnu_asm) { + const str = asm_str.data.bytes; + if (str.len > 1) { + // Empty string (just a NUL byte) is ok because it does not emit any assembly + try p.errTok(.gnu_asm_disabled, tok); + } + } +} + +/// assembly +/// : keyword_asm asmQual* '(' asmStr ')' +/// | keyword_asm asmQual* '(' gnuAsmStmt ')' +/// | keyword_asm msvcAsmStmt fn assembly(p: *Parser, kind: enum { global, decl_label, stmt }) Error!?NodeIndex { const asm_tok = p.tok_i; switch (p.tok_ids[p.tok_i]) { - .keyword_asm, .keyword_asm1, .keyword_asm2 => p.tok_i += 1, + .keyword_asm => { + try p.err(.extension_token_used); + p.tok_i += 1; + }, + .keyword_asm1, .keyword_asm2 => p.tok_i += 1, else => return null, } - var @"volatile" = false; - var @"inline" = false; - var goto = false; + if (!p.tok_ids[p.tok_i].canOpenGCCAsmStmt()) { + return p.msvcAsmStmt(); + } + + var quals: Tree.GNUAssemblyQualifiers = .{}; while (true) : (p.tok_i += 1) switch (p.tok_ids[p.tok_i]) { .keyword_volatile, .keyword_volatile1, .keyword_volatile2 => { if (kind != .stmt) try p.errStr(.meaningless_asm_qual, p.tok_i, "volatile"); - if (@"volatile") try p.errStr(.duplicate_asm_qual, p.tok_i, "volatile"); - @"volatile" = true; + if (quals.@"volatile") try p.errStr(.duplicate_asm_qual, p.tok_i, "volatile"); + quals.@"volatile" = true; }, .keyword_inline, .keyword_inline1, .keyword_inline2 => { if (kind != .stmt) try p.errStr(.meaningless_asm_qual, p.tok_i, "inline"); - if (@"inline") try p.errStr(.duplicate_asm_qual, p.tok_i, "inline"); - @"inline" = true; + if (quals.@"inline") try p.errStr(.duplicate_asm_qual, p.tok_i, "inline"); + quals.@"inline" = true; }, .keyword_goto => { if (kind != .stmt) try p.errStr(.meaningless_asm_qual, p.tok_i, "goto"); - if (goto) try p.errStr(.duplicate_asm_qual, p.tok_i, "goto"); - goto = true; + if (quals.goto) try p.errStr(.duplicate_asm_qual, p.tok_i, "goto"); + quals.goto = true; }, else => break, }; const l_paren = try p.expectToken(.l_paren); + var result_node: NodeIndex = .none; switch (kind) { .decl_label => { - const str = (try p.asmStr()).val.data.bytes; + const asm_str = try p.asmStr(); + const str = asm_str.val.data.bytes; const attr = Attribute{ .tag = .asm_label, .args = .{ .asm_label = .{ .name = str[0 .. str.len - 1] } }, .syntax = .keyword }; try p.attr_buf.append(p.gpa, .{ .attr = attr, .tok = asm_tok }); }, - .global => _ = try p.asmStr(), - .stmt => return p.todo("assembly statements"), + .global => { + const asm_str = try p.asmStr(); + try p.checkAsmStr(asm_str.val, l_paren); + result_node = try p.addNode(.{ + .tag = .file_scope_asm, + .ty = .{ .specifier = .void }, + .data = .{ .decl = .{ .name = asm_tok, .node = asm_str.node } }, + }); + }, + .stmt => result_node = try p.gnuAsmStmt(quals, l_paren), } try p.expectClosing(l_paren, .r_paren); if (kind != .decl_label) _ = try p.expectToken(.semicolon); - return .none; + return result_node; } /// Same as stringLiteral but errors on unicode and wide string literals diff --git a/src/Tokenizer.zig b/src/Tokenizer.zig index b0dfce8c..28de3c2c 100644 --- a/src/Tokenizer.zig +++ b/src/Tokenizer.zig @@ -759,6 +759,20 @@ pub const Token = struct { else => false, }; } + + pub fn canOpenGCCAsmStmt(id: Id) bool { + return switch (id) { + .keyword_volatile, .keyword_volatile1, .keyword_volatile2, .keyword_inline, .keyword_inline1, .keyword_inline2, .keyword_goto, .l_paren => true, + else => false, + }; + } + + pub fn isStringLiteral(id: Id) bool { + return switch (id) { + .string_literal, .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32, .string_literal_wide => true, + else => false, + }; + } }; /// double underscore and underscore + capital letter identifiers diff --git a/src/Tree.zig b/src/Tree.zig index 3295daa5..717653ff 100644 --- a/src/Tree.zig +++ b/src/Tree.zig @@ -104,6 +104,12 @@ pub fn deinit(tree: *Tree) void { tree.value_map.deinit(); } +pub const GNUAssemblyQualifiers = struct { + @"volatile": bool = false, + @"inline": bool = false, + goto: bool = false, +}; + pub const Node = struct { tag: Tag, ty: Type = .{ .specifier = .void }, @@ -273,6 +279,9 @@ pub const Tag = enum(u8) { threadlocal_extern_var, threadlocal_static_var, + /// __asm__("...") at file scope + file_scope_asm, + // typedef declaration typedef, @@ -345,6 +354,8 @@ pub const Tag = enum(u8) { null_stmt, /// return first; first may be null return_stmt, + /// Assembly statement of the form __asm__("string literal") + gnu_asm_simple, // ====== Expr ====== @@ -764,6 +775,14 @@ fn dumpNode(tree: Tree, node: NodeIndex, level: u32, mapper: StringInterner.Type switch (tag) { .invalid => unreachable, + .file_scope_asm => { + try w.writeByteNTimes(' ', level + 1); + try tree.dumpNode(data.decl.node, level + delta, mapper, color, w); + }, + .gnu_asm_simple => { + try w.writeByteNTimes(' ', level); + try tree.dumpNode(data.un, level, mapper, color, w); + }, .static_assert => { try w.writeByteNTimes(' ', level + 1); try w.writeAll("condition:\n"); diff --git a/test/cases/gnu inline assembly statements.c b/test/cases/gnu inline assembly statements.c new file mode 100644 index 00000000..337ff5f0 --- /dev/null +++ b/test/cases/gnu inline assembly statements.c @@ -0,0 +1,133 @@ +//aro-args --target=x86-linux-gnu -pedantic + +// Note: examples taken from here: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html +// Some examples do not use real instructions or register names, which may cause failures once we do +// validation of the assembly + +#include + +int add(void) { + int src = 1; + int dst; + + __asm__("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + return dst; +} + +void names(void) { + unsigned Mask = 1234; + unsigned Index; + + __asm__ ("bsfl %[aMask], %[aIndex]" + : [aIndex] "=r" (Index) + : [aMask] "r" (Mask) + : "cc"); +} + +int with_goto(int p1, int p2) { + __asm__ goto( + "btl %1, %0\n\t" + "jc %l2" + : /* No outputs. */ + : "r" (p1), "r" (p2) + : "cc" + : carry); + + return 0; + + carry: + return 1; +} + +bool constraint_expr(int *Base, unsigned Offset) { + bool old; + + __asm__ ("btsl %2,%1\n\t" // Turn on zero-based bit #Offset in Base. + "sbb %0,%0" // Use the CF to calculate old. + : "=r" (old), "+rm" (*Base) + : "Ir" (Offset) + : "cc"); + + return old; +} + +void foo(void) { + unsigned c = 1; + unsigned d; + unsigned *e = &c; + + __asm__ ("mov %[e], %[d]" + : [d] "=rm" (d) + : [e] "rm" (*e)); +} + +void no_outputs(unsigned Offset) { + __asm__ ("some instructions" :: "r" (Offset / 8)); +} + +int frob(int x) { + int y; + __asm__ goto ("frob %%r5, %1; jc %l[error]; mov (%2), %%r5" + : /* No outputs. */ + : "r"(x), "r"(&y) + : "r5", "memory" + : error); + return y; +error: + return -1; +} + +int bad_goto_label(int p1, int p2) { + __asm__ goto( + "btl %1, %0\n\t" + "jc %l2" + : /* No outputs. */ + : "r" (p1), "r" (p2) + : "cc" + : carry_1); + + return 0; + carry: + return 1; +} + +int missing_goto_kw(int p1, int p2) { + __asm__( + "btl %1, %0\n\t" + "jc %l2" + : /* No outputs. */ + : "r" (p1), "r" (p2) + : "cc" + : carry); + + return 0; + carry: + return 1; +} + +int missing_goto_label(int p1, int p2) { + __asm__ goto( + "btl %1, %0\n\t" + "jc %l2" + : /* No outputs. */ + : "r" (p1), "r" (p2) + : "cc" + ); + + return 0; + carry: + return 1; +} + +void extension_token(unsigned Offset) { + asm volatile("some instructions" :: ); +} + +#define EXPECTED_ERRORS "gnu inline assembly statements.c:90:11: error: use of undeclared label 'carry_1'" \ + "gnu inline assembly statements.c:104:9: error: expected ')', found ':'" \ + "gnu inline assembly statements.c:118:10: error: expected ':', found ')'" \ + "gnu inline assembly statements.c:126:5: warning: extension used [-Wlanguage-extension-token]" \ + diff --git a/test/cases/no inline asm.c b/test/cases/no inline asm.c new file mode 100644 index 00000000..d957d712 --- /dev/null +++ b/test/cases/no inline asm.c @@ -0,0 +1,7 @@ +//aro-args -fno-gnu-inline-asm + +__asm__("foo"); +__asm__(""); + +#define EXPECTED_ERRORS "no inline asm.c:3:8: error: GNU-style inline assembly is disabled" \ +