From 36b8cf3dbd2cb397eca8ee3fe87a50fefd2047c7 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 15:02:51 +0200 Subject: [PATCH 1/9] refactor cfg and introduce back freeing --- .../{build_cfg.jou => build_cf_graph.jou} | 1 + compiler/cf_graph.jou | 319 ++++++++++++++++++ compiler/codegen.jou | 1 + compiler/free.jou | 30 -- compiler/main.jou | 6 +- compiler/print.jou | 185 ---------- compiler/structs.jou | 109 ------ 7 files changed, 324 insertions(+), 327 deletions(-) rename compiler/{build_cfg.jou => build_cf_graph.jou} (99%) create mode 100644 compiler/cf_graph.jou diff --git a/compiler/build_cfg.jou b/compiler/build_cf_graph.jou similarity index 99% rename from compiler/build_cfg.jou rename to compiler/build_cf_graph.jou index b450a56f..90b684e0 100644 --- a/compiler/build_cfg.jou +++ b/compiler/build_cf_graph.jou @@ -1,6 +1,7 @@ import "stdlib/str.jou" import "stdlib/mem.jou" +import "./cf_graph.jou" import "./structs.jou" import "./evaluate.jou" import "./types.jou" diff --git a/compiler/cf_graph.jou b/compiler/cf_graph.jou new file mode 100644 index 00000000..384bffb7 --- /dev/null +++ b/compiler/cf_graph.jou @@ -0,0 +1,319 @@ +import "stdlib/mem.jou" +import "stdlib/io.jou" +import "stdlib/str.jou" + +import "./errors_and_warnings.jou" +import "./structs.jou" +import "./print.jou" +import "./free.jou" +import "./types.jou" + + +class CfStringArray: + str: byte* + len: int + +enum CfInstructionKind: + Constant + SpecialConstant # e.g. "WINDOWS", unlike CF_Constant this doesn't trigger "this code will never run" warnings + StringArray + Call # function or method call, depending on whether self_type is NULL (see below) + AddressOfLocalVar + AddressOfGlobalVar + SizeOf + PtrMemsetToZero # takes one operand, a pointer: memset(ptr, 0, sizeof(*ptr)) + PtrStore # *op1 = op2 (does not use destvar, takes 2 operands) + PtrLoad # aka dereference + PtrToInt64 + Int64ToPtr + PtrClassField # takes 1 operand (pointer), sets destvar to &op->fieldname + PtrCast + PtrAddInt + # Left and right side of number operations must be of the same type (except CfInstructionKind::NumCast). + NumAdd + NumSub + NumMul + NumDiv + NumMod + NumEq + NumLt + NumCast + EnumToInt32 + Int32ToEnum + BoolNegate # TODO: get rid of this? + VarCpy # similar to assignment statements: var1 = var2 + +# Control Flow Graph. +# Struct names not prefixed with Cfg because it looks too much like "config" to me +class CfInstruction: + location: Location + kind: CfInstructionKind + union: + constant: Constant # CfInstructionKind::Constant + strarray: CfStringArray # CfInstructionKind::StringArray + signature: Signature # CfInstructionKind::Call + fieldname: byte[100] # CfInstructionKind::PtrClassField + globalname: byte[100] # CfInstructionKind::AddressOfGlobalVar + scname: byte[100] # CfInstructionKind::SpecialConstant + type: Type* # CfInstructionKind::SizeOf + + operands: LocalVariable** # e.g. numbers to add, function arguments + noperands: int + destvar: LocalVariable* # NULL when it doesn't make sense, e.g. functions that return void + hide_unreachable_warning: bool # usually false, can be set to true to avoid unreachable warning false positives + + # operands should be NULL-terminated array, or NULL for empty + # TODO: does it ever need to be NULL? + # TODO: do we need this method at all? + def set_operands(self, operands: LocalVariable**) -> None: + self->noperands = 0 + while operands != NULL and operands[self->noperands] != NULL: + self->noperands++ + + nbytes = sizeof(self->operands[0]) * self->noperands + self->operands = malloc(nbytes) + assert self->operands != NULL + memcpy(self->operands, operands, nbytes) + + def free(self) -> None: + if self->kind == CfInstructionKind::Constant: + free_constant(&self->constant) + if self->kind == CfInstructionKind::StringArray: + free(self->strarray.str) + if self->kind == CfInstructionKind::Call: + free_signature(&self->signature) + free(self->operands) + + +class CfBlock: + instructions: CfInstruction* + ninstructions: int + branchvar: LocalVariable* # boolean value used to decide where to jump next + + # iftrue and iffalse are NULL for special end block and after calling a noreturn function. + # When iftrue and iffalse are the same, the branchvar is not used and may be NULL. + iftrue: CfBlock* + iffalse: CfBlock* + + def free(self) -> None: + for ins = self->instructions; ins < &self->instructions[self->ninstructions]; ins++: + ins->free() + free(self->instructions) + + +class CfGraph: + signature: Signature + start_block: CfBlock # First block + end_block: CfBlock # Always empty. Return statement jumps here. + all_blocks: CfBlock** + n_all_blocks: int + locals: LocalVariable** # First n variables are the function arguments + nlocals: int + + def free(self) -> None: + free_signature(&self->signature) + for b = self->all_blocks; b < &self->all_blocks[self->n_all_blocks]; b++: + (*b)->free() + if *b != &self->start_block and *b != &self->end_block: + free(*b) + for v = self->locals; v < &self->locals[self->nlocals]; v++: + free(*v) + free(self->all_blocks) + free(self->locals) + + +class CfGraphFile: + filename: byte* + graphs: CfGraph** # only for defined functions + ngraphs: int + + def free(self) -> None: + for cfg = self->graphs; cfg < &self->graphs[self->ngraphs]; cfg++: + (*cfg)->free() + free(*cfg) + free(self->graphs) + + +def very_short_number_type_description(t: Type*) -> byte*: + if t->kind == TypeKind::FloatingPoint: + return "floating" + if t->kind == TypeKind::SignedInteger: + return "signed" + if t->kind == TypeKind::UnsignedInteger: + return "unsigned" + assert False + + + + +global printed_varnames: byte[10][10] +global printed_varnames_idx: int + + +def varname_for_printing(var: LocalVariable*) -> byte*: + if var->name[0] != '\0': + # it is named, not a dummy + return var->name + + # Cycle through enough space for a few variables, so that you + # can call this several times inside the same printf(). + s: byte* = printed_varnames[printed_varnames_idx++] + printed_varnames_idx %= (sizeof(printed_varnames) / sizeof(printed_varnames[0])) as int + + sprintf(s, "$%d", var->id) + return s + + +def print_cf_instruction(ins: CfInstruction*) -> None: + printf(" line %-4d ", ins->location.lineno) + + if ins->destvar != NULL: + printf("%s = ", varname_for_printing(ins->destvar)) + + if ins->kind == CfInstructionKind::AddressOfLocalVar: + printf("address of %s (local variable)", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::AddressOfGlobalVar: + printf("address of %s (global variable)", ins->globalname) + elif ins->kind == CfInstructionKind::SizeOf: + printf("sizeof %s", ins->type->name) + elif ins->kind == CfInstructionKind::BoolNegate: + printf("boolean negation of %s", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::Call: + if get_self_class(&ins->signature) != NULL: + printf("call method %s.", get_self_class(&ins->signature)->name) + else: + printf("call function ") + printf("%s(", ins->signature.name) + for i = 0; i < ins->noperands; i++: + if i != 0: + printf(", ") + printf("%s", varname_for_printing(ins->operands[i])) + printf(")") + elif ins->kind == CfInstructionKind::NumCast: + printf( + "number cast %s (%d-bit %s --> %d-bit %s)", + varname_for_printing(ins->operands[0]), + ins->operands[0]->type->size_in_bits, + very_short_number_type_description(ins->operands[0]->type), + ins->destvar->type->size_in_bits, + very_short_number_type_description(ins->destvar->type)) + elif ins->kind == CfInstructionKind::EnumToInt32: + printf("cast %s from enum to 32-bit signed int", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::Int32ToEnum: + printf("cast %s from 32-bit signed int to enum", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::PtrToInt64: + printf("cast %s to 64-bit integer", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::Int64ToPtr: + printf("cast %s from 64-bit integer to pointer", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::Constant: + print_constant(&ins->constant) + elif ins->kind == CfInstructionKind::SpecialConstant: + printf("special constant \"%s\"", ins->scname) + elif ins->kind == CfInstructionKind::StringArray: + printf("string array ") + print_string(ins->strarray.str, ins->strarray.len) + elif ( + ins->kind == CfInstructionKind::NumAdd + or ins->kind == CfInstructionKind::NumSub + or ins->kind == CfInstructionKind::NumMul + or ins->kind == CfInstructionKind::NumDiv + or ins->kind == CfInstructionKind::NumMod + or ins->kind == CfInstructionKind::NumEq + or ins->kind == CfInstructionKind::NumLt + ): + if ins->kind == CfInstructionKind::NumAdd: + printf("num add ") + elif ins->kind == CfInstructionKind::NumSub: + printf("num sub ") + elif ins->kind == CfInstructionKind::NumMul: + printf("num mul ") + elif ins->kind == CfInstructionKind::NumDiv: + printf("num div ") + elif ins->kind == CfInstructionKind::NumMod: + printf("num mod ") + elif ins->kind == CfInstructionKind::NumEq: + printf("num eq ") + elif ins->kind == CfInstructionKind::NumLt: + printf("num lt ") + else: + assert False + printf("%s, %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) + elif ins->kind == CfInstructionKind::PtrLoad: + # Extra parentheses to make these stand out a bit. + printf("*(%s)", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::PtrStore: + printf("*(%s) = %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) + elif ins->kind == CfInstructionKind::PtrAddInt: + printf("ptr %s + integer %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) + elif ins->kind == CfInstructionKind::PtrClassField: + printf("%s + offset of field \"%s\"", varname_for_printing(ins->operands[0]), ins->fieldname) + elif ins->kind == CfInstructionKind::PtrCast: + printf("pointer cast %s", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::PtrMemsetToZero: + printf("set value of pointer %s to zero bytes", varname_for_printing(ins->operands[0])) + elif ins->kind == CfInstructionKind::VarCpy: + printf("%s", varname_for_printing(ins->operands[0])) + else: + assert False + printf("\n") + + +def print_control_flow_graph(cfg: CfGraph*) -> None: + printed_varnames_idx = 0 + + sigstr: byte* = signature_to_string(&cfg->signature, True, True) + printf("Function %s\n", sigstr) + free(sigstr) + + printf(" Variables:\n") + for var = cfg->locals; var < &cfg->locals[cfg->nlocals]; var++: + printf(" %-20s %s\n", varname_for_printing(*var), (*var)->type->name) + + for blockidx = 0; blockidx < cfg->n_all_blocks; blockidx++: + b = cfg->all_blocks[blockidx] + + printf(" Block %d", blockidx) + #printf(" at %p", b) + + if b == &cfg->start_block: + printf(" (start block)") + if b == &cfg->end_block: + assert b->ninstructions == 0 + printf(" is the end block.\n") + continue + + printf(":\n") + + for ins = b->instructions; ins < &b->instructions[b->ninstructions]; ins++: + print_cf_instruction(ins) + + if b == &cfg->end_block: + assert b->iftrue == NULL + assert b->iffalse == NULL + elif b->iftrue == NULL and b->iffalse == NULL: + printf(" Execution stops here. We have called a noreturn function.\n") + else: + trueidx = -1 + falseidx = -1 + for i = 0; i < cfg->n_all_blocks; i++: + if cfg->all_blocks[i] == b->iftrue: + trueidx = i + if cfg->all_blocks[i]==b->iffalse: + falseidx = i + assert trueidx != -1 + assert falseidx != -1 + if trueidx == falseidx: + printf(" Jump to block %d.\n", trueidx) + else: + assert b->branchvar != NULL + printf(" If %s is True jump to block %d, otherwise block %d.\n", + varname_for_printing(b->branchvar), trueidx, falseidx) + + printf("\n") + + +def print_control_flow_graphs(cfgfile: CfGraphFile*) -> None: + printf("===== Control Flow Graphs for file \"%s\" =====\n", cfgfile->filename) + for cfg = cfgfile->graphs; cfg < &cfgfile->graphs[cfgfile->ngraphs]; cfg++: + print_control_flow_graph(*cfg) + diff --git a/compiler/codegen.jou b/compiler/codegen.jou index bec9f6e7..e080da87 100644 --- a/compiler/codegen.jou +++ b/compiler/codegen.jou @@ -3,6 +3,7 @@ import "stdlib/mem.jou" import "stdlib/str.jou" import "./evaluate.jou" +import "./cf_graph.jou" import "./llvm.jou" import "./target.jou" import "./types.jou" diff --git a/compiler/free.jou b/compiler/free.jou index 61635d27..84cc6e62 100644 --- a/compiler/free.jou +++ b/compiler/free.jou @@ -39,33 +39,3 @@ def free_file_types(ft: FileTypes*) -> None: free(ft->owned_types) free(ft->functions) free(ft->fomtypes) - -def free_control_flow_graph_block(cfg: CfGraph*, b: CfBlock*) -> None: - for ins = b->instructions; ins < &b->instructions[b->ninstructions]; ins++: - if ins->kind == CfInstructionKind::Constant: - free_constant(&ins->constant) - if ins->kind == CfInstructionKind::StringArray: - free(ins->strarray.str) - if ins->kind == CfInstructionKind::Call: - free_signature(&ins->signature) - free(ins->operands) - free(b->instructions) - if b != &cfg->start_block and b != &cfg->end_block: - free(b) - -def free_cfg(cfg: CfGraph*) -> None: - free_signature(&cfg->signature) - - for b = cfg->all_blocks; b < &cfg->all_blocks[cfg->n_all_blocks]; b++: - free_control_flow_graph_block(cfg, *b) - for v = cfg->locals; v < &cfg->locals[cfg->nlocals]; v++: - free(*v) - - free(cfg->all_blocks) - free(cfg->locals) - free(cfg) - -def free_control_flow_graphs(cfgfile: CfGraphFile*) -> None: - for cfg = cfgfile->graphs; cfg < &cfgfile->graphs[cfgfile->ngraphs]; cfg++: - free_cfg(*cfg) - free(cfgfile->graphs) diff --git a/compiler/main.jou b/compiler/main.jou index 9cbaa8ed..ab974682 100644 --- a/compiler/main.jou +++ b/compiler/main.jou @@ -4,7 +4,8 @@ import "stdlib/mem.jou" import "stdlib/errno.jou" import "stdlib/str.jou" -import "./build_cfg.jou" +import "./cf_graph.jou" +import "./build_cf_graph.jou" import "./evaluate.jou" import "./run.jou" import "./codegen.jou" @@ -482,8 +483,7 @@ def main(argc: int, argv: byte**) -> int: cf_graphs = compst.files[i].build_cf_graphs() llvm_ir = compst.files[i].build_llvm_ir(&cf_graphs) - # TODO: free the control flow graphs, this crashes for some reason - #free_control_flow_graphs(&cf_graphs) + cf_graphs.free() objpaths[i] = compile_to_object_file(llvm_ir) LLVMDisposeModule(llvm_ir) diff --git a/compiler/print.jou b/compiler/print.jou index 6b9d44e4..097d2daf 100644 --- a/compiler/print.jou +++ b/compiler/print.jou @@ -1,10 +1,7 @@ import "stdlib/io.jou" -import "stdlib/str.jou" -import "stdlib/mem.jou" import "./llvm.jou" import "./structs.jou" -import "./types.jou" def print_string(s: byte*, len: int) -> None: @@ -51,188 +48,6 @@ def print_constant(c: Constant*) -> None: assert False -global printed_varnames: byte[10][10] -global printed_varnames_idx: int - - - -def varname_for_printing(var: LocalVariable*) -> byte*: - if var->name[0] != '\0': - # it is named, not a dummy - return var->name - - # Cycle through enough space for a few variables, so that you - # can call this several times inside the same printf(). - s: byte* = printed_varnames[printed_varnames_idx++] - printed_varnames_idx %= (sizeof(printed_varnames) / sizeof(printed_varnames[0])) as int - - sprintf(s, "$%d", var->id) - return s - - -def very_short_number_type_description(t: Type*) -> byte*: - if t->kind == TypeKind::FloatingPoint: - return "floating" - if t->kind == TypeKind::SignedInteger: - return "signed" - if t->kind == TypeKind::UnsignedInteger: - return "unsigned" - assert False - - -def print_cf_instruction(ins: CfInstruction*) -> None: - printf(" line %-4d ", ins->location.lineno) - - if ins->destvar != NULL: - printf("%s = ", varname_for_printing(ins->destvar)) - - if ins->kind == CfInstructionKind::AddressOfLocalVar: - printf("address of %s (local variable)", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::AddressOfGlobalVar: - printf("address of %s (global variable)", ins->globalname) - elif ins->kind == CfInstructionKind::SizeOf: - printf("sizeof %s", ins->type->name) - elif ins->kind == CfInstructionKind::BoolNegate: - printf("boolean negation of %s", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::Call: - if get_self_class(&ins->signature) != NULL: - printf("call method %s.", get_self_class(&ins->signature)->name) - else: - printf("call function ") - printf("%s(", ins->signature.name) - for i = 0; i < ins->noperands; i++: - if i != 0: - printf(", ") - printf("%s", varname_for_printing(ins->operands[i])) - printf(")") - elif ins->kind == CfInstructionKind::NumCast: - printf( - "number cast %s (%d-bit %s --> %d-bit %s)", - varname_for_printing(ins->operands[0]), - ins->operands[0]->type->size_in_bits, - very_short_number_type_description(ins->operands[0]->type), - ins->destvar->type->size_in_bits, - very_short_number_type_description(ins->destvar->type)) - elif ins->kind == CfInstructionKind::EnumToInt32: - printf("cast %s from enum to 32-bit signed int", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::Int32ToEnum: - printf("cast %s from 32-bit signed int to enum", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::PtrToInt64: - printf("cast %s to 64-bit integer", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::Int64ToPtr: - printf("cast %s from 64-bit integer to pointer", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::Constant: - print_constant(&ins->constant) - elif ins->kind == CfInstructionKind::SpecialConstant: - printf("special constant \"%s\"", ins->scname) - elif ins->kind == CfInstructionKind::StringArray: - printf("string array ") - print_string(ins->strarray.str, ins->strarray.len) - elif ( - ins->kind == CfInstructionKind::NumAdd - or ins->kind == CfInstructionKind::NumSub - or ins->kind == CfInstructionKind::NumMul - or ins->kind == CfInstructionKind::NumDiv - or ins->kind == CfInstructionKind::NumMod - or ins->kind == CfInstructionKind::NumEq - or ins->kind == CfInstructionKind::NumLt - ): - if ins->kind == CfInstructionKind::NumAdd: - printf("num add ") - elif ins->kind == CfInstructionKind::NumSub: - printf("num sub ") - elif ins->kind == CfInstructionKind::NumMul: - printf("num mul ") - elif ins->kind == CfInstructionKind::NumDiv: - printf("num div ") - elif ins->kind == CfInstructionKind::NumMod: - printf("num mod ") - elif ins->kind == CfInstructionKind::NumEq: - printf("num eq ") - elif ins->kind == CfInstructionKind::NumLt: - printf("num lt ") - else: - assert False - printf("%s, %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) - elif ins->kind == CfInstructionKind::PtrLoad: - # Extra parentheses to make these stand out a bit. - printf("*(%s)", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::PtrStore: - printf("*(%s) = %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) - elif ins->kind == CfInstructionKind::PtrAddInt: - printf("ptr %s + integer %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) - elif ins->kind == CfInstructionKind::PtrClassField: - printf("%s + offset of field \"%s\"", varname_for_printing(ins->operands[0]), ins->fieldname) - elif ins->kind == CfInstructionKind::PtrCast: - printf("pointer cast %s", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::PtrMemsetToZero: - printf("set value of pointer %s to zero bytes", varname_for_printing(ins->operands[0])) - elif ins->kind == CfInstructionKind::VarCpy: - printf("%s", varname_for_printing(ins->operands[0])) - else: - assert False - printf("\n") - - -def print_control_flow_graph(cfg: CfGraph*) -> None: - printed_varnames_idx = 0 - - sigstr: byte* = signature_to_string(&cfg->signature, True, True) - printf("Function %s\n", sigstr) - free(sigstr) - - printf(" Variables:\n") - for var = cfg->locals; var < &cfg->locals[cfg->nlocals]; var++: - printf(" %-20s %s\n", varname_for_printing(*var), (*var)->type->name) - - for blockidx = 0; blockidx < cfg->n_all_blocks; blockidx++: - b = cfg->all_blocks[blockidx] - - printf(" Block %d", blockidx) - #printf(" at %p", b) - - if b == &cfg->start_block: - printf(" (start block)") - if b == &cfg->end_block: - assert b->ninstructions == 0 - printf(" is the end block.\n") - continue - - printf(":\n") - - for ins = b->instructions; ins < &b->instructions[b->ninstructions]; ins++: - print_cf_instruction(ins) - - if b == &cfg->end_block: - assert b->iftrue == NULL - assert b->iffalse == NULL - elif b->iftrue == NULL and b->iffalse == NULL: - printf(" Execution stops here. We have called a noreturn function.\n") - else: - trueidx = -1 - falseidx = -1 - for i = 0; i < cfg->n_all_blocks; i++: - if cfg->all_blocks[i] == b->iftrue: - trueidx = i - if cfg->all_blocks[i]==b->iffalse: - falseidx = i - assert trueidx != -1 - assert falseidx != -1 - if trueidx == falseidx: - printf(" Jump to block %d.\n", trueidx) - else: - assert b->branchvar != NULL - printf(" If %s is True jump to block %d, otherwise block %d.\n", - varname_for_printing(b->branchvar), trueidx, falseidx) - - printf("\n") - - -def print_control_flow_graphs(cfgfile: CfGraphFile*) -> None: - printf("===== Control Flow Graphs for file \"%s\" =====\n", cfgfile->filename) - for cfg = cfgfile->graphs; cfg < &cfgfile->graphs[cfgfile->ngraphs]; cfg++: - print_control_flow_graph(*cfg) - def print_llvm_ir(module: LLVMModule*, is_optimized: bool) -> None: if is_optimized: opt_or_unopt = "Optimized" diff --git a/compiler/structs.jou b/compiler/structs.jou index e11ad2ef..b8e43929 100644 --- a/compiler/structs.jou +++ b/compiler/structs.jou @@ -1,9 +1,7 @@ # TODO: delete this file, merge into others import "stdlib/str.jou" -import "stdlib/mem.jou" -import "./llvm.jou" import "./ast.jou" import "./types.jou" import "./errors_and_warnings.jou" @@ -143,110 +141,3 @@ class FileTypes: ntypes: int functions: SignatureAndUsedPtr* nfunctions: int - - -class CfStringArray: - str: byte* - len: int - -enum CfInstructionKind: - Constant - SpecialConstant # e.g. "WINDOWS", unlike CF_Constant this doesn't trigger "this code will never run" warnings - StringArray - Call # function or method call, depending on whether self_type is NULL (see below) - AddressOfLocalVar - AddressOfGlobalVar - SizeOf - PtrMemsetToZero # takes one operand, a pointer: memset(ptr, 0, sizeof(*ptr)) - PtrStore # *op1 = op2 (does not use destvar, takes 2 operands) - PtrLoad # aka dereference - PtrToInt64 - Int64ToPtr - PtrClassField # takes 1 operand (pointer), sets destvar to &op->fieldname - PtrCast - PtrAddInt - # Left and right side of number operations must be of the same type (except CfInstructionKind::NumCast). - NumAdd - NumSub - NumMul - NumDiv - NumMod - NumEq - NumLt - NumCast - EnumToInt32 - Int32ToEnum - BoolNegate # TODO: get rid of this? - VarCpy # similar to assignment statements: var1 = var2 - -# Control Flow Graph. -# Struct names not prefixed with Cfg because it looks too much like "config" to me -class CfInstruction: - location: Location - kind: CfInstructionKind - union: - constant: Constant # CfInstructionKind::Constant - strarray: CfStringArray # CfInstructionKind::StringArray - signature: Signature # CfInstructionKind::Call - fieldname: byte[100] # CfInstructionKind::PtrClassField - globalname: byte[100] # CfInstructionKind::AddressOfGlobalVar - scname: byte[100] # CfInstructionKind::SpecialConstant - type: Type* # CfInstructionKind::SizeOf - - operands: LocalVariable** # e.g. numbers to add, function arguments - noperands: int - destvar: LocalVariable* # NULL when it doesn't make sense, e.g. functions that return void - hide_unreachable_warning: bool # usually false, can be set to true to avoid unreachable warning false positives - - # operands should be NULL-terminated array, or NULL for empty - # TODO: does it ever need to be NULL? - # TODO: do we need this method at all? - def set_operands(self, operands: LocalVariable**) -> None: - self->noperands = 0 - while operands != NULL and operands[self->noperands] != NULL: - self->noperands++ - - nbytes = sizeof(self->operands[0]) * self->noperands - self->operands = malloc(nbytes) - assert self->operands != NULL - memcpy(self->operands, operands, nbytes) - - -class CfBlock: - instructions: CfInstruction* - ninstructions: int - branchvar: LocalVariable* # boolean value used to decide where to jump next - - # iftrue and iffalse are NULL for special end block and after calling a noreturn function. - # When iftrue and iffalse are the same, the branchvar is not used and may be NULL. - iftrue: CfBlock* - iffalse: CfBlock* - -class CfGraph: - signature: Signature - start_block: CfBlock # First block - end_block: CfBlock # Always empty. Return statement jumps here. - all_blocks: CfBlock** - n_all_blocks: int - locals: LocalVariable** # First n variables are the function arguments - nlocals: int - -class CfGraphFile: - filename: byte* - graphs: CfGraph** # only for defined functions - ngraphs: int - - -# LLVM makes a mess of how to define what kind of computer will run the -# compiled programs. Sometimes it wants a target triple, sometimes a -# data layout. Sometimes it wants a string, sometimes an object -# representing the thing. -# -# This struct aims to provide everything you may ever need. Hopefully it -# will make the mess slightly less miserable to you. -class Target: - triple: byte[100] - data_layout: byte[500] - target_ref: LLVMTarget* - target_machine_ref: LLVMTargetMachine* - target_data_ref: LLVMTargetData* From af22065840d71f3a5051f427ae0440feb4d57373 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 15:23:39 +0200 Subject: [PATCH 2/9] moar cleanup --- compiler/cf_graph.jou | 100 ++++++++++++++++++++++++------------------ compiler/structs.jou | 20 ++++++++- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/compiler/cf_graph.jou b/compiler/cf_graph.jou index 384bffb7..376b0954 100644 --- a/compiler/cf_graph.jou +++ b/compiler/cf_graph.jou @@ -1,6 +1,5 @@ import "stdlib/mem.jou" import "stdlib/io.jou" -import "stdlib/str.jou" import "./errors_and_warnings.jou" import "./structs.jou" @@ -144,40 +143,24 @@ def very_short_number_type_description(t: Type*) -> byte*: assert False - - -global printed_varnames: byte[10][10] -global printed_varnames_idx: int - - -def varname_for_printing(var: LocalVariable*) -> byte*: - if var->name[0] != '\0': - # it is named, not a dummy - return var->name - - # Cycle through enough space for a few variables, so that you - # can call this several times inside the same printf(). - s: byte* = printed_varnames[printed_varnames_idx++] - printed_varnames_idx %= (sizeof(printed_varnames) / sizeof(printed_varnames[0])) as int - - sprintf(s, "$%d", var->id) - return s - - def print_cf_instruction(ins: CfInstruction*) -> None: printf(" line %-4d ", ins->location.lineno) if ins->destvar != NULL: - printf("%s = ", varname_for_printing(ins->destvar)) + ins->destvar->print() + printf(" = ") if ins->kind == CfInstructionKind::AddressOfLocalVar: - printf("address of %s (local variable)", varname_for_printing(ins->operands[0])) + printf("address of ") + ins->operands[0]->print() + printf(" (local variable)") elif ins->kind == CfInstructionKind::AddressOfGlobalVar: printf("address of %s (global variable)", ins->globalname) elif ins->kind == CfInstructionKind::SizeOf: printf("sizeof %s", ins->type->name) elif ins->kind == CfInstructionKind::BoolNegate: - printf("boolean negation of %s", varname_for_printing(ins->operands[0])) + printf("boolean negation of ") + ins->operands[0]->print() elif ins->kind == CfInstructionKind::Call: if get_self_class(&ins->signature) != NULL: printf("call method %s.", get_self_class(&ins->signature)->name) @@ -187,24 +170,34 @@ def print_cf_instruction(ins: CfInstruction*) -> None: for i = 0; i < ins->noperands; i++: if i != 0: printf(", ") - printf("%s", varname_for_printing(ins->operands[i])) + ins->operands[i]->print() printf(")") elif ins->kind == CfInstructionKind::NumCast: + printf("number cast ") + ins->operands[0]->print() printf( - "number cast %s (%d-bit %s --> %d-bit %s)", - varname_for_printing(ins->operands[0]), + " (%d-bit %s --> %d-bit %s)", ins->operands[0]->type->size_in_bits, very_short_number_type_description(ins->operands[0]->type), ins->destvar->type->size_in_bits, - very_short_number_type_description(ins->destvar->type)) + very_short_number_type_description(ins->destvar->type), + ) elif ins->kind == CfInstructionKind::EnumToInt32: - printf("cast %s from enum to 32-bit signed int", varname_for_printing(ins->operands[0])) + printf("cast ") + ins->operands[0]->print() + printf(" from enum to 32-bit signed int") elif ins->kind == CfInstructionKind::Int32ToEnum: - printf("cast %s from 32-bit signed int to enum", varname_for_printing(ins->operands[0])) + printf("cast ") + ins->operands[0]->print() + printf(" from 32-bit signed int to enum") elif ins->kind == CfInstructionKind::PtrToInt64: - printf("cast %s to 64-bit integer", varname_for_printing(ins->operands[0])) + printf("cast ") + ins->operands[0]->print() + printf(" to 64-bit integer") elif ins->kind == CfInstructionKind::Int64ToPtr: - printf("cast %s from 64-bit integer to pointer", varname_for_printing(ins->operands[0])) + printf("cast ") + ins->operands[0]->print() + printf(" from 64-bit integer to pointer") elif ins->kind == CfInstructionKind::Constant: print_constant(&ins->constant) elif ins->kind == CfInstructionKind::SpecialConstant: @@ -237,22 +230,36 @@ def print_cf_instruction(ins: CfInstruction*) -> None: printf("num lt ") else: assert False - printf("%s, %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) + ins->operands[0]->print() + printf(", ") + ins->operands[1]->print() elif ins->kind == CfInstructionKind::PtrLoad: # Extra parentheses to make these stand out a bit. - printf("*(%s)", varname_for_printing(ins->operands[0])) + printf("*(") + ins->operands[0]->print() + printf(")") elif ins->kind == CfInstructionKind::PtrStore: - printf("*(%s) = %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) + printf("*(") + ins->operands[0]->print() + printf(") = ") + ins->operands[1]->print() elif ins->kind == CfInstructionKind::PtrAddInt: - printf("ptr %s + integer %s", varname_for_printing(ins->operands[0]), varname_for_printing(ins->operands[1])) + printf("ptr ") + ins->operands[0]->print() + printf(" + integer ") + ins->operands[1]->print() elif ins->kind == CfInstructionKind::PtrClassField: - printf("%s + offset of field \"%s\"", varname_for_printing(ins->operands[0]), ins->fieldname) + ins->operands[0]->print() + printf(" + offset of field \"%s\"", ins->fieldname) elif ins->kind == CfInstructionKind::PtrCast: - printf("pointer cast %s", varname_for_printing(ins->operands[0])) + printf("pointer cast ") + ins->operands[0]->print() elif ins->kind == CfInstructionKind::PtrMemsetToZero: - printf("set value of pointer %s to zero bytes", varname_for_printing(ins->operands[0])) + printf("set value of pointer ") + ins->operands[0]->print() + printf(" to zero bytes") elif ins->kind == CfInstructionKind::VarCpy: - printf("%s", varname_for_printing(ins->operands[0])) + ins->operands[0]->print() else: assert False printf("\n") @@ -267,7 +274,13 @@ def print_control_flow_graph(cfg: CfGraph*) -> None: printf(" Variables:\n") for var = cfg->locals; var < &cfg->locals[cfg->nlocals]; var++: - printf(" %-20s %s\n", varname_for_printing(*var), (*var)->type->name) + printf(" ") + (*var)->print() + # Pad variable names with spaces to align them. + for i = (*var)->print_width(); i < 20; i++: + putchar(' ') + # If variable name is very long, put two spaces even in that case. + printf(" %s\n", (*var)->type->name) for blockidx = 0; blockidx < cfg->n_all_blocks; blockidx++: b = cfg->all_blocks[blockidx] @@ -306,8 +319,9 @@ def print_control_flow_graph(cfg: CfGraph*) -> None: printf(" Jump to block %d.\n", trueidx) else: assert b->branchvar != NULL - printf(" If %s is True jump to block %d, otherwise block %d.\n", - varname_for_printing(b->branchvar), trueidx, falseidx) + printf(" If ") + b->branchvar->print() + printf(" is True jump to block %d, otherwise block %d.\n", trueidx, falseidx) printf("\n") diff --git a/compiler/structs.jou b/compiler/structs.jou index b8e43929..80c97f4e 100644 --- a/compiler/structs.jou +++ b/compiler/structs.jou @@ -1,6 +1,7 @@ # TODO: delete this file, merge into others import "stdlib/str.jou" +import "stdlib/io.jou" import "./ast.jou" import "./types.jou" @@ -85,12 +86,29 @@ class GlobalVariable: defined_in_current_file: bool # not declare-only (e.g. stdout) or imported usedptr: bool* # If non-NULL, set to true when the variable is used. This is how we detect unused imports. + class LocalVariable: - id: int # Unique, but you can also compare pointers to Variable. + id: int # Unique, but you can also compare pointers to LocalVariable. name: byte[100] # Same name as in user's code, empty for temporary variables created by compiler type: Type* is_argument: bool # First n variables are always the arguments + def print(self) -> int: + if self->name[0] != '\0': + printf("%s", self->name) + else: + # Anonymous temporary variable created by compiler. + printf("$%d", self->id) + + # Return how many characters print() outputs. + def print_width(self) -> int: + if self->name[0] != '\0': + return strlen(self->name) as int + temp: byte[100] + sprintf(temp, "%d", self->id) + return 1 + (strlen(temp) as int) + + class ExpressionTypes: expr: AstExpression* # not owned type: Type* From a7eb75068a712bf9a1f0d98f7e5b299a2da82ee5 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 15:30:54 +0200 Subject: [PATCH 3/9] refactor to methods --- compiler/cf_graph.jou | 419 +++++++++++++++++++++--------------------- compiler/main.jou | 4 +- 2 files changed, 210 insertions(+), 213 deletions(-) diff --git a/compiler/cf_graph.jou b/compiler/cf_graph.jou index 376b0954..8ab803b4 100644 --- a/compiler/cf_graph.jou +++ b/compiler/cf_graph.jou @@ -1,3 +1,6 @@ +# Control Flow Graph. +# Struct names not prefixed with Cfg because it looks too much like "config" to me + import "stdlib/mem.jou" import "stdlib/io.jou" @@ -8,6 +11,16 @@ import "./free.jou" import "./types.jou" +def very_short_number_type_description(t: Type*) -> byte*: + if t->kind == TypeKind::FloatingPoint: + return "floating" + if t->kind == TypeKind::SignedInteger: + return "signed" + if t->kind == TypeKind::UnsignedInteger: + return "unsigned" + assert False + + class CfStringArray: str: byte* len: int @@ -42,8 +55,6 @@ enum CfInstructionKind: BoolNegate # TODO: get rid of this? VarCpy # similar to assignment statements: var1 = var2 -# Control Flow Graph. -# Struct names not prefixed with Cfg because it looks too much like "config" to me class CfInstruction: location: Location kind: CfInstructionKind @@ -61,6 +72,136 @@ class CfInstruction: destvar: LocalVariable* # NULL when it doesn't make sense, e.g. functions that return void hide_unreachable_warning: bool # usually false, can be set to true to avoid unreachable warning false positives + def print(self) -> None: + printf(" line %-4d ", self->location.lineno) + + if self->destvar != NULL: + self->destvar->print() + printf(" = ") + + if self->kind == CfInstructionKind::AddressOfLocalVar: + printf("address of ") + self->operands[0]->print() + printf(" (local variable)") + elif self->kind == CfInstructionKind::AddressOfGlobalVar: + printf("address of %s (global variable)", self->globalname) + elif self->kind == CfInstructionKind::SizeOf: + printf("sizeof %s", self->type->name) + elif self->kind == CfInstructionKind::BoolNegate: + printf("boolean negation of ") + self->operands[0]->print() + elif self->kind == CfInstructionKind::Call: + if get_self_class(&self->signature) != NULL: + printf("call method %s.", get_self_class(&self->signature)->name) + else: + printf("call function ") + printf("%s(", self->signature.name) + for i = 0; i < self->noperands; i++: + if i != 0: + printf(", ") + self->operands[i]->print() + printf(")") + elif self->kind == CfInstructionKind::NumCast: + printf("number cast ") + self->operands[0]->print() + printf( + " (%d-bit %s --> %d-bit %s)", + self->operands[0]->type->size_in_bits, + very_short_number_type_description(self->operands[0]->type), + self->destvar->type->size_in_bits, + very_short_number_type_description(self->destvar->type), + ) + elif self->kind == CfInstructionKind::EnumToInt32: + printf("cast ") + self->operands[0]->print() + printf(" from enum to 32-bit signed int") + elif self->kind == CfInstructionKind::Int32ToEnum: + printf("cast ") + self->operands[0]->print() + printf(" from 32-bit signed int to enum") + elif self->kind == CfInstructionKind::PtrToInt64: + printf("cast ") + self->operands[0]->print() + printf(" to 64-bit integer") + elif self->kind == CfInstructionKind::Int64ToPtr: + printf("cast ") + self->operands[0]->print() + printf(" from 64-bit integer to pointer") + elif self->kind == CfInstructionKind::Constant: + print_constant(&self->constant) + elif self->kind == CfInstructionKind::SpecialConstant: + printf("special constant \"%s\"", self->scname) + elif self->kind == CfInstructionKind::StringArray: + printf("string array ") + print_string(self->strarray.str, self->strarray.len) + elif ( + self->kind == CfInstructionKind::NumAdd + or self->kind == CfInstructionKind::NumSub + or self->kind == CfInstructionKind::NumMul + or self->kind == CfInstructionKind::NumDiv + or self->kind == CfInstructionKind::NumMod + or self->kind == CfInstructionKind::NumEq + or self->kind == CfInstructionKind::NumLt + ): + if self->kind == CfInstructionKind::NumAdd: + printf("num add ") + elif self->kind == CfInstructionKind::NumSub: + printf("num sub ") + elif self->kind == CfInstructionKind::NumMul: + printf("num mul ") + elif self->kind == CfInstructionKind::NumDiv: + printf("num div ") + elif self->kind == CfInstructionKind::NumMod: + printf("num mod ") + elif self->kind == CfInstructionKind::NumEq: + printf("num eq ") + elif self->kind == CfInstructionKind::NumLt: + printf("num lt ") + else: + assert False + self->operands[0]->print() + printf(", ") + self->operands[1]->print() + elif self->kind == CfInstructionKind::PtrLoad: + # Extra parentheses to make these stand out a bit. + printf("*(") + self->operands[0]->print() + printf(")") + elif self->kind == CfInstructionKind::PtrStore: + printf("*(") + self->operands[0]->print() + printf(") = ") + self->operands[1]->print() + elif self->kind == CfInstructionKind::PtrAddInt: + printf("ptr ") + self->operands[0]->print() + printf(" + integer ") + self->operands[1]->print() + elif self->kind == CfInstructionKind::PtrClassField: + self->operands[0]->print() + printf(" + offset of field \"%s\"", self->fieldname) + elif self->kind == CfInstructionKind::PtrCast: + printf("pointer cast ") + self->operands[0]->print() + elif self->kind == CfInstructionKind::PtrMemsetToZero: + printf("set value of pointer ") + self->operands[0]->print() + printf(" to zero bytes") + elif self->kind == CfInstructionKind::VarCpy: + self->operands[0]->print() + else: + assert False + printf("\n") + + def free(self) -> None: + if self->kind == CfInstructionKind::Constant: + free_constant(&self->constant) + if self->kind == CfInstructionKind::StringArray: + free(self->strarray.str) + if self->kind == CfInstructionKind::Call: + free_signature(&self->signature) + free(self->operands) + # operands should be NULL-terminated array, or NULL for empty # TODO: does it ever need to be NULL? # TODO: do we need this method at all? @@ -74,15 +215,6 @@ class CfInstruction: assert self->operands != NULL memcpy(self->operands, operands, nbytes) - def free(self) -> None: - if self->kind == CfInstructionKind::Constant: - free_constant(&self->constant) - if self->kind == CfInstructionKind::StringArray: - free(self->strarray.str) - if self->kind == CfInstructionKind::Call: - free_signature(&self->signature) - free(self->operands) - class CfBlock: instructions: CfInstruction* @@ -109,6 +241,66 @@ class CfGraph: locals: LocalVariable** # First n variables are the function arguments nlocals: int + def print(self) -> None: + printed_varnames_idx = 0 + + sigstr: byte* = signature_to_string(&self->signature, True, True) + printf("Function %s\n", sigstr) + free(sigstr) + + printf(" Variables:\n") + for var = self->locals; var < &self->locals[self->nlocals]; var++: + printf(" ") + (*var)->print() + # Pad variable names with spaces to align them. + for i = (*var)->print_width(); i < 20; i++: + putchar(' ') + # If variable name is very long, put two spaces even in that case. + printf(" %s\n", (*var)->type->name) + + for blockidx = 0; blockidx < self->n_all_blocks; blockidx++: + b = self->all_blocks[blockidx] + + printf(" Block %d", blockidx) + #printf(" at %p", b) + + if b == &self->start_block: + printf(" (start block)") + if b == &self->end_block: + assert b->ninstructions == 0 + printf(" is the end block.\n") + continue + + printf(":\n") + + for ins = b->instructions; ins < &b->instructions[b->ninstructions]; ins++: + ins->print() + + if b == &self->end_block: + assert b->iftrue == NULL + assert b->iffalse == NULL + elif b->iftrue == NULL and b->iffalse == NULL: + printf(" Execution stops here. We have called a noreturn function.\n") + else: + trueidx = -1 + falseidx = -1 + for i = 0; i < self->n_all_blocks; i++: + if self->all_blocks[i] == b->iftrue: + trueidx = i + if self->all_blocks[i]==b->iffalse: + falseidx = i + assert trueidx != -1 + assert falseidx != -1 + if trueidx == falseidx: + printf(" Jump to block %d.\n", trueidx) + else: + assert b->branchvar != NULL + printf(" If ") + b->branchvar->print() + printf(" is True jump to block %d, otherwise block %d.\n", trueidx, falseidx) + + printf("\n") + def free(self) -> None: free_signature(&self->signature) for b = self->all_blocks; b < &self->all_blocks[self->n_all_blocks]; b++: @@ -126,208 +318,13 @@ class CfGraphFile: graphs: CfGraph** # only for defined functions ngraphs: int + def print(self) -> None: + printf("===== Control Flow Graphs for file \"%s\" =====\n", self->filename) + for cfg = self->graphs; cfg < &self->graphs[self->ngraphs]; cfg++: + (*cfg)->print() + def free(self) -> None: for cfg = self->graphs; cfg < &self->graphs[self->ngraphs]; cfg++: (*cfg)->free() free(*cfg) free(self->graphs) - - -def very_short_number_type_description(t: Type*) -> byte*: - if t->kind == TypeKind::FloatingPoint: - return "floating" - if t->kind == TypeKind::SignedInteger: - return "signed" - if t->kind == TypeKind::UnsignedInteger: - return "unsigned" - assert False - - -def print_cf_instruction(ins: CfInstruction*) -> None: - printf(" line %-4d ", ins->location.lineno) - - if ins->destvar != NULL: - ins->destvar->print() - printf(" = ") - - if ins->kind == CfInstructionKind::AddressOfLocalVar: - printf("address of ") - ins->operands[0]->print() - printf(" (local variable)") - elif ins->kind == CfInstructionKind::AddressOfGlobalVar: - printf("address of %s (global variable)", ins->globalname) - elif ins->kind == CfInstructionKind::SizeOf: - printf("sizeof %s", ins->type->name) - elif ins->kind == CfInstructionKind::BoolNegate: - printf("boolean negation of ") - ins->operands[0]->print() - elif ins->kind == CfInstructionKind::Call: - if get_self_class(&ins->signature) != NULL: - printf("call method %s.", get_self_class(&ins->signature)->name) - else: - printf("call function ") - printf("%s(", ins->signature.name) - for i = 0; i < ins->noperands; i++: - if i != 0: - printf(", ") - ins->operands[i]->print() - printf(")") - elif ins->kind == CfInstructionKind::NumCast: - printf("number cast ") - ins->operands[0]->print() - printf( - " (%d-bit %s --> %d-bit %s)", - ins->operands[0]->type->size_in_bits, - very_short_number_type_description(ins->operands[0]->type), - ins->destvar->type->size_in_bits, - very_short_number_type_description(ins->destvar->type), - ) - elif ins->kind == CfInstructionKind::EnumToInt32: - printf("cast ") - ins->operands[0]->print() - printf(" from enum to 32-bit signed int") - elif ins->kind == CfInstructionKind::Int32ToEnum: - printf("cast ") - ins->operands[0]->print() - printf(" from 32-bit signed int to enum") - elif ins->kind == CfInstructionKind::PtrToInt64: - printf("cast ") - ins->operands[0]->print() - printf(" to 64-bit integer") - elif ins->kind == CfInstructionKind::Int64ToPtr: - printf("cast ") - ins->operands[0]->print() - printf(" from 64-bit integer to pointer") - elif ins->kind == CfInstructionKind::Constant: - print_constant(&ins->constant) - elif ins->kind == CfInstructionKind::SpecialConstant: - printf("special constant \"%s\"", ins->scname) - elif ins->kind == CfInstructionKind::StringArray: - printf("string array ") - print_string(ins->strarray.str, ins->strarray.len) - elif ( - ins->kind == CfInstructionKind::NumAdd - or ins->kind == CfInstructionKind::NumSub - or ins->kind == CfInstructionKind::NumMul - or ins->kind == CfInstructionKind::NumDiv - or ins->kind == CfInstructionKind::NumMod - or ins->kind == CfInstructionKind::NumEq - or ins->kind == CfInstructionKind::NumLt - ): - if ins->kind == CfInstructionKind::NumAdd: - printf("num add ") - elif ins->kind == CfInstructionKind::NumSub: - printf("num sub ") - elif ins->kind == CfInstructionKind::NumMul: - printf("num mul ") - elif ins->kind == CfInstructionKind::NumDiv: - printf("num div ") - elif ins->kind == CfInstructionKind::NumMod: - printf("num mod ") - elif ins->kind == CfInstructionKind::NumEq: - printf("num eq ") - elif ins->kind == CfInstructionKind::NumLt: - printf("num lt ") - else: - assert False - ins->operands[0]->print() - printf(", ") - ins->operands[1]->print() - elif ins->kind == CfInstructionKind::PtrLoad: - # Extra parentheses to make these stand out a bit. - printf("*(") - ins->operands[0]->print() - printf(")") - elif ins->kind == CfInstructionKind::PtrStore: - printf("*(") - ins->operands[0]->print() - printf(") = ") - ins->operands[1]->print() - elif ins->kind == CfInstructionKind::PtrAddInt: - printf("ptr ") - ins->operands[0]->print() - printf(" + integer ") - ins->operands[1]->print() - elif ins->kind == CfInstructionKind::PtrClassField: - ins->operands[0]->print() - printf(" + offset of field \"%s\"", ins->fieldname) - elif ins->kind == CfInstructionKind::PtrCast: - printf("pointer cast ") - ins->operands[0]->print() - elif ins->kind == CfInstructionKind::PtrMemsetToZero: - printf("set value of pointer ") - ins->operands[0]->print() - printf(" to zero bytes") - elif ins->kind == CfInstructionKind::VarCpy: - ins->operands[0]->print() - else: - assert False - printf("\n") - - -def print_control_flow_graph(cfg: CfGraph*) -> None: - printed_varnames_idx = 0 - - sigstr: byte* = signature_to_string(&cfg->signature, True, True) - printf("Function %s\n", sigstr) - free(sigstr) - - printf(" Variables:\n") - for var = cfg->locals; var < &cfg->locals[cfg->nlocals]; var++: - printf(" ") - (*var)->print() - # Pad variable names with spaces to align them. - for i = (*var)->print_width(); i < 20; i++: - putchar(' ') - # If variable name is very long, put two spaces even in that case. - printf(" %s\n", (*var)->type->name) - - for blockidx = 0; blockidx < cfg->n_all_blocks; blockidx++: - b = cfg->all_blocks[blockidx] - - printf(" Block %d", blockidx) - #printf(" at %p", b) - - if b == &cfg->start_block: - printf(" (start block)") - if b == &cfg->end_block: - assert b->ninstructions == 0 - printf(" is the end block.\n") - continue - - printf(":\n") - - for ins = b->instructions; ins < &b->instructions[b->ninstructions]; ins++: - print_cf_instruction(ins) - - if b == &cfg->end_block: - assert b->iftrue == NULL - assert b->iffalse == NULL - elif b->iftrue == NULL and b->iffalse == NULL: - printf(" Execution stops here. We have called a noreturn function.\n") - else: - trueidx = -1 - falseidx = -1 - for i = 0; i < cfg->n_all_blocks; i++: - if cfg->all_blocks[i] == b->iftrue: - trueidx = i - if cfg->all_blocks[i]==b->iffalse: - falseidx = i - assert trueidx != -1 - assert falseidx != -1 - if trueidx == falseidx: - printf(" Jump to block %d.\n", trueidx) - else: - assert b->branchvar != NULL - printf(" If ") - b->branchvar->print() - printf(" is True jump to block %d, otherwise block %d.\n", trueidx, falseidx) - - printf("\n") - - -def print_control_flow_graphs(cfgfile: CfGraphFile*) -> None: - printf("===== Control Flow Graphs for file \"%s\" =====\n", cfgfile->filename) - for cfg = cfgfile->graphs; cfg < &cfgfile->graphs[cfgfile->ngraphs]; cfg++: - print_control_flow_graph(*cfg) - diff --git a/compiler/main.jou b/compiler/main.jou index ab974682..8061744b 100644 --- a/compiler/main.jou +++ b/compiler/main.jou @@ -248,14 +248,14 @@ class FileState: self->warn_about_unused_imports() if command_line_args.verbosity >= 2: - print_control_flow_graphs(&cf_graphs) + cf_graphs.print() # TODO: implement this #if command_line_args.verbosity >= 1: # printf("Analyzing CFGs: %s\n", self->path) #simplify_control_flow_graphs(&cf_graphs) #if command_line_args.verbosity >= 2: - # print_control_flow_graphs(&cf_graphs) + # cf_graphs.print() return cf_graphs From 91580b2b9ea67f6ec15214b8c3c3579e6cc2fb15 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 15:41:06 +0200 Subject: [PATCH 4/9] print_to_width --- compiler/cf_graph.jou | 6 +----- compiler/structs.jou | 17 ++++++----------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/compiler/cf_graph.jou b/compiler/cf_graph.jou index 8ab803b4..674dca3e 100644 --- a/compiler/cf_graph.jou +++ b/compiler/cf_graph.jou @@ -251,11 +251,7 @@ class CfGraph: printf(" Variables:\n") for var = self->locals; var < &self->locals[self->nlocals]; var++: printf(" ") - (*var)->print() - # Pad variable names with spaces to align them. - for i = (*var)->print_width(); i < 20; i++: - putchar(' ') - # If variable name is very long, put two spaces even in that case. + (*var)->print_to_width(20) printf(" %s\n", (*var)->type->name) for blockidx = 0; blockidx < self->n_all_blocks; blockidx++: diff --git a/compiler/structs.jou b/compiler/structs.jou index 80c97f4e..e0d7890c 100644 --- a/compiler/structs.jou +++ b/compiler/structs.jou @@ -1,6 +1,7 @@ # TODO: delete this file, merge into others import "stdlib/str.jou" +import "stdlib/math.jou" import "stdlib/io.jou" import "./ast.jou" @@ -93,20 +94,14 @@ class LocalVariable: type: Type* is_argument: bool # First n variables are always the arguments - def print(self) -> int: + def print_to_width(self, width: int) -> None: if self->name[0] != '\0': - printf("%s", self->name) + printf("%-*s", width, self->name) else: - # Anonymous temporary variable created by compiler. - printf("$%d", self->id) + printf("$%-*d", max(width-1, 0), self->id) - # Return how many characters print() outputs. - def print_width(self) -> int: - if self->name[0] != '\0': - return strlen(self->name) as int - temp: byte[100] - sprintf(temp, "%d", self->id) - return 1 + (strlen(temp) as int) + def print(self) -> None: + self->print_to_width(0) class ExpressionTypes: From 91bc5974751698209c6453cb362711d33c738918 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 15:50:49 +0200 Subject: [PATCH 5/9] Delete set_operands --- compiler/build_cf_graph.jou | 40 +++++++++++++++++-------------------- compiler/cf_graph.jou | 22 ++++++++++---------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/compiler/build_cf_graph.jou b/compiler/build_cf_graph.jou index 05ad82c4..671b1a2c 100644 --- a/compiler/build_cf_graph.jou +++ b/compiler/build_cf_graph.jou @@ -114,8 +114,7 @@ def add_unary_op( target: LocalVariable*, ) -> None: ins = CfInstruction{location = location, kind = op, destvar = target} - operands = [arg, NULL] - ins.set_operands(operands) + ins.set_1_operand(arg) add_instruction(st, ins) @@ -129,8 +128,7 @@ def add_binary_op( target: LocalVariable*, ) -> None: ins = CfInstruction{location = location, kind = op, destvar = target} - operands = [lhs, rhs, NULL] - ins.set_operands(operands) + ins.set_2_operands(lhs, rhs) add_instruction(st, ins) @@ -327,13 +325,11 @@ def build_class_field_pointer( kind = CfInstructionKind::PtrClassField, destvar = result, } + ins.set_1_operand(instance) assert sizeof(ins.fieldname) == sizeof(f->name) strcpy(ins.fieldname, f->name) - operands = [instance, NULL] - ins.set_operands(operands) - add_instruction(st, ins) return result @@ -555,12 +551,12 @@ def build_function_or_method_call( assert sig != NULL - args: LocalVariable** = calloc(call->nargs + 2, sizeof(args[0])) - k = 0 + args: LocalVariable** = malloc(sizeof(args[0]) * (call->nargs + 2)) # +1 for self, another because why not + nargs = 0 if call->method_call_self != NULL: if is_pointer_type(sig->argtypes[0]) and not call->uses_arrow_operator: - args[k++] = build_address_of_expression(st, call->method_call_self) + args[nargs++] = build_address_of_expression(st, call->method_call_self) elif (not is_pointer_type(sig->argtypes[0])) and call->uses_arrow_operator: self_ptr = build_expression(st, call->method_call_self) assert self_ptr->type->kind == TypeKind::Pointer @@ -568,12 +564,12 @@ def build_function_or_method_call( # dereference the pointer val = add_local_var(st, self_ptr->type->value_type) add_unary_op(st, call->method_call_self->location, CfInstructionKind::PtrLoad, self_ptr, val) - args[k++] = val + args[nargs++] = val else: - args[k++] = build_expression(st, call->method_call_self) + args[nargs++] = build_expression(st, call->method_call_self) for i = 0; i < call->nargs; i++: - args[k++] = build_expression(st, &call->args[i]) + args[nargs++] = build_expression(st, &call->args[i]) if sig->returntype != NULL: return_value = add_local_var(st, sig->returntype) @@ -585,15 +581,15 @@ def build_function_or_method_call( kind = CfInstructionKind::Call, signature = copy_signature(sig), destvar = return_value, + operands = args, + noperands = nargs, } - ins.set_operands(args) add_instruction(st, ins) if sig->is_noreturn: # Place the remaining code into an unreachable block, so you will get a warning if there is any add_jump(st, NULL, NULL, NULL, NULL) - free(args) return return_value @@ -913,12 +909,11 @@ def build_assert(st: State*, assert_location: Location, assertion: AstAssertion* argtypes[1] = get_pointer_type(byteType) argtypes[2] = intType - args = [ - add_local_var(st, argtypes[0]), - add_local_var(st, argtypes[1]), - add_local_var(st, argtypes[2]), - NULL, - ] + args: LocalVariable** = malloc(sizeof(args[0]) * 3) + assert args != NULL + args[0] = add_local_var(st, argtypes[0]) + args[1] = add_local_var(st, argtypes[1]) + args[2] = add_local_var(st, argtypes[2]) add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = assertion->condition_str}, args[0]) tmp = strdup(assertion->condition.location.path) @@ -938,8 +933,9 @@ def build_assert(st: State*, assert_location: Location, assertion: AstAssertion* is_noreturn = True, returntype_location = assert_location, }, + operands = args, + noperands = 3, } - ins.set_operands(args) add_instruction(st, ins) st->current_block = trueblock diff --git a/compiler/cf_graph.jou b/compiler/cf_graph.jou index 674dca3e..36031183 100644 --- a/compiler/cf_graph.jou +++ b/compiler/cf_graph.jou @@ -202,18 +202,18 @@ class CfInstruction: free_signature(&self->signature) free(self->operands) - # operands should be NULL-terminated array, or NULL for empty - # TODO: does it ever need to be NULL? - # TODO: do we need this method at all? - def set_operands(self, operands: LocalVariable**) -> None: - self->noperands = 0 - while operands != NULL and operands[self->noperands] != NULL: - self->noperands++ - - nbytes = sizeof(self->operands[0]) * self->noperands - self->operands = malloc(nbytes) + def set_1_operand(self, operand: LocalVariable*) -> None: + self->noperands = 1 + self->operands = malloc(sizeof(self->operands[0])) assert self->operands != NULL - memcpy(self->operands, operands, nbytes) + self->operands[0] = operand + + def set_2_operands(self, operand1: LocalVariable*, operand2: LocalVariable*) -> None: + self->noperands = 2 + self->operands = malloc(2 * sizeof(self->operands[0])) + assert self->operands != NULL + self->operands[0] = operand1 + self->operands[1] = operand2 class CfBlock: From 329e6cf5ea15adae208aef5a93dec6bdb9721b59 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 15:52:29 +0200 Subject: [PATCH 6/9] simplify even more --- compiler/build_cf_graph.jou | 7 ++++--- compiler/cf_graph.jou | 14 +++----------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/compiler/build_cf_graph.jou b/compiler/build_cf_graph.jou index 671b1a2c..abb9da5e 100644 --- a/compiler/build_cf_graph.jou +++ b/compiler/build_cf_graph.jou @@ -114,7 +114,7 @@ def add_unary_op( target: LocalVariable*, ) -> None: ins = CfInstruction{location = location, kind = op, destvar = target} - ins.set_1_operand(arg) + ins.add_operand(arg) add_instruction(st, ins) @@ -128,7 +128,8 @@ def add_binary_op( target: LocalVariable*, ) -> None: ins = CfInstruction{location = location, kind = op, destvar = target} - ins.set_2_operands(lhs, rhs) + ins.add_operand(lhs) + ins.add_operand(rhs) add_instruction(st, ins) @@ -325,7 +326,7 @@ def build_class_field_pointer( kind = CfInstructionKind::PtrClassField, destvar = result, } - ins.set_1_operand(instance) + ins.add_operand(instance) assert sizeof(ins.fieldname) == sizeof(f->name) strcpy(ins.fieldname, f->name) diff --git a/compiler/cf_graph.jou b/compiler/cf_graph.jou index 36031183..fed7cc6e 100644 --- a/compiler/cf_graph.jou +++ b/compiler/cf_graph.jou @@ -202,18 +202,10 @@ class CfInstruction: free_signature(&self->signature) free(self->operands) - def set_1_operand(self, operand: LocalVariable*) -> None: - self->noperands = 1 - self->operands = malloc(sizeof(self->operands[0])) + def add_operand(self, operand: LocalVariable*) -> None: + self->operands = realloc(self->operands, sizeof(self->operands[0]) * (self->noperands + 1)) assert self->operands != NULL - self->operands[0] = operand - - def set_2_operands(self, operand1: LocalVariable*, operand2: LocalVariable*) -> None: - self->noperands = 2 - self->operands = malloc(2 * sizeof(self->operands[0])) - assert self->operands != NULL - self->operands[0] = operand1 - self->operands[1] = operand2 + self->operands[self->noperands++] = operand class CfBlock: From cf3b52e90922a54033a4ffa79f97da9491ab9031 Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 16:05:44 +0200 Subject: [PATCH 7/9] Yes, we need these because they're used many times. (I tried removing them.) --- compiler/build_cf_graph.jou | 3 --- 1 file changed, 3 deletions(-) diff --git a/compiler/build_cf_graph.jou b/compiler/build_cf_graph.jou index abb9da5e..d4b9f7a7 100644 --- a/compiler/build_cf_graph.jou +++ b/compiler/build_cf_graph.jou @@ -105,7 +105,6 @@ def add_instruction( return &st->current_block->instructions[st->current_block->ninstructions - 1] -# TODO: do we need this? def add_unary_op( st: State*, location: Location, @@ -118,7 +117,6 @@ def add_unary_op( add_instruction(st, ins) -# TODO: do we need this? def add_binary_op( st: State*, location: Location, @@ -133,7 +131,6 @@ def add_binary_op( add_instruction(st, ins) -# TODO: do we need this? def add_constant(st: State*, location: Location, c: Constant, target: LocalVariable*) -> CfInstruction*: ins = CfInstruction{location = location, kind = CfInstructionKind::Constant, constant = copy_constant(c), destvar = target} return add_instruction(st, ins) From d09ea009aa7b6c520153830b4da746103ec5792a Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 16:13:40 +0200 Subject: [PATCH 8/9] use add_operand --- compiler/build_cf_graph.jou | 57 ++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/compiler/build_cf_graph.jou b/compiler/build_cf_graph.jou index d4b9f7a7..1b6d6236 100644 --- a/compiler/build_cf_graph.jou +++ b/compiler/build_cf_graph.jou @@ -549,12 +549,15 @@ def build_function_or_method_call( assert sig != NULL - args: LocalVariable** = malloc(sizeof(args[0]) * (call->nargs + 2)) # +1 for self, another because why not - nargs = 0 + ins = CfInstruction{ + location = location, + kind = CfInstructionKind::Call, + signature = copy_signature(sig), + } if call->method_call_self != NULL: if is_pointer_type(sig->argtypes[0]) and not call->uses_arrow_operator: - args[nargs++] = build_address_of_expression(st, call->method_call_self) + ins.add_operand(build_address_of_expression(st, call->method_call_self)) elif (not is_pointer_type(sig->argtypes[0])) and call->uses_arrow_operator: self_ptr = build_expression(st, call->method_call_self) assert self_ptr->type->kind == TypeKind::Pointer @@ -562,33 +565,23 @@ def build_function_or_method_call( # dereference the pointer val = add_local_var(st, self_ptr->type->value_type) add_unary_op(st, call->method_call_self->location, CfInstructionKind::PtrLoad, self_ptr, val) - args[nargs++] = val + ins.add_operand(val) else: - args[nargs++] = build_expression(st, call->method_call_self) + ins.add_operand(build_expression(st, call->method_call_self)) for i = 0; i < call->nargs; i++: - args[nargs++] = build_expression(st, &call->args[i]) + ins.add_operand(build_expression(st, &call->args[i])) if sig->returntype != NULL: - return_value = add_local_var(st, sig->returntype) - else: - return_value = NULL + ins.destvar = add_local_var(st, sig->returntype) - ins = CfInstruction{ - location = location, - kind = CfInstructionKind::Call, - signature = copy_signature(sig), - destvar = return_value, - operands = args, - noperands = nargs, - } add_instruction(st, ins) if sig->is_noreturn: # Place the remaining code into an unreachable block, so you will get a warning if there is any add_jump(st, NULL, NULL, NULL, NULL) - return return_value + return ins.destvar def build_instantiation(st: State*, type: Type*, inst: AstInstantiation*, location: Location) -> LocalVariable*: @@ -907,18 +900,6 @@ def build_assert(st: State*, assert_location: Location, assertion: AstAssertion* argtypes[1] = get_pointer_type(byteType) argtypes[2] = intType - args: LocalVariable** = malloc(sizeof(args[0]) * 3) - assert args != NULL - args[0] = add_local_var(st, argtypes[0]) - args[1] = add_local_var(st, argtypes[1]) - args[2] = add_local_var(st, argtypes[2]) - - add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = assertion->condition_str}, args[0]) - tmp = strdup(assertion->condition.location.path) - add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = tmp}, args[1]) - free(tmp) - add_constant(st, assert_location, int_constant(intType, assert_location.lineno), args[2]) - ins = CfInstruction{ location = assert_location, kind = CfInstructionKind::Call, @@ -930,10 +911,20 @@ def build_assert(st: State*, assert_location: Location, assertion: AstAssertion* takes_varargs = False, is_noreturn = True, returntype_location = assert_location, - }, - operands = args, - noperands = 3, + } } + + arg1 = add_local_var(st, argtypes[0]) + arg2 = add_local_var(st, argtypes[1]) + arg3 = add_local_var(st, argtypes[1]) + + add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = assertion->condition_str}, arg1) + add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = assertion->condition.location.path}, arg2) + add_constant(st, assert_location, int_constant(intType, assert_location.lineno), arg3) + + ins.add_operand(arg1) + ins.add_operand(arg2) + ins.add_operand(arg3) add_instruction(st, ins) st->current_block = trueblock From 2d2ab1921972f0ea01d33737b6590e491bda31ed Mon Sep 17 00:00:00 2001 From: Akuli Date: Fri, 10 Jan 2025 16:15:47 +0200 Subject: [PATCH 9/9] fix --- compiler/build_cf_graph.jou | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/build_cf_graph.jou b/compiler/build_cf_graph.jou index 1b6d6236..40231f43 100644 --- a/compiler/build_cf_graph.jou +++ b/compiler/build_cf_graph.jou @@ -916,7 +916,7 @@ def build_assert(st: State*, assert_location: Location, assertion: AstAssertion* arg1 = add_local_var(st, argtypes[0]) arg2 = add_local_var(st, argtypes[1]) - arg3 = add_local_var(st, argtypes[1]) + arg3 = add_local_var(st, argtypes[2]) add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = assertion->condition_str}, arg1) add_constant(st, assert_location, Constant{kind = ConstantKind::String, str = assertion->condition.location.path}, arg2)