diff --git a/self_hosted/ast.jou b/self_hosted/ast.jou index 6b320661..7dbf5fe5 100644 --- a/self_hosted/ast.jou +++ b/self_hosted/ast.jou @@ -787,9 +787,7 @@ class AstClassMember: elif self->kind == AstClassMemberKind::Method: printf(" method ") self->method.signature.print() - tp = TreePrinter{} - strcpy(tp.prefix, " ") - self->method.body.print(tp) + self->method.body.print(TreePrinter{prefix = " "}) else: assert False diff --git a/self_hosted/create_llvm_ir.jou b/self_hosted/create_llvm_ir.jou index abcb8b0f..7a561796 100644 --- a/self_hosted/create_llvm_ir.jou +++ b/self_hosted/create_llvm_ir.jou @@ -166,14 +166,23 @@ class AstToIR: block = LLVMAppendBasicBlock(self->llvm_function, name_hint) LLVMPositionBuilderAtEnd(self->builder, block) - def make_a_string_constant(self, s: byte*) -> LLVMValue*: - array = LLVMConstString(s, strlen(s) as int, False) - global_var = LLVMAddGlobal(self->module, LLVMTypeOf(array), "string_literal") - LLVMSetLinkage(global_var, LLVMLinkage::Private) # This makes it a static global variable - LLVMSetInitializer(global_var, array) - - string_type = LLVMPointerType(LLVMInt8Type(), 0) - return LLVMBuildBitCast(self->builder, global_var, string_type, "string_ptr") + # If array_len is -1, returns a pointer to the start of a static global string. + # Otherwise returns an array value. + def make_a_string_constant(self, s: byte*, array_len: int) -> LLVMValue*: + if array_len == -1: + array = LLVMConstString(s, strlen(s) as int, False) + global_var = LLVMAddGlobal(self->module, LLVMTypeOf(array), "string_literal") + LLVMSetLinkage(global_var, LLVMLinkage::Private) # This makes it a static global variable + LLVMSetInitializer(global_var, array) + string_type = LLVMPointerType(LLVMInt8Type(), 0) + return LLVMBuildBitCast(self->builder, global_var, string_type, "string_ptr") + else: + assert strlen(s) < array_len + padded = calloc(1, array_len) + strcpy(padded, s) + array = LLVMConstString(padded, array_len, True) + free(padded) + return array def do_cast(self, obj: LLVMValue*, from: Type*, to: Type*) -> LLVMValue*: # Treat enums as just integers @@ -319,8 +328,8 @@ class AstToIR: assert assert_fail_func != NULL args = [ - self->make_a_string_constant("foo"), - self->make_a_string_constant("bar"), + self->make_a_string_constant("foo", -1), + self->make_a_string_constant("bar", -1), LLVMConstInt(LLVMInt32Type(), 123, False), ] @@ -424,10 +433,8 @@ class AstToIR: for i = 0; i < call->nargs; i++: args[k++] = self->do_expression(&call->args[i]) - name_hint: byte[100] - if signature->return_type == NULL: - strcpy(name_hint, "") - else: + name_hint: byte[100] = "" + if signature->return_type != NULL: sprintf(name_hint, "%.20s_return_value", signature->name) result = LLVMBuildCall2(self->builder, function_type, function, args, k, name_hint) @@ -448,7 +455,11 @@ class AstToIR: ) if ast->kind == AstExpressionKind::String: - result = self->make_a_string_constant(ast->string) + if types->implicit_string_to_array_cast: + array_len = types->implicit_cast_type->array.length + else: + array_len = -1 + result = self->make_a_string_constant(ast->string, array_len) elif ast->kind == AstExpressionKind::Bool: result = LLVMConstInt(LLVMInt1Type(), ast->bool_value as long, False) elif ast->kind == AstExpressionKind::Byte: @@ -568,7 +579,7 @@ class AstToIR: assert False types = self->function_or_method_types->get_expression_types(ast) - if types->implicit_cast_type == NULL: + if types->implicit_cast_type == NULL or types->implicit_string_to_array_cast: return result return self->do_cast(result, types->original_type, types->implicit_cast_type) diff --git a/self_hosted/parser.jou b/self_hosted/parser.jou index b55c09a8..3f4adf89 100644 --- a/self_hosted/parser.jou +++ b/self_hosted/parser.jou @@ -107,11 +107,9 @@ def parse_function_or_method_signature(tokens: Token**, is_method: bool) -> AstS if not is_method: fail((*tokens)->location, "'self' cannot be used here") - the_self: byte[100] - strcpy(the_self, "self") result.args = realloc(result.args, sizeof result.args[0] * (result.nargs+1)) result.args[result.nargs++] = AstNameTypeValue{ - name = the_self, + name = "self", name_location = (*tokens)->location, } ++*tokens diff --git a/self_hosted/target.jou b/self_hosted/target.jou index f84e5dec..e4bbb872 100644 --- a/self_hosted/target.jou +++ b/self_hosted/target.jou @@ -36,7 +36,7 @@ def init_target() -> void: if is_windows(): # LLVM's default is x86_64-pc-windows-msvc - strcpy(target.triple, "x86_64-pc-windows-gnu") + target.triple = "x86_64-pc-windows-gnu" else: triple = LLVMGetDefaultTargetTriple() assert strlen(triple) < sizeof target.triple diff --git a/self_hosted/token.jou b/self_hosted/token.jou index 3b4f6c88..e769ae75 100644 --- a/self_hosted/token.jou +++ b/self_hosted/token.jou @@ -104,33 +104,33 @@ class Token: def fail_expected_got(self, what_was_expected_instead: byte*) -> void: got: byte[100] if self->kind == TokenKind::Short: - strcpy(got, "a short") + got = "a short" elif self->kind == TokenKind::Int: - strcpy(got, "an integer") + got = "an integer" elif self->kind == TokenKind::Long: - strcpy(got, "a long integer") + got = "a long integer" elif self->kind == TokenKind::Float: - strcpy(got, "a float constant") + got = "a float constant" elif self->kind == TokenKind::Double: - strcpy(got, "a double constant") + got = "a double constant" elif self->kind == TokenKind::Byte: - strcpy(got, "a byte literal") + got = "a byte literal" elif self->kind == TokenKind::String: - strcpy(got, "a string") + got = "a string" elif self->kind == TokenKind::Name: snprintf(got, sizeof got, "a variable name '%s'", self->short_string) elif self->kind == TokenKind::Keyword: snprintf(got, sizeof got, "the '%s' keyword", self->short_string) elif self->kind == TokenKind::Newline: - strcpy(got, "end of line") + got = "end of line" elif self->kind == TokenKind::Indent: - strcpy(got, "more indentation") + got = "more indentation" elif self->kind == TokenKind::Dedent: - strcpy(got, "less indentation") + got = "less indentation" elif self->kind == TokenKind::Operator: snprintf(got, sizeof got, "'%s'", self->short_string) elif self->kind == TokenKind::EndOfFile: - strcpy(got, "end of file") + got = "end of file" else: assert False diff --git a/self_hosted/typecheck.jou b/self_hosted/typecheck.jou index 2c3a7274..a1b13d8b 100644 --- a/self_hosted/typecheck.jou +++ b/self_hosted/typecheck.jou @@ -101,26 +101,47 @@ class ExpressionTypes: expression: AstExpression* original_type: Type* implicit_cast_type: Type* # NULL if no implicit casting is needed - implicit_array_to_pointer_cast: bool # Whether a special kind of implicit cast happened next: ExpressionTypes* # TODO: switch to more efficient structure than linked list? + # Flags to indicate whether special kinds of implicit casts happened + implicit_array_to_pointer_cast: bool # Foo[N] to Foo* + implicit_string_to_array_cast: bool # "..." to byte[N] + def get_type_after_implicit_cast(self) -> Type*: assert self->original_type != NULL if self->implicit_cast_type == NULL: return self->original_type return self->implicit_cast_type + # TODO: error_location is probably unnecessary, can get location from self->expression def do_implicit_cast(self, to: Type*, error_location: Location, error_template: byte*) -> void: # This cannot be called multiple times assert self->implicit_cast_type == NULL assert not self->implicit_array_to_pointer_cast + assert not self->implicit_string_to_array_cast from = self->original_type if from == to: return + if ( + self->expression->kind == AstExpressionKind::String + and from == byte_type->get_pointer_type() + and to->kind == TypeKind::Array + and to->array.item_type == byte_type + ): + string_size = strlen(self->expression->string) + 1 + if to->array.length < string_size: + message: byte[100] + snprintf( + message, sizeof message, + "a string of %d bytes (including '\\0') does not fit into %s", + string_size, to->name, + ) + fail(error_location, message) + self->implicit_string_to_array_cast = True # Passing in NULL for error_template can be used to force a cast to happen. - if error_template != NULL and not can_cast_implicitly(from, to): + elif error_template != NULL and not can_cast_implicitly(from, to): fail_with_implicit_cast_error(error_location, error_template, from, to) self->implicit_cast_type = to @@ -437,7 +458,7 @@ def short_expression_description(expr: AstExpression*) -> byte[200]: or expr->kind == AstExpressionKind::Bool or expr->kind == AstExpressionKind::Null ): - strcpy(result, "a constant") + return "a constant" elif ( expr->kind == AstExpressionKind::Negate or expr->kind == AstExpressionKind::Add @@ -446,7 +467,7 @@ def short_expression_description(expr: AstExpression*) -> byte[200]: or expr->kind == AstExpressionKind::Divide or expr->kind == AstExpressionKind::Modulo ): - strcpy(result, "the result of a calculation") + return "the result of a calculation" elif ( expr->kind == AstExpressionKind::Eq or expr->kind == AstExpressionKind::Ne @@ -455,44 +476,45 @@ def short_expression_description(expr: AstExpression*) -> byte[200]: or expr->kind == AstExpressionKind::Lt or expr->kind == AstExpressionKind::Le ): - strcpy(result, "the result of a comparison") + return "the result of a comparison" elif expr->kind == AstExpressionKind::Call: sprintf(result, "a %s call", expr->call.function_or_method()) + return result elif expr->kind == AstExpressionKind::Instantiate: - strcpy(result, "a newly created instance") + return "a newly created instance" elif expr->kind == AstExpressionKind::GetVariable: - strcpy(result, "a variable") + return "a variable" elif expr->kind == AstExpressionKind::GetEnumMember: - strcpy(result, "an enum member") + return "an enum member" elif expr->kind == AstExpressionKind::GetClassField: snprintf(result, sizeof result, "field '%s'", expr->class_field.field_name) + return result elif expr->kind == AstExpressionKind::As: - strcpy(result, "the result of a cast") + return "the result of a cast" elif expr->kind == AstExpressionKind::SizeOf: - strcpy(result, "a sizeof expression") + return "a sizeof expression" elif expr->kind == AstExpressionKind::AddressOf: subresult = short_expression_description(expr->operands) snprintf(result, sizeof result, "address of %s", subresult) + return result elif expr->kind == AstExpressionKind::Dereference: - strcpy(result, "the value of a pointer") + return "the value of a pointer" elif expr->kind == AstExpressionKind::And: - strcpy(result, "the result of 'and'") + return "the result of 'and'" elif expr->kind == AstExpressionKind::Or: - strcpy(result, "the result of 'or'") + return "the result of 'or'" elif expr->kind == AstExpressionKind::Not: - strcpy(result, "the result of 'not'") + return "the result of 'not'" elif expr->kind == AstExpressionKind::PreIncr or expr->kind == AstExpressionKind::PostIncr: - strcpy(result, "the result of incrementing a value") + return "the result of incrementing a value" elif expr->kind == AstExpressionKind::PreDecr or expr->kind == AstExpressionKind::PostDecr: - strcpy(result, "the result of decrementing a value") + return "the result of decrementing a value" elif expr->kind == AstExpressionKind::Indexing: - strcpy(result, "an indexed value") + return "an indexed value" else: printf("*** %d\n", expr->kind) assert False - return result - # The & operator can't go in front of most expressions. # You can't do &(1 + 2), for example. # @@ -1111,10 +1133,7 @@ class Stage3TypeChecker: # This is a common error, so try to produce a helpful error message. error_template: byte[500] if target_expr->kind == AstExpressionKind::Dereference: - strcpy( - error_template, - "cannot place a value of type into a pointer of type *", - ) + error_template = "cannot place a value of type into a pointer of type *" else: target_description: byte[200] = short_expression_description(target_expr) snprintf( diff --git a/self_hosted/types.jou b/self_hosted/types.jou index 1a8deae0..51447563 100644 --- a/self_hosted/types.jou +++ b/self_hosted/types.jou @@ -134,18 +134,10 @@ global int_type: Type* global long_type: Type* def init_types() -> void: - strcpy(void_ptr_type.name, "void*") - void_ptr_type.kind = TypeKind::VoidPointer - - strcpy(bool_type.name, "bool") - bool_type.kind = TypeKind::Bool - - strcpy(float_type.name, "float") - strcpy(double_type.name, "double") - float_type.size_in_bits = 32 - double_type.size_in_bits = 64 - float_type.kind = TypeKind::FloatingPoint - double_type.kind = TypeKind::FloatingPoint + void_ptr_type = Type{name = "void*", kind = TypeKind::VoidPointer} + bool_type = Type{name = "bool", kind = TypeKind::Bool} + float_type = Type{name = "float", size_in_bits = 32, kind = TypeKind::FloatingPoint} + double_type = Type{name = "double", size_in_bits = 64, kind = TypeKind::FloatingPoint} for size = 8; size <= 64; size *= 2: sprintf(signed_integers[size].name, "<%d-bit signed integer>", size) @@ -160,10 +152,10 @@ def init_types() -> void: int_type = &signed_integers[32] long_type = &signed_integers[64] - strcpy(byte_type->name, "byte") - strcpy(short_type->name, "short") - strcpy(int_type->name, "int") - strcpy(long_type->name, "long") + byte_type->name = "byte" + short_type->name = "short" + int_type->name = "int" + long_type->name = "long" def create_opaque_class(name: byte*) -> Type*: result: Type* = malloc(sizeof *result) diff --git a/src/build_cfg.c b/src/build_cfg.c index 732ebdcd..d66da9ff 100644 --- a/src/build_cfg.c +++ b/src/build_cfg.c @@ -511,6 +511,24 @@ static const LocalVariable *build_expression(struct State *st, const AstExpressi return memberptr; } + if (types && types->implicit_string_to_array_cast) { + assert(types->implicit_cast_type); + assert(types->implicit_cast_type->kind == TYPE_ARRAY); + assert(expr->kind == AST_EXPR_CONSTANT); + assert(expr->data.constant.kind == CONSTANT_STRING); + + char *padded = calloc(1, types->implicit_cast_type->data.array.len); + strcpy(padded, expr->data.constant.data.str); + + const LocalVariable *result = add_local_var(st, types->implicit_cast_type); + union CfInstructionData data = { .strarray = { + .len = types->implicit_cast_type->data.array.len, + .str = padded, + }}; + add_instruction(st, expr->location, CF_STRING_ARRAY, &data, NULL, result); + return result; + } + const LocalVariable *result, *temp; switch(expr->kind) { @@ -586,9 +604,11 @@ static const LocalVariable *build_expression(struct State *st, const AstExpressi result = build_address_of_expression(st, &expr->data.operands[0]); break; case AST_EXPR_SIZEOF: - result = add_local_var(st, longType); - union CfInstructionData data = { .type = get_expr_types(st, &expr->data.operands[0])->type }; - add_instruction(st, expr->location, CF_SIZEOF, &data, NULL, result); + { + result = add_local_var(st, longType); + union CfInstructionData data = { .type = get_expr_types(st, &expr->data.operands[0])->type }; + add_instruction(st, expr->location, CF_SIZEOF, &data, NULL, result); + } break; case AST_EXPR_DEREFERENCE: temp = build_expression(st, &expr->data.operands[0]); diff --git a/src/codegen.c b/src/codegen.c index 6ec60bec..6d685724 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -320,6 +320,7 @@ static void codegen_instruction(const struct State *st, const CfInstruction *ins } break; case CF_CONSTANT: setdest(codegen_constant(st, &ins->data.constant)); break; + case CF_STRING_ARRAY: setdest(LLVMConstString(ins->data.strarray.str, ins->data.strarray.len, true)); break; case CF_SIZEOF: setdest(LLVMSizeOf(codegen_type(ins->data.type))); break; case CF_ADDRESS_OF_LOCAL_VAR: setdest(get_pointer_to_local_var(st, ins->operands[0])); break; case CF_ADDRESS_OF_GLOBAL_VAR: setdest(LLVMGetNamedGlobal(st->module, ins->data.globalname)); break; diff --git a/src/free.c b/src/free.c index 430ee26e..590d5140 100644 --- a/src/free.c +++ b/src/free.c @@ -277,6 +277,8 @@ void free_control_flow_graph_block(const CfGraph *cfg, CfBlock *b) for (const CfInstruction *ins = b->instructions.ptr; ins < End(b->instructions); ins++) { if (ins->kind == CF_CONSTANT) free_constant(&ins->data.constant); + if (ins->kind == CF_STRING_ARRAY) + free(ins->data.strarray.str); if (ins->kind == CF_CALL) free_signature(&ins->data.signature); free(ins->operands); diff --git a/src/jou_compiler.h b/src/jou_compiler.h index a4e48c89..988392b9 100644 --- a/src/jou_compiler.h +++ b/src/jou_compiler.h @@ -464,7 +464,10 @@ struct ExpressionTypes { const AstExpression *expr; const Type *type; const Type *implicit_cast_type; // NULL for no implicit cast - bool implicit_array_to_pointer_cast; // if true, the implicit cast is Foo[N] to Foo* + + // Flags to indicate whether special kinds of implicit casts happened + bool implicit_array_to_pointer_cast; // Foo[N] to Foo* + bool implicit_string_to_array_cast; // "..." to byte[N] }; struct ExportSymbol { @@ -524,6 +527,7 @@ struct CfInstruction { Location location; enum CfInstructionKind { CF_CONSTANT, + CF_STRING_ARRAY, CF_CALL, // function or method call, depending on whether self_type is NULL (see below) CF_ADDRESS_OF_LOCAL_VAR, CF_ADDRESS_OF_GLOBAL_VAR, @@ -551,6 +555,7 @@ struct CfInstruction { } kind; union CfInstructionData { Constant constant; // CF_CONSTANT + struct { char *str; int len; } strarray; // CF_STRING_ARRAY Signature signature; // CF_CALL char fieldname[100]; // CF_PTR_CLASS_FIELD char globalname[100]; // CF_ADDRESS_OF_GLOBAL_VAR diff --git a/src/print.c b/src/print.c index 19ea61d5..81ec258c 100644 --- a/src/print.c +++ b/src/print.c @@ -10,10 +10,10 @@ static void print_byte(char b) printf(" '%c'", b); } -static void print_string(const char *s) +static void print_string(const char *s, int len) { putchar('"'); - for (int i = 0; s[i]; i++) { + for (int i = 0; idata.str); + print_string(c->data.str, -1); break; } } @@ -80,7 +80,7 @@ void print_token(const Token *token) break; case TOKEN_STRING: printf("string "); - print_string(token->data.string_value); + print_string(token->data.string_value, -1); printf("\n"); break; case TOKEN_NAME: @@ -550,6 +550,10 @@ static void print_cf_instruction(const CfInstruction *ins) case CF_CONSTANT: print_constant(&ins->data.constant); break; + case CF_STRING_ARRAY: + printf("string array "); + print_string(ins->data.strarray.str, ins->data.strarray.len); + break; case CF_NUM_ADD: case CF_NUM_SUB: diff --git a/src/typecheck.c b/src/typecheck.c index e6f4f769..6eec0423 100644 --- a/src/typecheck.c +++ b/src/typecheck.c @@ -511,8 +511,22 @@ static void do_implicit_cast( if (from == to) return; + if ( + types->expr->kind == AST_EXPR_CONSTANT + && types->expr->data.constant.kind == CONSTANT_STRING + && from == get_pointer_type(byteType) + && to->kind == TYPE_ARRAY + && to->data.array.membertype == byteType + ) + { + int string_size = strlen(types->expr->data.constant.data.str) + 1; + if (to->data.array.len < string_size) { + fail_with_error(location, "a string of %d bytes (including '\\0') does not fit into %s", string_size, to->name); + } + types->implicit_string_to_array_cast = true; + } // Passing in NULL for errormsg_template can be used to "force" a cast to happen. - if (errormsg_template != NULL && !can_cast_implicitly(from, to)) + else if (errormsg_template != NULL && !can_cast_implicitly(from, to)) fail_with_implicit_cast_error(location, errormsg_template, from, to); types->implicit_cast_type = to; diff --git a/tests/other_errors/string_doesnt_fit_to_array.jou b/tests/other_errors/string_doesnt_fit_to_array.jou new file mode 100644 index 00000000..a9b43acf --- /dev/null +++ b/tests/other_errors/string_doesnt_fit_to_array.jou @@ -0,0 +1,2 @@ +def foo() -> void: + x: byte[20] = "this is a long string" # Error: a string of 22 bytes (including '\0') does not fit into byte[20] diff --git a/tests/should_succeed/string_initialize_byte_array.jou b/tests/should_succeed/string_initialize_byte_array.jou new file mode 100644 index 00000000..dfceee8f --- /dev/null +++ b/tests/should_succeed/string_initialize_byte_array.jou @@ -0,0 +1,12 @@ +import "stdlib/io.jou" + +def main() -> int: + foo: byte[50] = "hi" + # Output: hi000000000000000000000000000000000000000000000000 + for i = 0; i < 50; i++: + if foo[i] == '\0': + putchar('0') + else: + putchar(foo[i]) + putchar('\n') + return 0