From b9af320137c8f0b8e7c7b1173889589e0f05c424 Mon Sep 17 00:00:00 2001 From: Joshua James Venter Date: Sat, 13 Jul 2024 11:12:47 +0200 Subject: [PATCH] StringRef to StringSlice Signed-off-by: Joshua James Venter --- stdlib/src/builtin/string.mojo | 70 +++++++++++++------------- stdlib/src/builtin/string_literal.mojo | 2 +- stdlib/src/utils/stringref.mojo | 2 +- 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo index ef27cc85fdf..93127e339e3 100644 --- a/stdlib/src/builtin/string.mojo +++ b/stdlib/src/builtin/string.mojo @@ -212,15 +212,15 @@ fn ascii(value: String) -> String: # ===----------------------------------------------------------------------=== # -fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: - """Implementation of `atol` for StringRef inputs. +fn _atol(str_slice: StringSlice, base: Int = 10) raises -> Int: + """Implementation of `atol` for StringSlice inputs. Please see its docstring for details. """ if (base != 0) and (base < 2 or base > 36): raise Error("Base must be >= 2 and <= 36, or 0.") - if not str_ref: - raise Error(_str_to_base_error(base, str_ref)) + if not str_slice: + raise Error(_str_to_base_error(base, str_slice)) var real_base: Int var ord_num_max: Int @@ -229,11 +229,11 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: var result = 0 var is_negative: Bool = False var start: Int = 0 - var str_len = len(str_ref) + var str_len = len(str_slice) - start, is_negative = _trim_and_handle_sign(str_ref, str_len) + start, is_negative = _trim_and_handle_sign(str_slice, str_len) - start = _handle_base_prefix(start, str_ref, str_len, base) + start = _handle_base_prefix(start, str_slice, str_len, base) alias ord_0 = ord("0") # FIXME: @@ -242,11 +242,11 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: alias ord_underscore = ord("_") if base == 0: - var real_base_new_start = _identify_base(str_ref, start) + var real_base_new_start = _identify_base(str_slice, start) real_base = real_base_new_start[0] start = real_base_new_start[1] if real_base == -1: - raise Error(_str_to_base_error(base, str_ref)) + raise Error(_str_to_base_error(base, str_slice)) else: real_base = base @@ -259,7 +259,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: ord("A") + (real_base - 11), ) - var buff = str_ref.unsafe_ptr() + var buff = str_slice.unsafe_ptr() var found_valid_chars_after_start = False var has_space_after_number = False # single underscores are only allowed between digits @@ -270,7 +270,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: var ord_current = int(buff[pos]) if ord_current == ord_underscore: if was_last_digit_underscore: - raise Error(_str_to_base_error(base, str_ref)) + raise Error(_str_to_base_error(base, str_slice)) else: was_last_digit_underscore = True continue @@ -290,34 +290,34 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: start = pos + 1 break else: - raise Error(_str_to_base_error(base, str_ref)) + raise Error(_str_to_base_error(base, str_slice)) if pos + 1 < str_len and not _isspace(buff[pos + 1]): var nextresult = result * real_base if nextresult < result: raise Error( - _str_to_base_error(base, str_ref) + _str_to_base_error(base, str_slice) + " String expresses an integer too large to store in Int." ) result = nextresult if was_last_digit_underscore or (not found_valid_chars_after_start): - raise Error(_str_to_base_error(base, str_ref)) + raise Error(_str_to_base_error(base, str_slice)) if has_space_after_number: for pos in range(start, str_len): if not _isspace(buff[pos]): - raise Error(_str_to_base_error(base, str_ref)) + raise Error(_str_to_base_error(base, str_slice)) if is_negative: result = -result return result @always_inline -fn _trim_and_handle_sign(str_ref: StringRef, str_len: Int) -> (Int, Bool): +fn _trim_and_handle_sign(str_slice: StringSlice, str_len: Int) -> (Int, Bool): """Trims leading whitespace, handles the sign of the number in the string. Args: - str_ref: A StringRef containing the number to parse. + str_slice: A StringSlice containing the number to parse. str_len: The length of the string. Returns: @@ -325,18 +325,18 @@ fn _trim_and_handle_sign(str_ref: StringRef, str_len: Int) -> (Int, Bool): - The starting index of the number after whitespace and sign. - A boolean indicating whether the number is negative. """ - var buff = str_ref.unsafe_ptr() + var buff = str_slice.unsafe_ptr() var start: Int = 0 while start < str_len and _isspace(buff[start]): start += 1 - var p = str_ref[start] == "+" - var n = str_ref[start] == "-" + var p: Bool = buff[start] == ord("+") + var n: Bool = buff[start] == ord("-") return start + (p or n), n @always_inline fn _handle_base_prefix( - pos: Int, str_ref: StringRef, str_len: Int, base: Int + pos: Int, str_slice: StringSlice, str_len: Int, base: Int ) -> Int: """Adjusts the starting position if a valid base prefix is present. @@ -345,7 +345,7 @@ fn _handle_base_prefix( Args: pos: Current position in the string. - str_ref: The input string. + str_slice: The input StringSlice. str_len: Length of the input string. base: The specified base. @@ -353,9 +353,10 @@ fn _handle_base_prefix( Updated position after the prefix, if applicable. """ var start = pos + var buff = str_slice.unsafe_ptr() if start + 1 < str_len: - var prefix_char = str_ref[start + 1] - if str_ref[start] == "0" and ( + var prefix_char = chr(int(buff[start + 1])) + if buff[start] == ord("0") and ( (base == 2 and (prefix_char == "b" or prefix_char == "B")) or (base == 8 and (prefix_char == "o" or prefix_char == "O")) or (base == 16 and (prefix_char == "x" or prefix_char == "X")) @@ -364,23 +365,24 @@ fn _handle_base_prefix( return start -fn _str_to_base_error(base: Int, str_ref: StringRef) -> String: +fn _str_to_base_error(base: Int, str_slice: StringSlice) -> String: return ( "String is not convertible to integer with base " + str(base) + ": '" - + str(str_ref) + + str(str_slice) + "'" ) -fn _identify_base(str_ref: StringRef, start: Int) -> Tuple[Int, Int]: - var length = len(str_ref) +fn _identify_base(str_slice: StringSlice, start: Int) -> Tuple[Int, Int]: + var length = len(str_slice) + var buff = str_slice.unsafe_ptr() # just 1 digit, assume base 10 if start == (length - 1): return 10, start - if str_ref[start] == "0": - var second_digit = str_ref[start + 1] + if buff[start] == ord("0"): + var second_digit = chr(int(buff[start + 1])) if second_digit == "b" or second_digit == "B": return 2, start + 2 if second_digit == "o" or second_digit == "O": @@ -390,7 +392,7 @@ fn _identify_base(str_ref: StringRef, start: Int) -> Tuple[Int, Int]: # checking for special case of all "0", "_" are also allowed var was_last_character_underscore = False for i in range(start + 1, length): - if str_ref[i] == "_": + if buff[i] == ord("_"): if was_last_character_underscore: return -1, -1 else: @@ -398,9 +400,9 @@ fn _identify_base(str_ref: StringRef, start: Int) -> Tuple[Int, Int]: continue else: was_last_character_underscore = False - if str_ref[i] != "0": + if buff[i] != ord("0"): return -1, -1 - elif ord("1") <= ord(str_ref[start]) <= ord("9"): + elif ord("1") <= int(buff[start]) <= ord("9"): return 10, start else: return -1, -1 @@ -443,7 +445,7 @@ fn atol(str: String, base: Int = 10) raises -> Int: This follows [Python's integer literals]( https://docs.python.org/3/reference/lexical_analysis.html#integers). """ - return _atol(str._strref_dangerous(), base) + return _atol(str.as_string_slice(), base) fn _atof_error(str_ref: StringRef) -> Error: diff --git a/stdlib/src/builtin/string_literal.mojo b/stdlib/src/builtin/string_literal.mojo index 3931b0a08a5..7888b248f16 100644 --- a/stdlib/src/builtin/string_literal.mojo +++ b/stdlib/src/builtin/string_literal.mojo @@ -212,7 +212,7 @@ struct StringLiteral( Returns: An integer value that represents the string, or otherwise raises. """ - return _atol(self) + return _atol(self.as_string_slice()) @no_inline fn __str__(self) -> String: diff --git a/stdlib/src/utils/stringref.mojo b/stdlib/src/utils/stringref.mojo index dfa26408cc2..b09ea5422d4 100644 --- a/stdlib/src/utils/stringref.mojo +++ b/stdlib/src/utils/stringref.mojo @@ -398,7 +398,7 @@ struct StringRef( Returns: An integer value that represents the string, or otherwise raises. """ - return _atol(self) + return atol(self) @always_inline fn __len__(self) -> Int: