From c52155a5288c87f89ea6e4920be6db9d380d3e2d Mon Sep 17 00:00:00 2001 From: mertcandav Date: Sun, 28 Jan 2024 16:41:35 +0300 Subject: [PATCH] jule: add underscore support for numeric literals --- std/conv/atob.jule | 8 ++- std/conv/atof.jule | 16 ++++-- std/conv/atoi.jule | 25 +++++---- std/conv/ftoa.jule | 49 +++++++---------- std/conv/itoa.jule | 8 ++- std/conv/quote.jule | 4 +- std/jule/lex/lex.jule | 113 ++++++++++++++++++++++++++++------------ std/jule/sema/eval.jule | 26 +++++---- 8 files changed, 157 insertions(+), 92 deletions(-) diff --git a/std/conv/atob.jule b/std/conv/atob.jule index 57d60c433..b500212c6 100644 --- a/std/conv/atob.jule +++ b/std/conv/atob.jule @@ -44,14 +44,12 @@ pub fn conv_bool(s: str): (bool, ConvError) { ret true, ConvError.Ok | "0" | "f" | "F" | "false" | "FALSE" | "False": ret false, ConvError.Ok + |: + ret false, ConvError.InvalidSyntax } - ret false, ConvError.InvalidSyntax } // Returns "true" or "false" according to the value of b. pub fn fmt_bool(b: bool): str { - if b { - ret "true" - } - ret "false" + ret if b { "true" } else { "false" } } diff --git a/std/conv/atof.jule b/std/conv/atof.jule index 783a78d02..758689948 100644 --- a/std/conv/atof.jule +++ b/std/conv/atof.jule @@ -132,7 +132,9 @@ fn read_float(s: str): (mantissa: u64, exp: int, neg: bool, let mut dp = 0 for i < s.len; i++ { let c = s[i] - match true { + match { + | c == '_': + continue | c == '.': if sawdot { goto loop_end @@ -203,7 +205,10 @@ loop_end: ret } let mut e = 0 - for i < s.len && ('0' <= s[i] && s[i] <= '9'); i++ { + for i < s.len && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { + if s[i] == '_' { + continue + } if e < 10000 { e = e*10 + int(s[i]) - '0' } @@ -248,6 +253,8 @@ impl Decimal { let mut sawdigits = false for i < s.len; i++ { match { + | s[i] == '_': + continue | s[i] == '.': if sawdot { ret @@ -299,7 +306,10 @@ impl Decimal { ret } let mut e = 0 - for i < s.len && ('0' <= s[i] && s[i] <= '9'); i++ { + for i < s.len && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { + if s[i] == '_' { + continue + } if e < 10000 { e = e*10 + int(s[i]) - '0' } diff --git a/std/conv/atoi.jule b/std/conv/atoi.jule index f920e05a4..4a6f4718a 100644 --- a/std/conv/atoi.jule +++ b/std/conv/atoi.jule @@ -40,13 +40,13 @@ const __INT_SIZE = 32 << (^uint(0) >> 63) // Is the size in bits of an int or uint value. pub const INT_SIZE = __INT_SIZE -const MAX_UINT64 = u64.MAX // 1<<64 - 1 - // Is a lower-case letter if and only if // c is either that lower-case letter or the equivalent upper-case letter. // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'. // Note that lower of non-letters can produce other non-letters. -fn lower(c: byte): byte { ret c | ('x' - 'X') } +fn lower(c: byte): byte { + ret c | ('x' - 'X') +} // Is like parse_int but for unsigned numbers. // @@ -88,19 +88,24 @@ pub fn parse_uint(mut s: str, mut base: int, mut bit_size: int): (u64, ConvError ret 0, ConvError.InvalidBitSize } - // Cutoff is the smallest number such that cutoff*base > MAX_UINT64. + // Cutoff is the smallest number such that cutoff*base > u64.MAX. // Use compile-time constants for common cases. let mut cutoff: u64 = 0 match base { - | 10: cutoff = MAX_UINT64/10 + 1 - | 16: cutoff = MAX_UINT64/16 + 1 - |: cutoff = MAX_UINT64/u64(base) + 1 + | 10: + cutoff = u64.MAX/10 + 1 + | 16: + cutoff = u64.MAX/16 + 1 + |: + cutoff = u64.MAX/u64(base) + 1 } let mut max_val: u64 = 0 match bit_size { - | 32: max_val = u32.MAX - | 64: max_val = u64.MAX + | 32: + max_val = u32.MAX + | 64: + max_val = u64.MAX } /* let mut prec = 1 @@ -117,6 +122,8 @@ pub fn parse_uint(mut s: str, mut base: int, mut bit_size: int): (u64, ConvError d = c - '0' | 'a' <= lower(c) && lower(c) <= 'z': d = lower(c) - 'a' + 10 + | c == '_': + continue |: ret 0, ConvError.InvalidSyntax } diff --git a/std/conv/ftoa.jule b/std/conv/ftoa.jule index dc8862c2c..975f46d2e 100644 --- a/std/conv/ftoa.jule +++ b/std/conv/ftoa.jule @@ -76,29 +76,29 @@ struct DecimalSlice { // The special precision -1 uses the smallest number of digits // necessary such that parse_float will return f exactly. pub fn fmt_float(f: f64, fmt: byte, prec: int, bit_size: int): str { - ret str(generic_ftoa(nil, f, fmt, prec, bit_size)) + ret str(generic_ftoa(make([]byte, 0, max(prec+4, 24)), f, fmt, prec, bit_size)) } fn generic_ftoa(mut dst: []byte, val: f64, fmt: byte, mut prec: int, bit_size: int): []byte { let mut bits: u64 = 0 - let mut flt: *FloatInfo = nil + let mut flt: &FloatInfo = nil match bit_size { | 32: bits = u64(math::f32_bits(f32(val))) - flt = &F32_INFO + flt = unsafe { (&FloatInfo)(&F32_INFO) } | 64: bits = math::f64_bits(val) - flt = &F64_INFO + flt = unsafe { (&FloatInfo)(&F64_INFO) } |: panic("std::conv: illegal bit_size") } - let neg = unsafe { bits>>(flt.expbits+flt.mantbits) } != 0 - let mut exp = unsafe { int(bits>>flt.mantbits) & int(1<>(flt.expbits+flt.mantbits) != 0 + let mut exp = int(bits>>flt.mantbits) & int(1< b { - ret a - } - ret b + ret if a > b { a } else { b } } diff --git a/std/conv/itoa.jule b/std/conv/itoa.jule index d212532db..52dc890c9 100644 --- a/std/conv/itoa.jule +++ b/std/conv/itoa.jule @@ -79,7 +79,9 @@ pub fn fmt_int(i: i64, base: int): str { } // Is equivalent to fmt_int(i64(i), 10). -pub fn itoa(i: int): str { ret fmt_int(i64(i), 10) } +pub fn itoa(i: int): str { + ret fmt_int(i64(i), 10) +} // Returns the string for an i with 0 <= i < nSmalls. fn small(i: int): str { @@ -89,7 +91,9 @@ fn small(i: int): str { ret SMALLS_STR[i*2 : i*2+2] } -fn is_power_of_two(x: int): bool { ret x&(x-1) == 0 } +fn is_power_of_two(x: int): bool { + ret x&(x-1) == 0 +} // Computes the string representation of u in the given base. // If neg is set, u is treated as negative int64 value. If append_ is diff --git a/std/conv/quote.jule b/std/conv/quote.jule index 8b8a69d21..e549ca052 100644 --- a/std/conv/quote.jule +++ b/std/conv/quote.jule @@ -35,5 +35,5 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // ==================================================== -const lowerhex = "0123456789abcdef" -const upperhex = "0123456789ABCDEF" +const LOWERHEX = "0123456789abcdef" +const UPPERHEX = "0123456789ABCDEF" diff --git a/std/jule/lex/lex.jule b/std/jule/lex/lex.jule index d48bfa1c2..9efebb8d3 100644 --- a/std/jule/lex/lex.jule +++ b/std/jule/lex/lex.jule @@ -131,6 +131,9 @@ fn float_fmt_e(&txt: []byte, mut i: int): (lit: str) { } let mut b = txt[i] + if b == '_' { + ret + } if b == '+' || b == '-' { i++ // Skip operator if i >= txt.len { @@ -141,7 +144,7 @@ fn float_fmt_e(&txt: []byte, mut i: int): (lit: str) { let first = i for i < txt.len; i++ { b = txt[i] - if !is_decimal(b) { + if b != '_' && !is_decimal(b) { break } } @@ -152,7 +155,9 @@ fn float_fmt_e(&txt: []byte, mut i: int): (lit: str) { ret str(txt[:i]) } -fn float_fmt_p(&txt: []byte, i: int): str { ret float_fmt_e(txt, i) } +fn float_fmt_p(&txt: []byte, i: int): str { + ret float_fmt_e(txt, i) +} fn float_fmt_dotnp(&txt: []byte, mut i: int): str { if txt[i] != '.' { @@ -164,9 +169,14 @@ loop: for i < txt.len; i++ { let b = txt[i] match { - | is_decimal(b): continue - | is_float_fmt_p(b, i): ret float_fmt_p(txt, i) - |: break loop + | b == '_' | is_decimal(b): + continue + + | is_float_fmt_p(b, i): + ret float_fmt_p(txt, i) + + |: + break loop } } ret "" @@ -193,7 +203,7 @@ fn float_num(&txt: []byte, mut i: int): (lit: str) { if i > 1 && (b == 'e' || b == 'E') { ret float_fmt_e(txt, i) } - if !is_decimal(b) { + if b != '_' && !is_decimal(b) { break } } @@ -210,9 +220,14 @@ loop: for i < txt.len; i++ { let b = txt[i] match { - | b == '.': ret float_num(txt, i) - | is_float_fmt_e(b, i): ret float_fmt_e(txt, i) - | !is_decimal(b): break loop + | b == '.': + ret float_num(txt, i) + | b == '_': + continue + | is_float_fmt_e(b, i): + ret float_fmt_e(txt, i) + | !is_decimal(b): + break loop } } @@ -233,7 +248,7 @@ fn binary_num(&txt: []byte): (lit: str) { const BINARY_START = 2 let mut i = BINARY_START for i < txt.len; i++ { - if !is_binary(txt[i]) { + if txt[i] != '_' && !is_binary(txt[i]) { break } } @@ -244,8 +259,13 @@ fn binary_num(&txt: []byte): (lit: str) { ret str(txt[:i]) } -fn is_float_fmt_e(b: byte, i: int): bool { ret i > 0 && (b == 'e' || b == 'E') } -fn is_float_fmt_p(b: byte, i: int): bool { ret i > 0 && (b == 'p' || b == 'P') } +fn is_float_fmt_e(b: byte, i: int): bool { + ret i > 0 && (b == 'e' || b == 'E') +} + +fn is_float_fmt_p(b: byte, i: int): bool { + ret i > 0 && (b == 'p' || b == 'P') +} fn is_float_fmt_dotnp(&txt: []byte, mut i: int): bool { if txt[i] != '.' { @@ -257,9 +277,12 @@ loop: for i < txt.len; i++ { let b = txt[i] match { - | is_decimal(b): continue - | is_float_fmt_p(b, i): ret true - |: break loop + | b == '_' | is_decimal(b): + continue + | is_float_fmt_p(b, i): + ret true + |: + break loop } } @@ -268,20 +291,29 @@ loop: fn is_float_fmt_dotp(&txt: []byte, i: int): bool { match { - | txt.len < 3: fall - | txt[i] != '.': fall - | txt[i+1] != 'p' && txt[i+1] != 'P': ret false - |: ret true + | txt.len < 3: + fall + | txt[i] != '.': + fall + | txt[i+1] != 'p' && txt[i+1] != 'P': + ret false + |: + ret true } } fn is_float_fmt_dotfp(&txt: []byte, i: int): bool { match { - | txt.len < 4: fall - | txt[i] != '.': fall - | txt[i+1] != 'f' && txt[i+1] != 'F': fall - | txt[i+2] != 'p' && txt[i+1] != 'P': ret false - |: ret true + | txt.len < 4: + fall + | txt[i] != '.': + fall + | txt[i+1] != 'f' && txt[i+1] != 'F': + fall + | txt[i+2] != 'p' && txt[i+1] != 'P': + ret false + |: + ret true } } @@ -295,20 +327,28 @@ fn octal_num(&txt: []byte): (lit: str) { let mut octal_start = 1 + let mut o = false if txt[1] == 'o' { if txt.len < 3 { ret } octal_start++ + o = true } let mut i = octal_start for i < txt.len; i++ { let b = txt[i] + if b == '.' { + if o { + ret "" + } + ret float_num(txt, i) + } if is_float_fmt_e(b, i) { ret float_fmt_e(txt, i) } - if !is_octal(b) { + if b != '_' && !is_octal(b) { break } } @@ -335,17 +375,13 @@ loop: match { | is_float_fmt_dotp(txt, i): ret float_fmt_dotp(txt, i) - | is_float_fmt_dotfp(txt, i): ret float_fmt_dotfp(txt, i) - | is_float_fmt_p(b, i): ret float_fmt_p(txt, i) - | is_float_fmt_dotnp(txt, i): ret float_fmt_dotnp(txt, i) - - | !is_hex(b): + | b != '_' && !is_hex(b): break loop } } @@ -374,11 +410,19 @@ fn hex_escape(&txt: []byte, n: int): (seq: str) { } // Pattern (RegEx): ^\\U.{8} -fn big_unicode_point_escape(&txt: []byte): str { ret hex_escape(txt, 10) } +fn big_unicode_point_escape(&txt: []byte): str { + ret hex_escape(txt, 10) +} + // Pattern (RegEx): ^\\u.{4} -fn little_unicode_point_escape(&txt: []byte): str { ret hex_escape(txt, 6) } +fn little_unicode_point_escape(&txt: []byte): str { + ret hex_escape(txt, 6) +} + // Pattern (RegEx): ^\\x.. -fn hex_byte_escape(&txt: []byte): str { ret hex_escape(txt, 4) } +fn hex_byte_escape(&txt: []byte): str { + ret hex_escape(txt, 4) +} // Patter (RegEx): ^\\[0-7]{3} fn byte_escape(&txt: []byte): (seq: str) { @@ -522,6 +566,9 @@ impl Lex { // Returns literal if next token is numeric, returns empty string if not. fn num(mut self, &txt: []byte): (lit: str) { + if txt[0] == '_' { + ret "" + } lit = hex_num(txt) if lit != "" { goto end diff --git a/std/jule/sema/eval.jule b/std/jule/sema/eval.jule index b599c61a2..9b5b425da 100644 --- a/std/jule/sema/eval.jule +++ b/std/jule/sema/eval.jule @@ -33,7 +33,7 @@ use std::jule::ast::{ use std::jule::build::{LogMsg, Directive, PATH_STDLIB, logf} use std::jule::constant::{Const} use lit for std::jule::constant::lit -use std::jule::lex::{ +use lex for std::jule::lex::{ self, Token, TokenId, @@ -550,19 +550,27 @@ impl Eval { fn lit_num(mut self, &l: &LitExpr): &Data { match { - | std::jule::lex::is_float(l.value): ret self.lit_float(l) - |: ret self.lit_int(l) + | lex::is_float(l.value): + ret self.lit_float(l) + |: + ret self.lit_int(l) } } fn eval_lit(mut self, lit: &LitExpr): &Data { match { - | lit.is_nil(): ret self.lit_nil() - | is_str(lit.value): ret self.lit_str(lit) - | is_bool(lit.value): ret self.lit_bool(lit) - | is_rune(lit.value): ret self.lit_rune(lit) - | std::jule::lex::is_num(lit.value): ret self.lit_num(lit) - |: ret nil + | lit.is_nil(): + ret self.lit_nil() + | is_str(lit.value): + ret self.lit_str(lit) + | is_bool(lit.value): + ret self.lit_bool(lit) + | is_rune(lit.value): + ret self.lit_rune(lit) + | lex::is_num(lit.value): + ret self.lit_num(lit) + |: + ret nil } }