From e748979a9794db307c26cf4dc8458a1ac4c0993f Mon Sep 17 00:00:00 2001 From: Martin Elsman Date: Wed, 4 Oct 2023 05:48:49 +0200 Subject: [PATCH] Real improvements (#133) * IEEEReal support * Extended Real support * Fixes and cleanup * Extended real testing --- basis/ByteTable.sml | 8 +- basis/IEEEReal.sml | 427 ++++++++++++++++++++++++++++++++++++++ basis/IEEE_REAL.sig | 126 +++++++++++ basis/Initial.sml | 41 +++- basis/IntInf.sml | 1 - basis/OS_PROCESS.sml | 14 +- basis/REAL.sig | 379 +++++++++++++++++++++++++++------ basis/Real.sml | 171 ++++++++++++--- basis/Socket.sml | 23 +- basis/basis.mlb | 26 ++- basis/wordtables.sml | 4 +- doc/license/MLKit-LICENSE | 8 +- src/Runtime/Math.c | 119 ++++++++++- src/Runtime/Math.h | 3 + test/real.sml | 147 ++++++++++++- test/real.sml.out.ok | 36 ++++ 16 files changed, 1376 insertions(+), 157 deletions(-) create mode 100644 basis/IEEEReal.sml create mode 100644 basis/IEEE_REAL.sig diff --git a/basis/ByteTable.sml b/basis/ByteTable.sml index 66ba03130..42f95be05 100644 --- a/basis/ByteTable.sml +++ b/basis/ByteTable.sml @@ -30,13 +30,13 @@ functor ByteTable(eqtype table fun sub_vector_unsafe (v:vector,i:int) : elem = prim ("__bytetable_sub", (v,i)) fun alloc_table_unsafe (i:int) : table = prim("allocStringML", i) fun alloc_vector_unsafe (i:int) : vector = prim("allocStringML", i) - fun update_unsafe(t:table,i:int,c:elem) : unit = prim("__bytetable_update", (t, i, c)) - fun update_vector_unsafe(v:vector,i:int,c:elem) : unit = prim("__bytetable_update", (v, i, c)) + fun update_unsafe (t:table,i:int,c:elem) : unit = prim("__bytetable_update", (t, i, c)) + fun update_vector_unsafe (v:vector,i:int,c:elem) : unit = prim("__bytetable_update", (v, i, c)) fun fromList (es : elem list) : table = prim ("implodeCharsML", es) fun concat (vs : table list) : table = prim ("implodeStringML", vs) fun length (t:table): int = prim ("__bytetable_size", t) fun length_vector (v:vector): int = prim ("__bytetable_size", v) - fun null() : elem = prim("id",0:int) + fun null () : elem = prim("id",0:int) (* Body *) fun explode (t:table) : elem list = @@ -45,7 +45,7 @@ functor ByteTable(eqtype table in h (length t - 1, nil) end - val maxLen = 4 * 1024 * 1024 * 1024 (* 4Gb *) + val maxLen = Initial.bytetable_maxlen fun sub (t : table, i) : elem = if i < 0 orelse i >= length t then raise Subscript diff --git a/basis/IEEEReal.sml b/basis/IEEEReal.sml new file mode 100644 index 000000000..ed240dc25 --- /dev/null +++ b/basis/IEEEReal.sml @@ -0,0 +1,427 @@ +(* Much of this implementation is taken from the MLton implementation +of the basis library. *) + +structure IEEEReal : IEEE_REAL = +struct + +exception Unordered + +datatype real_order = LESS | EQUAL | GREATER | UNORDERED + +datatype float_class + = NAN + | INF + | ZERO + | NORMAL + | SUBNORMAL + +datatype rounding_mode + = TO_NEAREST + | TO_NEGINF + | TO_POSINF + | TO_ZERO + +fun int_of_rm rm = + case rm of + TO_NEAREST => 0 + | TO_NEGINF => 1 + | TO_POSINF => 2 + | TO_ZERO => 3 + +fun rm_of_int i = + case i of + 0 => TO_NEAREST + | 1 => TO_NEGINF + | 2 => TO_POSINF + | 3 => TO_ZERO + | _ => raise Fail ("IEEEReal.rm_of_int: " ^ Int.toString i) + +fun setRoundingMode (rm : rounding_mode) : unit = + prim("floatSetRoundingMode", int_of_rm rm) + +fun getRoundingMode () : rounding_mode = + rm_of_int (prim("floatGetRoundingMode", ())) + +type decimal_approx = { + class : float_class, + sign : bool, + digits : int list, + exp : int +} + +structure DecimalApprox = + struct + type t = {class: float_class, + digits: int list, + exp: int, + sign: bool} + + val inf: t = {class = INF, + digits = [], + exp = 0, + sign = false} + + val zero: t = {class = ZERO, + digits = [], + exp = 0, + sign = false} + end + +type decimal_approx = DecimalApprox.t + +fun 'a scan reader (state: 'a) = + let + val state = StringCvt.skipWS reader state + fun readc (c, state, f) = + case reader state of + NONE => NONE + | SOME (c', state') => + if c = Char.toLower c' + then f state' + else NONE + fun readString (s, state, failure, success) = + let + val n = String.size s + fun loop (i, state) = + if i = n + then success state + else + case reader state of + NONE => failure () + | SOME (c, state) => + if Char.toLower c = String.sub (s, i) + then loop (i + 1, state) + else failure () + in + loop (0, state) + end + fun charToDigit c = Char.ord c - Char.ord #"0" + fun digitStar (ds: int list, state) = + let + fun done () = (rev ds, state) + in + case reader state of + NONE => done () + | SOME (c, state) => + if Char.isDigit c + then digitStar (charToDigit c :: ds, state) + else done () + end + fun digitPlus (state, failure, success) = + case reader state of + NONE => failure () + | SOME (c, state) => + if Char.isDigit c + then success (digitStar ([charToDigit c], state)) + else failure () + (* [+~-]?[0-9]+ *) + type exp = {digits: int list, negate: bool} + fun 'b afterE (state: 'a, + failure: unit -> 'b, + success: exp * 'a -> 'b) : 'b = + case reader state of + NONE => failure () + | SOME (c, state) => + let + fun neg () = + digitPlus (state, failure, + fn (ds, state) => + success ({digits = ds, negate = true}, + state)) + in + case c of + #"+" => digitPlus (state, failure, + fn (ds, state) => + success ({digits = ds, + negate = false}, + state)) + | #"~" => neg () + | #"-" => neg () + | _ => + if Char.isDigit c + then + let + val (ds, state) = + digitStar ([charToDigit c], state) + in + success ({digits = ds, negate = false}, + state) + end + else failure () + end + (* e[+~-]?[0-9]+)? *) + fun exp (state: 'a, failure, success) = + case reader state of + NONE => failure () + | SOME (c, state) => + case Char.toLower c of + #"e" => afterE (state, failure, success) + | _ => failure () + (* (\.[0-9]+)(e[+~-]?[0-9]+)? *) + fun 'b afterDot (state: 'a, + failure: unit -> 'b, + success: int list * exp * 'a -> 'b) = + digitPlus (state, failure, + fn (frac, state) => + exp (state, + fn () => success (frac, + {digits = [], negate = false}, + state), + fn (e, state) => success (frac, e, state))) + fun stripLeadingZeros (ds: int list): int * int list = + let + fun loop (i, ds) = + case ds of + [] => (i, []) + | d :: ds' => + if d = 0 + then loop (i + 1, ds') + else (i, ds) + in + loop (0, ds) + end + fun stripTrailingZeros ds = + case ds of + [] => [] + | _ => + case List.last ds of + 0 => rev (#2 (stripLeadingZeros (rev ds))) + | _ => ds + fun done (whole: int list, + frac: int list, + {digits: int list, negate: bool}, + state: 'a) = + let + val (_, il) = stripLeadingZeros whole + val fl = stripTrailingZeros frac + datatype exp = + Int of int + | Overflow of DecimalApprox.t + val exp = + case (SOME (let + val i = + List.foldl (fn (d, n) => n * 10 + d) + 0 digits + in + if negate then Int.~ i else i + end) + handle General.Overflow => NONE) of + NONE => Overflow (if negate + then DecimalApprox.zero + else DecimalApprox.inf) + | SOME i => Int i + val da = + case il of + [] => + (case fl of + [] => DecimalApprox.zero + | _ => + case exp of + Int e => + let + val (m, fl) = stripLeadingZeros fl + in + {class = NORMAL, + digits = fl, + exp = e - m, + sign = false} + end + | Overflow da => da) + | _ => + case exp of + Int e => + {class = NORMAL, + digits = stripTrailingZeros (il @ fl), + exp = e + length il, + sign = false} + | Overflow da => da + in + SOME (da, state) + end + fun normal' (c, state) = + case Char.toLower c of + #"i" => readc (#"n", state, fn state => + readc (#"f", state, fn state => + let + fun res state = + SOME ({class = INF, + digits = [], + exp = 0, + sign = false}, + state) + in + readString ("inity", state, + fn () => res state, + res) + end)) + | #"n" => readc (#"a", state, fn state => + readc (#"n", state, fn state => + SOME ({class = NAN, + digits = [], + exp = 0, + sign = false}, + state))) + (* (([0-9]+(\.[0-9]+)?)|(\.[0-9]+))(e[+~-]?[0-9]+)? *) + | #"." => afterDot (state, + fn () => NONE, + fn (frac, exp, state) => + done ([], frac, exp, state)) + | _ => + if Char.isDigit c + then + (* ([0-9]+(\.[0-9]+)?)(e[+~-]?[0-9]+)? *) + let + val (whole, state) = + digitStar ([charToDigit c], state) + fun no () = done (whole, [], + {digits = [], negate = false}, + state) + in + case reader state of + NONE => no () + | SOME (c, state) => + case Char.toLower c of + #"." => + afterDot (state, no, + fn (frac, e, state) => + done (whole, frac, e, state)) + | #"e" => + afterE (state, no, + fn (e, state) => + done (whole, [], e, state)) + | _ => no () + end + else NONE + fun normal state = + case reader state of + NONE => NONE + | SOME z => normal' z + fun negate state = + case normal state of + NONE => NONE + | SOME ({class, digits, exp, ...}, state) => + SOME ({class = class, + digits = digits, + exp = exp, + sign = true}, + state) + in + case reader state of + NONE => NONE + | SOME (c, state) => + case c of + #"~" => negate state + | #"-" => negate state + | #"+" => normal state + | _ => normal' (c, state) + end + +fun fromString s = StringCvt.scanString scan s + +fun digitToChar (n: int): char = String.sub ("0123456789ABCDEF", n) + +fun toString {class, sign, digits, exp}: string = + let + fun digitStr () = implode (map digitToChar digits) + fun norm () = + let val num = "0." ^ digitStr() + in if exp = 0 + then num + else concat [num, "E", Int.toString exp] + end + val num = + case class of + ZERO => "0.0" + | NORMAL => norm () + | SUBNORMAL => norm () + | INF => "inf" + | NAN => "nan" + in if sign + then "~" ^ num + else num + end + +end + +(** + +[exception Unordered] Exception that may be raised by Real.compare. + +[setRoundingMode m] +[getRoundingMode()] + +These set and get the rounding mode of the underlying hardware. The +IEEE standard requires TO_NEAREST as the default rounding mode. + + Implementation note: + + Some platforms do not support all of the rounding modes. An SML + implementation built on these platforms will necessarily be + non-conforming with, presumably, setRoundingMode raising an + exception for the unsupported modes. + +[type decimal_approx] This type provides a structured decimal +representation of a real. The class field indicates the real class. If +sign is true, the number is negative. The integers in the digits list +must be digits, i.e., between 0 and 9. When class is NORMAL or +SUBNORMAL, a value of type decimal_approx with digits = [d(1), d(2), +..., d(n)] corresponds to the real number s * 0.d(1)d(2)...d(n) +10(exp), where s is -1 if sign is true and 1 otherwise. When class is +ZERO or INF, the value corresponds to zero or infinity, respectively, +with its sign determined by sign. When class is NAN, the value +corresponds to an unspecified NaN value. + +[toString d] returns a string representation of d. Assuming digits = +[d(1), d(2), ..., d(n)] and ignoring the sign and exp fields, toString +generates the following strings depending on the class field: + + ZERO "0.0" + NORMAL "0.d(1)d(2)...d(n)" + SUBNORMAL "0.d(1)d(2)...d(n)" + INF "inf" + NAN "nan" + +If the sign field is true, a #"~" is prepended. If the exp field is +non-zero and the class is NORMAL or SUBNORMAL, the string +"E"^(Integer.toString exp) is appended. The composition toString o +REAL.toDecimal is equivalent to REAL.fmt StringCvt.EXACT. + +[scan getc strm] +[fromString s] + +These functions scan a decimal approximation from a prefix of a +character source. Initial whitespace is ignored. The first reads from +the character stream src using the character input function getc. It +returns SOME(d, rest) if the decimal approximation d can be parsed; +rest is the remainder of the character stream. NONE is returned +otherwise. The second form uses the string s as input. It returns the +decimal approximation on success and NONE otherwise. The fromString +function is equivalent to StringCvt.scanString scan. + +The functions accept real numbers with the following format: + + [+~-]?([0-9]+.[0-9]+? | .[0-9]+)(e | E)[+~-]?[0-9]+? + +The optional sign determines the value of the sign field, with a +default of false. Initial zeros are stripped from the integer part and +trailing zeros are stripped from the fractional part, yielding two +lists il and fl, respectively, of digits. If il is non-empty, then +class is set to NORMAL, digits is set to il@fl with any trailing zeros +removed and exp is set to the length of il plus the value of the +scanned exponent, if any. If il is empty and so is fl, then class is +set to ZERO, digits = [] and exp = 0. Finally, if il is empty but fl +is not, let m be the number of leading zeros in fl and let fl' be fl +after the leading zeros are removed. Then, class is set to NORMAL, +digits is set to fl' and exp is set to -m plus the value of the +scanned exponent, if any. They also accept the following string +representations of non-finite values: + + [+~-]?(inf | infinity | nan) + +where the alphabetic characters are case-insensitive. The optional +sign determines the value of the sign field, with a default of +false. In the first and second cases, d will have class set to INF. In +the third case, class is set to NAN. In all these cases, d will have +digits = [] and exp = 0. + +*) diff --git a/basis/IEEE_REAL.sig b/basis/IEEE_REAL.sig new file mode 100644 index 000000000..3c715bb2e --- /dev/null +++ b/basis/IEEE_REAL.sig @@ -0,0 +1,126 @@ +(** Types and operations related to an IEEE implementation of reals. + +The IEEEReal structure defines types associated with an IEEE +implementation of floating-point numbers. In addition, it provides +control for the floating-point hardware's rounding mode. Refer to the +IEEE standard 754-1985 and the ANSI/IEEE standard 854-1987 for +additional information. + +*) + +signature IEEE_REAL = + sig + exception Unordered + + datatype real_order = LESS | EQUAL | GREATER | UNORDERED + + datatype float_class + = NAN + | INF + | ZERO + | NORMAL + | SUBNORMAL + + datatype rounding_mode + = TO_NEAREST + | TO_NEGINF + | TO_POSINF + | TO_ZERO + + val setRoundingMode : rounding_mode -> unit + val getRoundingMode : unit -> rounding_mode + + type decimal_approx = { + class : float_class, + sign : bool, + digits : int list, + exp : int + } + + val toString : decimal_approx -> string + val scan : (char, 'a) StringCvt.reader -> (decimal_approx, 'a) StringCvt.reader + val fromString : string -> decimal_approx option + end + +(** + +[exception Unordered] + +[setRoundingMode m] +[getRoundingMode()] + +These set and get the rounding mode of the underlying hardware. The +IEEE standard requires TO_NEAREST as the default rounding mode. + + Implementation note: + + Some platforms do not support all of the rounding modes. An SML + implementation built on these platforms will necessarily be + non-conforming with, presumably, setRoundingMode raising an + exception for the unsupported modes. + +[type decimal_approx] This type provides a structured decimal +representation of a real. The class field indicates the real class. If +sign is true, the number is negative. The integers in the digits list +must be digits, i.e., between 0 and 9. When class is NORMAL or +SUBNORMAL, a value of type decimal_approx with digits = [d(1), d(2), +..., d(n)] corresponds to the real number s * 0.d(1)d(2)...d(n) +10(exp), where s is -1 if sign is true and 1 otherwise. When class is +ZERO or INF, the value corresponds to zero or infinity, respectively, +with its sign determined by sign. When class is NAN, the value +corresponds to an unspecified NaN value. + +[toString d] returns a string representation of d. Assuming digits = +[d(1), d(2), ..., d(n)] and ignoring the sign and exp fields, toString +generates the following strings depending on the class field: + + ZERO "0.0" + NORMAL "0.d(1)d(2)...d(n)" + SUBNORMAL "0.d(1)d(2)...d(n)" + INF "inf" + NAN "nan" + +If the sign field is true, a #"~" is prepended. If the exp field is +non-zero and the class is NORMAL or SUBNORMAL, the string +"E"^(Integer.toString exp) is appended. The composition toString o +REAL.toDecimal is equivalent to REAL.fmt StringCvt.EXACT. + +[scan getc strm] +[fromString s] + +These functions scan a decimal approximation from a prefix of a +character source. Initial whitespace is ignored. The first reads from +the character stream src using the character input function getc. It +returns SOME(d, rest) if the decimal approximation d can be parsed; +rest is the remainder of the character stream. NONE is returned +otherwise. The second form uses the string s as input. It returns the +decimal approximation on success and NONE otherwise. The fromString +function is equivalent to StringCvt.scanString scan. + +The functions accept real numbers with the following format: + + [+~-]?([0-9]+.[0-9]+? | .[0-9]+)(e | E)[+~-]?[0-9]+? + +The optional sign determines the value of the sign field, with a +default of false. Initial zeros are stripped from the integer part and +trailing zeros are stripped from the fractional part, yielding two +lists il and fl, respectively, of digits. If il is non-empty, then +class is set to NORMAL, digits is set to il@fl with any trailing zeros +removed and exp is set to the length of il plus the value of the +scanned exponent, if any. If il is empty and so is fl, then class is +set to ZERO, digits = [] and exp = 0. Finally, if il is empty but fl +is not, let m be the number of leading zeros in fl and let fl' be fl +after the leading zeros are removed. Then, class is set to NORMAL, +digits is set to fl' and exp is set to -m plus the value of the +scanned exponent, if any. They also accept the following string +representations of non-finite values: + + [+~-]?(inf | infinity | nan) + +where the alphabetic characters are case-insensitive. The optional +sign determines the value of the sign field, with a default of +false. In the first and second cases, d will have class set to INF. In +the third case, class is set to NAN. In all these cases, d will have +digits = [] and exp = 0. + +*) diff --git a/basis/Initial.sml b/basis/Initial.sml index 8c0429ef4..a7a481edb 100644 --- a/basis/Initial.sml +++ b/basis/Initial.sml @@ -39,9 +39,13 @@ structure Initial = local fun get_posInf () : real = prim ("posInfFloat", ()) fun get_negInf () : real = prim ("negInfFloat", ()) + fun get_maxFinite () : real = prim("maxFiniteFloat", ()) in val posInf = get_posInf() val negInf = get_negInf() + val minPos = 0.5E~323 + val maxFinite : real = get_maxFinite() + val minNormalPos = 0.22250738585072014E~307 end (* Math structure *) @@ -53,12 +57,16 @@ structure Initial = val NaN = sqrt ~1.0 end + (* ByteTable and WordTable functors *) + val bytetable_maxlen : int = 4 * 1024 * 1024 * 1024 (* 4Gb *) + val wordtable_maxlen : int = 123456789*100 (* arbitrary chosen. *) + (* Int structure. Integers are untagged (or tagged if GC is enabled), * and there is a limit to the size of immediate integers that the Kit * accepts. We should change the lexer such that it does not convert a * string representation of an integer constant into an internal * integer, as this makes the the kit dependent on the precision of - * the compiler (SML/NJ) that we use to compile the Kit. *) + * the compiler that we use to compile the Kit. *) type int0 = int @@ -81,11 +89,6 @@ structure Initial = if precisionInt0 = 63 then (fromI63 minInt63, fromI63 maxInt63) else (fromI64 minInt64, fromI64 maxInt64) -(* - val maxInt0 : int = prim("max_fixed_int", 0) - val minInt0 : int = prim("min_fixed_int", 0) -*) - (* TextIO *) val stdIn_stream : int = prim ("stdInStream", 0) val stdOut_stream : int = prim ("stdOutStream", 0) @@ -338,4 +341,30 @@ structure Initial = end end + structure SocketDefs = struct + + val { AF_INET : int + , AF_UNIX : int + , INADDR_ANY : int + , SHUT_RD : int + , SHUT_RDWR : int + , SHUT_WR : int + , SOCK_DGRAM : int + , SOCK_RAW : int + , SOCK_STREAM : int + , SO_BROADCAST : int + , SO_DEBUG : int + , SO_DONTROUTE : int + , SO_ERROR : int + , SO_KEEPALIVE : int + , SO_LINGER : int + , SO_OOBINLINE : int + , SO_RCVBUF : int + , SO_REUSEADDR : int + , SO_SNDBUF : int + , SO_TYPE : int + } = prim("sml_sock_getDefines",()) + + end + end diff --git a/basis/IntInf.sml b/basis/IntInf.sml index 4fcdc5f91..44a0827b9 100644 --- a/basis/IntInf.sml +++ b/basis/IntInf.sml @@ -344,7 +344,6 @@ structure IntInf : INT_INF = val nbase : int31 = ~0x40000000 (* = ~2^lgBase *) fun maxDigit() : int31 = Int31.~(nbase + 1) -(* fun realBase() = (real (maxDigit())) + 1.0 *) fun lgHBase() : int31 = Int31.quot (lgBase, 2) (* half digits *) fun hbase() : word31 = Word31.<<(0w1, itow' (lgHBase())) diff --git a/basis/OS_PROCESS.sml b/basis/OS_PROCESS.sml index a753111c9..f32928494 100644 --- a/basis/OS_PROCESS.sml +++ b/basis/OS_PROCESS.sml @@ -3,15 +3,15 @@ signature OS_PROCESS = sig type status - val success : status - val failure : status + val success : status + val failure : status val isSuccess : status -> bool - val system : string -> status - val atExit : (unit -> unit) -> unit - val exit : status -> 'a + val system : string -> status + val atExit : (unit -> unit) -> unit + val exit : status -> 'a val terminate : status -> 'a - val getEnv : string -> string option - val sleep : Time.time -> unit + val getEnv : string -> string option + val sleep : Time.time -> unit end (** diff --git a/basis/REAL.sig b/basis/REAL.sig index 47001b18c..dc9425a94 100644 --- a/basis/REAL.sig +++ b/basis/REAL.sig @@ -1,3 +1,8 @@ +structure LargeReal = +struct +type real = real +end + (** Operations on floating point values. The REAL signature specifies structures that implement floating-point @@ -21,68 +26,172 @@ signature REAL = sig structure Math : MATH - val ~ : real -> real - val + : real * real -> real - val - : real * real -> real - val * : real * real -> real - val / : real * real -> real - val rem : real * real -> real - val abs : real -> real - val min : real * real -> real - val max : real * real -> real - val sign : real -> int - val compare : real * real -> order - - val sameSign : real * real -> bool - val toDefault : real -> real - val fromDefault : real -> real - val fromInt : int -> real - - val realFloor : real -> real - val realCeil : real -> real - val realTrunc : real -> real - val realRound : real -> real - - val floor : real -> int - val ceil : real -> int - val trunc : real -> int - val round : real -> int - - val isNan : real -> bool - val isFinite : real -> bool - val posInf : real - val negInf : real - - val > : real * real -> bool - val >= : real * real -> bool - val < : real * real -> bool - val <= : real * real -> bool - val == : real * real -> bool - val != : real * real -> bool - - val toString : real -> string - val fromString : string -> real option - val scan : (char, 'a) StringCvt.reader -> (real, 'a) StringCvt.reader - val fmt : StringCvt.realfmt -> real -> string + val radix : int + val precision : int + val maxFinite : real + val minPos : real + val minNormalPos : real + val posInf : real + val negInf : real + + val + : real * real -> real + val - : real * real -> real + val * : real * real -> real + val / : real * real -> real + val rem : real * real -> real + val *+ : real * real * real -> real + val *- : real * real * real -> real + val ~ : real -> real + val abs : real -> real + + val min : real * real -> real + val max : real * real -> real + + val sign : real -> int + val signBit : real -> bool + + val sameSign : real * real -> bool + val copySign : real * real -> real + + val compare : real * real -> order + val compareReal : real * real -> IEEEReal.real_order + + val < : real * real -> bool + val <= : real * real -> bool + val > : real * real -> bool + val >= : real * real -> bool + val == : real * real -> bool + val != : real * real -> bool + + val ?= : real * real -> bool + val unordered : real * real -> bool + + val isFinite : real -> bool + val isNan : real -> bool + val isNormal : real -> bool + val class : real -> IEEEReal.float_class + + val toManExp : real -> {man : real, exp : int} + val fromManExp : {man : real, exp : int} -> real + val split : real -> {whole : real, frac : real} + val realMod : real -> real + + val nextAfter : real * real -> real + val checkFloat : real -> real + + val realFloor : real -> real + val realCeil : real -> real + val realTrunc : real -> real + val realRound : real -> real + val floor : real -> int + val ceil : real -> int + val trunc : real -> int + val round : real -> int + + val toInt : IEEEReal.rounding_mode -> real -> int + val toLargeInt : IEEEReal.rounding_mode -> real -> LargeInt.int + val fromInt : int -> real + val fromLargeInt : LargeInt.int -> real + + val toLarge : real -> LargeReal.real + val fromLarge : IEEEReal.rounding_mode -> LargeReal.real -> real + + val fmt : StringCvt.realfmt -> real -> string + val toString : real -> string + val scan : (char, 'a) StringCvt.reader -> (real, 'a) StringCvt.reader + val fromString : string -> real option + +(* + val toDecimal : real -> IEEEReal.decimal_approx + val fromDecimal : IEEEReal.decimal_approx -> real option +*) end (** -[~ x] returns the negation of x. +[type real] The type of reals. Notice that real is not an equality +type. -[x * y] returns the product of x and y. +[structure Math] Mathematical operations on real values. -[x / y] returns the division of x by y. +[radix] The base of the representation, e.g., 2 or 10 for IEEE +floating point. + +[precision] The number of digits, each between 0 and radix-1, in the +mantissa. Note that the precision includes the implicit (or hidden) +bit used in the IEEE representation (e.g., the value of +Real64.precision is 53). + +[maxFinite] + +[minPos] + +[minNormalPos] + +The maximum finite number, the minimum non-zero positive number, and +the minimum non-zero normalized number, respectively. + +[posInf] + +[negInf] + +Positive and negative infinity values. [x + y] returns the sum of x and y. [x - y] returns the difference between x and y. -[x > y] returns true if x is strictly larger than y. Returns false -otherwise. +[x * y] returns the product of x and y. -[x >= y] returns true if x is larger than or equal to y. Returns false -otherwise. +[x / y] returns the division of x by y. + +[rem (x, y)] returns the remainder x - n*y, where n = trunc (x / +y). The result has the same sign as x and has absolute value less than +the absolute value of y. If x is an infinity or y is 0, rem returns +NaN. If y is an infinity, rem returns x. + +[*+ (a, b, c)] +[*- (a, b, c)] + +These return a*b + c and a*b - c, respectively. Their behaviors on +infinities follow from the behaviors derived from addition, +subtraction, and multiplication. The precise semantics of these +operations depend on the language implementation and the underlying +hardware. Specifically, certain architectures provide these operations +as a single instruction, possibly using a single rounding +operation. Thus, the use of these operations may be faster than +performing the individual arithmetic operations sequentially, but may +also cause different rounding behavior. + +[~ x] returns the negation of x. + +[abs r] returns the absolute value |r| of r. + +[min(x, y)] is the smaller of x and y. + +[max(x, y)] is the larger of x and y. + +[sign x] is ~1, 0, or 1, according to whether x is negative, zero, or +positive. An infinity returns its sign; a zero returns 0 regardless of +its sign. It raises Domain on NaN. + +[signBit r] returns true if and only if the sign of r (infinities, +zeros, and NaN, included) is negative. + +[sameSign (x, y)] is true iff sign x = sign y. + +[copySign (x, y)] returns x with the sign of y, even if y is NaN. + +[compare (x, y)] + +[compareReal (x, y)] + +The function compare returns LESS, EQUAL, or GREATER according to +whether its first argument is less than, equal to, or greater than the +second. It raises IEEEReal.Unordered on unordered arguments. The +function compareReal behaves similarly except that the values it +returns have the extended type IEEEReal.real_order and it returns +IEEEReal.UNORDERED on unordered arguments. [x < y] returns true if x is strictly smaller than y. Returns false otherwise. @@ -90,25 +199,97 @@ otherwise. [x <= y] returns true if x is smaller than or equal to y. Returns false otherwise. -[~ x] returns the absolute value of x. +[x > y] returns true if x is strictly larger than y. Returns false +otherwise. -[min(x, y)] is the smaller of x and y. +[x >= y] returns true if x is larger than or equal to y. Returns false +otherwise. -[max(x, y)] is the larger of x and y. +Note that these operators return false on unordered arguments, i.e., +if either argument is NaN, so that the usual reversal of comparison +under negation does not hold, e.g., a < b is not the same as not (a >= +b). -[sign x] is ~1, 0, or 1, according to whether x is negative, zero, or -positive. +[== (x, y)] +[!= (x, y)] + +The first returns true if and only if neither y nor x is NaN, and y +and x are equal, ignoring signs on zeros. This is equivalent to the +IEEE = operator. The second function != is equivalent to not o op == +and the IEEE ?<> operator. + +[?= (x, y)] returns true if either argument is NaN or if the arguments +are bitwise equal, ignoring signs on zeros. It is equivalent to the +IEEE ?= operator. + +[unordered (x, y)] returns true if x and y are unordered, i.e., at +least one of x and y is NaN. + +[isFinite x] returns true if x is neither NaN nor an infinity. + +[isNan x] returns true if x is NaN. + +[isNormal x] returns true if x is normal, i.e., neither zero, +subnormal, infinite nor NaN. + +[class x] returns the IEEEReal.float_class to which x belongs. + +[toManExp r] returns {man, exp}, where man and exp are the mantissa +and exponent of r, respectively. Specifically, we have the relation + + r = man * radix^(exp) -[compare(x, y)] returns LESS, EQUAL, or GREATER, according as x is -less than, equal to, or greater than y. +where 1.0 <= man * radix < radix. This function is comparable to frexp +in the C library. If r is +-0, man is +-0 and exp is +0. If r is ++-infinity, man is +-infinity and exp is unspecified. If r is NaN, man +is NaN and exp is unspecified. -[sameSign(x, y)] is true iff sign x = sign y. +[fromManExp {man, exp}] returns man * radix^(exp). This function is +comparable to ldexp in the C library. Note that, even if man is a +non-zero, finite real value, the result of fromManExp can be zero or +infinity because of underflows and overflows. If man is +-0, the +result is +-0. If man is +-infinity, the result is +-infinity. If man +is NaN, the result is NaN. -[toDefault x] is x. +[split r] -[fromDefault x] is x. +[realMod r] -[fromInt i] is the floating-point number representing integer i. +The former returns {whole, frac}, where frac and whole are the +fractional and integral parts of r, respectively. Specifically, whole +is integral, |frac| < 1.0, whole and frac have the same sign as r, and +r = whole + frac. This function is comparable to modf in the C +library. If r is +-infinity, whole is +-infinity and frac is +-0. If +r is NaN, both whole and frac are NaN. The realMod function is +equivalent to #frac o split. + +[nextAfter (r, t)] returns the next representable real after r in the +direction of t. Thus, if t is less than r, nextAfter returns the +largest representable floating-point number less than r. If r = t then +it returns r. If either argument is NaN, this returns NaN. If r is ++-infinity, it returns +-infinity. + +[checkFloat x] raises Overflow if x is an infinity, and raises Div if +x is NaN. Otherwise, it returns its argument. This can be used to +synthesize trapping arithmetic from the non-trapping operations given +here. Note, however, that infinities can be converted to NaNs by some +operations, so that if accurate exceptions are required, checks must +be done after each operation. + +[realFloor r] + +[realCeil r] + +[realTrunc r] + +[realRound r] + +These functions convert real values to integer-valued reals. realFloor +produces floor(r), the largest integer not larger than r. realCeil +produces ceil(r), the smallest integer not less than r. realTrunc +rounds r towards zero, and realRound rounds to the integer-values real +value that is nearest to r. If r is NaN or an infinity, these +functions return r. [floor r] is the largest integer <= r (rounds towards minus infinity). May raise Overflow. @@ -123,6 +304,33 @@ towards zero). May raise Overflow. mode. NOTE: This isn't the required behaviour: it should round to nearest even integer in case of a tie. May raise Overflow. +[toInt mode x] + +[toLargeInt mode x] + +These functions convert the argument x to an integral type using the +specified rounding mode. They raise Overflow if the result is not +representable, in particular, if x is an infinity. They raise Domain +if the input real is NaN. + +[fromInt i] + +[fromLargeInt i] + +These functions convert the integer i to a real value. If the absolute +value of i is larger than maxFinite, then the appropriate infinity is +returned. If i cannot be exactly represented as a real value, then the +current rounding mode is used to determine the resulting value. The +top-level function real is an alias for Real.fromInt. + +[toLarge r] + +[fromLarge r] + +These convert between values of type real and type LargeReal.real. If +r is too small or too large to be represented as a real, fromLarge +will convert it to a zero or an infinity. + [fmt spec r] returns a string representing r, in the format specified by spec. @@ -139,13 +347,6 @@ by spec. format according to the magnitude of r. Equivalent to (fmt (GEN NONE) r). -[fromString s] returns SOME(r) if a floating-point numeral can be -scanned from a prefix of string s, ignoring any initial whitespace; -returns NONE otherwise. The valid forms of floating-point numerals -are described by - - [+~-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+))([eE][+~-]?[0-9]+)? - [scan getc charsrc] attempts to scan a floating-point number from the character source charsrc, using the accessor getc, and ignoring any initial whitespace. If successful, it returns SOME(r, rest) where r @@ -154,4 +355,46 @@ source. The valid forms of floating-point numerals are described by [+~-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+))([eE][+~-]?[0-9]+)? +[fromString s] returns SOME(r) if a floating-point numeral can be +scanned from a prefix of string s, ignoring any initial whitespace; +returns NONE otherwise. The valid forms of floating-point numerals +are described by + + [+~-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+))([eE][+~-]?[0-9]+)? + +[toDecimal r] + +[fromDecimal d] + +These convert between real values and decimal approximations. Decimal +approximations are to be converted using the IEEEReal.TO_NEAREST +rounding mode. toDecimal should produce only as many digits as are +necessary for fromDecimal to convert back to the same number. In +particular, for any normal or subnormal real value r, we have the +bit-wise equality: fromDecimal (toDecimal r) = r. For toDecimal, when +the r is not normal or subnormal, then the exp field is set to 0 and +the digits field is the empty list. In all cases, the sign and class +field capture the sign and class of r. + +For fromDecimal, if class is ZERO or INF, the resulting real is the +appropriate signed zero or infinity. If class is NAN, a signed NaN is +generated. If class is NORMAL or SUBNORMAL, the sign, digits and exp +fields are used to produce a real number whose value is + + s * 0.d(1)d(2)...d(n) 10^(exp) + +where digits = [d(1), d(2), ..., d(n)] and where s is -1 if sign is +true and 1 otherwise. Note that the conversion itself should ignore +the class field, so that the resulting value might have class NORMAL, +SUBNORMAL, ZERO, or INF. For example, if digits is empty or a list of +all 0's, the result should be a signed zero. More generally, very +large or small magnitudes are converted to infinities or zeros. If +the argument to fromDecimal does not have a valid format, i.e., if the +digits field contains integers outside the range [0,9], it returns +NONE. + + Implementation note: Algorithms for accurately and efficiently + converting between binary and decimal real representations are + readily available, e.g., see the technical report by Gay. + *) diff --git a/basis/Real.sml b/basis/Real.sml index d833c0fd0..83955ff1d 100644 --- a/basis/Real.sml +++ b/basis/Real.sml @@ -4,29 +4,48 @@ structure Real : REAL = (* Primitives *) - fun real (x : int) : real = prim ("realInt", x) + val radix = 2 + val precision = 53 + + fun real (x:int) : real = prim ("realInt", x) fun getCtx () : foreignptr = prim("__get_ctx",()) - fun floor (x : real) : int = prim ("floorFloat", (getCtx(),x)) (* may raise Overflow *) - fun ceil (x : real) : int = prim ("ceilFloat", (getCtx(),x)) (* may raise Overflow *) - fun trunc (x : real) : int = prim ("truncFloat", (getCtx(),x)) (* may raise Overflow *) + fun floor (x:real) : int = prim ("floorFloat", (getCtx(),x)) (* may raise Overflow *) + fun ceil (x:real) : int = prim ("ceilFloat", (getCtx(),x)) (* may raise Overflow *) + fun trunc (x:real) : int = prim ("truncFloat", (getCtx(),x)) (* may raise Overflow *) - fun realFloor (x: real) : real = prim ("realFloor", x) - fun realCeil (x: real) : real = prim ("realCeil", x) - fun realTrunc (x: real) : real = prim ("realTrunc", x) - fun realRound (x: real) : real = prim ("realRound", x) + fun realFloor (x:real) : real = prim ("realFloor", x) + fun realCeil (x:real) : real = prim ("realCeil", x) + fun realTrunc (x:real) : real = prim ("realTrunc", x) + fun realRound (x:real) : real = prim ("realRound", x) - fun (x: real) / (y: real): real = prim ("divFloat", (x, y)) - fun rem (x: real, y: real): real = prim ("remFloat", (x, y)) + fun (x:real) / (y:real) : real = prim ("divFloat", (x, y)) + fun rem (x:real, y:real) : real = prim ("remFloat", (x, y)) fun to_string_gen (s : string) (x : real) : string = - prim ("generalStringOfFloat", (s,x)) - fun toString (x : real) : string = prim ("stringOfFloat", x) - fun sub_unsafe (s:string,i:int) : char = prim ("__bytetable_sub", (s,i)) - fun isNan (x : real) : bool = prim ("isnanFloat", x) + prim ("generalStringOfFloat", (s,x)) + fun toString (x:real) : string = prim ("stringOfFloat", x) + fun sub_unsafe (s:string, i:int) : char = prim ("__bytetable_sub", (s,i)) + fun isNan (x:real) : bool = prim ("isnanFloat", x) + + fun max (x:real, y:real) : real = prim ("__max_real", (x, y)) + fun min (x:real, y:real) : real = prim ("__min_real", (x, y)) + + fun copySign (x:real, y:real) : real = prim("copysignFloat", (x, y)) + fun signBit (x:real) : bool = prim("signbitFloat", x) + fun isNormal (x:real) : bool = prim("isnormalFloat", x) + + fun ldexp (x:real, e:int) : real = prim("ldexpFloat", (x, e)) + fun frexp (x:real) : real * int = prim("frexpFloat", x) + + fun nextAfter (r:real, d:real) : real = prim("nextafterFloat", (r, d)) - fun max (x: real, y: real) : real = prim ("__max_real", (x, y)) - fun min (x: real, y: real) : real = prim ("__min_real", (x, y)) + fun split (r:real) : {whole:real, frac:real} = + let val (w,f) = prim("splitFloat", r) + in {whole=w,frac=f} + end + + val realMod : real -> real = #frac o split type real = real @@ -34,9 +53,30 @@ structure Real : REAL = val posInf = Initial.posInf val negInf = Initial.negInf + val minPos = Initial.minPos + val maxFinite = Initial.maxFinite + val minNormalPos = Initial.minNormalPos val fromInt = real + fun fromLargeInt i = + let val N_i = 1073741824 (* pow2 30 *) + val N = IntInf.fromInt N_i + val N_r = real N_i + val op < = IntInf.< + fun fromLargePos i = + if N < i then + let val factor = IntInf.div(i, N) + val rem = IntInf.-(i, IntInf.*(factor, N)) + val factor_r = fromLargePos factor + val rem_r = fromLargePos rem + in N_r * factor_r + rem_r + end + else real (Int.fromLarge i) + in if i < 0 then ~ (fromLargePos (IntInf.~ i)) + else fromLargePos i + end + (* The following should be replaced by numerically better conversion functions; see @@ -174,25 +214,74 @@ structure Real : REAL = val op < : real * real -> bool = op < val op <= : real * real -> bool = op <= val abs : real -> real = abs - fun sign i = if i > 0.0 then 1 else if i < 0.0 then ~1 else 0 + + fun *+ (a,b,c) = a * b + c + fun *- (a,b,c) = a * b - c + + fun unordered (x:real, y:real) : bool = isNan x orelse isNan y + + fun compareReal (x:real, y:real) : IEEEReal.real_order = + let open IEEEReal + in if unordered(x,y) then UNORDERED + else if x < y then LESS + else if y < x then GREATER + else EQUAL + end + + fun sign i = + if i > 0.0 then 1 + else if i < 0.0 then ~1 + else if isNan i then raise Domain + else 0 fun compare (x, y: real) = - if xy then GREATER else EQUAL + if unordered (x,y) then raise IEEEReal.Unordered + else if x < y then LESS + else if y < x then GREATER + else EQUAL + + fun op == (x, y) = + case compareReal (x,y) of + IEEEReal.EQUAL => true + | _ => false + + fun op != (x,y) = + case compareReal (x,y) of + IEEEReal.EQUAL => false + | _ => true - fun op == (x, y) = case compare (x,y) - of EQUAL => true - | _ => false - fun op != (x,y) = case compare (x,y) - of EQUAL => false - | _ => true + fun op ?= (a,b) = + isNan a orelse isNan b orelse op == (a, b) infix != == fun isFinite r = if isNan r then false else r != posInf andalso r != negInf + fun checkFloat (r:real) = + if r == posInf orelse r == negInf then raise Overflow + else if isNan r then raise Div + else r + fun sameSign (i, j) = sign i = sign j + fun class (r:real) : IEEEReal.float_class = + let open IEEEReal + in if isNan r then NAN + else if r == posInf orelse r == negInf then INF + else if r == 0.0 then ZERO + else if isNormal r then NORMAL + else SUBNORMAL + end + + fun fromManExp {man,exp} : real = + ldexp(man,exp) + + fun toManExp (r:real) : {man:real, exp:int} = + let val (m,e) = frexp r + in {man=m,exp=e} + end + fun round (x : real) : int = let (* val _ = print "**R1**\n" *) val t0 = x+0.5 @@ -211,8 +300,38 @@ structure Real : REAL = else floor_t0 end - fun toDefault i = i - fun fromDefault i = i + fun toInt (rm:IEEEReal.rounding_mode) (r:real) : int = + case rm of + IEEEReal.TO_NEAREST => round r + | IEEEReal.TO_NEGINF => floor r + | IEEEReal.TO_POSINF => ceil r + | IEEEReal.TO_ZERO => trunc r + + fun toLargeInt rm (r:real) = + let val N_i = 1073741824 (* pow2 30 *) + val N = IntInf.fromInt N_i + val N_r = real N_i + fun whole r = #whole(split r) + fun toLargePos r = + if N_r < r then + let val factor_r = whole(r / N_r) + val rem_r = r - factor_r * N_r + val factor = toLargePos factor_r + val rem = toLargePos rem_r + in IntInf.+(IntInf.*(N, factor), rem) + end + else Int.toLarge (toInt rm r) + in if isNan r then raise Domain + else if r == negInf orelse r == posInf then raise Overflow + else if r < 0.0 then IntInf.~ (toLargePos (~r)) + else toLargePos r + end + + fun toLarge r = r + fun fromLarge _ r = r + + fun toDefault i = i + fun fromDefault i = i end (*structure Real*) diff --git a/basis/Socket.sml b/basis/Socket.sml index dd28c1415..3ed5eb7d0 100644 --- a/basis/Socket.sml +++ b/basis/Socket.sml @@ -32,28 +32,7 @@ local end = struct (* see socket.c *) - - val { AF_INET : int - , AF_UNIX : int - , INADDR_ANY : int - , SHUT_RD : int - , SHUT_RDWR : int - , SHUT_WR : int - , SOCK_DGRAM : int - , SOCK_RAW : int - , SOCK_STREAM : int - , SO_BROADCAST : int - , SO_DEBUG : int - , SO_DONTROUTE : int - , SO_ERROR : int - , SO_KEEPALIVE : int - , SO_LINGER : int - , SO_OOBINLINE : int - , SO_RCVBUF : int - , SO_REUSEADDR : int - , SO_SNDBUF : int - , SO_TYPE : int - } = prim("sml_sock_getDefines",()) + open Initial.SocketDefs datatype af = Inet_af | Unix_af diff --git a/basis/basis.mlb b/basis/basis.mlb index 21133f392..e3203cba5 100644 --- a/basis/basis.mlb +++ b/basis/basis.mlb @@ -91,9 +91,24 @@ local in bas INTEGER.sml Int.sml Int32.sml Int31.sml Int63.sml Int64.sml end end + basis IEEEReal = + let open General String Int List + in bas IEEE_REAL.sig IEEEReal.sml end + end + + basis IntInf = + let open General List ArrayVector String Word Int + in bas ann safeLinkTimeElimination + in INT_INF.sml IntInf.sml + end + end + end + basis Real = let open General String Int - in bas MATH.sig Math.sml REAL.sig Real.sml + in bas + MATH.sig Math.sml + local open IEEEReal IntInf in REAL.sig Real.sml end local open Byte ArrayVector in PACK_REAL.sml PackRealLittle.sml PackRealBig.sml end @@ -102,14 +117,7 @@ local RealArray2.sml in RealArrayVector.sml end - end - end - - basis IntInf = - let open General List ArrayVector String Word Int Real - in bas ann safeLinkTimeElimination - in INT_INF.sml IntInf.sml - end + open IEEEReal end end diff --git a/basis/wordtables.sml b/basis/wordtables.sml index 8761f19cf..e407d598e 100644 --- a/basis/wordtables.sml +++ b/basis/wordtables.sml @@ -33,8 +33,8 @@ struct fun array0 (n : int, x:'a) : 'a array = prim ("word_table_init", (n, x)) - (* 26 bits are reserved for the length in the tag field; maxLen = 2^26 *) - val maxLen = 123456789*100 (* arbitrary chosen. *) + (* quite a few bits are available for the length in the tag field! *) + val maxLen = Initial.wordtable_maxlen fun check_index (n, i) = if 0 <= i andalso i < n then () diff --git a/doc/license/MLKit-LICENSE b/doc/license/MLKit-LICENSE index 63525f4f3..ac66759ce 100644 --- a/doc/license/MLKit-LICENSE +++ b/doc/license/MLKit-LICENSE @@ -41,16 +41,16 @@ SML/NJ SMLNJ-LICENSE (BSD-style) Ported some libraries for SML/NJ Lib SMLNJ-LIB-LICENSE (BSD-style) Ported some libraries for kitlib.mlb (in basis/); see individual files for - details. + details. copyright.att (BSD-style) 1993 versions of Polyhash.sml, POLYHASH.sml - (in basis/ and src/Pickle/) + (in basis/ and src/Pickle/). MLton MLton-LICENSE (BSD-style) Ported some libraries for basis.mlb: PrimIO, StreamIO, - TextIO, ImperativeIO, BinIO - (in basis/). + TextIO, ImperativeIO, BinIO, + IEEEReal (in basis/). MLton MLton-HPND-LICENSE (HPND-style) Made use of some functionality for basis.mlb: NetHostDb scan function, e.g. diff --git a/src/Runtime/Math.c b/src/Runtime/Math.c index 74558eb9b..2465ba292 100644 --- a/src/Runtime/Math.c +++ b/src/Runtime/Math.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "Math.h" #include "Tagging.h" #include "Exception.h" @@ -708,6 +710,54 @@ isnanFloat(ssize_t s) return mlFALSE; } +ssize_t +signbitFloat(ssize_t s) +{ + if (signbit(get_d(s))) + { + return mlTRUE; + } + return mlFALSE; +} + +ssize_t +isnormalFloat(ssize_t s) +{ + if (isnormal(get_d(s))) + { + return mlTRUE; + } + return mlFALSE; +} + +ssize_t +copysignFloat (ssize_t d, ssize_t y, ssize_t x) +{ + get_d (d) = copysign (get_d (y), get_d (x)); + set_dtag(d); + return d; +} + +ssize_t +ldexpFloat (ssize_t d, ssize_t x, ssize_t e) +{ + long int e2 = convertIntToC((long int)e); + get_d (d) = ldexp (get_d (x), e2); + set_dtag(d); + return d; +} + +ssize_t +frexpFloat (ssize_t p, ssize_t d, ssize_t x) +{ + int e = 0; + get_d (d) = frexp (get_d (x), &e); + set_dtag(d); + first(p) = d; + second(p) = convertIntToML((long int)e); + return p; +} + ssize_t posInfFloat(ssize_t d) { @@ -724,6 +774,73 @@ negInfFloat(ssize_t d) return d; } +ssize_t +maxFiniteFloat(ssize_t d) +{ + get_d(d) = DBL_MAX; + set_dtag(d); + return d; +} + +ssize_t +nextafterFloat (ssize_t d, ssize_t x, ssize_t y) +{ + get_d (d) = nextafter (get_d (x), get_d (y)); + set_dtag(d); + return d; +} + +ssize_t +splitFloat (ssize_t p, ssize_t w, ssize_t f, ssize_t r) +{ + double whole; + get_d (f) = modf (get_d (r), &whole); + get_d (w) = whole; + set_dtag(w); + set_dtag(f); + first(p) = w; + second(p) = f; + return p; +} + +// IEEE rounding modes +// 0:TONEAREST, 1: DOWNWARD, 2: UPWARD, 3: ZERO +void +floatSetRoundingMode (ssize_t m) { + long int rm = convertIntToC((long int)m); + if ( rm == 0 ) { + fesetround(FE_TONEAREST); + } else if ( rm == 1 ) { + fesetround(FE_DOWNWARD); + } else if ( rm == 2 ) { + fesetround(FE_UPWARD); + } else if ( rm == 3 ) { + fesetround(FE_TOWARDZERO); + } else { + printf("ERROR floatSetRoundingMode: %ld\n", rm); + exit(1); + } +} + +ssize_t +floatGetRoundingMode(void) { + long int m = 0; + int rm = fegetround(); + if ( rm == FE_TONEAREST ) { + m = 0; + } else if ( rm == FE_DOWNWARD) { + m = 1; + } else if ( rm == FE_UPWARD) { + m = 2; + } else if ( rm == FE_TOWARDZERO) { + m = 3; + } else { + printf("ERROR floatGetRoundingMode: %d\n", rm); + exit(1); + } + return convertIntToML(m); +} + // countChar: count the number of times the character `c' // occurs in the string `s'. static ssize_t countChar(ssize_t c, char * s) { @@ -761,7 +878,7 @@ REG_POLY_FUN_HDR(stringOfFloat, Region rAddr, size_t arg) char buf[64]; sprintf(buf, "%.12g", get_d(arg)); mkSMLMinus(buf); - if( countChar('.', buf) == 0 && countChar('E', buf) == 0 ) + if( countChar('.', buf) == 0 && countChar('E', buf) == 0 && countChar('n', buf) == 0) // protect for nan and inf { strcat(buf, ".0"); } diff --git a/src/Runtime/Math.h b/src/Runtime/Math.h index 57edd8bb2..eb5d0c6a6 100644 --- a/src/Runtime/Math.h +++ b/src/Runtime/Math.h @@ -104,6 +104,9 @@ ssize_t isnanFloat(ssize_t s); ssize_t posInfFloat(ssize_t d); ssize_t negInfFloat(ssize_t d); +void floatSetRoundingMode(ssize_t m); // 0:TONEAREST, 1: DOWNWARD, 2: UPWARD, 3: ZERO +ssize_t floatGetRoundingMode(void); + // #ifdef PROFILING // String stringOfFloatProf(Region rAddr, long f, uintptr_t pPoint); // String generalStringOfFloatProf(Region rAddr, String str, long f, long pPoint); diff --git a/test/real.sml b/test/real.sml index c19527011..1b9e51572 100644 --- a/test/real.sml +++ b/test/real.sml @@ -369,11 +369,144 @@ val test13c = (1.6, "2.0", "1.6", "1.6", "1.6"), (1.45, "1.0", "1.4", "1.45", "1.45"), (3.141592653589, "3.0", "3.1", "3.14159", "3.14159265359"), - (91827364509182.0, "9E13", "9.2E13", "9.18274E13", - "9.18273645092E13")]); -end + (91827364509182.0, "9E13", "9.2E13", "9.18274E13", "9.18273645092E13")]) -(* -fun f r n = Real.fmt (StringCvt.GEN (SOME n)) r; -fun ff r = map (f r) [1,2,6,12]; - *) +val nan = Math.sqrt ~1.0 + +val test14 = + tst "test14" (radix = 2 andalso precision = 53) + +val test15 = + tst "test15" (maxFinite > 100000.0 andalso maxFinite < posInf) + +val test16 = + tst "test16" (minPos > 0.0 andalso minPos < 0.5E~322) + +val test17 = + tst "test17" (minNormalPos > 0.0 andalso minNormalPos < 0.5E~300) + +val test18 = + tst "test18" (toString posInf = "inf") + +val test19 = + tst "test19" (toString negInf = "~inf") + +val test20 = + tst "test20" (toString nan = "nan") + +val test21 = + tst "test21" ( *+(34.0,~23.4,12.2) == 34.0 * ~23.4 + 12.2 ) + +val test22 = + tst "test22" ( *-(34.0,~23.4,12.2) == 34.0 * ~23.4 - 12.2 ) + +val test23 = + tst "test23" (not(signBit posInf) andalso signBit negInf andalso signBit ~2.1 andalso not (signBit 2.0)) + +val test24 = + tst "test24" (copySign(1.2,~300.0) == ~1.2 andalso copySign(~1.2,300.0) == 1.2) + +val test25 = + tst "test25" (compareReal (nan,2.0) = IEEEReal.UNORDERED andalso + compareReal (~2.0,nan) = IEEEReal.UNORDERED andalso + compareReal (nan,nan) = IEEEReal.UNORDERED andalso + compareReal (2.0,posInf) = IEEEReal.LESS) + +val test26a = + tst "test26a" ((compare(nan,2.0); false) handle IEEEReal.Unordered => true) + +val test26b = + tst "test26b" ((compare(nan,nan); false) handle IEEEReal.Unordered => true) + +val test26c = + tst "test26c" ((compare(~2.0,nan); false) handle IEEEReal.Unordered => true) + +val test27 = + tst "test27" (unordered(nan,2.0)) + +val test28 = + tst "test28" (isNan nan andalso not(isNan posInf) andalso not(isNan 2.0)) + +val test29 = + tst "test29" (isFinite 2.0 andalso not (isFinite posInf) andalso not (isFinite negInf)) + +fun isoManExp r = fromManExp(toManExp r) + +val test30 = + tst "test30" (2.3 == isoManExp 2.3 andalso posInf == isoManExp posInf andalso isNan(isoManExp nan)) + +fun whole r = #whole(split r) + +val test31 = + tst "test31" (whole 2.3 == 2.0 andalso whole posInf == posInf andalso isNan(whole nan) andalso whole ~2.4 == ~2.0) + +val test32 = + tst "test32" (realMod 2.5 == 0.5) + +val test33 = + tst "test33" (realMod posInf == 0.0) + +val test34 = + tst "test34" (isNan(realMod nan)) + +val test35 = + tst "test35" (nextAfter (2.0, 3.0) > 2.0 andalso nextAfter (2.0, 3.0) < 2.0001) + +val test36 = + tst "test36" (nextAfter (2.0, ~3.0) < 2.0 andalso nextAfter (2.0, ~3.0) > 1.9999) + +val test37 = + tst "test37" (nextAfter (2.0, 2.0) == 2.0) + +val test38 = + tst "test38" (isNan(nextAfter (nan, 2.0)) andalso isNan(nextAfter (2.0,nan))) + +val test39 = + tst "test39" (nextAfter (negInf, 0.0) > negInf andalso nextAfter (posInf, 0.0) < posInf) + +val test40 = + tst "test40" ((checkFloat nan; false) handle General.Div => true) + +val test41 = + tst "test41" ((checkFloat posInf; false) handle General.Overflow => true) + +val test42 = + tst "test42" ((checkFloat 3.4; true) handle _ => false) + +fun pr r = Real.fmt (StringCvt.FIX(SOME 0)) r + +fun try t s expected = + let val res = pr (fromLargeInt (valOf (LargeInt.fromString s))) + in tst t (res = expected) + end + +val test43 = try "test43" "123456789123456789123456789" "123456789123456791337762816" + +fun try2 t rm r expected = + let val res = IntInf.toString (toLargeInt rm r) + in tst t (res = expected) + end + +val test44 = + try2 "test44" IEEEReal.TO_POSINF 123456789123456789123456789123456789123456789123456789.0E200 + "12345678912345679076198071096244527781560870137019946199603053023437034345861166751654846484345015072896161860739481140984032537425287193418804458844483168788367356521095598566212295492892610317287470596434332346080596003132423633511547282430231173922816" + +val test45 = + try2 "test45" IEEEReal.TO_ZERO 12345678.6 + "12345678" + +fun withRM rm f = + let val rm' = IEEEReal.getRoundingMode() + val () = IEEEReal.setRoundingMode rm + val res = f() + in IEEEReal.setRoundingMode rm' + ; res + end + +val test46a = + tst "test46a" (toInt IEEEReal.TO_NEAREST 2.8 = 3) + +val test46b = + tst "test46b" (toInt IEEEReal.TO_ZERO 2.8 = 2) + +end diff --git a/test/real.sml.out.ok b/test/real.sml.out.ok index b63ce0890..6b655c65a 100644 --- a/test/real.sml.out.ok +++ b/test/real.sml.out.ok @@ -172,3 +172,39 @@ test12c OK test13a OK test13b OK test13c OK +test14 OK +test15 OK +test16 OK +test17 OK +test18 OK +test19 OK +test20 OK +test21 OK +test22 OK +test23 OK +test24 OK +test25 OK +test26a OK +test26b OK +test26c OK +test27 OK +test28 OK +test29 OK +test30 OK +test31 OK +test32 OK +test33 OK +test34 OK +test35 OK +test36 OK +test37 OK +test38 OK +test39 OK +test40 OK +test41 OK +test42 OK +test43 OK +test44 OK +test45 OK +test46a OK +test46b OK