From 7369794328cae55e251ff805cddd47d91cee55f4 Mon Sep 17 00:00:00 2001 From: rocky Date: Fri, 22 Nov 2024 19:43:53 -0500 Subject: [PATCH] More operator tables we can use in Mathics-core --- mathics_scanner/data/operators.yml | 154 +++++++++--------- .../generate/build_operator_tables.py | 65 +++++++- 2 files changed, 138 insertions(+), 81 deletions(-) diff --git a/mathics_scanner/data/operators.yml b/mathics_scanner/data/operators.yml index c423a61..634a354 100644 --- a/mathics_scanner/data/operators.yml +++ b/mathics_scanner/data/operators.yml @@ -33,9 +33,12 @@ # |-> to get treated as one unit and not split into two operators like # | and ->. So the precedence of |-> has to be higher than |. # +# Note: When there was a mismatch between Jacobson's table and the old Mathics code, +# we've used the old Mathics code. +# # # arity (https://en.wikipedia.org/wiki/Arity) -# ----- +# ------------------------------------------- # # A fancy word for how many operands (arguments) the operator takes; # it is some sort of positive integer. Acceptable values found in our table @@ -76,12 +79,14 @@ # FullForm: when "usage" exists, the FullForm translation of the example # associativity: when two or more of the same operator is used, which group to -# evaluate first. One of: +# evaluate first. This value is used in the Mathics3 parser. The +# value should be one of: # - None -# - Non -# - Right -# - Left -# - Missing["Unknown"] +# - left +# - non-associative +# - right +# +# # # meaningful: "true" if WMA defines a meaning for the operator and "false" if not. # See "Operators without Built-in Meanings" @@ -97,7 +102,7 @@ AddTo: # N-tokens: {} # L-tokens: {"+="} # O-tokens: {} - # usage: "expr1 += expr2" + usage: "expr1 += expr2" # parse: {"AddTo", "[", "expr1", ",", "expr2", "]"} FullForm: AddTo[expr1, expr2] arity: Binary @@ -116,18 +121,21 @@ Alternatives: # N-tokens: {} # L-tokens: {"|"} # O-tokens: {} - # usage: "p1 | p2" + usage: "p1 | p2" # parse: expr2 FullForm: {"Alternatives", "[", "expr1", ",", "expr2", "]"} arity: Alternatives[expr1, expr2] affix: Binary - associativity: infix + associativity: null meaningful: none # comments: True And: actual-precedence: 290 - Precedence-Function: 215 + # Note from Mathics3 code: + # HACK: although the should be 215 for all boolean_ops we adjust slightly + # to get the subprecedences correct + Precedence-Function: 225 precedence: 220 WolframLanguageData: 55 WolframLanguageData-corrected: 55 @@ -136,7 +144,7 @@ And: # N-tokens: {} # L-tokens: {"&&", "∧"} # O-tokens: {} - # usage: "expr1 && expr2; expr1 ∧ expr2" + usage: "expr1 && expr2; expr1 ∧ expr2" # parse: {"And", "[", "expr1", ",", "expr2", "]"} FullForm: And[expr1, expr2] arity: Binary @@ -156,7 +164,7 @@ AngleBracket: # N-tokens: {"〈"} # L-tokens: {} # O-tokens: {"〉"} - # usage: "〈expr〉" + usage: "〈expr〉" # parse: {"AngleBracket", "[", "expr", ",", "…", "]"} FullForm: AngleBracket[expr, \[Ellipsis]] arity: n-ary @@ -176,7 +184,7 @@ Apply: # N-tokens: {} # L-tokens: {"@@"} # O-tokens: {} - # usage: "expr1 @@ expr2" + usage: "expr1 @@ expr2" # parse: {"Apply", "[", "expr1", ",", "expr2", "]"} FullForm: Apply[expr1, expr2] arity: Binary @@ -195,7 +203,7 @@ ApplyTo: # N-tokens: None # L-tokens: None # O-tokens: None - # # usage: "None" + # usage: "None" # parse: None FullForm: None arity: Binary @@ -215,7 +223,7 @@ Association: # N-tokens: {"<|", ""} # L-tokens: e # O-tokens: {"|>", ""} - # usage: "<|expr|>; expr " + usage: "<|expr|>; expr " # parse: {"Association", "[", "expr", ",", "…", "]"} FullForm: Association[expr, \[Ellipsis]] arity: n-ary @@ -235,7 +243,7 @@ AutoMatch: # N-tokens: {""} # L-tokens: {} # O-tokens: {""} - # usage: " expr " + usage: " expr " # parse: {"AutoMatch","[","expr","]"} FullForm: AutoMatch[expr] arity: Unary @@ -254,7 +262,7 @@ Backslash: # N-tokens: {} # L-tokens: {"∖"} # O-tokens: {} - # usage: "expr1 \ expr2" + usage: "expr1 \ expr2" # parse: {"Backslash", "[", "expr1", ",", "expr2", "]"} FullForm: Backslash[expr1, expr2] arity: Binary @@ -273,7 +281,7 @@ Because: # N-tokens: {} # L-tokens: {"∵"} # O-tokens: {} - # usage: "expr1 ∵ expr2" + usage: "expr1 ∵ expr2" # parse: {"Because", "[", "expr1", ",", "expr2", "]"} FullForm: Because[expr1, expr2] arity: Binary @@ -293,7 +301,7 @@ BlackLenticularBracket: # N-tokens: {"【"} # L-tokens: {} # O-tokens: {"】"} - # usage: "【expr】" + usage: "【expr】" # parse: FullForm: arity: Unary @@ -312,7 +320,7 @@ Blank: # N-tokens: {"_"} # L-tokens: {} # O-tokens: {} - # usage: "_" + usage: "_" # parse: {"Blank", "[", "]"} FullForm: Blank[] arity: Nullary @@ -331,7 +339,7 @@ BlankHead: # N-tokens: {"_"} # L-tokens: {} # O-tokens: {} - # usage: "_expr" + usage: "_expr" # parse: {"Blank", "[", "expr", "]"} FullForm: Blank[expr] arity: Unary @@ -350,7 +358,7 @@ BlankNullSequence: # N-tokens: {"___"} # L-tokens: {} # O-tokens: {} - # usage: "___" + usage: "___" # parse: {"BlankNullSequence", "[", "]"} FullForm: BlankNullSequence[] arity: Nullary @@ -369,7 +377,7 @@ BlankNullSequenceHead: # N-tokens: {"___"} # L-tokens: {} # O-tokens: {} - # usage: "___expr" + usage: "___expr" # parse: {"BlankNullSequence", "[", "expr", "]"} FullForm: BlankNullSequence[expr] arity: Unary @@ -388,7 +396,7 @@ BlankOptional: # N-tokens: {"_."} # L-tokens: {} # O-tokens: {} - # usage: "_." + usage: "_." # parse: {"Optional", "[", "Blank", "]"} FullForm: Optional[Blank[]] arity: Nullary @@ -407,7 +415,7 @@ BlankSequence: # N-tokens: {"__"} # L-tokens: {} # O-tokens: {} - # usage: "__" + usage: "__" # parse: {"BlankSequence", "[", "]"} FullForm: BlankSequence[] arity: Nullary @@ -426,7 +434,7 @@ BlankSequenceHead: # N-tokens: {"__"} # L-tokens: {} # O-tokens: {} - # usage: "__expr" + usage: "__expr" # parse: {"BlankSequence", "[", "expr", "]"} FullForm: BlankSequence[expr] arity: Unary @@ -446,7 +454,7 @@ BoxGroup: # N-tokens: {"\("} # L-tokens: {} # O-tokens: {"\)"} - # usage: "\\(expr\\)" + usage: "\\(expr\\)" # parse: {“LeftRowBox”, expr, “RightRowBox”] FullForm: "\\(expr\\)" arity: Unary @@ -466,7 +474,7 @@ BracketingBar: # N-tokens: {""} # L-tokens: {} # O-tokens: {""} - # usage: "expr" + usage: "expr" # parse: {"BracketingBar", "[", "expr", ",", "…", "]"} FullForm: BracketingBar[expr, \[Ellipsis]] arity: n-ary @@ -485,7 +493,7 @@ Cap: # N-tokens: {} # L-tokens: {"⌢"} # O-tokens: {} - # usage: "expr1 ⌢ expr2" + usage: "expr1 ⌢ expr2" # parse: {"Cap", "[", "expr1", ",", "expr2", "]"} FullForm: Cap[expr1, expr2] arity: Binary @@ -505,7 +513,7 @@ CapitalDifferentialD: # N-tokens: {""} # L-tokens: {} # O-tokens: {} - # usage: "x" + usage: "x" # parse: {"CapitalDifferentialD", "[", "x", "]"} FullForm: CapitalDifferentialD[x] arity: Unary @@ -525,7 +533,7 @@ Ceiling: # N-tokens: {"⌈"} # L-tokens: {} # O-tokens: {"⌉"} - # usage: "⌈expr⌉" + usage: "⌈expr⌉" # parse: {"Ceiling", "[", "expr", "]"} FullForm: Ceiling[expr] arity: Unary @@ -544,7 +552,7 @@ CenterDot: # N-tokens: {} # L-tokens: {"·"} # O-tokens: {} - # usage: "x · y" + usage: "x · y" # parse: {"CenterDot", "[", "x", ",", "y", "]"} FullForm: CenterDot[x, y] arity: Binary @@ -563,7 +571,7 @@ CircleDot: # N-tokens: {} # L-tokens: {"⊙"} # O-tokens: {} - # usage: "expr1 ⊙ expr2" + usage: "expr1 ⊙ expr2" # parse: {"CircleDot", "[", "expr1", ",", "expr2", "]"} FullForm: CircleDot[expr1, expr2] arity: Binary @@ -582,7 +590,7 @@ CircleMinus: # N-tokens: {} # L-tokens: {"⊖"} # O-tokens: {} - # usage: "expr1 ⊖ expr2" + usage: "expr1 ⊖ expr2" # parse: {"CircleMinus", "[", "expr1", ",", "expr2", "]"} FullForm: CircleMinus[expr1, expr2] arity: Binary @@ -601,7 +609,7 @@ CirclePlus: # N-tokens: {} # L-tokens: {"⊕"} # O-tokens: {} - # usage: "expr1 ⊕ expr1" + usage: "expr1 ⊕ expr1" # parse: {"CirclePlus", "[", "expr1", ",", "expr2", "]"} FullForm: CirclePlus[expr1, expr2] arity: Binary @@ -620,7 +628,7 @@ CircleTimes: # N-tokens: {} # L-tokens: {"⊗"} # O-tokens: {} - # usage: "expr1 ⊗ expr2" + usage: "expr1 ⊗ expr2" # parse: {"CircleTimes", "[", "expr1", ",", "expr2", "]"} FullForm: CircleTimes[expr1, expr2] arity: Binary @@ -640,7 +648,7 @@ ClockwiseContourIntegral: # N-tokens: {"∲"} # L-tokens: {} # O-tokens: {""} - # usage: "∲ f(x) x" + usage: "∲ f(x) x" # parse: {"ClockwiseContourIntegral", "[", "expr1", ",", "expr2", "]"} FullForm: Integrate[expr1, expr2] arity: Binary @@ -659,7 +667,7 @@ Colon: # N-tokens: {} # L-tokens: {"∶"} # O-tokens: {} - # usage: "expr1 ∶ expr2" + usage: "expr1 ∶ expr2" # parse: {"Colon", "[", "expr1", ",", "expr2", "]"} FullForm: Colon[expr1, expr2] arity: Binary @@ -679,7 +687,7 @@ Composition: # N-tokens: {} # L-tokens: {"@*"} # O-tokens: {} - # usage: "expr1 @* expr2" + usage: "expr1 @* expr2" # parse: {"Composition", "[", "expr1", ",", "expr2", "]"} FullForm: Composition[expr1, expr2] arity: Binary @@ -696,9 +704,9 @@ CompoundExpression: UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 170 # N-tokens: {} - # L-tokens: {";"} + # L-tokens: {","} # O-tokens: {} - # usage: "expr1, expr2" + usage: "expr1, expr2" # parse: {"CompoundExpression", "[", "expr1", ",", "expr2", "]"} FullForm: CompoundExpression[expr1, expr2] arity: Binary @@ -736,7 +744,7 @@ Condition: # N-tokens: {} # L-tokens: {"/;"} # O-tokens: {} - # usage: "expr1 /; expr2" + usage: "expr1 /; expr2" # parse: {"Condition", "[", "expr1", ",", "expr2", "]"} FullForm: Condition[expr1, expr2] arity: Binary @@ -755,7 +763,7 @@ Conditioned: # N-tokens: {} # L-tokens: {""} # O-tokens: {} - # usage: "expr  cond" + usage: "expr  cond" # parse: {"Conditioned", "[", "expr", ",", "cond", "]"} FullForm: Conditioned[expr, cond] arity: Binary @@ -774,7 +782,7 @@ Congruent: # N-tokens: {} # L-tokens: {"≡"} # O-tokens: {} - # usage: "x ≡ y" + usage: "x ≡ y" # parse: {"Congruent", "[", "x", ",", "y", "]"} FullForm: Congruent[x, y] arity: Binary @@ -794,7 +802,7 @@ Conjugate: # N-tokens: {} # L-tokens: {""} # O-tokens: {} - # usage: "z" + usage: "z" # parse: {"Conjugate", "[", "z", "]"} FullForm: Conjugate[z] arity: Unary @@ -814,7 +822,7 @@ ConjugateTranspose: # N-tokens: {} # L-tokens: {"", ""} # O-tokens: {} - # usage: "m; m" + usage: "m; m" # parse: {"ConjugateTranspose", "[", "m", "]"} FullForm: ConjugateTranspose[m] arity: Unary @@ -834,7 +842,7 @@ ContextPathSeparator: # N-tokens: {} # L-tokens: {"`"} # O-tokens: {} - # usage: "symb1`symb2" + usage: "symb1`symb2" # parse: {"symb1", "`", "symb2"} FullForm: symb1`symb2 arity: @@ -854,7 +862,7 @@ ContourIntegral: # N-tokens: {"∮"} # L-tokens: {} # O-tokens: {""} - # usage: "∮ f(x) x" + usage: "∮ f(x) x" # parse: {"ContourIntegral", "[", "expr1", ",", "expr2", "]"} FullForm: Integrate[expr1, expr2] arity: Binary @@ -873,7 +881,7 @@ Coproduct: # N-tokens: {} # L-tokens: {"∐"} # O-tokens: {} - # usage: "expr1 ∐ expr2" + usage: "expr1 ∐ expr2" # parse: {"Coproduct", "[", "expr1", ",", "expr2", "]"} FullForm: Coproduct[expr1, expr2] arity: Binary @@ -893,7 +901,7 @@ CornerBracket: # N-tokens: {"「"} # L-tokens: {} # O-tokens: {"」"} - # usage: "「expr」" + usage: "「expr」" # parse: FullForm: arity: Unary @@ -913,7 +921,7 @@ CounterClockwiseContourIntegral: # N-tokens: {"∳"} # L-tokens: {} # O-tokens: {""} - # usage: "∳ f(x) x" + usage: "∳ f(x) x" # parse: {"CounterClockwiseContourIntegral", "[", "expr1", ",", "expr2", "]"} FullForm: Integrate[expr1, expr2] arity: Binary @@ -932,7 +940,7 @@ Cross: # N-tokens: {} # L-tokens: {""} # O-tokens: {} - # usage: "expr1  expr2" + usage: "expr1  expr2" # parse: {"Cross", "[", "expr1", ",", "expr2", "]"} FullForm: Cross[expr1, expr2] arity: Binary @@ -951,7 +959,7 @@ Cup: # N-tokens: {} # L-tokens: {"⌣"} # O-tokens: {} - # usage: "expr1 ⌣ expr2" + usage: "expr1 ⌣ expr2" # parse: {"Cup", "[", "expr1", ",", "expr2", "]"} FullForm: Cup[expr1, expr2] arity: Binary @@ -970,7 +978,7 @@ CupCap: # N-tokens: {} # L-tokens: {"≍"} # O-tokens: {} - # usage: "x ≍ y" + usage: "x ≍ y" # parse: {"CupCap", "[", "x", ",", "y", "]"} FullForm: CupCap[x, y] arity: Binary @@ -990,7 +998,7 @@ Curl: # N-tokens: {""} # L-tokens: {} # O-tokens: {} - # usage: "expr" + usage: "expr" # parse: {"Curl", "[", "expr", "]"} FullForm: arity: Unary @@ -1010,7 +1018,7 @@ CurlyDoubleQuote: # N-tokens: {"“"} # L-tokens: {} # O-tokens: {"”"} - # usage: "“expr”" + usage: "“expr”" # parse: {"CurlyDoubleQuote","[","expr","]"} FullForm: CurlyDoubleQuote[expr] arity: Unary @@ -1050,7 +1058,7 @@ Decrement: # N-tokens: {} # L-tokens: {"--"} # O-tokens: {} - # usage: "expr--" + usage: "expr--" # parse: {"Decrement", "[", "expr", "]"} FullForm: Decrement[expr] arity: Unary @@ -1069,7 +1077,7 @@ Del: # N-tokens: {"∇"} # L-tokens: {} # O-tokens: {} - # usage: "∇x" + usage: "∇x" # parse: {"Del", "[", "expr", "]"} FullForm: Del[expr] arity: Unary @@ -1107,7 +1115,7 @@ Diamond: # N-tokens: {} # L-tokens: {"⋄"} # O-tokens: {} - # usage: "expr1 ⋄ expr2" + usage: "expr1 ⋄ expr2" # parse: {"Diamond", "[", "expr1", ",", "expr2", "]"} FullForm: Diamond[expr1, expr2] arity: Binary @@ -1126,7 +1134,7 @@ DifferenceDelta: # N-tokens: {""} # L-tokens: {} # O-tokens: {} - # usage: "expr" + usage: "expr" # parse: {"DifferenceDelta", "[", "expr", "]"} FullForm: arity: Unary @@ -1146,7 +1154,7 @@ DifferentialD: # N-tokens: {""} # L-tokens: {} # O-tokens: {} - # usage: "x" + usage: "x" # parse: {"DifferentialD", "[", "x", "]"} FullForm: DifferentialD[x] arity: Unary @@ -1157,20 +1165,20 @@ DifferentialD: DirectedEdge: actual-precedence: 370 - precedence: 295 + precedence: 128 WolframLanguageData: WolframLanguageData-corrected: 49 UnicodeCharacters.tr: 395 UnicodeCharacters-corrected.tr: 395 # N-tokens: {} - # L-tokens: {""} + # L-tokens: {"→"} # O-tokens: {} - # usage: "u  v" + usage: "u → v" # parse: {"DirectedEdge", "[", "u", ",", "v", "]"} FullForm: DirectedEdge[expr1, expr2] arity: Binary affix: Infix - associativity: null + associativity: non-associative meaningful: true # comments: @@ -1184,7 +1192,7 @@ DiscreteRatio: # N-tokens: {""} # L-tokens: {} # O-tokens: {} - # usage: "f  i" + usage: "f  i" # parse: {"DiscreteRatio", "[", "f", "i", "]"} FullForm: arity: Unary @@ -5181,7 +5189,7 @@ PatternTest: FullForm: PatternTest[expr1, expr2] arity: Binary affix: Infix - associativity: missing["unknown"] + associativity: non-associative meaningful: true # comments: @@ -7380,20 +7388,20 @@ UnderscriptBox: UndirectedEdge: actual-precedence: 370 - precedence: 295 + precedence: 120 WolframLanguageData: WolframLanguageData-corrected: 49 UnicodeCharacters.tr: 395 UnicodeCharacters-corrected.tr: 395 # N-tokens: {} - # L-tokens: {""} + # L-tokens: {"↔"} # O-tokens: {} - # usage: "expr1  expr2" - # parse: {"UndirectedEdge", "[", "expr1", ",", "expr2", "]"} - FullForm: UndirectedEdge[expr1, expr2] + usage: "u ↔ v" + # parse: {"UndirectedEdge", "[", "u", ",", "v", "]"} + FullForm: UndirectedEdge[u, v] arity: Binary affix: Infix - associativity: null + associativity: non-associative meaningful: true # comments: diff --git a/mathics_scanner/generate/build_operator_tables.py b/mathics_scanner/generate/build_operator_tables.py index 190402e..bd70b01 100755 --- a/mathics_scanner/generate/build_operator_tables.py +++ b/mathics_scanner/generate/build_operator_tables.py @@ -54,34 +54,83 @@ def compile_tables( for k, v in operator_data.items(): operator_precedence[k] = v["precedence"] + flat_binary_operators = {} + left_binary_operators = {} no_meaning_infix_operators = {} - no_meaning_prefix_operators = {} no_meaning_postfix_operators = {} + no_meaning_prefix_operators = {} + nonassoc_binary_operators = {} + postfix_operators = {} + prefix_operators = {} + right_binary_operators = {} + ternary_operators = {} for operator_name, operator_info in operator_data.items(): + character_info = character_data.get(operator_name) + + if character_info is None: + continue + + precedence = operator_info["precedence"] + unicode_char = character_info.get("unicode-equivalent") + + affix = operator_info["affix"] + operator_dict = None + + if affix in ("Infix", "Binary"): + associativity = operator_info["associativity"] + if associativity is None: + operator_dict = flat_binary_operators + elif associativity == "left": + operator_dict = left_binary_operators + elif associativity == "right": + operator_dict = right_binary_operators + elif associativity == "non-associative": + operator_dict = nonassoc_binary_operators + else: + print( + f"FIXME: associativity {associativity} not handled in {operator_name}" + ) + + elif affix == "Prefix": + operator_dict = prefix_operators + elif affix == "Postfix": + operator_dict = postfix_operators + elif affix == "Ternaryhh": + operator_dict = ternary_operators + + if operator_dict is not None: + operator_dict[operator_name] = unicode_char, precedence + if operator_info.get("meaningful", True) is False and ( - character_info := character_data.get(operator_name) + character_data.get(operator_name) ): - if (unicode_char := character_info.get("unicode-equivalent")) is None: + if unicode_char is None: if (unicode_char := character_info.get("wl-unicode")) is None: print(f"FIXME: no unicode or WMA equivalent for {operator_name}") continue affix = operator_info["affix"] - precedence = operator_info["precedence"] if affix == "Infix": - no_meaning_infix_operators[operator_name] = unicode_char, precedence + no_meaning_infix_operators[operator_name] = unicode_char elif affix == "Postfix": - no_meaning_postfix_operators[operator_name] = unicode_char, precedence + no_meaning_postfix_operators[operator_name] = unicode_char elif affix == "Prefix": - no_meaning_prefix_operators[operator_name] = unicode_char, precedence + no_meaning_prefix_operators[operator_name] = unicode_char else: print(f"FIXME: affix {affix} of {operator_name} not handled") + return { "operator-precedence": operator_precedence, "no-meaning-infix-operators": no_meaning_infix_operators, "no-meaning-postfix-operators": no_meaning_postfix_operators, - "no-meaning-prefix-operators": no_meaning_prefix_operators, + "non-associative-binary-operators": nonassoc_binary_operators, + "flat-binary-operators": flat_binary_operators, + "right-binary-operators": right_binary_operators, + "left-binary-operators": left_binary_operators, + "prefix-operators": prefix_operators, + "postfix_operators": postfix_operators, + "ternary-operators": ternary_operators, }