diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml index 73654fd..59fec86 100644 --- a/mathics_scanner/data/named-characters.yml +++ b/mathics_scanner/data/named-characters.yml @@ -14,7 +14,6 @@ # 5. Unicode symbols cannot be overloaded, i.e. should not be used for more than one underlying function. # For example, ≫ (U+226B, "Much Greater-Than") is already used for GreaterGreater and therefore should not be an alias for >> for Put. # Likewise, ≪ (U+226A, "Much Less-Than") for Get, ∷ (U+2237, "Proportion") for MessageName, etc. - # # Field definitions # ================= diff --git a/mathics_scanner/data/operators-additional.yml b/mathics_scanner/data/operators-additional.yml deleted file mode 100644 index 00db270..0000000 --- a/mathics_scanner/data/operators-additional.yml +++ /dev/null @@ -1,121 +0,0 @@ -# -# Additional information not in CSV or -# Note: we keep the misspelling of "meaningfull" -# and the uncoverted types like None and True - -ApplyTo: - actual-precedence: 75 - Precedence: 75 # CSV has 604 which is wrong - Precedence-corrected: 75 - Precedence-Function: 75 - WolframLanguageData: 76 - WolframLanguageData-corrected: 75 - UnicodeCharacters.tr: - UnicodeCharacters-corrected.tr: 75 - arity: Binary - affix: Infix - associativity: left - meaningfull: "true" - # comments: - -Derivative: - actual-precedence: 770 - Precedence: 670 # CSV has 604 which is wrong - Precedence-corrected: 670 - Precedence-Function: 670 - WolframLanguageData: 19 - WolframLanguageData-corrected: 19 - UnicodeCharacters.tr: - UnicodeCharacters-corrected.tr: 680 - # N-tokens: {} - # L-tokens: {"''"} - # O-tokens: {} - # usage: {{"expr", "''"}} - # parse: {"Derivative", "[", "n", "]", "[", "expr", "]"} - # FullForm: Derivative[n][expr] - arity: Unary - affix: Postfix - associativity: left - meaningfull: "true" - # comments: - -Information: - actual-precedence: 670 - Precedence: 670 - Precedence-corrected: 670 - WolframLanguageData: null - WolframLanguageData-corrected: - UnicodeCharacters.tr: - UnicodeCharacters-corrected.tr: - # N-tokens: - # L-tokens: - # O-tokens: - # usage: "?? AddTo" - # parse: {"Information", "[", "AddTo", "]"} - # FullForm: Information[AddTo] - arity: Binary - affix: Infix - associativity: None - meaningfull: "true" - # comments: - -# This operator is a little sketchy -InterpretedBox: - actual-precedence: 670 - Precedence: 670 - Precedence-corrected: 670 - WolframLanguageData: null - WolframLanguageData-corrected: - UnicodeCharacters.tr: - UnicodeCharacters-corrected.tr: - # N-tokens: - # L-tokens: - # O-tokens: - # usage: "\! \(2+2\)" - # parse: {"FullForm", "[", "expr1", "]"} - # FullForm: FullForm[expr] - arity: Binary - affix: Infix - associativity: None - meaningfull: "true" - # comments: - -Postfix: - actual-precedence: 640 - Precedence: 640 - Precedence-corrected: 640 - WolframLanguageData: null - WolframLanguageData-corrected: - UnicodeCharacters.tr: - UnicodeCharacters-corrected.tr: - # N-tokens: - # L-tokens: - # O-tokens: - usage: "expr // FormName" - # parse: {"FullForm", "[", "expr1", "]"} - # FullForm: FullForm[expr] - arity: Binary - affix: Infix - associativity: None - meaningfull: "true" - # comments: - -Prefix: - actual-precedence: 640 - Precedence: 640 - Precedence-corrected: 640 - WolframLanguageData: null - WolframLanguageData-corrected: - UnicodeCharacters.tr: - UnicodeCharacters-corrected.tr: - # N-tokens: - # L-tokens: - # O-tokens: - usage: "expr1 @ expr2" - # parse: {"expr1", "[", "expr2", "]"} - # FullForm: expr1[expr2] - arity: Binary - affix: Infix - associativity: None - meaningfull: "true" - # comments: diff --git a/mathics_scanner/data/operators.yml b/mathics_scanner/data/operators.yml index 7211a22..a3a0427 100644 --- a/mathics_scanner/data/operators.yml +++ b/mathics_scanner/data/operators.yml @@ -111,6 +111,19 @@ # "non-associative-operators", "unknown" under the key "miscellaneous-operators", # and None as "flat_binary_operators. # +# box-operator +# ------------ +# +# This field exists and is set true if the operator is used in boxing. Boxing +# operators are enclosed in \( \) pairs. +# +# operator +# -------- +# +# This field exists for box-operators. In the future, we may expand to other operators. +# (Non box operators are listed in fields of named-characters.) +# It is the string vlaue for the operator. + # meaningful # --------- # This field "true" if WMA defines a meaning for the operator and "false" if not. @@ -1911,13 +1924,13 @@ FormBox: WolframLanguageData-corrected: 78 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 160 - # N-tokens: {} - # L-tokens: {"\\`"} - # O-tokens: {} - usage: "expr1 \\ expr2" + # FIXME: how do I YAML encode the below: + operator: "\\`" + usage: ["\\(`input\\)", "\\(form\\`input\\)"] arity: Binary affix: Infix associativity: "unknown" + box-operator: true meaningful: true # comments: @@ -1928,13 +1941,12 @@ FractionBox: WolframLanguageData-corrected: 31 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 590 - # N-tokens: {} - # L-tokens: {"\/"} - # O-tokens: {} - usage: "\\( expr1 \/ expr2 \\)" + operator: "\/" + usage: "\\(x\/y\\)" arity: Binary affix: Infix associativity: "unknown" + box-operator: true meaningful: true # comments: @@ -2445,14 +2457,13 @@ InterpretedBox: WolframLanguageData-corrected: None UnicodeCharacters.tr: None UnicodeCharacters-corrected.tr: None - # N-tokens: None - # L-tokens: None - # O-tokens: None - # usage: "None" + operator: "\\!" + # usage: "\\!\(...\)" FullForm: None - arity: Binary - affix: Infix + arity: Unary + affix: Prefix associativity: null + box-operator: true meaningful: true # comments: None @@ -4511,13 +4522,12 @@ OverscriptBox: WolframLanguageData-corrected: 7 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 790 - # N-tokens: {} - # L-tokens: {"\&"} - # O-tokens: {} - # usage: "expr1 \& expr2" + operator: "\\&" + usage: "\\(x\\&y\\)" arity: Binary affix: Infix associativity: "unknown" + box-operator: true meaningful: true # comments: @@ -4528,12 +4538,11 @@ OverunderscriptBox: WolframLanguageData-corrected: 7.5 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 785 - # N-tokens: {} - # L-tokens: {"\&"} - # O-tokens: {"\%"} - # usage: "expr1", "\&", "expr2 \% expr3" + operator: ["\\+", "\\%"] + usage: "\\(x\\+y\\%z\\)" arity: Ternary affix: Infix + box-operator: true associativity: "unknown" meaningful: true # comments: @@ -5079,13 +5088,12 @@ RadicalBox: WolframLanguageData-corrected: 22 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 640 - # N-tokens: {"\@"} - # L-tokens: {} - # O-tokens: {"\%"} - # usage: "\@", "expr1 \% expr2" + operator: ["\\@", "\\%"] + usage: "\\(\\@x\\%n\\)" FullForm: - arity: Binary + arity: Ternary affix: Prefix + box-operator: true associativity: right meaningful: true # comments: @@ -5868,14 +5876,13 @@ SqrtBox: WolframLanguageData-corrected: 22 UnicodeCharacters.tr: 650 UnicodeCharacters-corrected.tr: 640 - # N-tokens: {"\@"} - # L-tokens: {} - # O-tokens: {} - # usage: "{{"\@", "expr"}}" + operator: "\\@" + usage: "\\(\\@x\\)" FullForm: arity: Unary affix: Prefix associativity: "unknown" + box-operator: true meaningful: true # comments: @@ -6274,20 +6281,19 @@ SuperDagger: meaningful: true # comments: -SuperscriptBox: +SubSuperscriptBox: precedence: 590 WolframLanguageData: 21 WolframLanguageData-corrected: 21 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 660 - # N-tokens: {} - # L-tokens: {"\^"} - # O-tokens: {} - # usage: "expr1 \^ expr2" + operator: ["\_", "\\%"] + usage: "\\(x\_y\\%z\\)" FullForm: - arity: Binary + arity: Ternary affix: Infix associativity: "unknown" + box-operator: true meaningful: true # comments: @@ -6325,21 +6331,20 @@ SupersetEqual: meaningful: false # comments: -SupersubscriptBox: +SupercriptBox: Precedence-Function: 690 precedence: 590 WolframLanguageData: 21 WolframLanguageData-corrected: 21 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 660 - # N-tokens: {} - # L-tokens: {"\^"} - # O-tokens: {"\%"} - # usage: "expr1", "\^", "expr2 \% expr3" + operator: "\\^" + usage: "\\(x\\^y\\)" FullForm: - arity: Ternary + arity: Binary affix: Infix associativity: right + box-operator: true meaningful: true # comments: @@ -6666,14 +6671,13 @@ UnderoverscriptBox: WolframLanguageData-corrected: 7.5 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 785 - # N-tokens: {} - # L-tokens: {"\+"} - # O-tokens: {"\%"} - # usage: "expr1", "\+", "expr2 \% expr3" + operator: ["\\+", "\\%"] + usage: "\\(x\\+y\\%z\\)" FullForm: arity: Ternary affix: Infix associativity: "unknown" + box-operator: true meaningful: true # comments: @@ -6684,13 +6688,12 @@ UnderscriptBox: WolframLanguageData-corrected: 7 UnicodeCharacters.tr: UnicodeCharacters-corrected.tr: 790 - # N-tokens: {} - # L-tokens: {"\+"} - # O-tokens: {} - usage: "expr1 \\+ expr2" + operator: "\\+" + usage: "\\(x\\+y\\)" FullForm: arity: Binary affix: Infix + box-operator: true associativity: "unknown" meaningful: true # comments: diff --git a/mathics_scanner/generate/build_operator_tables.py b/mathics_scanner/generate/build_operator_tables.py index fedafa4..94e979e 100755 --- a/mathics_scanner/generate/build_operator_tables.py +++ b/mathics_scanner/generate/build_operator_tables.py @@ -5,6 +5,7 @@ import json import os.path as osp import sys +from collections import defaultdict from pathlib import Path from typing import Dict @@ -53,6 +54,7 @@ def compile_tables( for k, v in operator_data.items(): operator_precedence[k] = v["precedence"] + box_operators = {} flat_binary_operators = {} left_binary_operators = {} miscellaneous_operators = {} @@ -60,6 +62,7 @@ def compile_tables( no_meaning_postfix_operators = {} no_meaning_prefix_operators = {} nonassoc_binary_operators = {} + operator2string = defaultdict(list) postfix_operators = {} prefix_operators = {} right_binary_operators = {} @@ -96,6 +99,9 @@ def compile_tables( elif affix == "Postfix": operator_dict = postfix_operators + if operator_info.get("box-operator", False): + box_operators[operator_name] = operator_info["operator"] + # operator_dict tables are tied into the Mathics3 # parser. Extend this table, for example to # include the operator unicode, requires @@ -108,6 +114,12 @@ def compile_tables( continue unicode_char = character_info.get("unicode-equivalent", "no-unicode") + ascii_chars = character_info.get("ascii", "no-ascii") + + if unicode_char != "no-unicode": + operator2string[operator_name].append(unicode_char) + if ascii_chars != "no-ascii": + operator2string[operator_name].append(ascii_chars) if operator_info.get("meaningful", True) is False and ( character_data.get(operator_name) @@ -128,6 +140,7 @@ def compile_tables( print(f"FIXME: affix {affix} of {operator_name} not handled") return { + "box-operators": box_operators, "flat-binary-operators": flat_binary_operators, "left-binary-operators": left_binary_operators, "miscellaneous-operators": miscellaneous_operators, @@ -135,6 +148,7 @@ def compile_tables( "no-meaning-postfix-operators": no_meaning_postfix_operators, "no-meaning-prefix-operators": no_meaning_prefix_operators, "non-associative-binary-operators": nonassoc_binary_operators, + "operator-to_string": operator2string, "operator-precedence": operator_precedence, "postfix-operators": postfix_operators, "prefix-operators": prefix_operators,