core/optimizer: repair unused variable and function removal

- core: new 'Tokens' field for 'Config' structure - core/run: support for parsing '-tokens' cli flag and printing the tokens upon getting this flag - docs: roadmap for optimizer - core/optimizer: new postFix function for appending a random string to a given string - core/optimizer: reorganised Optimiser and Node - core/optimizer: new 'Optimiser.builder' field - core/optimizer: encoutering a new variable or function of the same name stores the previously stored expr.Node with a new key generated with postFix in the counter map - core/optimizer: upon removing a node from the ast the optimizer removes the node from the counter map
xNaCly · Nov 20, 2023 · 1bba4f1 · 1bba4f1
1 parent dfba2f5
commit 1bba4f1
Show file tree

Hide file tree

Showing 6 changed files with 105 additions and 89 deletions.
diff --git a/core/conf.go b/core/conf.go
@@ -25,6 +25,7 @@ type Config struct {
 	AllErrors bool
 	Target    string // target to compile sophia to
 	Ast       bool   // print ast
+	Tokens    bool   // print lexed tokens
 }
 
 var CONF = Config{

diff --git a/core/optimizer/opt.go b/core/optimizer/opt.go
@@ -5,16 +5,23 @@
 package optimizer
 
 import (
+	"math/rand"
 	"sophia/core/debug"
 	"sophia/core/expr"
+	"strings"
 )
 
+var alphabet = []rune("0123456789ABCDEF")
+var alphabetlen = len(alphabet)
+
 // TODO: Replace variable names with integers -> should reduce time spend in
 // runtime.mapassign_faststr and aeshashbody (watch out for error handling,
 // etc)
 
 // TODO: precompute constants
 
+// TODO: dead code elim, empty if, match, put, for, fun and all references to them
+
 // Optimisations
 //   - Dead code elimination
 //   - Precomputed constants, less load on the evaluation stage especially in
@@ -34,21 +41,23 @@ import (
 //   - All statements referencing empty functions are removed, such as
 //     variables or expressions calling these functions
 type Optimiser struct {
-	nodes       []NodeTuple // stores variables and functions that are possible defined but not usnot used
-	emptyNodes  []NodeTuple // stores expressions that are possible empty
+	variables   map[string]Node // counter for keeping track of functions
+	functions   map[string]Node // counter for keeping track of variables
+	builder     strings.Builder
 	didOptimise bool
 }
 
-type NodeTuple struct {
-	Name   string
+type Node struct {
+	Used   bool
 	Parent expr.Node
 	Child  expr.Node
 }
 
 func New() *Optimiser {
 	return &Optimiser{
-		nodes:      []NodeTuple{},
-		emptyNodes: []NodeTuple{},
+		variables: map[string]Node{},
+		functions: map[string]Node{},
+		builder:   strings.Builder{},
 	}
 }
 
@@ -63,44 +72,50 @@ func (o *Optimiser) Start(ast []expr.Node) []expr.Node {
 		o.walkAst(astHolder, node)
 	}
 
-	// unused variables and functions
-	for i := 0; i < len(o.nodes); i++ {
-		tuple := o.nodes[i]
-		if tuple.Parent == nil {
+	// unused variables
+	for k, v := range o.variables {
+		if v.Used {
 			continue
 		}
-		ch := tuple.Parent.GetChildren()
+		if v.Parent == nil {
+			continue
+		}
+		ch := v.Parent.GetChildren()
 		if ch == nil {
 			continue
 		}
 		for i, c := range ch {
-			if c == tuple.Child {
-				ch[i] = ch[len(ch)-1]
-				ch = ch[:len(ch)-1]
-				tuple.Parent.SetChildren(ch)
-				debug.Logf("removed: %T(%s) [%d:%d]\n", tuple.Child, tuple.Name, tuple.Child.GetToken().Line+1, tuple.Child.GetToken().LinePos)
+			if c == v.Child {
+				ch = append(ch[:i], ch[i+1:]...)
+				v.Parent.SetChildren(ch)
+				t := v.Child.GetToken()
+				debug.Logf("removed: %T(%s) [%d:%d]\n", v.Child, k, t.Line+1, t.LinePos)
+				delete(o.variables, k)
 				o.didOptimise = true
 				break
 			}
 		}
 	}
 
-	// dead code removal
-	for i := 0; i < len(o.emptyNodes); i++ {
-		tuple := o.emptyNodes[i]
-		if tuple.Parent == nil {
+	// unused functions
+	for k, v := range o.functions {
+		if v.Used {
+			continue
+		}
+		if v.Parent == nil {
 			continue
 		}
-		ch := tuple.Parent.GetChildren()
+		ch := v.Parent.GetChildren()
 		if ch == nil {
 			continue
 		}
 		for i, c := range ch {
-			if c == tuple.Child {
-				ch[i] = ch[len(ch)-1]
-				ch = ch[:len(ch)-1]
-				tuple.Parent.SetChildren(ch)
-				debug.Logf("removed: %T(%s) [%d:%d]\n", tuple.Child, tuple.Name, tuple.Child.GetToken().Line+1, tuple.Child.GetToken().LinePos)
+			if c == v.Child {
+				ch = append(ch[:i], ch[i+1:]...)
+				v.Parent.SetChildren(ch)
+				t := v.Child.GetToken()
+				debug.Logf("removed: %T(%s) [%d:%d]\n", v.Child, k, t.Line+1, t.LinePos)
+				delete(o.functions, k)
 				o.didOptimise = true
 				break
 			}
@@ -115,32 +130,15 @@ func (o *Optimiser) Start(ast []expr.Node) []expr.Node {
 	return astHolder.Children
 }
 
-func (o *Optimiser) removeNodeByName(nodes []NodeTuple, name string) []NodeTuple {
-	for i, k := range nodes {
-		if k.Name == name {
-			nodes[i] = nodes[len(nodes)-1]
-			return nodes[:len(nodes)-1]
-		}
-	}
-	return nodes
-}
-
-func (o *Optimiser) containsNode(nodes []NodeTuple, name string) bool {
-	for _, k := range nodes {
-		if k.Name == name {
-			return true
-		}
-	}
-	return false
-}
-
-func (o *Optimiser) isEmpty(node expr.Node) bool {
-	if node == nil {
-		return false
-	} else if len(node.GetChildren()) == 0 {
-		return true
+// postFix appends a random id of length 5 to val, returns result
+func (o *Optimiser) postFix(val string) string {
+	o.builder.WriteString(val)
+	o.builder.WriteRune('#')
+	for i := 0; i < 5; i++ {
+		o.builder.WriteRune(alphabet[rand.Intn(alphabetlen)])
 	}
-	return false
+	defer o.builder.Reset()
+	return o.builder.String()
 }
 
 func (o *Optimiser) walkAst(parent, node expr.Node) {
@@ -149,42 +147,36 @@ func (o *Optimiser) walkAst(parent, node expr.Node) {
 	}
 
 	switch v := node.(type) {
-	case *expr.If, *expr.Match, *expr.For, *expr.Put:
-		// empty expressions are subject to removal
-		if o.isEmpty(v) {
-			o.emptyNodes = append(o.emptyNodes, NodeTuple{Parent: parent, Child: v})
-		}
 	case *expr.Func:
-		// detect a function definition
-		o.nodes = append(o.nodes, NodeTuple{Name: v.Name.GetToken().Raw, Parent: parent, Child: v})
-
-		// empty node are subject to removal
-		if o.isEmpty(v) {
-			o.emptyNodes = append(o.emptyNodes, NodeTuple{Name: v.Name.GetToken().Raw, Parent: parent, Child: v})
+		name := v.Name.GetToken().Raw
+		if fun, ok := o.functions[name]; ok {
+			o.functions[o.postFix(name)] = fun
 		}
+		o.functions[name] = Node{Used: false, Parent: parent, Child: v}
 	case *expr.Call:
-		// detects a function usage, removes the item from the unused functions tracker
-		o.removeNodeByName(o.nodes, v.Token.Raw)
-
-		// if a function with a matching name is subject to removal we want to remove the call as well
-		if o.containsNode(o.emptyNodes, v.Token.Raw) {
-			o.emptyNodes = append(o.emptyNodes, NodeTuple{Name: v.Token.Raw, Parent: parent, Child: v})
+		name := v.Token.Raw
+		// detects a function usage, updates counter
+		if val, ok := o.functions[name]; ok && !val.Used {
+			c := val
+			c.Used = true
+			o.functions[name] = c
 		}
 	case *expr.Var:
 		// detect a variable definition
-		o.nodes = append(o.nodes, NodeTuple{Name: v.Ident.GetToken().Raw, Parent: parent, Child: v})
-
-		if o.isEmpty(v) {
-			o.emptyNodes = append(o.emptyNodes, NodeTuple{Parent: parent, Child: v})
+		name := v.Ident.GetToken().Raw
+		if variable, ok := o.variables[name]; ok {
+			o.variables[o.postFix(name)] = variable
 		}
+		o.variables[name] = Node{Used: false, Parent: parent, Child: v}
 	case *expr.Ident:
-		// if a variable with a matching name is subject to removal we want to remove its uses as well
-		if o.containsNode(o.emptyNodes, v.Name) || o.containsNode(o.nodes, v.Name) {
-			o.emptyNodes = append(o.emptyNodes, NodeTuple{Name: v.Name, Parent: parent, Child: v})
+		// detects a variable usage, updates counter
+		name := v.Name
+		if val, ok := o.variables[name]; ok && !val.Used {
+			c := val
+			c.Used = true
+			o.variables[name] = c
+			// !ok impossible codepath
 		}
-
-		// detects a variable usage, removes the item from the tracker
-		o.removeNodeByName(o.nodes, v.Name)
 	}
 
 	children := node.GetChildren()

diff --git a/core/optimizer/opt_test.go b/core/optimizer/opt_test.go
@@ -10,12 +10,15 @@ func TestOptimizer(t *testing.T) {
 	tests := []string{
 		"(fun square (_n) (*n n))",
 		"(let b 5)",
-		"(if true)",
-		"(match)",
-		"(for (_ i) 20)",
-		"(fun dummy (_))(put (dummy))",
-		"(fun dummy (_))(let b (dummy))(put b)",
-		"(let b 12)(fun dummy (_))(let b (dummy))(put b)",
+		"(let b 5)(let b 5)",
+		"(let r 5)(let r 12)",
+		"(fun square (_n) (*n n))(fun square (_n))(fun square (_n))",
+		// "(if true)",
+		// "(match)",
+		// "(for (_ i) 20)",
+		// "(fun dummy (_))(put (dummy))",
+		// "(fun dummy (_))(let b (dummy))(put b)",
+		// "(let b 12)(fun dummy (_))(let b (dummy))(put b)",
 	}
 	for _, test := range tests {
 		tokens := lexer.New(test).Lex()

diff --git a/core/run/run.go b/core/run/run.go
@@ -36,7 +36,7 @@ func run(input string, filename string) (s []string, e error) {
 	}
 	debug.Log("lexed", len(tokens), "token")
 
-	if core.CONF.Debug {
+	if core.CONF.Tokens {
 		debug.Log(debug.Token(tokens))
 	}
 
@@ -51,7 +51,7 @@ func run(input string, filename string) (s []string, e error) {
 
 	if core.CONF.Ast {
 		out, _ := json.MarshalIndent(ast, "", "  ")
-		debug.Log(string(out))
+		fmt.Println("ast:", string(out))
 	}
 
 	if filename != "repl" {
@@ -60,7 +60,7 @@ func run(input string, filename string) (s []string, e error) {
 		ast = opt.Start(ast)
 		if core.CONF.Ast {
 			out, _ := json.MarshalIndent(ast, "", "  ")
-			debug.Log(string(out))
+			fmt.Println("optimized ast:", string(out))
 		}
 	} else {
 		debug.Log("done parsing - starting eval")

diff --git a/core/run/start.go b/core/run/start.go
@@ -16,12 +16,14 @@ func Start() {
 	dbg := flag.Bool("dbg", false, "enable debug logs")
 	allErrors := flag.Bool("all-errors", false, "display all found errors")
 	ast := flag.Bool("ast", false, "display the ast")
+	toks := flag.Bool("tokens", false, "display lexed tokens")
 	flag.Parse()
 	core.CONF = core.Config{
 		Debug:     *dbg,
 		Target:    *target,
 		AllErrors: *allErrors,
 		Ast:       *ast,
+		Tokens:    *toks,
 	}
 
 	if *dbg {

diff --git a/docs/Readme.md b/docs/Readme.md
@@ -94,5 +94,23 @@ currently using the visitor pattern for evaluation, which means the interpreter
 is not fast. I am thinking about a bytecode interpreter rewrite, but I am
 nowhere near experienced enough for that yet. The lexer and parser itself
 aren't doing much work and are pretty fast - the evaluation is the slowest part
-of the interpreter. If anyone reading this has ideas for improving the
-performance either create an issue or a pr please :^).
+of the interpreter.
+
+I recently improved the interpreter performance by doing a lot less in hot
+paths, such as computing float and boolean values at parse time, see
+[daf3767](https://github.com/xNaCly/Sophia/commit/daf3767245389060b4706064bb6027be8d72e0ad).
+This improved the performance for the specific test script by 5.67 times. I
+also added an optimizer for several optimisations that are preformed by
+modifying the abstract syntax tree:
+
+- [x] stripping unused variables
+- [x] stripping unused functions
+- [ ] computing constant expressions
+- [ ] replacing variable and function names with integers for faster hash map
+      access
+- [ ] removing empty constructs:
+  - [ ] empty functions
+  - [ ] empty if
+  - [ ] empty for
+  - [ ] empty put
+  - [ ] empty match