Skip to content

Commit

Permalink
core/optimizer: repair unused variable and function removal
Browse files Browse the repository at this point in the history
- core: new 'Tokens' field for 'Config' structure
- core/run: support for parsing '-tokens' cli flag and printing the
  tokens upon getting this flag
- docs: roadmap for optimizer
- core/optimizer: new postFix function for appending a random string to
  a given string
- core/optimizer: reorganised Optimiser and Node
- core/optimizer: new 'Optimiser.builder' field
- core/optimizer: encoutering a new variable or function of the same
  name stores the previously stored expr.Node with a new key generated
  with postFix in the counter map
- core/optimizer: upon removing a node from the ast the optimizer removes the
  node from the counter map
  • Loading branch information
xNaCly committed Nov 20, 2023
1 parent dfba2f5 commit 1bba4f1
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 89 deletions.
1 change: 1 addition & 0 deletions core/conf.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type Config struct {
AllErrors bool
Target string // target to compile sophia to
Ast bool // print ast
Tokens bool // print lexed tokens
}

var CONF = Config{
Expand Down
148 changes: 70 additions & 78 deletions core/optimizer/opt.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,23 @@
package optimizer

import (
"math/rand"
"sophia/core/debug"
"sophia/core/expr"
"strings"
)

var alphabet = []rune("0123456789ABCDEF")
var alphabetlen = len(alphabet)

// TODO: Replace variable names with integers -> should reduce time spend in
// runtime.mapassign_faststr and aeshashbody (watch out for error handling,
// etc)

// TODO: precompute constants

// TODO: dead code elim, empty if, match, put, for, fun and all references to them

// Optimisations
// - Dead code elimination
// - Precomputed constants, less load on the evaluation stage especially in
Expand All @@ -34,21 +41,23 @@ import (
// - All statements referencing empty functions are removed, such as
// variables or expressions calling these functions
type Optimiser struct {
nodes []NodeTuple // stores variables and functions that are possible defined but not usnot used
emptyNodes []NodeTuple // stores expressions that are possible empty
variables map[string]Node // counter for keeping track of functions
functions map[string]Node // counter for keeping track of variables
builder strings.Builder
didOptimise bool
}

type NodeTuple struct {
Name string
type Node struct {
Used bool
Parent expr.Node
Child expr.Node
}

func New() *Optimiser {
return &Optimiser{
nodes: []NodeTuple{},
emptyNodes: []NodeTuple{},
variables: map[string]Node{},
functions: map[string]Node{},
builder: strings.Builder{},
}
}

Expand All @@ -63,44 +72,50 @@ func (o *Optimiser) Start(ast []expr.Node) []expr.Node {
o.walkAst(astHolder, node)
}

// unused variables and functions
for i := 0; i < len(o.nodes); i++ {
tuple := o.nodes[i]
if tuple.Parent == nil {
// unused variables
for k, v := range o.variables {
if v.Used {
continue
}
ch := tuple.Parent.GetChildren()
if v.Parent == nil {
continue
}
ch := v.Parent.GetChildren()
if ch == nil {
continue
}
for i, c := range ch {
if c == tuple.Child {
ch[i] = ch[len(ch)-1]
ch = ch[:len(ch)-1]
tuple.Parent.SetChildren(ch)
debug.Logf("removed: %T(%s) [%d:%d]\n", tuple.Child, tuple.Name, tuple.Child.GetToken().Line+1, tuple.Child.GetToken().LinePos)
if c == v.Child {
ch = append(ch[:i], ch[i+1:]...)
v.Parent.SetChildren(ch)
t := v.Child.GetToken()
debug.Logf("removed: %T(%s) [%d:%d]\n", v.Child, k, t.Line+1, t.LinePos)
delete(o.variables, k)
o.didOptimise = true
break
}
}
}

// dead code removal
for i := 0; i < len(o.emptyNodes); i++ {
tuple := o.emptyNodes[i]
if tuple.Parent == nil {
// unused functions
for k, v := range o.functions {
if v.Used {
continue
}
if v.Parent == nil {
continue
}
ch := tuple.Parent.GetChildren()
ch := v.Parent.GetChildren()
if ch == nil {
continue
}
for i, c := range ch {
if c == tuple.Child {
ch[i] = ch[len(ch)-1]
ch = ch[:len(ch)-1]
tuple.Parent.SetChildren(ch)
debug.Logf("removed: %T(%s) [%d:%d]\n", tuple.Child, tuple.Name, tuple.Child.GetToken().Line+1, tuple.Child.GetToken().LinePos)
if c == v.Child {
ch = append(ch[:i], ch[i+1:]...)
v.Parent.SetChildren(ch)
t := v.Child.GetToken()
debug.Logf("removed: %T(%s) [%d:%d]\n", v.Child, k, t.Line+1, t.LinePos)
delete(o.functions, k)
o.didOptimise = true
break
}
Expand All @@ -115,32 +130,15 @@ func (o *Optimiser) Start(ast []expr.Node) []expr.Node {
return astHolder.Children
}

func (o *Optimiser) removeNodeByName(nodes []NodeTuple, name string) []NodeTuple {
for i, k := range nodes {
if k.Name == name {
nodes[i] = nodes[len(nodes)-1]
return nodes[:len(nodes)-1]
}
}
return nodes
}

func (o *Optimiser) containsNode(nodes []NodeTuple, name string) bool {
for _, k := range nodes {
if k.Name == name {
return true
}
}
return false
}

func (o *Optimiser) isEmpty(node expr.Node) bool {
if node == nil {
return false
} else if len(node.GetChildren()) == 0 {
return true
// postFix appends a random id of length 5 to val, returns result
func (o *Optimiser) postFix(val string) string {
o.builder.WriteString(val)
o.builder.WriteRune('#')
for i := 0; i < 5; i++ {
o.builder.WriteRune(alphabet[rand.Intn(alphabetlen)])
}
return false
defer o.builder.Reset()
return o.builder.String()
}

func (o *Optimiser) walkAst(parent, node expr.Node) {
Expand All @@ -149,42 +147,36 @@ func (o *Optimiser) walkAst(parent, node expr.Node) {
}

switch v := node.(type) {
case *expr.If, *expr.Match, *expr.For, *expr.Put:
// empty expressions are subject to removal
if o.isEmpty(v) {
o.emptyNodes = append(o.emptyNodes, NodeTuple{Parent: parent, Child: v})
}
case *expr.Func:
// detect a function definition
o.nodes = append(o.nodes, NodeTuple{Name: v.Name.GetToken().Raw, Parent: parent, Child: v})

// empty node are subject to removal
if o.isEmpty(v) {
o.emptyNodes = append(o.emptyNodes, NodeTuple{Name: v.Name.GetToken().Raw, Parent: parent, Child: v})
name := v.Name.GetToken().Raw
if fun, ok := o.functions[name]; ok {
o.functions[o.postFix(name)] = fun
}
o.functions[name] = Node{Used: false, Parent: parent, Child: v}
case *expr.Call:
// detects a function usage, removes the item from the unused functions tracker
o.removeNodeByName(o.nodes, v.Token.Raw)

// if a function with a matching name is subject to removal we want to remove the call as well
if o.containsNode(o.emptyNodes, v.Token.Raw) {
o.emptyNodes = append(o.emptyNodes, NodeTuple{Name: v.Token.Raw, Parent: parent, Child: v})
name := v.Token.Raw
// detects a function usage, updates counter
if val, ok := o.functions[name]; ok && !val.Used {
c := val
c.Used = true
o.functions[name] = c
}
case *expr.Var:
// detect a variable definition
o.nodes = append(o.nodes, NodeTuple{Name: v.Ident.GetToken().Raw, Parent: parent, Child: v})

if o.isEmpty(v) {
o.emptyNodes = append(o.emptyNodes, NodeTuple{Parent: parent, Child: v})
name := v.Ident.GetToken().Raw
if variable, ok := o.variables[name]; ok {
o.variables[o.postFix(name)] = variable
}
o.variables[name] = Node{Used: false, Parent: parent, Child: v}
case *expr.Ident:
// if a variable with a matching name is subject to removal we want to remove its uses as well
if o.containsNode(o.emptyNodes, v.Name) || o.containsNode(o.nodes, v.Name) {
o.emptyNodes = append(o.emptyNodes, NodeTuple{Name: v.Name, Parent: parent, Child: v})
// detects a variable usage, updates counter
name := v.Name
if val, ok := o.variables[name]; ok && !val.Used {
c := val
c.Used = true
o.variables[name] = c
// !ok impossible codepath
}

// detects a variable usage, removes the item from the tracker
o.removeNodeByName(o.nodes, v.Name)
}

children := node.GetChildren()
Expand Down
15 changes: 9 additions & 6 deletions core/optimizer/opt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@ func TestOptimizer(t *testing.T) {
tests := []string{
"(fun square (_n) (*n n))",
"(let b 5)",
"(if true)",
"(match)",
"(for (_ i) 20)",
"(fun dummy (_))(put (dummy))",
"(fun dummy (_))(let b (dummy))(put b)",
"(let b 12)(fun dummy (_))(let b (dummy))(put b)",
"(let b 5)(let b 5)",
"(let r 5)(let r 12)",
"(fun square (_n) (*n n))(fun square (_n))(fun square (_n))",
// "(if true)",
// "(match)",
// "(for (_ i) 20)",
// "(fun dummy (_))(put (dummy))",
// "(fun dummy (_))(let b (dummy))(put b)",
// "(let b 12)(fun dummy (_))(let b (dummy))(put b)",
}
for _, test := range tests {
tokens := lexer.New(test).Lex()
Expand Down
6 changes: 3 additions & 3 deletions core/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func run(input string, filename string) (s []string, e error) {
}
debug.Log("lexed", len(tokens), "token")

if core.CONF.Debug {
if core.CONF.Tokens {
debug.Log(debug.Token(tokens))
}

Expand All @@ -51,7 +51,7 @@ func run(input string, filename string) (s []string, e error) {

if core.CONF.Ast {
out, _ := json.MarshalIndent(ast, "", " ")
debug.Log(string(out))
fmt.Println("ast:", string(out))
}

if filename != "repl" {
Expand All @@ -60,7 +60,7 @@ func run(input string, filename string) (s []string, e error) {
ast = opt.Start(ast)
if core.CONF.Ast {
out, _ := json.MarshalIndent(ast, "", " ")
debug.Log(string(out))
fmt.Println("optimized ast:", string(out))
}
} else {
debug.Log("done parsing - starting eval")
Expand Down
2 changes: 2 additions & 0 deletions core/run/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ func Start() {
dbg := flag.Bool("dbg", false, "enable debug logs")
allErrors := flag.Bool("all-errors", false, "display all found errors")
ast := flag.Bool("ast", false, "display the ast")
toks := flag.Bool("tokens", false, "display lexed tokens")
flag.Parse()
core.CONF = core.Config{
Debug: *dbg,
Target: *target,
AllErrors: *allErrors,
Ast: *ast,
Tokens: *toks,
}

if *dbg {
Expand Down
22 changes: 20 additions & 2 deletions docs/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,23 @@ currently using the visitor pattern for evaluation, which means the interpreter
is not fast. I am thinking about a bytecode interpreter rewrite, but I am
nowhere near experienced enough for that yet. The lexer and parser itself
aren't doing much work and are pretty fast - the evaluation is the slowest part
of the interpreter. If anyone reading this has ideas for improving the
performance either create an issue or a pr please :^).
of the interpreter.

I recently improved the interpreter performance by doing a lot less in hot
paths, such as computing float and boolean values at parse time, see
[daf3767](https://github.com/xNaCly/Sophia/commit/daf3767245389060b4706064bb6027be8d72e0ad).
This improved the performance for the specific test script by 5.67 times. I
also added an optimizer for several optimisations that are preformed by
modifying the abstract syntax tree:

- [x] stripping unused variables
- [x] stripping unused functions
- [ ] computing constant expressions
- [ ] replacing variable and function names with integers for faster hash map
access
- [ ] removing empty constructs:
- [ ] empty functions
- [ ] empty if
- [ ] empty for
- [ ] empty put
- [ ] empty match

0 comments on commit 1bba4f1

Please sign in to comment.