This repository has been archived by the owner on Aug 3, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnode.go
90 lines (76 loc) · 1.69 KB
/
node.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
// Copyright 2021 go-mcts. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package mcts
import (
"math"
"math/rand"
)
type node struct {
move Move
parent *node
playerToMove int
wins float64
visits int
moves []Move
children []*node
}
func newNode(state State, move Move, parent *node) *node {
return &node{
move: move,
parent: parent,
playerToMove: state.PlayerToMove(),
wins: 0,
visits: 0,
moves: state.GetMoves(),
}
}
func (p *node) hasUntriedMoves() bool {
return len(p.moves) > 0
}
func (p *node) hasChildren() bool {
return len(p.children) > 0
}
func (p *node) getUntriedMove(rd *rand.Rand) Move {
l := len(p.moves)
if l == 0 {
panic("untried moves is empty")
}
return p.moves[rd.Intn(l)]
}
func (p *node) selectChildUCT() *node {
l := len(p.children)
if l == 0 {
panic("children is empty")
}
best := p.children[0]
bestScore := best.wins/float64(best.visits) +
math.Sqrt(2.0*math.Log(float64(p.visits))/float64(best.visits))
for i := 1; i < l; i++ {
c := p.children[i]
uctScore := c.wins/float64(c.visits) +
math.Sqrt(2.0*math.Log(float64(p.visits))/float64(c.visits))
if uctScore > bestScore {
bestScore = uctScore
best = c
}
}
return best
}
func (p *node) addChild(move Move, state State) *node {
node := newNode(state, move, p)
p.children = append(p.children, node)
l := len(p.moves)
for i := 0; i < l-1; i++ {
if p.moves[i] == move {
p.moves[i] = p.moves[l-1]
break
}
}
p.moves = p.moves[:l-1]
return node
}
func (p *node) update(result float64) {
p.visits++
p.wins += result
}