Skip to content

Commit

Permalink
Merge pull request #44 from shogo82148/support-nbest
Browse files Browse the repository at this point in the history
support N-Best parsing
  • Loading branch information
shogo82148 authored Apr 2, 2024
2 parents 1c03088 + 8f99fec commit 7fe5483
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 8 deletions.
54 changes: 54 additions & 0 deletions example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,60 @@ func ExampleMeCab_ParseLattice() {
// EOS
}

func ExampleMeCab_ParseLattice_nBest() {
options := map[string]string{}
if path := os.Getenv("MECABRC_PATH"); path != "" {
options["rcfile"] = path
}

tagger, err := mecab.New(options)
if err != nil {
panic(err)
}
defer tagger.Destroy()

lattice, err := mecab.NewLattice()
if err != nil {
panic(err)
}

lattice.SetSentence("こんにちは世界")
lattice.AddRequestType(mecab.RequestTypeNBest)
err = tagger.ParseLattice(lattice)
if err != nil {
panic(err)
}
for i := 0; i < 5; i++ {
fmt.Println(lattice.String())
if !lattice.Next() {
break
}
}
// Output:
// こんにちは 感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
// 世界 名詞,一般,*,*,*,*,世界,セカイ,セカイ
// EOS
//
// こんにちは 感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
// 世界 名詞,一般,*,*,*,*,世界,セカイ,セカイ
// EOS
//
// こんにちは 感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
// 世 名詞,一般,*,*,*,*,世,ヨ,ヨ
// 界 名詞,接尾,一般,*,*,*,界,カイ,カイ
// EOS
//
// こんにちは 感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
// 世 名詞,一般,*,*,*,*,世,ヨ,ヨ
// 界 名詞,固有名詞,地域,一般,*,*,界,サカイ,サカイ
// EOS
//
// こんにちは 感動詞,*,*,*,*,*,こんにちは,コンニチハ,コンニチワ
// 世 名詞,接尾,助数詞,*,*,*,世,セイ,セイ
// 界 名詞,接尾,一般,*,*,*,界,カイ,カイ
// EOS
}

func ExampleMeCab_ParseToNode() {
options := map[string]string{}
if path := os.Getenv("MECABRC_PATH"); path != "" {
Expand Down
78 changes: 76 additions & 2 deletions lattice.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,41 @@ import (
"unsafe"
)

// RequestType is a request type.
type RequestType int

const (
// RequestTypeOneBest is a request type for one best result.
RequestTypeOneBest RequestType = 1

// RequestTypeNBest is a request type for N-best results.
RequestTypeNBest RequestType = 2

// RequestTypePartial enables a partial parsing mode.
// When this flag is set, the input |sentence| needs to be written
// in partial parsing format.
RequestTypePartial RequestType = 4

// RequestTypeMarginalProb is a request type for marginal probability.
// Set this flag if you want to obtain marginal probabilities.
// Marginal probability is set in [Node.Prob].
// The parsing speed will get 3-5 times slower than the default mode.
RequestTypeMarginalProb RequestType = 8

// RequestTypeMorphsToNBest is a request type for alternative results.
// Set this flag if you want to obtain alternative results.
// Not implemented.
RequestTypeAlternative RequestType = 16

// RequestTypeAllMorphs is a request type for all morphs.
RequestTypeAllMorphs RequestType = 32

// RequestTypeAllocateSentence is a request type for allocating sentence.
// When this flag is set, tagger internally copies the body of passed
// sentence into internal buffer.
RequestTypeAllocateSentence RequestType = 64
)

var errLatticeNotAvailable = errors.New("mecab: lattice is not available")

type lattice struct {
Expand All @@ -26,7 +61,8 @@ func newLattice(l *C.mecab_lattice_t) *lattice {
// It is a marker that a lattice must not be copied after the first use.
// See https://github.com/golang/go/issues/8005#issuecomment-190753527
// for details.
func (*lattice) Lock() {}
func (*lattice) Lock() {}
func (*lattice) Unlock() {}

func finalizeLattice(l *lattice) {
if l.lattice != nil {
Expand Down Expand Up @@ -123,7 +159,7 @@ func (l Lattice) SetSentence(s string) {
input := C.CString(s)
defer C.free(unsafe.Pointer(input))

C.mecab_lattice_add_request_type(l.l.lattice, 64) // MECAB_ALLOCATE_SENTENCE = 64
C.mecab_lattice_add_request_type(l.l.lattice, C.int(RequestTypeAllocateSentence)) // MECAB_ALLOCATE_SENTENCE = 64
C.mecab_lattice_set_sentence2(l.l.lattice, input, length)
runtime.KeepAlive(l.l)
}
Expand All @@ -136,3 +172,41 @@ func (l Lattice) String() string {
runtime.KeepAlive(l.l)
return s
}

// Next obtains next-best result. The internal linked list structure is updated.
// You should set [RequestTypeNBest] in advance.
// Return false if no more results are available or [RequestType] is invalid.
func (l Lattice) Next() bool {
if l.l.lattice == nil {
panic(errLatticeNotAvailable)
}
next := C.mecab_lattice_next(l.l.lattice) != 0
runtime.KeepAlive(l)
return next
}

// RequestType returns the request type.
func (l Lattice) RequestType() RequestType {
if l.l.lattice == nil {
panic(errLatticeNotAvailable)
}
return RequestType(C.mecab_lattice_get_request_type(l.l.lattice))
}

// SetRequestType sets the request type.
func (l Lattice) SetRequestType(t RequestType) {
if l.l.lattice == nil {
panic(errLatticeNotAvailable)
}
C.mecab_lattice_add_request_type(l.l.lattice, C.int(t))
runtime.KeepAlive(l)
}

// AddRequestType adds the request type.
func (l Lattice) AddRequestType(t RequestType) {
if l.l.lattice == nil {
panic(errLatticeNotAvailable)
}
C.mecab_lattice_add_request_type(l.l.lattice, C.int(t))
runtime.KeepAlive(l)
}
15 changes: 11 additions & 4 deletions mecab.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ func newMeCab(m *C.mecab_t) *mecab {
// It is a marker that a mecab must not be copied after the first use.
// See https://github.com/golang/go/issues/8005#issuecomment-190753527
// for details.
func (*mecab) Lock() {}
func (*mecab) Lock() {}
func (*mecab) Unlock() {}

func finalizeMeCab(m *mecab) {
if m.mecab != nil {
Expand Down Expand Up @@ -90,7 +91,8 @@ func (m MeCab) Destroy() {
m.m.mecab = nil
}

// Parse parses the string and returns the result as string
// Parse parses the string and returns the result as string.
// Parse is not safe for concurrent use by multiple goroutines.
func (m MeCab) Parse(s string) (string, error) {
if m.m.mecab == nil {
panic(errMeCabNotAvailable)
Expand All @@ -108,7 +110,8 @@ func (m MeCab) Parse(s string) (string, error) {
return C.GoString(result), nil
}

// ParseToString is alias of Parse
// ParseToString is alias of [Parse].
// ParseToString is not safe for concurrent use by multiple goroutines.
func (m MeCab) ParseToString(s string) (string, error) {
if m.m.mecab == nil {
panic(errMeCabNotAvailable)
Expand All @@ -117,6 +120,8 @@ func (m MeCab) ParseToString(s string) (string, error) {
}

// ParseLattice parses the lattice and returns the result as string.
// ParseLattice is safe for concurrent use by multiple goroutines.
// Create a lattice for each goroutine.
func (m MeCab) ParseLattice(lattice Lattice) error {
if m.m.mecab == nil {
panic(errMeCabNotAvailable)
Expand All @@ -128,7 +133,8 @@ func (m MeCab) ParseLattice(lattice Lattice) error {
return nil
}

// ParseToNode parses the string and returns the result as Node
// ParseToNode parses the string and returns the result as [Node].
// ParseToNode is not safe for concurrent use by multiple goroutines.
func (m MeCab) ParseToNode(s string) (Node, error) {
if m.m.mecab == nil {
panic(errMeCabNotAvailable)
Expand All @@ -148,6 +154,7 @@ func (m MeCab) ParseToNode(s string) (Node, error) {
}, nil
}

// Error returns the error of MeCab.
func (m MeCab) Error() error {
if m.m.mecab == nil {
panic(errMeCabNotAvailable)
Expand Down
4 changes: 2 additions & 2 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (node Node) Next() Node {
return Node{node: (*C.mecab_node_t)(node.node.next)}
}

// ENext resturns a node which ends same position
// ENext returns a node which ends same position
func (node Node) ENext() Node {
return Node{
node: (*C.mecab_node_t)(node.node.enext),
Expand All @@ -99,7 +99,7 @@ func (node Node) ENext() Node {
}
}

// BNext resturns a node which begins same position
// BNext returns a node which begins same position
func (node Node) BNext() Node {
return Node{
node: (*C.mecab_node_t)(node.node.bnext),
Expand Down

0 comments on commit 7fe5483

Please sign in to comment.