Skip to content

Commit

Permalink
cuda: update wrappers
Browse files Browse the repository at this point in the history
  • Loading branch information
godsic committed Sep 28, 2018
1 parent 41bcd2e commit b21e9e6
Show file tree
Hide file tree
Showing 47 changed files with 4,286 additions and 2,982 deletions.
133 changes: 67 additions & 66 deletions cuda/copypadmul2_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,102 +5,103 @@ package cuda
EDITING IS FUTILE.
*/

import (
import(
"unsafe"
"github.com/mumax/3/cuda/cu"
"github.com/mumax/3/timer"
"sync"
"unsafe"
)

// CUDA handle for copypadmul2 kernel
var copypadmul2_code cu.Function

// Stores the arguments for copypadmul2 kernel invocation
type copypadmul2_args_t struct {
arg_dst unsafe.Pointer
arg_Dx int
arg_Dy int
arg_Dz int
arg_src unsafe.Pointer
arg_Sx int
arg_Sy int
arg_Sz int
arg_Ms_ unsafe.Pointer
arg_Ms_mul float32
arg_vol unsafe.Pointer
argptr [11]unsafe.Pointer
type copypadmul2_args_t struct{
arg_dst unsafe.Pointer
arg_Dx int
arg_Dy int
arg_Dz int
arg_src unsafe.Pointer
arg_Sx int
arg_Sy int
arg_Sz int
arg_Ms_ unsafe.Pointer
arg_Ms_mul float32
arg_vol unsafe.Pointer
argptr [11]unsafe.Pointer
sync.Mutex
}

// Stores the arguments for copypadmul2 kernel invocation
var copypadmul2_args copypadmul2_args_t

func init() {
func init(){
// CUDA driver kernel call wants pointers to arguments, set them up once.
copypadmul2_args.argptr[0] = unsafe.Pointer(&copypadmul2_args.arg_dst)
copypadmul2_args.argptr[1] = unsafe.Pointer(&copypadmul2_args.arg_Dx)
copypadmul2_args.argptr[2] = unsafe.Pointer(&copypadmul2_args.arg_Dy)
copypadmul2_args.argptr[3] = unsafe.Pointer(&copypadmul2_args.arg_Dz)
copypadmul2_args.argptr[4] = unsafe.Pointer(&copypadmul2_args.arg_src)
copypadmul2_args.argptr[5] = unsafe.Pointer(&copypadmul2_args.arg_Sx)
copypadmul2_args.argptr[6] = unsafe.Pointer(&copypadmul2_args.arg_Sy)
copypadmul2_args.argptr[7] = unsafe.Pointer(&copypadmul2_args.arg_Sz)
copypadmul2_args.argptr[8] = unsafe.Pointer(&copypadmul2_args.arg_Ms_)
copypadmul2_args.argptr[9] = unsafe.Pointer(&copypadmul2_args.arg_Ms_mul)
copypadmul2_args.argptr[10] = unsafe.Pointer(&copypadmul2_args.arg_vol)
}
copypadmul2_args.argptr[0] = unsafe.Pointer(&copypadmul2_args.arg_dst)
copypadmul2_args.argptr[1] = unsafe.Pointer(&copypadmul2_args.arg_Dx)
copypadmul2_args.argptr[2] = unsafe.Pointer(&copypadmul2_args.arg_Dy)
copypadmul2_args.argptr[3] = unsafe.Pointer(&copypadmul2_args.arg_Dz)
copypadmul2_args.argptr[4] = unsafe.Pointer(&copypadmul2_args.arg_src)
copypadmul2_args.argptr[5] = unsafe.Pointer(&copypadmul2_args.arg_Sx)
copypadmul2_args.argptr[6] = unsafe.Pointer(&copypadmul2_args.arg_Sy)
copypadmul2_args.argptr[7] = unsafe.Pointer(&copypadmul2_args.arg_Sz)
copypadmul2_args.argptr[8] = unsafe.Pointer(&copypadmul2_args.arg_Ms_)
copypadmul2_args.argptr[9] = unsafe.Pointer(&copypadmul2_args.arg_Ms_mul)
copypadmul2_args.argptr[10] = unsafe.Pointer(&copypadmul2_args.arg_vol)
}

// Wrapper for copypadmul2 CUDA kernel, asynchronous.
func k_copypadmul2_async(dst unsafe.Pointer, Dx int, Dy int, Dz int, src unsafe.Pointer, Sx int, Sy int, Sz int, Ms_ unsafe.Pointer, Ms_mul float32, vol unsafe.Pointer, cfg *config) {
if Synchronous { // debug
func k_copypadmul2_async ( dst unsafe.Pointer, Dx int, Dy int, Dz int, src unsafe.Pointer, Sx int, Sy int, Sz int, Ms_ unsafe.Pointer, Ms_mul float32, vol unsafe.Pointer, cfg *config) {
if Synchronous{ // debug
Sync()
timer.Start("copypadmul2")
}

copypadmul2_args.Lock()
defer copypadmul2_args.Unlock()

if copypadmul2_code == 0 {
if copypadmul2_code == 0{
copypadmul2_code = fatbinLoad(copypadmul2_map, "copypadmul2")
}

copypadmul2_args.arg_dst = dst
copypadmul2_args.arg_Dx = Dx
copypadmul2_args.arg_Dy = Dy
copypadmul2_args.arg_Dz = Dz
copypadmul2_args.arg_src = src
copypadmul2_args.arg_Sx = Sx
copypadmul2_args.arg_Sy = Sy
copypadmul2_args.arg_Sz = Sz
copypadmul2_args.arg_Ms_ = Ms_
copypadmul2_args.arg_Ms_mul = Ms_mul
copypadmul2_args.arg_vol = vol
copypadmul2_args.arg_dst = dst
copypadmul2_args.arg_Dx = Dx
copypadmul2_args.arg_Dy = Dy
copypadmul2_args.arg_Dz = Dz
copypadmul2_args.arg_src = src
copypadmul2_args.arg_Sx = Sx
copypadmul2_args.arg_Sy = Sy
copypadmul2_args.arg_Sz = Sz
copypadmul2_args.arg_Ms_ = Ms_
copypadmul2_args.arg_Ms_mul = Ms_mul
copypadmul2_args.arg_vol = vol


args := copypadmul2_args.argptr[:]
cu.LaunchKernel(copypadmul2_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)

if Synchronous { // debug
if Synchronous{ // debug
Sync()
timer.Stop("copypadmul2")
}
}

// maps compute capability on PTX code for copypadmul2 kernel.
var copypadmul2_map = map[int]string{0: "",
30: copypadmul2_ptx_30,
35: copypadmul2_ptx_35,
37: copypadmul2_ptx_37,
50: copypadmul2_ptx_50,
52: copypadmul2_ptx_52,
53: copypadmul2_ptx_53,
60: copypadmul2_ptx_60,
61: copypadmul2_ptx_61,
70: copypadmul2_ptx_70,
75: copypadmul2_ptx_75}
var copypadmul2_map = map[int]string{ 0: "" ,
30: copypadmul2_ptx_30 ,
35: copypadmul2_ptx_35 ,
37: copypadmul2_ptx_37 ,
50: copypadmul2_ptx_50 ,
52: copypadmul2_ptx_52 ,
53: copypadmul2_ptx_53 ,
60: copypadmul2_ptx_60 ,
61: copypadmul2_ptx_61 ,
70: copypadmul2_ptx_70 ,
75: copypadmul2_ptx_75 }

// copypadmul2 PTX code for various compute capabilities.
const (
copypadmul2_ptx_30 = `
const(
copypadmul2_ptx_30 = `
.version 6.3
.target sm_30
.address_size 64
Expand Down Expand Up @@ -203,7 +204,7 @@ BB0_6:
`
copypadmul2_ptx_35 = `
copypadmul2_ptx_35 = `
.version 6.3
.target sm_35
.address_size 64
Expand Down Expand Up @@ -306,7 +307,7 @@ BB0_6:
`
copypadmul2_ptx_37 = `
copypadmul2_ptx_37 = `
.version 6.3
.target sm_37
.address_size 64
Expand Down Expand Up @@ -409,7 +410,7 @@ BB0_6:
`
copypadmul2_ptx_50 = `
copypadmul2_ptx_50 = `
.version 6.3
.target sm_50
.address_size 64
Expand Down Expand Up @@ -512,7 +513,7 @@ BB0_6:
`
copypadmul2_ptx_52 = `
copypadmul2_ptx_52 = `
.version 6.3
.target sm_52
.address_size 64
Expand Down Expand Up @@ -615,7 +616,7 @@ BB0_6:
`
copypadmul2_ptx_53 = `
copypadmul2_ptx_53 = `
.version 6.3
.target sm_53
.address_size 64
Expand Down Expand Up @@ -718,7 +719,7 @@ BB0_6:
`
copypadmul2_ptx_60 = `
copypadmul2_ptx_60 = `
.version 6.3
.target sm_60
.address_size 64
Expand Down Expand Up @@ -821,7 +822,7 @@ BB0_6:
`
copypadmul2_ptx_61 = `
copypadmul2_ptx_61 = `
.version 6.3
.target sm_61
.address_size 64
Expand Down Expand Up @@ -924,7 +925,7 @@ BB0_6:
`
copypadmul2_ptx_70 = `
copypadmul2_ptx_70 = `
.version 6.3
.target sm_70
.address_size 64
Expand Down Expand Up @@ -1027,7 +1028,7 @@ BB0_6:
`
copypadmul2_ptx_75 = `
copypadmul2_ptx_75 = `
.version 6.3
.target sm_75
.address_size 64
Expand Down Expand Up @@ -1130,4 +1131,4 @@ BB0_6:
`
)
)
Loading

0 comments on commit b21e9e6

Please sign in to comment.