Skip to content

Commit

Permalink
Fix ISPC codegen.
Browse files Browse the repository at this point in the history
  • Loading branch information
athas committed Aug 3, 2023
1 parent f448f53 commit 15c0eec
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 79 deletions.
54 changes: 0 additions & 54 deletions rts/c/ispc_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,58 +399,4 @@ static uniform int memblock_set (uniform struct futhark_context * uniform ctx,
return err;
}

// AOS <-> SOA memcpy functions
#define memmove_sized(dim) \
static inline void memmove_##dim(varying uint8 * uniform dst, uniform uint8 * varying src, uniform int64_t n) { \
uniform uint##dim * varying srcp = (uniform uint##dim * varying) src; \
varying uint##dim * uniform dstp = (varying uint##dim * uniform) dst; \
for (uniform int64_t i = 0; i < n / (dim / 8); i++) { \
dstp[i] = srcp[i]; \
} \
} \
static inline void memmove_##dim(uniform uint8 * varying dst, varying uint8 * uniform src, uniform int64_t n) { \
varying uint##dim * uniform srcp = (varying uint##dim * uniform) src; \
uniform uint##dim * varying dstp = (uniform uint##dim * varying) dst; \
for (uniform int64_t i = 0; i < n / (dim / 8); i++) { \
dstp[i] = srcp[i]; \
} \
} \
static inline void memmove_##dim(varying uint8 * uniform dst, varying uint8 * uniform src, uniform int64_t n) { \
varying uint##dim * uniform srcp = (varying uint##dim * uniform) src; \
varying uint##dim * uniform dstp = (varying uint##dim * uniform) dst; \
for (uniform int64_t i = 0; i < n / (dim / 8); i++) { \
dstp[i] = srcp[i]; \
} \
} \
static inline void memmove_##dim(varying uint8 * varying dst, uniform uint8 * varying src, uniform int64_t n) { \
foreach_unique (ptr in dst) { \
memmove_##dim(ptr, src, n); \
} \
} \
static inline void memmove_##dim(uniform uint8 * varying dst, varying uint8 * varying src, uniform int64_t n) { \
foreach_unique (ptr in src) { \
memmove_##dim(dst, ptr, n); \
} \
} \
static inline void memmove_##dim(varying uint8 * varying dst, varying uint8 * uniform src, uniform int64_t n) { \
foreach_unique (ptr in dst) { \
memmove_##dim(ptr, src, n); \
} \
} \
static inline void memmove_##dim(varying uint8 * varying dst, varying uint8 * varying src, uniform int64_t n) { \
if (reduce_equal((varying int64_t)dst)) { \
foreach_unique (ptr in src) { \
memmove_##dim(dst, ptr, n); \
} \
} else { \
foreach_unique (ptr in dst) { \
memmove_##dim(ptr, src, n); \
} \
} \
}
memmove_sized(8)
memmove_sized(16)
memmove_sized(32)
memmove_sized(64)

// End of ispc_util.h.
68 changes: 43 additions & 25 deletions src/Futhark/CodeGen/Backends/GenericC/Code.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module Futhark.CodeGen.Backends.GenericC.Code
compileDest,
compileArg,
compileLMADCopy,
compileLMADCopyWith,
errorMsgString,
linearCode,
)
Expand Down Expand Up @@ -405,6 +406,42 @@ compileCode (Call dests fname args) = do
<*> mapM compileArg args
stms $ mconcat unpack_dest

-- | Compile an 'LMADCopy' using sequential nested loops, but
-- parameterised over how to do the reads and writes.
compileLMADCopyWith ::
[Count Elements (TExp Int64)] ->
(C.Exp -> C.Exp -> CompilerM op s ()) ->
( Count Elements (TExp Int64),
[Count Elements (TExp Int64)]
) ->
(C.Exp -> CompilerM op s C.Exp) ->
( Count Elements (TExp Int64),
[Count Elements (TExp Int64)]
) ->
CompilerM op s ()
compileLMADCopyWith shape doWrite dst_lmad doRead src_lmad = do
let (dstoffset, dststrides) = dst_lmad
(srcoffset, srcstrides) = src_lmad
shape' <- mapM (compileExp . untyped . unCount) shape
body <- collect $ do
dst_i <-
compileExp . untyped . unCount $
dstoffset + sum (zipWith (*) is' dststrides)
src_i <-
compileExp . untyped . unCount $
srcoffset + sum (zipWith (*) is' srcstrides)
doWrite dst_i =<< doRead src_i
items $ loops (zip is shape') body
where
r = length shape
is = map (VName "i") [0 .. r - 1]
is' :: [Count Elements (TExp Int64)]
is' = map (elements . le64) is
loops [] body = body
loops ((i, n) : ins) body =
[C.citems|for (typename int64_t $id:i = 0; $id:i < $exp:n; $id:i++)
{ $items:(loops ins body) }|]

-- | Compile an 'LMADCopy' using sequential nested loops and
-- 'Read'/'Write' of individual scalars. This always works, but can
-- be pretty slow if those reads and writes are costly.
Expand All @@ -420,28 +457,9 @@ compileLMADCopy ::
[Count Elements (TExp Int64)]
) ->
CompilerM op s ()
compileLMADCopy
t
shape
(dst, dstspace)
(dstoffset, dststrides)
(src, srcspace)
(srcoffset, srcstrides) = do
shape' <- mapM (compileExp . untyped . unCount) shape
dst_i <- compileExp $ untyped $ unCount $ dstoffset + sum (zipWith (*) is' dststrides)
src_i <- compileExp $ untyped $ unCount $ srcoffset + sum (zipWith (*) is' srcstrides)
body <- collect $ do
src' <- rawMem src
dst' <- rawMem dst
generateWrite dst' dst_i t dstspace Nonvolatile
=<< generateRead src' src_i t srcspace Nonvolatile
items $ loops (zip is shape') body
where
r = length shape
is = map (VName "i") [0 .. r - 1]
is' :: [Count Elements (TExp Int64)]
is' = map (elements . le64) is
loops [] body = body
loops ((i, n) : ins) body =
[C.citems|for (typename int64_t $id:i = 0; $id:i < $exp:n; $id:i++)
{ $items:(loops ins body) }|]
compileLMADCopy t shape (dst, dstspace) dst_lmad (src, srcspace) src_lmad = do
src' <- rawMem src
dst' <- rawMem dst
let doWrite dst_i = generateWrite dst' dst_i t dstspace Nonvolatile
doRead src_i = generateRead src' src_i t srcspace Nonvolatile
compileLMADCopyWith shape doWrite dst_lmad doRead src_lmad
13 changes: 13 additions & 0 deletions src/Futhark/CodeGen/Backends/MulticoreISPC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,19 @@ compileCode (Read x src (Count iexp) restype DefaultSpace _) = do
<$> compileExp (untyped iexp)
<*> getMemType src restype
GC.stm [C.cstm|$id:x = $exp:e;|]
compileCode (LMADCopy t shape (dst, DefaultSpace) dst_lmad (src, DefaultSpace) src_lmad) = do
dst' <- GC.rawMem dst
src' <- GC.rawMem src
let doWrite dst_i ve = do
deref <-
GC.derefPointer
dst'
[C.cexp|($tyquals:([varying]) typename int64_t)$exp:dst_i|]
<$> getMemType dst t
GC.stm [C.cstm|$exp:deref = $exp:(toStorage t ve);|]
doRead src_i =
fromStorage t . GC.derefPointer src' src_i <$> getMemType src t
GC.compileLMADCopyWith shape doWrite dst_lmad doRead src_lmad
compileCode (Free name space) = do
cached <- isJust <$> GC.cacheMem name
unless cached $ unRefMem name space
Expand Down

0 comments on commit 15c0eec

Please sign in to comment.