Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add simple processing element for the switch demo #719

Draft
wants to merge 4 commits into
base: staging
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/synthesis/debug.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[
]
2 changes: 2 additions & 0 deletions bittide/bittide.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ library
Bittide.ScatterGather
Bittide.SharedTypes
Bittide.Switch
Bittide.SwitchDemoProcessingElement
Bittide.Transceiver
Bittide.Transceiver.Cdc
Bittide.Transceiver.Comma
Expand Down Expand Up @@ -221,6 +222,7 @@ test-suite unittests
Tests.Shared
Tests.StabilityChecker
Tests.Switch
Tests.SwitchDemoProcessingElement
Tests.Transceiver
Tests.Transceiver.Prbs
Tests.Transceiver.WordAlign
Expand Down
136 changes: 136 additions & 0 deletions bittide/src/Bittide/SwitchDemoProcessingElement.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
-- SPDX-FileCopyrightText: 2025 Google LLC
--
-- SPDX-License-Identifier: Apache-2.0
{-# LANGUAGE NumericUnderscores #-}

module Bittide.SwitchDemoProcessingElement where

import Clash.Prelude

import Data.Maybe (fromMaybe)

{- | Multiplying by 3 should always fit, though if n~1, the output type is `Index 3`
which doesn't fit the 3 we're multiplying by hence yielding an undefined. This
function works around that.
-}
zeroExtendTimesThree :: forall n. (1 <= n, KnownNat n) => Index n -> Index (n * 3)
zeroExtendTimesThree = truncateB . mul (3 :: Index 4)

-- | Simple processing element used for the Bittide switch demo.
switchDemoPe ::
forall bufferSize dom.
( HiddenClockResetEnable dom
, 1 <= bufferSize
) =>
-- | Size of buffer in number of "tri-cycles". That is, we always store 3 64-bit words:
-- DNA (32 msbs), DNA (32 lsbs), local clock cycle counter.
SNat bufferSize ->
-- | Local clock cycle counter
Signal dom (Unsigned 64) ->
-- | Incoming crossbar link
Signal dom (BitVector 64) ->
-- | Device DNA
Signal dom (Maybe (BitVector 96)) ->
-- | When to read from the crossbar link
Signal dom (Unsigned 64) ->
-- | How many tri-cycles to read from the crossbar link
Signal dom (Index bufferSize) ->
-- | When to write to the crossbar link
Signal dom (Unsigned 64) ->
-- | How many tri-cycles to write to the crossbar link. Includes writing \"own\" data.
Signal dom (Index (bufferSize + 1)) ->
( -- \| Outgoing crossbar link
Signal dom (BitVector 64)
, -- \| Buffer output
Signal dom (Vec (bufferSize * 3) (BitVector 64))
)
switchDemoPe SNat localCounter linkIn maybeDna readStart readCycles writeStart writeCycles =
(linkOut, buffer)
where
readCyclesExtended = zeroExtendTimesThree <$> readCycles
writeCyclesExtended = zeroExtendTimesThree <$> writeCycles

localData :: Signal dom (Vec 3 (BitVector 64))
localData = bundle ((pack <$> localCounter) :> unbundle dnaVec)
where
dnaVec :: Signal dom (Vec 2 (BitVector 64))
dnaVec = bitCoerce . zeroExtend <$> dnaLocked
dnaLocked = fromMaybe 0xBAAB_BAAB_BAAB_BAAB_BAAB_BAAB <$> maybeDna

linkOut = stateToLinkOutput <$> peState <*> buffer <*> localData

stateToLinkOutput ::
SimplePeState bufferSize ->
Vec (bufferSize * 3) (BitVector 64) ->
Vec 3 (BitVector 64) ->
BitVector 64
stateToLinkOutput state buf locData =
case state of
Write i
| i <= 2 -> locData !! i
| otherwise -> buf !! (i - 3)
_ -> 0xAAAA_BBBB_AAAA_BBBB

-- \| The buffer stores all the incoming bittide data. For the Bittide Switch demo,
-- each FPGA sends its DNA and local clock cycle counter, along with all received data.
-- The last FPGA will therefore receive all DNAs and local clock cycle counters.
-- Order of data packet: DNA 32 msbs, DNA 64 lsbs, local counter.
-- Order of packets: Current FPGA first, followed by the one before. Comparing all
-- buffers would look like a moving window.
buffer :: Signal dom (Vec (bufferSize * 3) (BitVector 64))
buffer = bundle $ regEn initVal <$> enableVec <*> linkInVec
where
initVal = 0xABBA_ABBA_ABBA_ABBA
linkInVec = repeat linkIn

enableVec :: Vec (bufferSize * 3) (Signal dom Bool)
enableVec = unbundle $ go <$> peState
where
go :: SimplePeState bufferSize -> Vec (bufferSize * 3) Bool
go (Read x) = (== x) <$> indicesI
go _ = repeat False

prevPeState = register Idle peState

peState =
update
<$> localCounter
<*> readStart
<*> readCyclesExtended
<*> writeStart
<*> writeCyclesExtended
<*> prevPeState
where
update ::
-- \| Local clock cycle counter
Unsigned 64 ->
-- \| When to read from the crossbar link
Unsigned 64 ->
-- \| How many cycles to read from crossbar link
Index (bufferSize * 3) ->
-- \| When to write to the crossbar link
Unsigned 64 ->
-- \| How many cycles to read from crossbar lin
Index ((bufferSize + 1) * 3) ->
SimplePeState bufferSize ->
SimplePeState bufferSize
update cntr rs rc ws wc state =
case state of
Idle -> nextState
Read x
| x >= rc - 1 -> nextState
| otherwise -> Read (satSucc SatBound x)
Write x
| x >= wc - 1 -> nextState
| otherwise -> Write (satSucc SatBound x)
where
nextState
| cntr == ws && wc > 0 = Write 0
| cntr == rs && rc > 0 = Read 0
| otherwise = Idle

data SimplePeState bufferSize
= Idle
| Read (Index (bufferSize * 3))
| Write (Index ((bufferSize + 1) * 3))
deriving (Generic, NFDataX, Eq, Show)
184 changes: 184 additions & 0 deletions bittide/tests/Tests/SwitchDemoProcessingElement.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
-- SPDX-FileCopyrightText: 2025 Google LLC
--
-- SPDX-License-Identifier: Apache-2.0
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}

module Tests.SwitchDemoProcessingElement where

import Clash.Prelude hiding (someNatVal, withSomeSNat)

import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.Hedgehog

import Control.Monad (forM_)
import Data.Proxy (Proxy (..))
import GHC.TypeNats (someNatVal)

import Bittide.SwitchDemoProcessingElement

import qualified Hedgehog as H
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range

import qualified Clash.Explicit.Prelude as E
import qualified Data.List as L

import Clash.Explicit.Reset (noReset)
import Clash.Hedgehog.Sized.BitVector (genDefinedBitVector)
import Clash.Hedgehog.Sized.Index (genIndex)
import Clash.Hedgehog.Sized.Unsigned (genUnsigned)
import Hedgehog ((===))

main :: IO ()
main = defaultMain tests

tests :: TestTree
tests =
testGroup
"SwitchDemoProcessingElement"
[ testPropertyNamed "prop_readThenWrite" "prop_readThenWrite" prop_readThenWrite
, testCase "case_zeroExtendTimesThree" case_zeroExtendTimesThree
]

withSomeSNat :: Natural -> (forall (n :: Nat). SNat n -> r) -> r
withSomeSNat n f = case someNatVal n of
SomeNat (_ :: Proxy n) -> f (SNat @n)

linearLength :: (Integral a) => a -> a -> Range.Range a
linearLength start len = Range.linear start (start + len)

singletonInt :: (Integral a) => a -> Range.Range Int
singletonInt = Range.singleton . fromIntegral

-- | Exhaustive test for 'zeroExtendTimesThree' for n ~ [1..64]
case_zeroExtendTimesThree :: Assertion
case_zeroExtendTimesThree =
forM_ [0 .. 63] $ \nMinusOne ->
withSomeSNat nMinusOne $ \(succSNat -> (SNat :: SNat n)) -> do
forM_ [(0 :: Index n) ..] $ \i -> do
let
actual = fromIntegral (zeroExtendTimesThree @n i)
expected = fromIntegral @_ @Integer i * 3
actual @?= expected

prop_readThenWrite :: H.Property
prop_readThenWrite = H.property $ do
bufferSizeMinusOne <- H.forAll $ Gen.integral (Range.linear 0 10)
withSomeSNat bufferSizeMinusOne $ \(succSNat -> bufferSizeSNat@(SNat :: SNat bufferSize)) -> do
nReadTriCycles <-
H.forAll
$ Gen.frequency
[ (30, Gen.constant 0)
, (70, genIndex Range.linearBounded)
]
nWriteTriCycles <-
if nReadTriCycles == 0
then H.forAll $ genIndex (Range.linear 1 maxBound)
else H.forAll $ genIndex Range.linearBounded

let
cyclesPerReadWrite = 3 :: Unsigned 64
nReadCycles = cyclesPerReadWrite * fromIntegral nReadTriCycles
nWriteCycles = cyclesPerReadWrite * fromIntegral nWriteTriCycles
maxIdle1 = 100
maxIdle2 = 100

-- Notice that the PE needs a single clock cycle in its idle state to function
-- correctly. Hence, we always start reading a minimum at clockStart+1.
readData <- H.forAll $ Gen.list (singletonInt nReadCycles) genDefinedBitVector
clockStart <- H.forAll $ genUnsigned @_ @64 (Range.linear 0 100)
readStart <-
H.forAll
$ Gen.frequency
[ (30, Gen.constant clockStart)
, (70, genUnsigned @_ @64 (linearLength clockStart maxIdle1))
]
let readEnd = readStart + fromIntegral nReadCycles
writeStart <-
H.forAll
$ Gen.frequency
[ (30, Gen.constant readEnd)
, (70, genUnsigned @_ @64 (linearLength readEnd maxIdle2))
]
deviceDna <- H.forAll genDefinedBitVector

let immediateRead = nReadCycles > 0 && readStart == clockStart
H.cover 5 "Read in the very first cycle we're allowed to" immediateRead

let immediateWrite = nWriteCycles > 0 && writeStart == clockStart
H.cover 5 "Write in the very first cycle we're allowed to" immediateWrite

let isBackToBack = nReadCycles > 0 && nWriteCycles > 0 && readEnd == writeStart
H.cover 5 "Back-to-back read/write" isBackToBack

let
idle1length = readStart - clockStart
idle2length = writeStart - readEnd
idle1in = L.replicate (fromIntegral idle1length) 0
crossBarIn = fromList (idle1in <> readData <> L.repeat 0)

out =
E.sample
$ bundle
$ withClockResetEnable @System clockGen noReset enableGen
$ switchDemoPe
bufferSizeSNat
(fromList [clockStart ..])
crossBarIn
(pure (Just deviceDna))
(pure readStart)
(pure nReadTriCycles)
(pure writeStart)
(pure nWriteTriCycles)

(idle1out, rest0) = L.splitAt (fromIntegral idle1length) out
(readOut, rest1) = L.splitAt (fromIntegral nReadCycles) rest0
(idle2out, rest2) = L.splitAt (fromIntegral idle2length) rest1
(writeOut, rest3) = L.splitAt (fromIntegral nWriteCycles) rest2

(idle1outs, _idle1buffers) = L.unzip idle1out
(readOuts, _readBuffers) = L.unzip readOut
(idle2outs, _idle2buffers) = L.unzip idle2out
(writeOuts, _writeBuffers) = L.unzip writeOut
(_restOuts, restBuffers) = L.unzip rest3

H.footnote $ "idle1in: " <> show idle1in
H.footnote $ "idle1length: " <> show idle1length
H.footnote $ "idle2length: " <> show idle2length
H.footnote $ "nReadCycles: " <> show nReadCycles
H.footnote $ "nWriteCycles: " <> show nWriteCycles
H.footnote $ "readData: " <> show readData
H.footnote $ "readEnd: " <> show readEnd
H.footnote $ "readStart: " <> show readStart
H.footnote $ "writeStart: " <> show writeStart
H.footnote $ "clockStart: " <> show clockStart
H.footnote $ "idle2outs: " <> showX idle2outs
H.footnote $ "bufferSizeSNat: " <> show bufferSizeSNat

-- Check that at the end of the simulation the buffer is what we expect it
-- to be. The buffer should be equal to the data we send to the PE. We don't
-- care about data we don't write, hence we truncate (L.take) the buffer to
-- match the number of read cycles.
case restBuffers of
[] -> error "Unexpected end of output"
(buffer : _) -> do
H.footnote $ "buffer: " <> show buffer
L.take (fromIntegral nReadCycles) (toList buffer) === readData

-- Check that idle value is written at correct times
let idleValue = 0xAAAA_BBBB_AAAA_BBBB
idle1outs === L.replicate (L.length idle1outs) idleValue
readOuts === L.replicate (L.length readOuts) idleValue
idle2outs === L.replicate (L.length idle2outs) idleValue

-- Check that the right data is written the crossbar link at the time we
-- expect it to.
let
-- Note we can always write one tri-cycle more than we read, since internal
-- data comes first.
relevantOutCycles = fromIntegral (min nWriteCycles (nReadCycles + cyclesPerReadWrite))
deviceDnaVec = bitCoerce @_ @(Vec 2 (BitVector 64)) (zeroExtend deviceDna)
expectedOutData = toList (pack writeStart :> deviceDnaVec) <> readData
L.take relevantOutCycles writeOuts === L.take relevantOutCycles expectedOutData
2 changes: 2 additions & 0 deletions bittide/tests/UnitTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import qualified Tests.ProcessingElement.ReadElf
import qualified Tests.ScatterGather
import qualified Tests.StabilityChecker
import qualified Tests.Switch
import qualified Tests.SwitchDemoProcessingElement
import qualified Tests.Transceiver
import qualified Tests.Transceiver.Prbs
import qualified Tests.Transceiver.WordAlign
Expand All @@ -36,6 +37,7 @@ tests =
, Tests.DoubleBufferedRam.tests
, Tests.ElasticBuffer.tests
, Tests.ProcessingElement.ReadElf.tests
, Tests.SwitchDemoProcessingElement.tests
, Tests.ScatterGather.tests
, Tests.StabilityChecker.tests
, Tests.Switch.tests
Expand Down
1 change: 1 addition & 0 deletions nix/bin/format
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: Apache-2.0
ROOT=$(git rev-parse --show-toplevel)
cd "${ROOT}" || exit 1

echo "Formatting Cabal files.."
"${ROOT}"/.github/scripts/cabal-gild.sh
Expand Down
Loading