diff --git a/bittide-instances/bittide-instances.cabal b/bittide-instances/bittide-instances.cabal index f4debd0e5..82288139d 100644 --- a/bittide-instances/bittide-instances.cabal +++ b/bittide-instances/bittide-instances.cabal @@ -212,6 +212,7 @@ test-suite unittests Wishbone.CaptureUgn Wishbone.DnaPortE2 Wishbone.ScatterGather + Wishbone.SwitchDemoProcessingElement Wishbone.Time Wishbone.Watchdog diff --git a/bittide-instances/src/Bittide/Instances/Pnr/Ethernet.hs b/bittide-instances/src/Bittide/Instances/Pnr/Ethernet.hs index 03dd12aa2..81ac92223 100644 --- a/bittide-instances/src/Bittide/Instances/Pnr/Ethernet.hs +++ b/bittide-instances/src/Bittide/Instances/Pnr/Ethernet.hs @@ -101,7 +101,7 @@ vexRiscGmii SNat sysClk sysRst rxClk rxRst txClk txRst fwd = [uartBus, timeBus, wbAxiRx, wbAxiTx, dnaWb, gpioWb, macWb] <- pe -< jtag (uartRx, _uartStatus) <- uart -< (uartBus, uartTx) time -< timeBus - dna -< dnaWb + _dna <- dnaC -< dnaWb macStatIf -< (macWb, macStatus) gpioDf <- idleSource -< () gpioOut <- gpio -< (gpioWb, gpioDf) @@ -116,7 +116,7 @@ vexRiscGmii SNat sysClk sysRst rxClk rxRst txClk txRst fwd = (fwd, (pure (), pure (), pure ())) where time = wcre timeWb - dna = wcre readDnaPortE2Wb simDna2 + dnaC = wcre readDnaPortE2Wb simDna2 mac = ethMac1GFifoC (SNat @1500) diff --git a/bittide-instances/tests/Wishbone/DnaPortE2.hs b/bittide-instances/tests/Wishbone/DnaPortE2.hs index 7c50e178e..ecab1bdca 100644 --- a/bittide-instances/tests/Wishbone/DnaPortE2.hs +++ b/bittide-instances/tests/Wishbone/DnaPortE2.hs @@ -63,7 +63,7 @@ dut = circuit $ \_unit -> do (uartRx, jtag) <- idleSource -< () [uartBus, dnaWb] <- processingElement @dom NoDumpVcd peConfig -< jtag (uartTx, _uartStatus) <- uartInterfaceWb d2 d2 uartSim -< (uartBus, uartRx) - readDnaPortE2Wb simDna2 -< dnaWb + _dna <- readDnaPortE2Wb simDna2 -< dnaWb idC -< uartTx where (iMem, dMem) = diff --git a/bittide-instances/tests/Wishbone/SwitchDemoProcessingElement.hs b/bittide-instances/tests/Wishbone/SwitchDemoProcessingElement.hs new file mode 100644 index 000000000..0f2b2ee49 --- /dev/null +++ b/bittide-instances/tests/Wishbone/SwitchDemoProcessingElement.hs @@ -0,0 +1,123 @@ +-- SPDX-FileCopyrightText: 2025 Google LLC +-- +-- SPDX-License-Identifier: Apache-2.0 +{-# LANGUAGE NumericUnderscores #-} +{-# OPTIONS_GHC -fplugin=Protocols.Plugin #-} + +module Wishbone.SwitchDemoProcessingElement where + +import Clash.Explicit.Prelude +import Clash.Prelude (HiddenClockResetEnable, withClockResetEnable) + +import Data.Char (chr) +import Data.List (isPrefixOf) +import Data.Maybe (mapMaybe) +import Project.FilePath +import Protocols +import Protocols.Idle +import System.FilePath (()) +import System.IO.Unsafe (unsafePerformIO) +import Test.Tasty +import Test.Tasty.HUnit +import Test.Tasty.TH +import VexRiscv (DumpVcd (NoDumpVcd)) + +import Bittide.DoubleBufferedRam +import Bittide.ProcessingElement +import Bittide.ProcessingElement.Util +import Bittide.SharedTypes +import Bittide.SwitchDemoProcessingElement +import Bittide.Wishbone + +import qualified Protocols.Df as Df + +takeWhileInclusive :: (a -> Bool) -> [a] -> [a] +takeWhileInclusive _ [] = [] +takeWhileInclusive p (x : xs) = x : if p x then takeWhileInclusive p xs else [] + +sim :: IO () +sim = putStr simResult + +simResult :: String +simResult = unlines . takeWhileInclusive (/= "Finished") . lines $ uartString + where + uartString = chr . fromIntegral <$> mapMaybe Df.dataToMaybe uartStream + uartStream = + sampleC def{timeoutAfter = 200_000} + $ withClockResetEnable clk reset enable + $ dut @System localCounter dnaA dnaB + + clk = clockGen + reset = resetGen + enable = enableGen + localCounter = register clk reset enable 0 (localCounter + 1) + dnaA = pure 0xAAAA_0123_4567_89AB_CDEF_0001 + dnaB = pure 0xBBBB_0123_4567_89AB_CDEF_0001 + +case_switch_demo_pe_test :: Assertion +case_switch_demo_pe_test = assertBool msg (receivedString == expectedString) + where + msg = + "Received string " + <> receivedString + <> " not equal to expected string " + <> expectedString + -- Filter the 'debugging' prints, which are prefixed with 'INFO' + receivedString = unlines . filter (not . isPrefixOf "INFO") . lines $ simResult + expectedString = + unlines + [ "Buffer A: [(0x10100, 0xBBBB0123456789ABCDEF0001), (0x10000, 0xAAAA0123456789ABCDEF0001)]" + , "Buffer B: [(0x10000, 0xAAAA0123456789ABCDEF0001), (0xABBAABBAABBA0003, 0xABBA0005ABBAABBAABBA0004)]" + , "Finished" + ] + +{- | A simulation-only design containing two `switchDemoPeWb`s connected to a single +VexRiscV. The VexRiscV runs the `switch_demo_pe_test` binary from `firmware-binaries`. +-} +dut :: + forall dom. + ( HiddenClockResetEnable dom + , 1 <= DomainPeriod dom + ) => + -- | Local clock cycle counter + Signal dom (Unsigned 64) -> + -- | Fake DNA (used to identify the different PEs) + Signal dom (BitVector 96) -> + -- | Fake DNA (used to identify the different PEs) + Signal dom (BitVector 96) -> + Circuit () (Df dom (BitVector 8)) +dut localCounter dnaA dnaB = circuit $ do + (uartRx, jtagIdle) <- idleSource -< () + [uartBus, timeBus, peBusA, peBusB] <- processingElement NoDumpVcd peConfig -< jtagIdle + (uartTx, _uartStatus) <- uartInterfaceWb d16 d2 uartSim -< (uartBus, uartRx) + timeWb -< timeBus + linkAB <- switchDemoPeWb d2 localCounter -< (peBusA, dnaAC, linkBA) + linkBA <- switchDemoPeWb d2 localCounter -< (peBusB, dnaBC, linkAB) + dnaAC <- signalToCSignal dnaA -< () + dnaBC <- signalToCSignal dnaB -< () + idC -< uartTx + where + signalToCSignal :: Signal dom a -> Circuit () (CSignal dom a) + signalToCSignal = Circuit . const . ((),) + + memMap = 0b000 :> 0b001 :> 0b010 :> 0b011 :> 0b100 :> 0b101 :> Nil + peConfig = unsafePerformIO $ do + root <- findParentContaining "cabal.project" + let + elfDir = root firmwareBinariesDir "riscv32imc" Release + elfPath = elfDir "switch_demo_pe_test" + (iMem, dMem) <- vecsFromElf @DMemWords @IMemWords BigEndian elfPath Nothing + pure + PeConfig + { memMapConfig = memMap + , initI = Reloadable (Vec iMem) + , initD = Reloadable (Vec dMem) + , iBusTimeout = d0 -- No timeouts on the instruction bus + , dBusTimeout = d0 -- No timeouts on the data bus + } + +type DMemWords = DivRU (32 * 1024) 4 +type IMemWords = DivRU (32 * 1024) 4 + +tests :: TestTree +tests = $(testGroupGenerator) diff --git a/bittide-instances/tests/unittests.hs b/bittide-instances/tests/unittests.hs index e2e8065b4..d14beb531 100644 --- a/bittide-instances/tests/unittests.hs +++ b/bittide-instances/tests/unittests.hs @@ -14,6 +14,7 @@ import qualified Wishbone.Axi as Axi import qualified Wishbone.CaptureUgn as CaptureUgn import qualified Wishbone.DnaPortE2 as DnaPortE2 import qualified Wishbone.ScatterGather as ScatterGather +import qualified Wishbone.SwitchDemoProcessingElement as SwitchDemoProcessingElement import qualified Wishbone.Time as Time import qualified Wishbone.Watchdog as Watchdog @@ -24,6 +25,7 @@ tests = [ CaptureUgn.tests , ClockControlWb.tests , ScatterGather.tests + , SwitchDemoProcessingElement.tests , DnaPortE2.tests , Ord.tests , Time.tests diff --git a/bittide/bittide.cabal b/bittide/bittide.cabal index 707989e22..45b2883fc 100644 --- a/bittide/bittide.cabal +++ b/bittide/bittide.cabal @@ -150,6 +150,7 @@ library Bittide.ScatterGather Bittide.SharedTypes Bittide.Switch + Bittide.SwitchDemoProcessingElement Bittide.Transceiver Bittide.Transceiver.Cdc Bittide.Transceiver.Comma @@ -221,6 +222,7 @@ test-suite unittests Tests.Shared Tests.StabilityChecker Tests.Switch + Tests.SwitchDemoProcessingElement Tests.Transceiver Tests.Transceiver.Prbs Tests.Transceiver.WordAlign diff --git a/bittide/src/Bittide/SwitchDemoProcessingElement.hs b/bittide/src/Bittide/SwitchDemoProcessingElement.hs new file mode 100644 index 000000000..5022072b4 --- /dev/null +++ b/bittide/src/Bittide/SwitchDemoProcessingElement.hs @@ -0,0 +1,219 @@ +-- SPDX-FileCopyrightText: 2025 Google LLC +-- +-- SPDX-License-Identifier: Apache-2.0 +{-# LANGUAGE NumericUnderscores #-} + +module Bittide.SwitchDemoProcessingElement where + +import Clash.Prelude + +import Data.Tuple (swap) +import GHC.Stack (HasCallStack) + +import Protocols +import Protocols.Wishbone + +import Bittide.SharedTypes (Bytes) +import Bittide.Wishbone (wbToVec) +import Clash.Sized.Vector.ToTuple (vecToTuple) + +{- | Multiplying by 3 should always fit, though if n~1, the output type is `Index 3` +which doesn't fit the 3 we're multiplying by hence yielding an undefined. This +function works around that. +-} +zeroExtendTimesThree :: forall n. (1 <= n, KnownNat n) => Index n -> Index (n * 3) +zeroExtendTimesThree = truncateB . mul (3 :: Index 4) + +-- | Simple processing element used for the Bittide switch demo. +switchDemoPe :: + forall bufferSize dom. + ( HasCallStack + , HiddenClockResetEnable dom + , 1 <= bufferSize + ) => + -- | Size of buffer in number of "tri-cycles". That is, we always store 3 64-bit words: + -- local clock cycle counter, DNA (64 lsbs), DNA (32 msbs, zero-extended). + SNat bufferSize -> + -- | Local clock cycle counter + Signal dom (Unsigned 64) -> + -- | Incoming crossbar link + Signal dom (BitVector 64) -> + -- | Device DNA + Signal dom (BitVector 96) -> + -- | When to read from the crossbar link + Signal dom (Unsigned 64) -> + -- | How many tri-cycles to read from the crossbar link + Signal dom (Index (bufferSize + 1)) -> + -- | When to write to the crossbar link + Signal dom (Unsigned 64) -> + -- | How many tri-cycles to write to the crossbar link. Includes writing \"own\" data. + Signal dom (Index (bufferSize + 1)) -> + ( -- \| Outgoing crossbar link + Signal dom (BitVector 64) + , -- \| Buffer output + Signal dom (Vec (bufferSize * 3) (BitVector 64)) + ) +switchDemoPe SNat localCounter linkIn dna readStart readCycles writeStart writeCycles = + (linkOut, buffer) + where + readCyclesExtended = checkedResize . zeroExtendTimesThree <$> readCycles + writeCyclesExtended = zeroExtendTimesThree <$> writeCycles + + localData :: Signal dom (Vec 3 (BitVector 64)) + localData = bundle ((pack <$> localCounter) :> unbundle dnaVec) + where + dnaVec :: Signal dom (Vec 2 (BitVector 64)) + dnaVec = reverse . bitCoerce . zeroExtend <$> dna + + linkOut = stateToLinkOutput <$> peState <*> buffer <*> localData + + stateToLinkOutput :: + SimplePeState bufferSize -> + Vec (bufferSize * 3) (BitVector 64) -> + Vec 3 (BitVector 64) -> + BitVector 64 + stateToLinkOutput state buf locData = + case state of + Write i + | i <= 2 -> locData !! i + | otherwise -> buf !! (i - 3) + _ -> 0xAAAA_BBBB_AAAA_BBBB + + -- \| The buffer stores all the incoming bittide data. For the Bittide Switch demo, + -- each FPGA sends its DNA and local clock cycle counter, along with all received data. + -- The last FPGA will therefore receive all DNAs and local clock cycle counters. + buffer :: (HasCallStack) => Signal dom (Vec (bufferSize * 3) (BitVector 64)) + buffer = bundle $ regEn <$> initVec <*> enableVec <*> linkInVec + where + initVec = iterateI (+ 1) 0xABBA_ABBA_ABBA_0000 + linkInVec = repeat linkIn + + enableVec :: (HasCallStack) => Vec (bufferSize * 3) (Signal dom Bool) + enableVec = unbundle $ go <$> peState + where + go :: (HasCallStack) => SimplePeState bufferSize -> Vec (bufferSize * 3) Bool + go (Read x) = (== checkedResize x) <$> indicesI + go _ = repeat False + + prevPeState = register Idle peState + + peState = + update + <$> localCounter + <*> readStart + <*> readCyclesExtended + <*> writeStart + <*> writeCyclesExtended + <*> prevPeState + where + update :: + -- \| Local clock cycle counter + Unsigned 64 -> + -- \| When to read from the crossbar link + Unsigned 64 -> + -- \| How many cycles to read from the crossbar link + Index (bufferSize * 3 + 1) -> + -- \| When to write to the crossbar link + Unsigned 64 -> + -- \| How many cycles to write to the crossbar link + Index ((bufferSize + 1) * 3) -> + SimplePeState bufferSize -> + SimplePeState bufferSize + update cntr rs rc ws wc state = + case state of + Idle -> nextState + Read x + | x >= rc - 1 -> nextState + | otherwise -> Read (satSucc SatBound x) + Write x + | x >= wc - 1 -> nextState + | otherwise -> Write (satSucc SatBound x) + where + nextState + | cntr == ws && wc > 0 = Write 0 + | cntr == rs && rc > 0 = Read 0 + | otherwise = Idle + +data SimplePeState bufferSize + = Idle + | Read (Index (bufferSize * 3 + 1)) + | Write (Index ((bufferSize + 1) * 3)) + deriving (Generic, NFDataX, Eq, Show) + +{- | Wishbone circuit wrapper for `switchDemoPe`. + +Buffer uses 64-bit words internally, but WB interface is 32-bit. + +The register layout is as follows (lsbs in first 32-bit word, msbs in second): +- Address 0 - 1: read start +- Address 2 - 3: read cycles +- Address 4 - 5: write start +- Address 6 - 7: write cycles +- Address 8 - 9: local clock cycle counter +- Address 10 - .: buffer (bufferSize*3*2) +-} +switchDemoPeWb :: + forall bufferSize dom addrW. + ( HiddenClockResetEnable dom + , KnownNat addrW + , 1 <= bufferSize + ) => + SNat bufferSize -> + -- | Local clock cycle counter + Signal dom (Unsigned 64) -> + Circuit + ( Wishbone dom 'Standard addrW (Bytes 4) + , -- \| Device DNA + CSignal dom (BitVector 96) + , -- \| Incoming crossbar link + CSignal dom (BitVector 64) + ) + -- \| Outgoing crossbar link + (CSignal dom (BitVector 64)) +switchDemoPeWb SNat localCounter = Circuit go + where + go ((wbM2S, dna, linkIn), _) = ((wbS2M, pure (), pure ()), linkOut) + where + readVec :: Vec (8 + bufferSize * 3 * 2 + 2) (Signal dom (BitVector 32)) + readVec = + dflipflop + <$> ( unbundle (bitCoerce . map swapWords <$> writableRegs) + ++ unbundle (bitCoerce . map swapWords . bitCoerce <$> localCounter) + ++ unbundle (bitCoerce . map swapWords <$> buffer) + ) + + (linkOut, buffer) = + switchDemoPe + (SNat @bufferSize) + localCounter + linkIn + dna + readStart + readCycles + writeStart + writeCycles + + readStart = unpack <$> rs + readCycles = checkedResize . bvToIndex <$> rc + writeStart = unpack <$> ws + writeCycles = checkedResize . bvToIndex <$> wc + + -- \| Unpack a BitVector to an Index of the same size + bvToIndex :: (KnownNat n) => BitVector n -> Index (2 ^ n) + bvToIndex = unpack + + -- \| Swap the two words of a 64-bit Bitvector to match the word order of + -- the Vexriscv. This allows the CPU to read the two words as one 64-bit value. + swapWords :: BitVector 64 -> BitVector 64 + swapWords = bitCoerce . (swap @(BitVector 32) @(BitVector 32)) . bitCoerce + + rs, rc, ws, wc :: Signal dom (BitVector 64) + (rs, rc, ws, wc) = unbundle $ vecToTuple <$> writableRegs + + writableRegs :: Signal dom (Vec 4 (BitVector 64)) + writableRegs = + (fmap (map swapWords . bitCoerce) . bundle . map (regMaybe maxBound) . unbundle) + $ take d8 + <$> writeVec + + (writeVec, wbS2M) = unbundle $ wbToVec <$> bundle readVec <*> wbM2S diff --git a/bittide/src/Bittide/Wishbone.hs b/bittide/src/Bittide/Wishbone.hs index 7bc20df34..d5d197b4e 100644 --- a/bittide/src/Bittide/Wishbone.hs +++ b/bittide/src/Bittide/Wishbone.hs @@ -629,11 +629,11 @@ readDnaPortE2Wb :: ) => -- | Simulation DNA value BitVector 96 -> - Circuit (Wishbone dom 'Standard addrW (Bytes nBytes)) () + Circuit (Wishbone dom 'Standard addrW (Bytes nBytes)) (CSignal dom (BitVector 96)) readDnaPortE2Wb simDna = circuit $ \wb -> do dnaDf <- dnaCircuit -< () - _dna <- reg -< (wb, dnaDf) - idC -< () + dna <- reg -< (wb, dnaDf) + idC -< dna where maybeDna = readDnaPortE2 hasClock hasReset hasEnable simDna regRst = diff --git a/bittide/tests/Tests/SwitchDemoProcessingElement.hs b/bittide/tests/Tests/SwitchDemoProcessingElement.hs new file mode 100644 index 000000000..1e095852f --- /dev/null +++ b/bittide/tests/Tests/SwitchDemoProcessingElement.hs @@ -0,0 +1,184 @@ +-- SPDX-FileCopyrightText: 2025 Google LLC +-- +-- SPDX-License-Identifier: Apache-2.0 +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE OverloadedStrings #-} + +module Tests.SwitchDemoProcessingElement where + +import Clash.Prelude hiding (someNatVal, withSomeSNat) + +import Test.Tasty +import Test.Tasty.HUnit +import Test.Tasty.Hedgehog + +import Control.Monad (forM_) +import Data.Proxy (Proxy (..)) +import GHC.TypeNats (someNatVal) + +import Bittide.SwitchDemoProcessingElement + +import qualified Hedgehog as H +import qualified Hedgehog.Gen as Gen +import qualified Hedgehog.Range as Range + +import qualified Clash.Explicit.Prelude as E +import qualified Data.List as L + +import Clash.Explicit.Reset (noReset) +import Clash.Hedgehog.Sized.BitVector (genDefinedBitVector) +import Clash.Hedgehog.Sized.Index (genIndex) +import Clash.Hedgehog.Sized.Unsigned (genUnsigned) +import Hedgehog ((===)) + +main :: IO () +main = defaultMain tests + +tests :: TestTree +tests = + testGroup + "SwitchDemoProcessingElement" + [ testPropertyNamed "prop_readThenWrite" "prop_readThenWrite" prop_readThenWrite + , testCase "case_zeroExtendTimesThree" case_zeroExtendTimesThree + ] + +withSomeSNat :: Natural -> (forall (n :: Nat). SNat n -> r) -> r +withSomeSNat n f = case someNatVal n of + SomeNat (_ :: Proxy n) -> f (SNat @n) + +linearLength :: (Integral a) => a -> a -> Range.Range a +linearLength start len = Range.linear start (start + len) + +singletonInt :: (Integral a) => a -> Range.Range Int +singletonInt = Range.singleton . fromIntegral + +-- | Exhaustive test for 'zeroExtendTimesThree' for n ~ [1..64] +case_zeroExtendTimesThree :: Assertion +case_zeroExtendTimesThree = + forM_ [0 .. 63] $ \nMinusOne -> + withSomeSNat nMinusOne $ \(succSNat -> (SNat :: SNat n)) -> do + forM_ [(0 :: Index n) ..] $ \i -> do + let + actual = fromIntegral (zeroExtendTimesThree @n i) + expected = fromIntegral @_ @Integer i * 3 + actual @?= expected + +prop_readThenWrite :: H.Property +prop_readThenWrite = H.property $ do + bufferSizeMinusOne <- H.forAll $ Gen.integral (Range.linear 0 10) + withSomeSNat bufferSizeMinusOne $ \(succSNat -> bufferSizeSNat@(SNat :: SNat bufferSize)) -> do + nReadTriCycles <- + H.forAll + $ Gen.frequency + [ (30, Gen.constant 0) + , (70, genIndex Range.linearBounded) + ] + nWriteTriCycles <- + if nReadTriCycles == 0 + then H.forAll $ genIndex (Range.linear 1 maxBound) + else H.forAll $ genIndex Range.linearBounded + + let + cyclesPerReadWrite = 3 :: Unsigned 64 + nReadCycles = cyclesPerReadWrite * fromIntegral nReadTriCycles + nWriteCycles = cyclesPerReadWrite * fromIntegral nWriteTriCycles + maxIdle1 = 100 + maxIdle2 = 100 + + -- Notice that the PE needs a single clock cycle in its idle state to function + -- correctly. Hence, we always start reading a minimum at clockStart+1. + readData <- H.forAll $ Gen.list (singletonInt nReadCycles) genDefinedBitVector + clockStart <- H.forAll $ genUnsigned @_ @64 (Range.linear 0 100) + readStart <- + H.forAll + $ Gen.frequency + [ (30, Gen.constant clockStart) + , (70, genUnsigned @_ @64 (linearLength clockStart maxIdle1)) + ] + let readEnd = readStart + fromIntegral nReadCycles + writeStart <- + H.forAll + $ Gen.frequency + [ (30, Gen.constant readEnd) + , (70, genUnsigned @_ @64 (linearLength readEnd maxIdle2)) + ] + deviceDna <- H.forAll genDefinedBitVector + + let immediateRead = nReadCycles > 0 && readStart == clockStart + H.cover 5 "Read in the very first cycle we're allowed to" immediateRead + + let immediateWrite = nWriteCycles > 0 && writeStart == clockStart + H.cover 5 "Write in the very first cycle we're allowed to" immediateWrite + + let isBackToBack = nReadCycles > 0 && nWriteCycles > 0 && readEnd == writeStart + H.cover 5 "Back-to-back read/write" isBackToBack + + let + idle1length = readStart - clockStart + idle2length = writeStart - readEnd + idle1in = L.replicate (fromIntegral idle1length) 0 + crossBarIn = fromList (idle1in <> readData <> L.repeat 0) + + out = + E.sample + $ bundle + $ withClockResetEnable @System clockGen noReset enableGen + $ switchDemoPe + bufferSizeSNat + (fromList [clockStart ..]) + crossBarIn + (pure deviceDna) + (pure readStart) + (pure nReadTriCycles) + (pure writeStart) + (pure nWriteTriCycles) + + (idle1out, rest0) = L.splitAt (fromIntegral idle1length) out + (readOut, rest1) = L.splitAt (fromIntegral nReadCycles) rest0 + (idle2out, rest2) = L.splitAt (fromIntegral idle2length) rest1 + (writeOut, rest3) = L.splitAt (fromIntegral nWriteCycles) rest2 + + (idle1outs, _idle1buffers) = L.unzip idle1out + (readOuts, _readBuffers) = L.unzip readOut + (idle2outs, _idle2buffers) = L.unzip idle2out + (writeOuts, _writeBuffers) = L.unzip writeOut + (_restOuts, restBuffers) = L.unzip rest3 + + H.footnote $ "idle1in: " <> show idle1in + H.footnote $ "idle1length: " <> show idle1length + H.footnote $ "idle2length: " <> show idle2length + H.footnote $ "nReadCycles: " <> show nReadCycles + H.footnote $ "nWriteCycles: " <> show nWriteCycles + H.footnote $ "readData: " <> show readData + H.footnote $ "readEnd: " <> show readEnd + H.footnote $ "readStart: " <> show readStart + H.footnote $ "writeStart: " <> show writeStart + H.footnote $ "clockStart: " <> show clockStart + H.footnote $ "idle2outs: " <> showX idle2outs + H.footnote $ "bufferSizeSNat: " <> show bufferSizeSNat + + -- Check that at the end of the simulation the buffer is what we expect it + -- to be. The buffer should be equal to the data we send to the PE. We don't + -- care about data we don't write, hence we truncate (L.take) the buffer to + -- match the number of read cycles. + case restBuffers of + [] -> error "Unexpected end of output" + (buffer : _) -> do + H.footnote $ "buffer: " <> show buffer + L.take (fromIntegral nReadCycles) (toList buffer) === readData + + -- Check that idle value is written at correct times + let idleValue = 0xAAAA_BBBB_AAAA_BBBB + idle1outs === L.replicate (L.length idle1outs) idleValue + readOuts === L.replicate (L.length readOuts) idleValue + idle2outs === L.replicate (L.length idle2outs) idleValue + + -- Check that the right data is written the crossbar link at the time we + -- expect it to. + let + -- Note we can always write one tri-cycle more than we read, since internal + -- data comes first. + relevantOutCycles = fromIntegral (min nWriteCycles (nReadCycles + cyclesPerReadWrite)) + deviceDnaVec = reverse $ bitCoerce @_ @(Vec 2 (BitVector 64)) (zeroExtend deviceDna) + expectedOutData = toList (pack writeStart :> deviceDnaVec) <> readData + L.take relevantOutCycles writeOuts === L.take relevantOutCycles expectedOutData diff --git a/bittide/tests/UnitTests.hs b/bittide/tests/UnitTests.hs index e4fa57950..40f7587b7 100644 --- a/bittide/tests/UnitTests.hs +++ b/bittide/tests/UnitTests.hs @@ -21,6 +21,7 @@ import qualified Tests.ProcessingElement.ReadElf import qualified Tests.ScatterGather import qualified Tests.StabilityChecker import qualified Tests.Switch +import qualified Tests.SwitchDemoProcessingElement import qualified Tests.Transceiver import qualified Tests.Transceiver.Prbs import qualified Tests.Transceiver.WordAlign @@ -36,6 +37,7 @@ tests = , Tests.DoubleBufferedRam.tests , Tests.ElasticBuffer.tests , Tests.ProcessingElement.ReadElf.tests + , Tests.SwitchDemoProcessingElement.tests , Tests.ScatterGather.tests , Tests.StabilityChecker.tests , Tests.Switch.tests diff --git a/firmware-binaries/Cargo.lock b/firmware-binaries/Cargo.lock index c177fad9e..dec71424f 100644 --- a/firmware-binaries/Cargo.lock +++ b/firmware-binaries/Cargo.lock @@ -481,6 +481,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "switch_demo_pe_test" +version = "0.1.0" +dependencies = [ + "bittide-sys", + "log", + "riscv-rt", + "ufmt", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/firmware-binaries/Cargo.toml b/firmware-binaries/Cargo.toml index 88a2af2ab..dab63f5e1 100644 --- a/firmware-binaries/Cargo.toml +++ b/firmware-binaries/Cargo.toml @@ -21,6 +21,7 @@ members = [ "test-cases/clock-control-wb", "test-cases/dna_port_e2_test", "test-cases/scatter_gather_test", + "test-cases/switch_demo_pe_test", "test-cases/time_self_test", "test-cases/watchdog_test", diff --git a/firmware-binaries/examples/smoltcp_client/Cargo.toml b/firmware-binaries/examples/smoltcp_client/Cargo.toml index d658ed34c..a4f881e38 100644 --- a/firmware-binaries/examples/smoltcp_client/Cargo.toml +++ b/firmware-binaries/examples/smoltcp_client/Cargo.toml @@ -28,7 +28,7 @@ default-features = false [dependencies.log] version = "0.4.21" -features = ["max_level_off", "release_max_level_info"] +features = ["max_level_trace", "release_max_level_info"] [dependencies.smoltcp] git = "https://github.com/smoltcp-rs/smoltcp.git" diff --git a/firmware-binaries/test-cases/switch_demo_pe_test/Cargo.toml b/firmware-binaries/test-cases/switch_demo_pe_test/Cargo.toml new file mode 100644 index 000000000..6ae3da80c --- /dev/null +++ b/firmware-binaries/test-cases/switch_demo_pe_test/Cargo.toml @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2024 Google LLC +# +# SPDX-License-Identifier: CC0-1.0 + +[package] +name = "switch_demo_pe_test" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +authors = ["Google LLC"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bittide-sys = { path = "../../../firmware-support/bittide-sys" } +riscv-rt = "0.11.0" +ufmt = "0.2.0" + +[dependencies.log] +version = "0.4.21" +features = ["max_level_trace", "release_max_level_info"] diff --git a/firmware-binaries/test-cases/switch_demo_pe_test/build.rs b/firmware-binaries/test-cases/switch_demo_pe_test/build.rs new file mode 100644 index 000000000..cd062a257 --- /dev/null +++ b/firmware-binaries/test-cases/switch_demo_pe_test/build.rs @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: 2024 Google LLC +// +// SPDX-License-Identifier: Apache-2.0 + +use std::env; +use std::fs; +use std::path::Path; + +/// Put the linker script somewhere the linker can find it. +fn main() { + let out_dir = env::var("OUT_DIR").expect("No out dir"); + let dest_path = Path::new(&out_dir).join("memory.x"); + fs::write(dest_path, include_bytes!("memory.x")).expect("Could not write file"); + + if env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "riscv32" { + println!("cargo:rustc-link-arg=-Tmemory.x"); + println!("cargo:rustc-link-arg=-Tlink.x"); // linker script from riscv-rt + } + println!("cargo:rustc-link-search={out_dir}"); + + println!("cargo:rerun-if-changed=memory.x"); + println!("cargo:rerun-if-changed=build.rs"); +} diff --git a/firmware-binaries/test-cases/switch_demo_pe_test/memory.x b/firmware-binaries/test-cases/switch_demo_pe_test/memory.x new file mode 100644 index 000000000..34a0597f8 --- /dev/null +++ b/firmware-binaries/test-cases/switch_demo_pe_test/memory.x @@ -0,0 +1,18 @@ +/* +SPDX-FileCopyrightText: 2024 Google LLC + +SPDX-License-Identifier: CC0-1.0 +*/ + +MEMORY +{ + IMEM : ORIGIN = 0x80000000, LENGTH = 32K + DMEM : ORIGIN = 0x20000000, LENGTH = 32K +} + +REGION_ALIAS("REGION_TEXT", IMEM); +REGION_ALIAS("REGION_RODATA", DMEM); +REGION_ALIAS("REGION_DATA", DMEM); +REGION_ALIAS("REGION_BSS", DMEM); +REGION_ALIAS("REGION_HEAP", DMEM); +REGION_ALIAS("REGION_STACK", DMEM); diff --git a/firmware-binaries/test-cases/switch_demo_pe_test/src/main.rs b/firmware-binaries/test-cases/switch_demo_pe_test/src/main.rs new file mode 100644 index 000000000..60faf33e3 --- /dev/null +++ b/firmware-binaries/test-cases/switch_demo_pe_test/src/main.rs @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: 2025 Google LLC +// +// SPDX-License-Identifier: Apache-2.0 +#![no_std] +#![cfg_attr(not(test), no_main)] + +use bittide_sys::dna_port_e2::dna_to_u128; +use bittide_sys::switch_demo_pe::SwitchDemoProcessingElement; +use bittide_sys::time::{Clock, Duration}; +use bittide_sys::uart::log::LOGGER; +use bittide_sys::uart::Uart; + +use core::fmt::Write; +use log::{info, LevelFilter}; + +#[cfg(not(test))] +use riscv_rt::entry; + +const UART_ADDR: *const () = (0b010 << 29) as *const (); +const CLOCK_ADDR: *const () = (0b011 << 29) as *const (); +const SWITCH_PE_A: *const () = (0b100 << 29) as *const (); +const SWITCH_PE_B: *const () = (0b101 << 29) as *const (); + +// Size of buffer in number of "tri-cycles". That is, we always store 3 64-bit words: +// local clock cycle counter, DNA (64 lsbs), DNA (32 msbs, zero-extended). +// Should match `bufferSize` of the associated `switchDemoPeWb` device. +const BUFFER_SIZE: usize = 2; + +// See https://github.com/bittide/bittide-hardware/issues/681 +#[allow(static_mut_refs)] +#[cfg_attr(not(test), entry)] +fn main() -> ! { + // Initialize peripherals. + let mut uart = unsafe { Uart::new(UART_ADDR) }; + let mut clock = unsafe { Clock::new(CLOCK_ADDR) }; + let switch_pe_a: SwitchDemoProcessingElement = + unsafe { SwitchDemoProcessingElement::new(SWITCH_PE_A) }; + let switch_pe_b: SwitchDemoProcessingElement = + unsafe { SwitchDemoProcessingElement::new(SWITCH_PE_B) }; + + unsafe { + LOGGER.set_logger(uart.clone()); + LOGGER.set_clock(clock.clone()); + LOGGER.display_source = LevelFilter::Info; + log::set_logger_racy(&LOGGER).ok(); + // The 'max_level' is actually the current debug level. Note that the + // unittest uses a release build, which has 'max_level_info', which sets + // the actual maximum level. + log::set_max_level_racy(LevelFilter::Info); + } + + info!("Local counter: 0x{:X}", switch_pe_a.get_counter()); + + let first_transfer_start = 0x10000; + let second_transfer_start = 0x10100; + + // A only writes its own data + switch_pe_a.set_write(first_transfer_start, 1); + // B reads data from A + switch_pe_b.set_read(first_transfer_start, 1); + // B writes its own data and data received from A + switch_pe_b.set_write(second_transfer_start, 2); + // A reads all data from B + switch_pe_a.set_read(second_transfer_start, 2); + + clock.wait(Duration::from_micros(600)); + + let (rs_a, rc_a) = switch_pe_a.get_read(); + let (rs_b, rc_b) = switch_pe_b.get_read(); + info!("A: readStart: 0x{:X}, readCycles: 0x{:X}", rs_a, rc_a); + info!("B: readStart: 0x{:X}, readCycles: 0x{:X}", rs_b, rc_b); + info!("Local counter: 0x{:X}", switch_pe_a.get_counter()); + + // Write the buffer of A over UART + write!(uart, "Buffer A: [").unwrap(); + switch_pe_a.buffer().enumerate().for_each(|(i, nd)| { + let sep = if i + 1 < BUFFER_SIZE { ", " } else { "" }; + write!( + uart, + "(0x{:X}, 0x{:X}){sep}", + nd.local_counter, + dna_to_u128(nd.dna) + ) + .unwrap(); + }); + writeln!(uart, "]").unwrap(); + + info!("Local counter: 0x{:X}", switch_pe_a.get_counter()); + + // Write the buffer of B over UART + write!(uart, "Buffer B: [").unwrap(); + switch_pe_b.buffer().enumerate().for_each(|(i, nd)| { + let sep = if i + 1 < BUFFER_SIZE { ", " } else { "" }; + write!( + uart, + "(0x{:X}, 0x{:X}){sep}", + nd.local_counter, + dna_to_u128(nd.dna) + ) + .unwrap(); + }); + writeln!(uart, "]").unwrap(); + + info!("Local counter: 0x{:X}", switch_pe_a.get_counter()); + + writeln!(uart, "Finished").unwrap(); + + loop { + continue; + } +} + +#[panic_handler] +fn panic_handler(info: &core::panic::PanicInfo) -> ! { + let mut uart = unsafe { Uart::new(UART_ADDR) }; + writeln!(uart, "Panicked! #{info}").unwrap(); + loop { + continue; + } +} diff --git a/firmware-support/bittide-sys/src/lib.rs b/firmware-support/bittide-sys/src/lib.rs index 8bdc6db2c..35bf93cce 100644 --- a/firmware-support/bittide-sys/src/lib.rs +++ b/firmware-support/bittide-sys/src/lib.rs @@ -14,6 +14,7 @@ pub mod mac; pub mod program_stream; pub mod scatter_unit; pub mod smoltcp; +pub mod switch_demo_pe; pub mod time; pub mod uart; pub mod ugn; diff --git a/firmware-support/bittide-sys/src/switch_demo_pe.rs b/firmware-support/bittide-sys/src/switch_demo_pe.rs new file mode 100644 index 000000000..f32dfad32 --- /dev/null +++ b/firmware-support/bittide-sys/src/switch_demo_pe.rs @@ -0,0 +1,105 @@ +// SPDX-FileCopyrightText: 2025 Google LLC +// +// SPDX-License-Identifier: Apache-2.0 + +use ufmt::derive::uDebug; + +use crate::dna_port_e2::DnaValue; + +#[repr(C)] +#[derive(uDebug, PartialEq, Eq, Copy, Clone)] +pub struct NodeData { + pub local_counter: u64, + pub dna: DnaValue, +} + +pub struct SwitchDemoProcessingElement { + base_addr: *const u64, +} + +impl SwitchDemoProcessingElement { + const READ_START: usize = 0; + const READ_CYCLES: usize = 1; + const WRITE_START: usize = 2; + const WRITE_CYCLES: usize = 3; + const COUNTER: usize = 4; + const BUFFER: usize = 5; + + /// Create a new [`SwitchDemoProcessingElement`] instance given a base + /// address. The `BUFFER_SIZE` is the number of [`NodeData`] elements in its + /// internal buffer. + /// + /// # Safety + /// + /// The `base_addr` pointer must be a valid pointer that is backed by + /// a memory mapped switch demo processing element. The `BUFFER_SIZE` should + /// match the `bufferSize` of the associated `swtichDemoPeWb` device. + pub unsafe fn new(base_addr: *const ()) -> Self { + let addr = base_addr as *const u64; + Self { base_addr: addr } + } + + pub fn set_read(&self, read_start: u64, read_cycles: u64) { + // SAFETY: This is safe since this function can only be called + // after construction, which is only valid with valid addresses. + unsafe { + self.base_addr + .add(Self::READ_START) + .cast_mut() + .write_volatile(read_start); + self.base_addr + .add(Self::READ_CYCLES) + .cast_mut() + .write_volatile(read_cycles); + } + } + + pub fn set_write(&self, write_start: u64, write_cycles: u64) { + // SAFETY: This is safe since this function can only be called + // after construction, which is only valid with valid addresses. + unsafe { + self.base_addr + .add(Self::WRITE_START) + .cast_mut() + .write_volatile(write_start); + self.base_addr + .add(Self::WRITE_CYCLES) + .cast_mut() + .write_volatile(write_cycles); + } + } + + pub fn get_read(&self) -> (u64, u64) { + // SAFETY: This is safe since this function can only be called + // after construction, which is only valid with valid addresses. + unsafe { + let read_start = self.base_addr.add(Self::READ_START).read_volatile(); + let read_cycles = self.base_addr.add(Self::READ_CYCLES).read_volatile(); + (read_start, read_cycles) + } + } + + pub fn get_write(&self) -> (u64, u64) { + // SAFETY: This is safe since this function can only be called + // after construction, which is only valid with valid addresses. + unsafe { + let write_start = self.base_addr.add(Self::WRITE_START).read_volatile(); + let write_cycles = self.base_addr.add(Self::WRITE_CYCLES).read_volatile(); + (write_start, write_cycles) + } + } + + pub fn get_counter(&self) -> u64 { + // SAFETY: This is safe since this function can only be called + // after construction, which is only valid with valid addresses. + unsafe { self.base_addr.add(Self::COUNTER).read_volatile() } + } + + pub fn buffer(&self) -> impl Iterator + '_ { + // SAFETY: This is safe since this function can only be called + // after construction, which is only valid with valid addresses. + (Self::BUFFER..Self::BUFFER + BUFFER_SIZE * 3) + .step_by(3) + .map(|i| unsafe { self.base_addr.add(i).cast::().read_volatile() }) + } +} diff --git a/nix/bin/format b/nix/bin/format index 2f79e5586..c8d8eec4d 100755 --- a/nix/bin/format +++ b/nix/bin/format @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 ROOT=$(git rev-parse --show-toplevel) +cd "${ROOT}" || exit 1 echo "Formatting Cabal files.." "${ROOT}"/.github/scripts/cabal-gild.sh