Skip to content

Commit

Permalink
Benchmark run error json decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
tbidne committed Feb 26, 2025
1 parent 80a3fff commit c40902c
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 94 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on: # yamllint disable-line rule:truthy rule:comments
push:
branches:
- main
- bench3

pull_request:
branches:
Expand Down Expand Up @@ -82,8 +83,8 @@ jobs:
--csv bench/app/${{ matrix.os }}_${{ matrix.ghc.vers }}_ci.csv
--svg bench/app/${{ matrix.os }}_${{ matrix.ghc.vers }}_ci.svg
--baseline bench/app/baseline_${{ matrix.os }}_${{ matrix.ghc.vers }}_ci.csv
--fail-if-slower 20
--fail-if-faster 20'
--fail-if-slower 1
--fail-if-faster 1'
- uses: actions/upload-artifact@v4
name: Bench upload
Expand Down
12 changes: 11 additions & 1 deletion bench/app/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import Prelude (IO)
main :: IO ()
main =
defaultMain
[ benchMkSomeRuns
[ benchMkSomeRuns,
benchMkSomeRunsError
]

benchMkSomeRuns :: Benchmark
Expand All @@ -35,6 +36,15 @@ benchMkSomeRuns =
runs_10_000_bs = Utils.genRunsJson 10_000
}

benchMkSomeRunsError :: Benchmark
benchMkSomeRunsError =
bgroup
"decode_runs_error"
[ bench "100" $ nf Utils.decodeErrorRuns (Utils.genOverlappedRunsJson 100),
bench "1_000" $ nf Utils.decodeErrorRuns (Utils.genOverlappedRunsJson 1_000),
bench "10_000" $ nf Utils.decodeErrorRuns (Utils.genOverlappedRunsJson 10_000)
]

data TestParams = MkTestParams
{ runs_100_bs :: ByteString,
runs_1_000_bs :: ByteString,
Expand Down
25 changes: 21 additions & 4 deletions bench/fit/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,37 @@ main =
defaultMain
$ testGroup
"Complexity tests"
[testComplexity]
[ testRunsDecodeComplexity,
testRunsDecodeOverlappedComplexity
]

testComplexity :: TestTree
testComplexity = testCase "Decode runs is linear" $ do
testRunsDecodeComplexity :: TestTree
testRunsDecodeComplexity = testCase "Decode runs is linear" $ do
c <- measureGenDecode
-- This is too flaky to have the assertion be an actual test, so we
-- just run it and print the result if it's different. We keep it as
-- a test, however, so that CI verifies it at least runs.
unless (Fit.isLinear c) $ do
IO.putStrLn $ "Assumed linear but guessed: " ++ show c
IO.putStrLn $ "Predicted linear but guessed: " ++ show c

testRunsDecodeOverlappedComplexity :: TestTree
testRunsDecodeOverlappedComplexity = testCase desc $ do
c <- measureGenDecodeOverlapped
unless (Fit.isLinear c) $ do
IO.putStrLn $ "Predicted linear but guessed: " ++ show c
where
desc = "Decode overlapped runs is linear"

measureGenDecode :: IO Complexity
measureGenDecode =
Fit.fit
$ Fit.mkFitConfig
(Utils.genAndDecodeRuns)
(10, 10_000)

measureGenDecodeOverlapped :: IO Complexity
measureGenDecodeOverlapped =
Fit.fit
$ Fit.mkFitConfig
(Utils.genAndDecodeOverlappedRuns)
(10, 10_000)
164 changes: 99 additions & 65 deletions bench/utils/Bench/Pacer/Utils.hs
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
{-# LANGUAGE QuasiQuotes #-}

module Bench.Pacer.Utils
( genAndDecodeRuns,
( -- * High level
genAndDecodeRuns,
genAndDecodeOverlappedRuns,

-- * Low level
genRunsJson,
genOverlappedRunsJson,
decodeRuns,
decodeErrorRuns,

-- * Low level
concatRunsJson,
mkRunJsonList,
tsStrToRunJson,
)
where

Expand All @@ -23,22 +34,42 @@ import Pacer.Command.Chart.Data.Time
),
)
import Pacer.Command.Chart.Data.Time qualified as Time
import Pacer.Data.Result (onErr)
import Pacer.Data.Result (onErr, onOk)
import Pacer.Prelude hiding (Double)
import Pacer.Utils (AesonE)
import Pacer.Utils qualified as Utils
import Prelude (Double)

-- | 'genRunsJson' and 'decodeRuns'.
genAndDecodeRuns :: Word -> SomeRuns Double
genAndDecodeRuns = decodeRuns . genRunsJson

-- | 'genOverlappedRunsJson' and 'decodeErrorRuns'.
genAndDecodeOverlappedRuns :: Word -> AesonE
genAndDecodeOverlappedRuns = decodeErrorRuns . genOverlappedRunsJson

-- | Decodes runs json w/ expected error.
decodeErrorRuns :: ByteString -> AesonE
decodeErrorRuns = onOk (error "") . Utils.decodeJson @(SomeRuns Double)

-- | Decodes runs json.
decodeRuns :: ByteString -> SomeRuns Double
decodeRuns = onErr (error . displayException) . Utils.decodeJson

-- | Generate runs json with immediate overlap.
genOverlappedRunsJson :: Word -> ByteString
genOverlappedRunsJson n =
concatRunsJson
$ tsStrToRunJson "1980-04-08"
: tsStrToRunJson "1980-04-08"
: mkRunJsonList n

-- | Generate runs json sized by the parameter.
genRunsJson :: Word -> ByteString
genRunsJson maxAll = mkBS allRuns
genRunsJson = concatRunsJson . mkRunJsonList

mkRunJsonList :: Word -> List ByteString
mkRunJsonList maxAll = allRuns
where
-- NOTE: [Avoiding overlaps]
--
Expand All @@ -62,65 +93,68 @@ genRunsJson maxAll = mkBS allRuns
-- zipWith3 so that we can interleave the different times, just so the
-- sorting is non-trivial.
$ L.zipWith3
(\d t z -> mkRun d : mkRun t : [mkRun z])
dates
times
zoneds

mkRun :: Timestamp -> ByteString
mkRun t =
-- intercalate over unlines so we don't have a trailing newline
BS.intercalate
"\n"
[ " {",
" \"datetime\": \"" <> encodeTimestamp t <> "\",",
" \"distance\": \"marathon\",",
" \"duration\": \"3h20m\"",
" }"
]

mkBS :: List ByteString -> ByteString
mkBS rs =
C8.unlines
[ "{",
" \"runs\": [",
BS.intercalate ",\n" rs,
" ]",
"}"
]

-- See NOTE: [Avoiding overlaps] for choosing good start dates for each
-- 3 types.
startDate :: Day
startDate = unsafeParse "1990-04-08"

dates :: List Timestamp
dates = (TimestampDate . incDate startDate) <$> [0 .. maxEach]

startTime :: LocalTime
startTime = unsafeParse "2000-04-08T12:00:00"

times :: List Timestamp
times = (TimestampTime . incTime startTime) <$> [0 .. maxEach]

startZoned :: ZonedTime
startZoned = unsafeParse "2010-04-08T12:00:00-0800"

zoneds :: List Timestamp
zoneds = (TimestampZoned . incZoned startZoned) <$> [0 .. maxEach]

encodeTimestamp :: Timestamp -> ByteString
encodeTimestamp = encodeUtf8 . Time.fmtTimestamp

incDate :: Day -> Word -> Day
incDate d i = Cal.addDays (fromIntegral @Word @Integer i) d

incTime :: LocalTime -> Word -> LocalTime
incTime (LocalTime d t) i = LocalTime (incDate d i) t

incZoned :: ZonedTime -> Word -> ZonedTime
incZoned (ZonedTime (LocalTime d t) z) i =
ZonedTime (LocalTime (incDate d i) t) z

unsafeParse :: (Parser a) => Text -> a
unsafeParse = errorErr . P.parseAll
(\d t z -> [tsToRunJson d, tsToRunJson t, tsToRunJson z])
(dates maxEach)
(times maxEach)
(zoneds maxEach)

tsStrToRunJson :: Text -> ByteString
tsStrToRunJson = tsToRunJson . unsafeParse

tsToRunJson :: Timestamp -> ByteString
tsToRunJson t =
-- intercalate over unlines so we don't have a trailing newline
BS.intercalate
"\n"
[ " {",
" \"datetime\": \"" <> encodeTimestamp t <> "\",",
" \"distance\": \"marathon\",",
" \"duration\": \"3h20m\"",
" }"
]

concatRunsJson :: List ByteString -> ByteString
concatRunsJson rs =
C8.unlines
[ "{",
" \"runs\": [",
BS.intercalate ",\n" rs,
" ]",
"}"
]

-- See NOTE: [Avoiding overlaps] for choosing good start dates for each
-- 3 types.
startDate :: Day
startDate = unsafeParse "1990-04-08"

dates :: Word -> List Timestamp
dates mx = (TimestampDate . incDate startDate) <$> [0 .. mx]

startTime :: LocalTime
startTime = unsafeParse "2000-04-08T12:00:00"

times :: Word -> List Timestamp
times mx = (TimestampTime . incTime startTime) <$> [0 .. mx]

startZoned :: ZonedTime
startZoned = unsafeParse "2010-04-08T12:00:00-0800"

zoneds :: Word -> List Timestamp
zoneds mx = (TimestampZoned . incZoned startZoned) <$> [0 .. mx]

encodeTimestamp :: Timestamp -> ByteString
encodeTimestamp = encodeUtf8 . Time.fmtTimestamp

incDate :: Day -> Word -> Day
incDate d i = Cal.addDays (fromIntegral @Word @Integer i) d

incTime :: LocalTime -> Word -> LocalTime
incTime (LocalTime d t) i = LocalTime (incDate d i) t

incZoned :: ZonedTime -> Word -> ZonedTime
incZoned (ZonedTime (LocalTime d t) z) i =
ZonedTime (LocalTime (incDate d i) t) z

unsafeParse :: (Parser a) => Text -> a
unsafeParse = errorErr . P.parseAll
18 changes: 18 additions & 0 deletions src/Pacer/Command/Chart/Data/Run.hs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,24 @@ mkSomeRuns (y@(MkSomeRun _ r) :| ys) =
initOverlapData :: HashMap Timestamp OverlapData
initOverlapData = runToOverlapMap r

-- NOTE: [SomeRuns error short-circuit]
--
-- Notice anything unfortunate about this function? It is not lazy in its
-- right argument, despite being called with foldr! This is a shame as it
-- means it will not short-circuit in the event of an error. While we need
-- the accumulating map to actually check for errors (hence it is unclear
-- how to achieve short-cicuiting w/ foldr), surely we could rewrite this
-- to short-circuit with ordinary recursion?
--
-- Alas, there is a complication. At this point, we have already decoded
-- the bytestring into @List (SomeRun a)@, so we are stuck with linear
-- time anyway. What we'd want is a way to stream the decoding and check
-- for errors right from the get go.
--
-- You might think that at least switching to ordinary recurion would
-- improve things -- even though it is still linear -- but the benchmarks
-- seem to get worse. So there is probably no reason to try until we can
-- actually stream properly.
go :: SomeRun a -> SomeRunsAcc a -> SomeRunsAcc a
go _ (Err overlap) = Err overlap
go someRun@(MkSomeRun _ q) (Ok (acc, foundKeys)) =
Expand Down
2 changes: 1 addition & 1 deletion src/Pacer/Driver.hs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ displayInnerMatchKnown e =

knownExceptions :: List ExceptionProxy
knownExceptions =
[ MkExceptionProxy @Utils.AesonPathE,
[ MkExceptionProxy @Utils.AesonE,
MkExceptionProxy @PacerEx.ChartFileMissingE,
MkExceptionProxy @PacerEx.CommandConvertE,
MkExceptionProxy @PacerEx.CommandDeriveE,
Expand Down
42 changes: 26 additions & 16 deletions src/Pacer/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module Pacer.Utils
(.:?:),
failUnknownFields,
decodeJson,
AesonPathE (..),
AesonE (..),
readDecodeJson,

-- * Show
Expand All @@ -36,7 +36,7 @@ module Pacer.Utils
)
where

import Data.Aeson (AesonException (AesonException), Key, (<?>))
import Data.Aeson (Key, (<?>))
import Data.Aeson qualified as Asn
import Data.Aeson.Key qualified as Key
import Data.Aeson.KeyMap (KeyMap)
Expand Down Expand Up @@ -98,17 +98,27 @@ seqGroupBy p = go
where
(ys, zs) = Seq.spanl (p x) xs

data AesonPathE = MkAesonPathE OsPath String
deriving stock (Show)

instance Exception AesonPathE where
displayException (MkAesonPathE p s) =
mconcat
[ "Error decoding json path '",
decodeLenient p,
"': ",
s
]
-- | We use this rather than aeson's AesonException for two reasons:
--
-- 1. AesonException does not have an NFData instance, which we require for
-- the benchmarks.
--
-- 2. Optional path improves the error message.
data AesonE = MkAesonE (Maybe OsPath) String
deriving stock (Generic, Show)
deriving anyclass (NFData)

instance Exception AesonE where
displayException (MkAesonE mPath s) =
case mPath of
Just p ->
mconcat
[ "Error decoding json path '",
decodeLenient p,
"': ",
s
]
Nothing -> "Error decode json: " ++ s

-- | Decodes a json(c) file.
readDecodeJson ::
Expand All @@ -126,7 +136,7 @@ readDecodeJson path = do
$ decodeJson @a contents
where
osPath = pathToOsPath path
toAesonPathE (AesonException s) = MkAesonPathE osPath s
toAesonPathE (MkAesonE _ s) = MkAesonE (Just osPath) s

-- | Fails if there are any unknown fields in the object.
failUnknownFields ::
Expand Down Expand Up @@ -159,9 +169,9 @@ failUnknownFields name knownKeys kmap = do
showKeys = show . L.sort . fmap Key.toString

-- | Decodes json(c).
decodeJson :: (FromJSON a) => ByteString -> Result AesonException a
decodeJson :: (FromJSON a) => ByteString -> Result AesonE a
decodeJson =
first AesonException
first (MkAesonE Nothing)
<<< (review #eitherIso)
. Asn.eitherDecodeStrict
<=< P.stripComments
Expand Down
Loading

0 comments on commit c40902c

Please sign in to comment.