Skip to content

Commit

Permalink
Estimate batch sizes more accurately
Browse files Browse the repository at this point in the history
Summary: Hopefully keep the write queue closer to the size it's supposed to be.

Reviewed By: donsbot

Differential Revision: D51471709

fbshipit-source-id: d08ee54bfff3f4c2b61761b12d68b554b545540c
  • Loading branch information
Simon Marlow authored and facebook-github-bot committed Dec 3, 2024
1 parent 3b2f97f commit 6eda668
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
15 changes: 15 additions & 0 deletions glean/db/Glean/Database/Writes.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,12 @@ import Data.Default
import Data.Either
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Monoid as Monoid
import Data.Text as Text (Text)
import qualified Data.Text.Encoding as Text
import qualified Data.UUID as UUID
import qualified Data.UUID.V4 as UUID
import qualified Data.Vector.Storable as VS
import System.Clock
import System.Timeout

Expand Down Expand Up @@ -289,6 +291,7 @@ enqueueBatch env ComputedBatch{..} ownership = do
-- server restarts/crashes
handle <- UUID.toText <$> UUID.nextRandom

let size = batchSize computedBatch_batch
r <- try $ enqueueWrite env computedBatch_repo size $ pure $
(writeContentFromBatch computedBatch_batch) {
writeOwnership= ownership
Expand All @@ -301,6 +304,18 @@ enqueueBatch env ComputedBatch{..} ownership = do
when computedBatch_remember $ rememberWrite env handle write
return $ Thrift.SendResponse_handle handle

batchSize :: Thrift.Batch -> Int
batchSize Thrift.Batch{..} =
ByteString.length batch_facts +
Monoid.getSum (foldMap (Monoid.Sum . storableSize) batch_owned) +
Monoid.getSum (foldMap (Monoid.Sum . depsSize) batch_dependencies)
where
storableSize = snd . VS.unsafeToForeignPtr0
depsSize deps = sum
[ storableSize f + storableSize d
| FactDependencies f d <- deps
]

enqueueJsonBatch
:: Env
-> Repo
Expand Down
17 changes: 7 additions & 10 deletions glean/if/glean.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ struct Fact {
}

struct FactDependencies {
1: list_Id_1857 facts;
2: list_Id_1857 dependencies;
1: listOfIds facts;
2: listOfIds dependencies;
}

// A collection of facts which can be written to a database.
Expand Down Expand Up @@ -228,7 +228,7 @@ struct Batch {
// - all elements are unique
// - ids are reasonably dense (writing the batch to the db will use a
// data structure of size O(max id - firstId))
4: optional list_i64_7948 ids;
4: optional list_i64 ids;

// (optional for now)
//
Expand All @@ -242,7 +242,7 @@ struct Batch {
//
// Units do not need to be declared beforehand; a Unit exists if
// it is the owner of at least one fact.
5: map_UnitName_listOfIds_7119 owned;
5: map_UnitName_listOfIds owned;

// Specifies explicit dependencies of derived facts per predicate.
//
Expand All @@ -257,7 +257,7 @@ struct Batch {

struct Subst {
1: Id firstId;
2: list_i64_7948 ids;
2: list_i64 ids;
}

struct Error {
Expand Down Expand Up @@ -1294,13 +1294,10 @@ struct PredicateAnnotation {

// The following were automatically generated and may benefit from renaming.
typedef list<Fact> (hs.type = "Vector") list_Fact_2137
typedef list<Id> (hs.type = "VectorStorable") list_Id_1857
typedef list<Id> (hs.type = "Vector") list_Id_2029
typedef list<ParcelState> (hs.type = "Vector") list_ParcelState_7430
typedef list<i64> (hs.type = "VectorStorable") list_i64_7948
typedef list<i64> (hs.type = "VectorStorable") list_i64
typedef map<Id, list<FactDependencies>> (
hs.type = "HashMap",
) map_Id_list_FactDependencies_964
typedef map<UnitName, listOfIds> (
hs.type = "HashMap",
) map_UnitName_listOfIds_7119
typedef map<UnitName, listOfIds> (hs.type = "HashMap") map_UnitName_listOfIds

0 comments on commit 6eda668

Please sign in to comment.