From d0382adfa7cd30217dc812653631e58da1eaf2d2 Mon Sep 17 00:00:00 2001 From: Josef Svenningsson Date: Thu, 28 Nov 2024 10:41:40 -0800 Subject: [PATCH] Fix the encoding of fact IDs and bytes for sets Summary: Fixes the representation of sets of bytes. I had accidentally used a packed representation, but in order for the representation to agree with arrays it needs to use a fixed representation. Sadly we need a new syscall for this. Reviewed By: simonmar Differential Revision: D66097241 fbshipit-source-id: a6a7784c4a21c808f2a3373dd7843ca9f46e5c37 --- glean/bytecode/Glean/Bytecode/SysCalls.hs | 13 +++++++++---- .../def/Glean/Bytecode/Generate/Instruction.hs | 4 ++-- glean/db/Glean/Query/Codegen.hs | 9 ++++++++- glean/hs/Glean/RTS/Typecheck.hs | 12 ++++++++++-- glean/rts/inventory.h | 1 + glean/rts/query.cpp | 1 + glean/rts/set.cpp | 8 ++++++++ glean/rts/set.h | 2 ++ 8 files changed, 41 insertions(+), 9 deletions(-) diff --git a/glean/bytecode/Glean/Bytecode/SysCalls.hs b/glean/bytecode/Glean/Bytecode/SysCalls.hs index c8c54996d..d06a7815e 100644 --- a/glean/bytecode/Glean/Bytecode/SysCalls.hs +++ b/glean/bytecode/Glean/Bytecode/SysCalls.hs @@ -17,11 +17,16 @@ userQuerySysCalls :: [String] userQuerySysCalls = ["seek","seekWithinSection","currentSeek", "endSeek" ,"next", "lookupKeyValue", "result", "resultWithPid" - , "newDerivedFact", "firstFreeId" - , "newSet", "insertOutputSet", "setToArray", "freeSet" - , "newWordSet", "insertWordSet", "wordSetToArray", "freeWordSet"] + ,"newDerivedFact", "firstFreeId" + ,"newSet", "insertOutputSet", "setToArray", "freeSet" + ,"newWordSet", "insertWordSet", "wordSetToArray", "byteSetToArray" + ,"freeWordSet"] -- This list has to sync up with the list in Glean.RTS.Typecheck or -- the wrong name will be printed typecheckSysCalls :: [String] -typecheckSysCalls = ["rename"] +typecheckSysCalls = + ["rename", "newSet", "insertOutputSet", "setToArray", "freeSet" + ,"newWordSet_", "insertBytesWordSet_", "wordSetToArray_" + ,"byteSetToByteArray_", "freeWordSet_" + ] diff --git a/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs b/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs index c46eb502b..a621ea3d5 100644 --- a/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs +++ b/glean/bytecode/def/Glean/Bytecode/Generate/Instruction.hs @@ -85,7 +85,7 @@ data Usage -- -- BUMP THIS WHENEVER YOU CHANGE THE BYTECODE EVEN IF YOU JUST ADD INSTRUCTIONS version :: Int -version = 12 +version = 13 -- | Lowest bytecode version supported by the current engine. -- @@ -93,7 +93,7 @@ version = 12 -- TO THE END OF THE LIST (in which case the new engine can still execute -- old bytecode) lowestSupportedVersion :: Int -lowestSupportedVersion = 12 +lowestSupportedVersion = 13 -- | Definitions of all bytecode instructions instructions :: [Insn] diff --git a/glean/db/Glean/Query/Codegen.hs b/glean/db/Glean/Query/Codegen.hs index 5a29ba887..77dd25e25 100644 --- a/glean/db/Glean/Query/Codegen.hs +++ b/glean/db/Glean/Query/Codegen.hs @@ -1702,6 +1702,11 @@ data QueryRegs = QueryRegs -> Register 'BinaryOutputPtr -- (output) array -> Code () + , byteSetToByteArray + :: Register 'Word -- set token + -> Register 'BinaryOutputPtr -- (output) array + -> Code () + , freeWordSet :: Register 'Word -- set token (invalid after this call) -> Code () @@ -1723,7 +1728,7 @@ generateQueryCode f = generate Optimised $ \ seek_ seekWithinSection_ currentSeek_ endSeek_ next_ lookupKey_ result_ resultWithPid_ newDerivedFact_ firstFreeId_ newSet_ insertOutputSet_ setToArray_ freeSet_ - newWordSet_ insertWordSet_ wordSetToArray_ freeWordSet_ + newWordSet_ insertWordSet_ wordSetToArray_ byteSetToByteArray_ freeWordSet_ saveState maxResults maxBytes -> let seek typ ptr end tok = @@ -1789,6 +1794,8 @@ generateQueryCode f = generate Optimised $ wordSetToArray setToken outputPtr = callFun_1_1 wordSetToArray_ setToken (castRegister outputPtr) + byteSetToByteArray setToken outputPtr = + callFun_1_1 byteSetToByteArray_ setToken (castRegister outputPtr) freeWordSet setToken = callFun_1_0 freeWordSet_ setToken diff --git a/glean/hs/Glean/RTS/Typecheck.hs b/glean/hs/Glean/RTS/Typecheck.hs index 77239228a..61d43e48a 100644 --- a/glean/hs/Glean/RTS/Typecheck.hs +++ b/glean/hs/Glean/RTS/Typecheck.hs @@ -84,7 +84,7 @@ typecheck syscalls@SysCalls{..} input inputend out = tc move input ptr inputBytes input inputend size insertBytesWordSet set ptr input - wordSetToArray set out + byteSetToByteArray set out freeWordSet set return () tc (SetTy elty) = local $ \size -> do @@ -172,6 +172,11 @@ data SysCalls = SysCalls { -> Register 'BinaryOutputPtr -- (output) array -> Code () + , byteSetToByteArray + :: Register 'Word -- set token + -> Register 'BinaryOutputPtr -- (output) array + -> Code () + , freeWordSet :: Register 'Word -- set token (invalid after this call) -> Code () @@ -204,7 +209,8 @@ checkSignature key_ty val_ty = fmap snd $ generate Optimised $ \rename_ newSet_ insertOutputSet_ setToArray_ freeSet_ - newWordSet_ insertBytesWordSet_ wordSetToArray_ freeWordSet_ + newWordSet_ insertBytesWordSet_ wordSetToArray_ + byteSetToByteArray_ freeWordSet_ clause_begin key_end clause_end -> output $ \out -> do let syscalls = SysCalls { rename = \id pid reg -> @@ -221,6 +227,8 @@ checkSignature key_ty val_ty = insertBytesWordSet_ set (castRegister start) (castRegister end) , wordSetToArray = \set arr -> callFun_1_1 wordSetToArray_ set (castRegister arr) + , byteSetToByteArray = \set arr -> + callFun_1_1 byteSetToByteArray_ set (castRegister arr) , freeWordSet = callFun_1_0 freeWordSet_ } -- We return the key size in the first local register diff --git a/glean/rts/inventory.h b/glean/rts/inventory.h index bc599a871..4c6c039de 100644 --- a/glean/rts/inventory.h +++ b/glean/rts/inventory.h @@ -106,6 +106,7 @@ struct Predicate { &Rename::newWordSet, &Rename::insertBytesWordSet, &Rename::wordSetToArray, + &Rename::byteSetToByteArray, &Rename::freeWordSet>(rename); Subroutine::Activation::with( diff --git a/glean/rts/query.cpp b/glean/rts/query.cpp index 1affcece2..a14d244f1 100644 --- a/glean/rts/query.cpp +++ b/glean/rts/query.cpp @@ -571,6 +571,7 @@ std::unique_ptr executeQuery( &QueryExecutor::newWordSet, &QueryExecutor::insertWordSet, &QueryExecutor::wordSetToArray, + &QueryExecutor::byteSetToByteArray, &QueryExecutor::freeWordSet>(q); folly::Optional cont; diff --git a/glean/rts/set.cpp b/glean/rts/set.cpp index 44e255824..ac1574f62 100644 --- a/glean/rts/set.cpp +++ b/glean/rts/set.cpp @@ -66,6 +66,14 @@ void SetOps::wordSetToArray(SetOps::SetToken token, binary::Output* out) { } } +void SetOps::byteSetToByteArray(SetOps::SetToken token, binary::Output* out) { + auto& s = wordsets[token]; + out->packed(s.size()); + for (const auto& v : s) { + out->fixed(v); + } +} + void SetOps::freeWordSet(SetOps::SetToken token) { wordsets.erase(wordsets.begin() + token); } diff --git a/glean/rts/set.h b/glean/rts/set.h index da0fd7109..ec33b0cb8 100644 --- a/glean/rts/set.h +++ b/glean/rts/set.h @@ -51,6 +51,8 @@ struct SetOps { void wordSetToArray(SetToken token, binary::Output* out); + void byteSetToByteArray(SetToken token, binary::Output* out); + void freeWordSet(SetToken token); std::vector sets = {};