From 8ca05361505499d5c09993b06ccdc44442ad02f9 Mon Sep 17 00:00:00 2001 From: Julia Molin Date: Fri, 4 Oct 2024 09:36:21 -0700 Subject: [PATCH] Allow compressed (.ztd) files for `glean write` Summary: Accept compressed (zstd, .zst) files to ```glean write``` Goal: allow compressed files to be passed through workflows https://www.internalfb.com/code/fbsource/[b5249951f60baf828a88b7089556ee62ae3f099b]/tools/skycastle/lib2/glean/glean.sky?lines=657 Reviewed By: malanka Differential Revision: D63698716 fbshipit-source-id: 6e1236e75717f477b0edc1743aee335351f17ec4 --- glean.cabal.in | 3 ++- glean/client/hs/Glean/Write.hs | 10 +++++++++- glean/tools/gleancli/GleanCLI/Common.hs | 3 ++- glean/tools/gleancli/GleanCLI/Write.hs | 2 +- glean/website/docs/cli.md | 8 ++++---- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/glean.cabal.in b/glean.cabal.in index 89db38f7b..45c1beb33 100644 --- a/glean.cabal.in +++ b/glean.cabal.in @@ -582,7 +582,8 @@ library client-hs glean:haxl-datasource, glean:stubs, prettyprinter-ansi-terminal, - thrift-haxl + thrift-haxl, + process-extras library client-hs-local import: fb-haskell, fb-cpp, deps diff --git a/glean/client/hs/Glean/Write.hs b/glean/client/hs/Glean/Write.hs index faaf53a45..88bd013c8 100644 --- a/glean/client/hs/Glean/Write.hs +++ b/glean/client/hs/Glean/Write.hs @@ -34,6 +34,9 @@ import Util.String.Quasi import Glean.Types hiding (Value) import Glean.Schema.Util +import System.FilePath (takeExtension) +import qualified System.Process.ByteString as BS +import System.Exit (ExitCode(ExitSuccess)) $(mangle [s| @@ -53,7 +56,12 @@ instance FromJSON ParseJsonFactBatchForWriteServer where fileToBatches :: FilePath -> IO [JsonFactBatch] fileToBatches file = do - bs <- B.readFile file + bs <- if takeExtension file == ".zst" then do + (exit, bs, err) <- BS.readProcessWithExitCode "zstd" [file,"-d","-c"] "" + when (exit /= ExitSuccess) $ + throwIO $ ErrorCall $ file ++ ": " ++ show err + return bs + else B.readFile file r <- Foreign.CPP.Dynamic.callJSONParserFFI c_parseJsonFacts bs case r of Right val -> case Aeson.parse parseJSON val of diff --git a/glean/tools/gleancli/GleanCLI/Common.hs b/glean/tools/gleancli/GleanCLI/Common.hs index 2417e3398..535268f5c 100644 --- a/glean/tools/gleancli/GleanCLI/Common.hs +++ b/glean/tools/gleancli/GleanCLI/Common.hs @@ -112,7 +112,8 @@ fileFormatOpt defaultFormat = option (eitherReader parseFileFormat) <> value defaultFormat <> showDefault <> metavar "(json|binary)" - <> help "Format of the files with facts (see FILE for more details)" + <> help ("Format of the files with facts (see FILE for more details). " + <> "json also accepts zstd compressed json") ) where parseFileFormat :: String -> Either String FileFormat diff --git a/glean/tools/gleancli/GleanCLI/Write.hs b/glean/tools/gleancli/GleanCLI/Write.hs index d8654300c..4dbdbb0bc 100644 --- a/glean/tools/gleancli/GleanCLI/Write.hs +++ b/glean/tools/gleancli/GleanCLI/Write.hs @@ -67,7 +67,7 @@ fileArg :: Parser [FilePath] fileArg = many $ strArgument ( metavar "FILE..." <> help ("File(s) of facts to add to the DB. " - <> "You can specify the format of the file with --file-format") + <> "You can specify the format of the file with --file-format. ") ) repoTimeOpt :: Parser UTCTime diff --git a/glean/website/docs/cli.md b/glean/website/docs/cli.md index 3bfc404e9..8b6473825 100644 --- a/glean/website/docs/cli.md +++ b/glean/website/docs/cli.md @@ -47,8 +47,8 @@ using this option, creation will fail if the current schema has a different definition for any predicate in the base DB schema; therefore predicates may only be added or removed relative to the base DB. * `FILE..`
-File(s) of facts to write into the database (JSON). See [Writing data -to Glean](./write.md). +File(s) of facts to write into the database. Accepts JSON or compressed (zstd) JSONs. +See [Writing data to Glean](./write.md). The schema for the new DB is given by: @@ -74,8 +74,8 @@ Write facts to a database. * `--db NAME/INSTANCE` or `--db-name NAME --db-instance INSTANCE`
Specifies the name and instance of the database * `FILE..`
-File(s) of facts to write into the database (JSON). See [Writing data -to Glean](./write.md). +File(s) of facts to write into the database. Accepts JSON or compressed (zstd) JSONs. +See [Writing data to Glean](./write.md). * `--finish`
Also mark the DB as complete