From 3d41e2093ac2a6193ec4c43edc1308815a72d282 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 20 Dec 2023 16:31:19 +0000 Subject: [PATCH 01/28] index: add error document to the index This change enables storing crawler errors in the index. --- src/Monocle/Backend/Documents.hs | 17 +++++++++++++++++ src/Monocle/Backend/Index.hs | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 4b36563df..35cdcca55 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -195,6 +195,20 @@ instance From ETaskData SearchPB.TaskData where taskDataPrefix = from $ fromMaybe "" $ tdPrefix td in SearchPB.TaskData {..} +data EError = EError + { erCrawlerName :: Text + , erEntity :: Text + , erMessage :: Text + , erBody :: Text + } + deriving (Show, Eq, Generic) + +instance ToJSON EError where + toJSON = genericToJSON $ aesonPrefix snakeCase + +instance FromJSON EError where + parseJSON = genericParseJSON $ aesonPrefix snakeCase + data EChangeState = EChangeOpen | EChangeMerged @@ -240,6 +254,7 @@ data EDocType | EIssueDoc | EOrphanTaskData | ECachedAuthor + | EErrorDoc deriving (Eq, Show, Enum, Bounded) allEventTypes :: [EDocType] @@ -262,6 +277,7 @@ instance From EDocType Text where EIssueDoc -> "Issue" EOrphanTaskData -> "OrphanTaskData" ECachedAuthor -> "CachedAuthor" + EErrorDoc -> "Error" instance From EDocType LText where from = via @Text @@ -300,6 +316,7 @@ instance FromJSON EDocType where "Issue" -> pure EIssueDoc "OrphanTaskData" -> pure EOrphanTaskData "CachedAuthor" -> pure ECachedAuthor + "Error" -> pure EErrorDoc anyOtherValue -> fail $ "Unknown Monocle Elastic doc type: " <> from anyOtherValue ) diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 947f7135c..8cf126bb2 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -58,6 +58,14 @@ instance ToJSON AuthorMapping where , "groups" .= object ["type" .= ("keyword" :: Text)] ] +data BlobMapping = BlobMapping deriving (Eq, Show) + +instance ToJSON BlobMapping where + toJSON BlobMapping = + object + [ "type" .= ("binary" :: Text) + ] + instance ToJSON AuthorIndexMapping where toJSON AuthorIndexMapping = object ["properties" .= AuthorMapping] @@ -220,6 +228,16 @@ instance ToJSON ChangesIndexMapping where , "_adopted" .= BoolMapping ] ] + , "error_data" + .= object + [ "properties" + .= object + [ "crawler_name" .= KWMapping + , "entity" .= TextAndKWMapping + , "message" .= TextAndKWMapping + , "body" .= BlobMapping + ] + ] ] <> cachedAuthorField <> mergedCommitField From 0cd8e46e6c970fbd3c85859ca18595d307d1baba Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 20 Dec 2023 17:05:28 +0000 Subject: [PATCH 02/28] api: add error document type to the add doc endpoint --- codegen/Monocle/Protob/Crawler.hs | 152 ++++++++++++++++++++++++++- doc/openapi.yaml | 10 ++ schemas/monocle/protob/crawler.proto | 6 ++ src/Macroscope/Worker.hs | 5 + src/Monocle/Api/Server.hs | 1 + 5 files changed, 172 insertions(+), 2 deletions(-) diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index b991fe0bf..c9e7549dd 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -341,6 +341,112 @@ instance HsJSONPB.FromJSON EntityType where instance HsProtobuf.Finite EntityType +data CrawlerError = CrawlerError + { crawlerErrorMessage :: Hs.Text + , crawlerErrorBody :: Hs.Text + } + deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) + +instance Hs.NFData CrawlerError + +instance HsProtobuf.Named CrawlerError where + nameOf _ = (Hs.fromString "CrawlerError") + +instance HsProtobuf.HasDefault CrawlerError + +instance HsProtobuf.Message CrawlerError where + encodeMessage + _ + CrawlerError + { crawlerErrorMessage = crawlerErrorMessage + , crawlerErrorBody = crawlerErrorBody + } = + ( Hs.mconcat + [ ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 1) + ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) + (crawlerErrorMessage) + ) + ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 2) + ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) + (crawlerErrorBody) + ) + ) + ] + ) + decodeMessage _ = + (Hs.pure CrawlerError) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 1) + ) + ) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 2) + ) + ) + dotProto _ = + [ ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 1) + (HsProtobufAST.Prim HsProtobufAST.String) + (HsProtobufAST.Single "message") + [] + "" + ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 2) + (HsProtobufAST.Prim HsProtobufAST.String) + (HsProtobufAST.Single "body") + [] + "" + ) + ] + +instance HsJSONPB.ToJSONPB CrawlerError where + toJSONPB (CrawlerError f1 f2) = + ( HsJSONPB.object + [ "message" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) + , "body" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + ] + ) + toEncodingPB (CrawlerError f1 f2) = + ( HsJSONPB.pairs + [ "message" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) + , "body" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + ] + ) + +instance HsJSONPB.FromJSONPB CrawlerError where + parseJSONPB = + ( HsJSONPB.withObject + "CrawlerError" + ( \obj -> + (Hs.pure CrawlerError) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (obj .: "message") + ) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (obj .: "body") + ) + ) + ) + +instance HsJSONPB.ToJSON CrawlerError where + toJSON = HsJSONPB.toAesonValue + toEncoding = HsJSONPB.toAesonEncoding + +instance HsJSONPB.FromJSON CrawlerError where + parseJSON = HsJSONPB.parseJSONPB + data AddDocRequest = AddDocRequest { addDocRequestIndex :: Hs.Text , addDocRequestCrawler :: Hs.Text @@ -356,6 +462,8 @@ data AddDocRequest = AddDocRequest , addDocRequestIssues :: Hs.Vector Monocle.Protob.Issue.Issue , addDocRequestIssueEvents :: Hs.Vector Monocle.Protob.Issue.IssueEvent + , addDocRequestErrors :: + Hs.Vector Monocle.Protob.Crawler.CrawlerError } deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) @@ -380,6 +488,7 @@ instance HsProtobuf.Message AddDocRequest where , addDocRequestTaskDatas = addDocRequestTaskDatas , addDocRequestIssues = addDocRequestIssues , addDocRequestIssueEvents = addDocRequestIssueEvents + , addDocRequestErrors = addDocRequestErrors } = ( Hs.mconcat [ ( HsProtobuf.encodeMessageField @@ -449,6 +558,13 @@ instance HsProtobuf.Message AddDocRequest where (addDocRequestIssueEvents) ) ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 12) + ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (addDocRequestErrors) + ) + ) ] ) decodeMessage _ = @@ -527,6 +643,14 @@ instance HsProtobuf.Message AddDocRequest where (HsProtobuf.FieldNumber 11) ) ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 12) + ) + ) dotProto _ = [ ( HsProtobufAST.DotProtoField (HsProtobuf.FieldNumber 1) @@ -632,10 +756,19 @@ instance HsProtobuf.Message AddDocRequest where [] "" ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 12) + ( HsProtobufAST.Repeated + (HsProtobufAST.Named (HsProtobufAST.Single "CrawlerError")) + ) + (HsProtobufAST.Single "errors") + [] + "" + ) ] instance HsJSONPB.ToJSONPB AddDocRequest where - toJSONPB (AddDocRequest f1 f2 f3 f4 f5 f6 f7 f8 f10 f11) = + toJSONPB (AddDocRequest f1 f2 f3 f4 f5 f6 f7 f8 f10 f11 f12) = ( HsJSONPB.object [ "index" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) @@ -678,9 +811,14 @@ instance HsJSONPB.ToJSONPB AddDocRequest where @(HsProtobuf.NestedVec Monocle.Protob.Issue.IssueEvent) (f11) ) + , "errors" + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (f12) + ) ] ) - toEncodingPB (AddDocRequest f1 f2 f3 f4 f5 f6 f7 f8 f10 f11) = + toEncodingPB (AddDocRequest f1 f2 f3 f4 f5 f6 f7 f8 f10 f11 f12) = ( HsJSONPB.pairs [ "index" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) @@ -723,6 +861,11 @@ instance HsJSONPB.ToJSONPB AddDocRequest where @(HsProtobuf.NestedVec Monocle.Protob.Issue.IssueEvent) (f11) ) + , "errors" + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (f12) + ) ] ) @@ -776,6 +919,11 @@ instance HsJSONPB.FromJSONPB AddDocRequest where @(Hs.Vector Monocle.Protob.Issue.IssueEvent) (obj .: "issue_events") ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + (obj .: "errors") + ) ) ) diff --git a/doc/openapi.yaml b/doc/openapi.yaml index b70704ae2..19767d731 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -676,6 +676,10 @@ components: items: $ref: '#/components/schemas/monocle_issue_IssueEvent' description: issue_events are added when Entity is project_issue_name + errors: + type: array + items: + $ref: '#/components/schemas/monocle_crawler_CrawlerError' monocle_crawler_AddDocResponse: properties: error: @@ -723,6 +727,12 @@ components: timestamp: type: string format: RFC3339 + monocle_crawler_CrawlerError: + properties: + message: + type: string + body: + type: string monocle_crawler_Entity: properties: organization_name: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index 60e147c8e..9821f1cbd 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -28,6 +28,11 @@ enum EntityType { ENTITY_TYPE_USER = 3; } +message CrawlerError { + string message = 1; + string body = 2; +} + message AddDocRequest { string index = 1; string crawler = 2; @@ -45,6 +50,7 @@ message AddDocRequest { repeated monocle_issue.Issue issues = 10; // issue_events are added when Entity is project_issue_name repeated monocle_issue.IssueEvent issue_events = 11; + repeated CrawlerError errors = 12; } enum AddDocError { diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 7826feac0..351dfd96c 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -78,6 +78,7 @@ data DocumentType | DTChanges (Change, [ChangeEvent]) | DTTaskData TaskData | DTIssues (Issue, [IssueEvent]) + | DTError CrawlerError deriving (Generic, ToJSON) data ProcessError es @@ -246,6 +247,7 @@ runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStre addDocRequestTaskDatas = V.fromList $ mapMaybe getTD xs addDocRequestIssues = V.fromList $ mapMaybe getIssue xs addDocRequestIssueEvents = V.fromList $ concat $ mapMaybe getIssueEvent xs + addDocRequestErrors = V.fromList $ mapMaybe getError xs in AddDocRequest {..} where getIssue = \case @@ -266,6 +268,9 @@ runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStre getTD = \case DTTaskData td -> Just td _ -> Nothing + getError = \case + DTError e -> Just e + _ -> Nothing -- 'commitTimestamp' post the commit date. commitTimestamp entity = do diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index ca2867a3f..0cd49eb8a 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -293,6 +293,7 @@ crawlerAddDoc _auth request = do taskDatas issues issuesEvents + _errors ) = request let requestE = do From b389c624b0d51af3df239cfa68b5405e30512156 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 20 Dec 2023 20:09:31 +0000 Subject: [PATCH 03/28] crawler: emit crawler error when processing stream --- src/Lentille.hs | 6 ++++++ src/Macroscope/Worker.hs | 25 ++++--------------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index 9e4254a9e..86dbfdeac 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -45,6 +45,7 @@ import Monocle.Protob.Change ( Change_ChangeState (Change_ChangeStateClosed, Change_ChangeStateMerged), Ident (..), ) +import Monocle.Protob.Crawler (CrawlerError (..)) import Network.HTTP.Client qualified as HTTP import Proto3.Suite (Enumerated (Enumerated)) import Streaming.Prelude qualified as S @@ -96,6 +97,11 @@ data LentilleError instance Exception LentilleError +instance From LentilleError CrawlerError where + from = \case + DecodeError xs -> CrawlerError "decode error" (decodeUtf8 $ encode xs) + GraphError x -> CrawlerError "graph error" (decodeUtf8 $ encode x) + type LentilleStream es a = Stream (Of (Either LentilleError a)) (Eff es) () ------------------------------------------------------------------------------- diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 351dfd96c..cb5fac62a 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -55,11 +55,6 @@ streamName = \case TaskDatas _ -> "TaskDatas" UserChanges _ -> "UserChanges" -isTDStream :: DocumentStream m -> Bool -isTDStream = \case - TaskDatas _ -> True - _anyOtherStream -> False - ------------------------------------------------------------------------------- -- Adapter between protobuf api and crawler stream ------------------------------------------------------------------------------- @@ -84,7 +79,6 @@ data DocumentType data ProcessError es = CommitError Text | AddError Text - | StreamError (LentilleError, LentilleStream es DocumentType) -- | 'processStream' read the stream of document and post to the monocle API processStream :: @@ -106,12 +100,11 @@ processStream logFunc postFunc = go (0 :: Word) [] [] -- The end of the stream res <- processBatch acc pure $ reverse (res : results) - Right (Left err, rest) -> do - -- An error occured in the stream, abort now - let res = Just (StreamError (err, rest)) - pure $ reverse (res : results) - Right (Right doc, rest) -> do + Right (edoc, rest) -> do -- We got a new document + let doc = case edoc of + Right x -> x + Left err -> DTError $ from err let newAcc = doc : acc if count == 499 then do @@ -150,16 +143,6 @@ runStream apiKey indexName crawlerName documentStream = do forM_ errors \case AddError err -> logWarn "Could not add documents" ["err" .= err] CommitError err -> logWarn "Could not commit update date" ["err" .= err] - StreamError (err, rest) -> do - logWarn "Error occured when consuming the document stream" ["err" .= err] - S.toList_ rest >>= \case - [] -> pure () - items -> logWarn "Left over documents found after error" ["items" .= items] - - -- TODO: explains why TDStream don't support offset? - unless (isTDStream documentStream) do - -- Try the next entity by incrementing the offset - go startTime (offset + 1) -- | 'runStreamError' is the stream processor runStreamError :: From f9357eb5d2dfc67becde213dd535e0482269888a Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 20 Dec 2023 20:26:45 +0000 Subject: [PATCH 04/28] api: add error indexing --- src/Monocle/Api/Server.hs | 30 +++++++++++++++++++++++------- src/Monocle/Backend/Index.hs | 11 +++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 0cd49eb8a..9b0306fa3 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -23,6 +23,7 @@ import Monocle.Api.Jwt ( import Monocle.Backend.Documents ( EChange (..), EChangeEvent (..), + EError (..), ) import Monocle.Backend.Index as I import Monocle.Backend.Queries qualified as Q @@ -69,6 +70,7 @@ import Effectful.Reader.Static (asks) import Monocle.Effects import Monocle.Backend.Queries (PeersStrengthMode (PSModeFilterOnAuthor)) +import Monocle.Protob.Crawler (CrawlerError) import Servant.API (Headers) import Servant.API.Header (Header) import Servant.Auth.Server.Internal.JWT (makeJWT) @@ -293,7 +295,7 @@ crawlerAddDoc _auth request = do taskDatas issues issuesEvents - _errors + errors ) = request let requestE = do @@ -312,14 +314,28 @@ crawlerAddDoc _auth request = do pure (index, crawler) case requestE of - Right (index, crawler) -> runEmptyQueryM index $ case toEntity entity of - Project _ -> addChanges crawlerName changes events - ProjectIssue _ -> addIssues crawlerName issues issuesEvents - Organization organizationName -> addProjects crawler organizationName projects - TaskDataEntity _ -> addTDs crawlerName taskDatas - User _ -> addChanges crawlerName changes events + Right (index, crawler) -> runEmptyQueryM index do + addErrors crawlerName (toEntity entity) errors + case toEntity entity of + Project _ -> addChanges crawlerName changes events + ProjectIssue _ -> addIssues crawlerName issues issuesEvents + Organization organizationName -> addProjects crawler organizationName projects + TaskDataEntity _ -> addTDs crawlerName taskDatas + User _ -> addChanges crawlerName changes events Left err -> pure $ toErrorResponse err where + addErrors crawlerName entity errors = do + logInfo "AddingErrors" ["crawler" .= crawlerName, "errors" .= length errors] + let toError :: CrawlerError -> EError + toError ce = + EError + { erCrawlerName = from crawlerName + , erEntity = from entity + , erMessage = from ce.crawlerErrorMessage + , erBody = from ce.crawlerErrorBody + } + I.indexErrors $ toList (toError <$> errors) + addTDs crawlerName taskDatas = do logInfo "AddingTaskData" ["crawler" .= crawlerName, "tds" .= length taskDatas] I.taskDataAdd (from crawlerName) $ toList taskDatas diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 8cf126bb2..4a6a4dc40 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -649,6 +649,17 @@ indexChanges changes = indexDocs $ fmap (toDoc . ensureType) changes toDoc change = (toJSON change, getChangeDocId change) ensureType change = change {echangeType = EChangeDoc} +indexErrors :: MonoQuery :> es => IndexEffects es => [EError] -> Eff es () +indexErrors errors = indexDocs $ fmap toDoc errors + where + toDoc err = (getErrorDoc err, getErrorDocId err) + + getErrorDoc :: EError -> Value + getErrorDoc err = object ["type" .= EErrorDoc, "error_data" .= toJSON err] + + getErrorDocId :: EError -> BH.DocId + getErrorDocId = getBHDocID . erBody + indexIssues :: [EIssue] -> Eff es () indexIssues = error "todo" From 1d808a0c8fe189fa6dc18c5ad24d57a6b5e382ec Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Thu, 21 Dec 2023 19:30:33 +0000 Subject: [PATCH 05/28] index: add error created_at attribute --- codegen/Monocle/Protob/Crawler.hs | 56 +++++++++++++++++++++++++--- doc/openapi.yaml | 3 ++ schemas/monocle/protob/crawler.proto | 1 + src/Lentille.hs | 10 ++--- src/Lentille/GraphQL.hs | 13 +++++-- src/Macroscope/Test.hs | 2 +- src/Monocle/Api/Server.hs | 16 ++++---- src/Monocle/Backend/Index.hs | 8 ++-- 8 files changed, 81 insertions(+), 28 deletions(-) diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index c9e7549dd..28d222144 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -344,6 +344,8 @@ instance HsProtobuf.Finite EntityType data CrawlerError = CrawlerError { crawlerErrorMessage :: Hs.Text , crawlerErrorBody :: Hs.Text + , crawlerErrorCreatedAt :: + Hs.Maybe Google.Protobuf.Timestamp.Timestamp } deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) @@ -360,6 +362,7 @@ instance HsProtobuf.Message CrawlerError where CrawlerError { crawlerErrorMessage = crawlerErrorMessage , crawlerErrorBody = crawlerErrorBody + , crawlerErrorCreatedAt = crawlerErrorCreatedAt } = ( Hs.mconcat [ ( HsProtobuf.encodeMessageField @@ -374,6 +377,13 @@ instance HsProtobuf.Message CrawlerError where (crawlerErrorBody) ) ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 3) + ( Hs.coerce @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) + @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) + (crawlerErrorCreatedAt) + ) + ) ] ) decodeMessage _ = @@ -390,6 +400,14 @@ instance HsProtobuf.Message CrawlerError where (HsProtobuf.FieldNumber 2) ) ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) + @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 3) + ) + ) dotProto _ = [ ( HsProtobufAST.DotProtoField (HsProtobuf.FieldNumber 1) @@ -405,23 +423,44 @@ instance HsProtobuf.Message CrawlerError where [] "" ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 3) + ( HsProtobufAST.Prim + ( HsProtobufAST.Named + ( HsProtobufAST.Dots + (HsProtobufAST.Path ("google" Hs.:| ["protobuf", "Timestamp"])) + ) + ) + ) + (HsProtobufAST.Single "created_at") + [] + "" + ) ] instance HsJSONPB.ToJSONPB CrawlerError where - toJSONPB (CrawlerError f1 f2) = + toJSONPB (CrawlerError f1 f2 f3) = ( HsJSONPB.object [ "message" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) - , "body" - .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + , "body" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + , "created_at" + .= ( Hs.coerce @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) + @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) + (f3) + ) ] ) - toEncodingPB (CrawlerError f1 f2) = + toEncodingPB (CrawlerError f1 f2 f3) = ( HsJSONPB.pairs [ "message" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) - , "body" - .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + , "body" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + , "created_at" + .= ( Hs.coerce @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) + @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) + (f3) + ) ] ) @@ -437,6 +476,11 @@ instance HsJSONPB.FromJSONPB CrawlerError where <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) (obj .: "body") ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) + @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) + (obj .: "created_at") + ) ) ) diff --git a/doc/openapi.yaml b/doc/openapi.yaml index 19767d731..b8c307407 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -733,6 +733,9 @@ components: type: string body: type: string + created_at: + type: string + format: RFC3339 monocle_crawler_Entity: properties: organization_name: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index 9821f1cbd..28c5ce0cd 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -31,6 +31,7 @@ enum EntityType { message CrawlerError { string message = 1; string body = 2; + google.protobuf.Timestamp created_at = 3; } message AddDocRequest { diff --git a/src/Lentille.hs b/src/Lentille.hs index 86dbfdeac..300338358 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -91,16 +91,14 @@ data GraphQLError = GraphQLError deriving (Show, Generic, ToJSON) data LentilleError - = DecodeError [Text] - | GraphError GraphQLError + = DecodeError UTCTime [Text] + | GraphError UTCTime GraphQLError deriving (Show, Generic, ToJSON) -instance Exception LentilleError - instance From LentilleError CrawlerError where from = \case - DecodeError xs -> CrawlerError "decode error" (decodeUtf8 $ encode xs) - GraphError x -> CrawlerError "graph error" (decodeUtf8 $ encode x) + DecodeError ts xs -> CrawlerError "decode error" (decodeUtf8 $ encode xs) (Just $ from ts) + GraphError ts x -> CrawlerError "graph error" (decodeUtf8 $ encode x) (Just $ from ts) type LentilleStream es a = Stream (Of (Either LentilleError a)) (Eff es) () diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index 547e8c44d..368bfbb93 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -238,7 +238,9 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe Nothing -> pure Nothing case mErr of - Just err -> S.yield (Left $ GraphError err) + Just err -> do + now <- lift mGetCurrentTime + S.yield (Left $ GraphError now err) Nothing -> go Nothing 0 go pageInfoM totalFetched = do @@ -254,15 +256,18 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe -- Handle the response case respE of - Left e -> + Left e -> do -- Yield the error and stop the stream - S.yield (Left $ GraphError e) + now <- lift mGetCurrentTime + S.yield (Left $ GraphError now e) Right (pageInfo, rateLimitM, decodingErrors, xs) -> do -- Log crawling status logStep pageInfo rateLimitM xs totalFetched case decodingErrors of - _ : _ -> S.yield (Left $ DecodeError decodingErrors) + _ : _ -> do + now <- lift mGetCurrentTime + S.yield (Left $ DecodeError now decodingErrors) [] -> do -- Yield the results S.each (Right <$> xs) diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index 17f9abd16..4657bf08a 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -64,7 +64,7 @@ testCrawlingPoint = do badStream date name | date == BT.fakeDateAlt && name == "opendev/neutron" = do Streaming.yield $ Right (fakeChangePB, []) - Streaming.yield $ Left (DecodeError ["Oops"]) + Streaming.yield $ Left (DecodeError BT.fakeDateAlt ["Oops"]) | otherwise = error "Bad crawling point" -- A document stream that yield a change diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 9b0306fa3..790c75a40 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -326,14 +326,16 @@ crawlerAddDoc _auth request = do where addErrors crawlerName entity errors = do logInfo "AddingErrors" ["crawler" .= crawlerName, "errors" .= length errors] - let toError :: CrawlerError -> EError + let toError :: CrawlerError -> (UTCTime, EError) toError ce = - EError - { erCrawlerName = from crawlerName - , erEntity = from entity - , erMessage = from ce.crawlerErrorMessage - , erBody = from ce.crawlerErrorBody - } + ( from $ fromMaybe (error "missing timestamp") ce.crawlerErrorCreatedAt + , EError + { erCrawlerName = from crawlerName + , erEntity = from entity + , erMessage = from ce.crawlerErrorMessage + , erBody = from ce.crawlerErrorBody + } + ) I.indexErrors $ toList (toError <$> errors) addTDs crawlerName taskDatas = do diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 4a6a4dc40..f3ff5927f 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -649,13 +649,13 @@ indexChanges changes = indexDocs $ fmap (toDoc . ensureType) changes toDoc change = (toJSON change, getChangeDocId change) ensureType change = change {echangeType = EChangeDoc} -indexErrors :: MonoQuery :> es => IndexEffects es => [EError] -> Eff es () +indexErrors :: MonoQuery :> es => IndexEffects es => [(UTCTime, EError)] -> Eff es () indexErrors errors = indexDocs $ fmap toDoc errors where - toDoc err = (getErrorDoc err, getErrorDocId err) + toDoc (ts, err) = (getErrorDoc ts err, getErrorDocId err) - getErrorDoc :: EError -> Value - getErrorDoc err = object ["type" .= EErrorDoc, "error_data" .= toJSON err] + getErrorDoc :: UTCTime -> EError -> Value + getErrorDoc ts err = object ["created_at" .= ts, "type" .= EErrorDoc, "error_data" .= toJSON err] getErrorDocId :: EError -> BH.DocId getErrorDocId = getBHDocID . erBody From 2475a4ec35af4a65b6722d4a13f6397c5d457ee8 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Thu, 21 Dec 2023 19:35:46 +0000 Subject: [PATCH 06/28] crawler: base64 encode json blob --- src/Lentille.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index 300338358..29bae3be4 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -31,6 +31,7 @@ module Lentille ( module Monocle.Logging, ) where +import Data.ByteString.Base64.Lazy qualified as B64 import Data.Text qualified as T import Google.Protobuf.Timestamp qualified as T import Monocle.Class @@ -97,8 +98,11 @@ data LentilleError instance From LentilleError CrawlerError where from = \case - DecodeError ts xs -> CrawlerError "decode error" (decodeUtf8 $ encode xs) (Just $ from ts) - GraphError ts x -> CrawlerError "graph error" (decodeUtf8 $ encode x) (Just $ from ts) + DecodeError ts xs -> CrawlerError "decode error" (encodeBlob xs) (Just $ from ts) + GraphError ts x -> CrawlerError "graph error" (encodeBlob x) (Just $ from ts) + +encodeBlob :: ToJSON a => a -> LText +encodeBlob = decodeUtf8 . B64.encode . encode type LentilleStream es a = Stream (Of (Either LentilleError a)) (Eff es) () From 5cac8e3d47d44fa338a9de3fe80f25012b4f144e Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Thu, 21 Dec 2023 19:47:46 +0000 Subject: [PATCH 07/28] test: fix the macroscope failure test --- src/Lentille.hs | 2 +- src/Macroscope/Test.hs | 10 ++++++---- src/Macroscope/Worker.hs | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index 29bae3be4..42f7232f9 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -151,7 +151,7 @@ type Changes = (Change, [ChangeEvent]) -- We don't care about the rest so we replace it with () -- See: https://hackage.haskell.org/package/streaming-0.2.4.0/docs/Streaming-Prelude.html#v:break -- --- >>> let stream = S.yield (Left (DecodeError ["oops"])) +-- >>> let stream = S.yield (Left (DecodeError [utctime|2021-05-31 00:00:00|] ["oops"])) -- >>> runEff $ S.length_ $ streamDropBefore [utctime|2021-05-31 00:00:00|] stream -- 1 streamDropBefore :: UTCTime -> LentilleStream es Changes -> LentilleStream es Changes diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index 4657bf08a..51211ded0 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -53,9 +53,11 @@ testCrawlingPoint = do Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes badStream) (currentOldestAge, _) <- getOldest - liftIO $ assertEqual "Commit date is not updated on failure" oldestAge currentOldestAge + liftIO $ assertBool "Commit date is updated on failure" (currentOldestAge > oldestAge) - Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes goodStream) + -- TODO: check that the errors got indexed + + Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes $ goodStream currentOldestAge) (newOldestAge, _) <- getOldest liftIO $ assertBool "Commit date updated" (newOldestAge > oldestAge) @@ -68,8 +70,8 @@ testCrawlingPoint = do | otherwise = error "Bad crawling point" -- A document stream that yield a change - goodStream date name - | date == BT.fakeDateAlt && name == "opendev/neutron" = do + goodStream expected date name + | date == expected && name == "opendev/neutron" = do Streaming.yield $ Right (fakeChangePB, []) | otherwise = error "Bad crawling point" diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index cb5fac62a..e900efdfe 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -103,8 +103,8 @@ processStream logFunc postFunc = go (0 :: Word) [] [] Right (edoc, rest) -> do -- We got a new document let doc = case edoc of - Right x -> x - Left err -> DTError $ from err + Right x -> x + Left err -> DTError $ from err let newAcc = doc : acc if count == 499 then do From 303dae29a4924419c6321055967d6743b35ff38b Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Thu, 21 Dec 2023 20:21:03 +0000 Subject: [PATCH 08/28] api: add crawler/errors endpoint to fetch errors --- codegen/Monocle/Protob/Crawler.hs | 342 +++++++++++++++++++++++++++ doc/openapi.yaml | 35 +++ schemas/monocle/protob/crawler.proto | 16 ++ schemas/monocle/protob/http.proto | 8 + src/Monocle/Api/Server.hs | 19 ++ src/Monocle/Backend/Documents.hs | 9 + src/Monocle/Backend/Queries.hs | 22 +- src/Monocle/Client/Api.hs | 3 + src/Monocle/Servant/HTTP.hs | 6 +- 9 files changed, 456 insertions(+), 4 deletions(-) diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index 28d222144..41a1b7c9e 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -491,6 +491,348 @@ instance HsJSONPB.ToJSON CrawlerError where instance HsJSONPB.FromJSON CrawlerError where parseJSON = HsJSONPB.parseJSONPB +data ErrorsRequest = ErrorsRequest + { errorsRequestIndex :: Hs.Text + , errorsRequestQuery :: Hs.Text + } + deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) + +instance Hs.NFData ErrorsRequest + +instance HsProtobuf.Named ErrorsRequest where + nameOf _ = (Hs.fromString "ErrorsRequest") + +instance HsProtobuf.HasDefault ErrorsRequest + +instance HsProtobuf.Message ErrorsRequest where + encodeMessage + _ + ErrorsRequest + { errorsRequestIndex = errorsRequestIndex + , errorsRequestQuery = errorsRequestQuery + } = + ( Hs.mconcat + [ ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 1) + ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) + (errorsRequestIndex) + ) + ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 2) + ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) + (errorsRequestQuery) + ) + ) + ] + ) + decodeMessage _ = + (Hs.pure ErrorsRequest) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 1) + ) + ) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 2) + ) + ) + dotProto _ = + [ ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 1) + (HsProtobufAST.Prim HsProtobufAST.String) + (HsProtobufAST.Single "index") + [] + "" + ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 2) + (HsProtobufAST.Prim HsProtobufAST.String) + (HsProtobufAST.Single "query") + [] + "" + ) + ] + +instance HsJSONPB.ToJSONPB ErrorsRequest where + toJSONPB (ErrorsRequest f1 f2) = + ( HsJSONPB.object + [ "index" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) + , "query" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + ] + ) + toEncodingPB (ErrorsRequest f1 f2) = + ( HsJSONPB.pairs + [ "index" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) + , "query" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + ] + ) + +instance HsJSONPB.FromJSONPB ErrorsRequest where + parseJSONPB = + ( HsJSONPB.withObject + "ErrorsRequest" + ( \obj -> + (Hs.pure ErrorsRequest) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (obj .: "index") + ) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (obj .: "query") + ) + ) + ) + +instance HsJSONPB.ToJSON ErrorsRequest where + toJSON = HsJSONPB.toAesonValue + toEncoding = HsJSONPB.toAesonEncoding + +instance HsJSONPB.FromJSON ErrorsRequest where + parseJSON = HsJSONPB.parseJSONPB + +newtype ErrorsList = ErrorsList + { errorsListErrors :: + Hs.Vector Monocle.Protob.Crawler.CrawlerError + } + deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) + +instance Hs.NFData ErrorsList + +instance HsProtobuf.Named ErrorsList where + nameOf _ = (Hs.fromString "ErrorsList") + +instance HsProtobuf.HasDefault ErrorsList + +instance HsProtobuf.Message ErrorsList where + encodeMessage _ ErrorsList {errorsListErrors = errorsListErrors} = + ( Hs.mconcat + [ ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 1) + ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (errorsListErrors) + ) + ) + ] + ) + decodeMessage _ = + (Hs.pure ErrorsList) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 1) + ) + ) + dotProto _ = + [ ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 1) + ( HsProtobufAST.Repeated + (HsProtobufAST.Named (HsProtobufAST.Single "CrawlerError")) + ) + (HsProtobufAST.Single "errors") + [] + "" + ) + ] + +instance HsJSONPB.ToJSONPB ErrorsList where + toJSONPB (ErrorsList f1) = + ( HsJSONPB.object + [ "errors" + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (f1) + ) + ] + ) + toEncodingPB (ErrorsList f1) = + ( HsJSONPB.pairs + [ "errors" + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (f1) + ) + ] + ) + +instance HsJSONPB.FromJSONPB ErrorsList where + parseJSONPB = + ( HsJSONPB.withObject + "ErrorsList" + ( \obj -> + (Hs.pure ErrorsList) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + (obj .: "errors") + ) + ) + ) + +instance HsJSONPB.ToJSON ErrorsList where + toJSON = HsJSONPB.toAesonValue + toEncoding = HsJSONPB.toAesonEncoding + +instance HsJSONPB.FromJSON ErrorsList where + parseJSON = HsJSONPB.parseJSONPB + +newtype ErrorsResponse = ErrorsResponse + { errorsResponseResult :: + Hs.Maybe ErrorsResponseResult + } + deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) + +instance Hs.NFData ErrorsResponse + +instance HsProtobuf.Named ErrorsResponse where + nameOf _ = (Hs.fromString "ErrorsResponse") + +instance HsProtobuf.HasDefault ErrorsResponse + +instance HsProtobuf.Message ErrorsResponse where + encodeMessage + _ + ErrorsResponse {errorsResponseResult = errorsResponseResult} = + ( Hs.mconcat + [ case errorsResponseResult of + Hs.Nothing -> Hs.mempty + Hs.Just x -> + case x of + ErrorsResponseResultSuccess y -> + ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 1) + ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.ErrorsList) + @(HsProtobuf.Nested Monocle.Protob.Crawler.ErrorsList) + (Hs.Just y) + ) + ) + ErrorsResponseResultError y -> + ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 2) + ( HsProtobuf.ForceEmit + (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (y)) + ) + ) + ] + ) + decodeMessage _ = + (Hs.pure ErrorsResponse) + <*> ( HsProtobuf.oneof + Hs.Nothing + [ + ( (HsProtobuf.FieldNumber 1) + , (Hs.pure (Hs.fmap ErrorsResponseResultSuccess)) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Monocle.Protob.Crawler.ErrorsList) + @(Hs.Maybe Monocle.Protob.Crawler.ErrorsList) + (HsProtobuf.decodeMessageField) + ) + ) + , + ( (HsProtobuf.FieldNumber 2) + , (Hs.pure (Hs.Just Hs.. ErrorsResponseResultError)) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (HsProtobuf.decodeMessageField) + ) + ) + ] + ) + dotProto _ = [] + +instance HsJSONPB.ToJSONPB ErrorsResponse where + toJSONPB (ErrorsResponse f1_or_f2) = + ( HsJSONPB.object + [ ( let encodeResult = + ( case f1_or_f2 of + Hs.Just (ErrorsResponseResultSuccess f1) -> + (HsJSONPB.pair "success" f1) + Hs.Just (ErrorsResponseResultError f2) -> + ( HsJSONPB.pair + "error" + (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + ) + Hs.Nothing -> Hs.mempty + ) + in \options -> + if HsJSONPB.optEmitNamedOneof options + then + ("result" .= (HsJSONPB.objectOrNull [encodeResult] options)) + options + else encodeResult options + ) + ] + ) + toEncodingPB (ErrorsResponse f1_or_f2) = + ( HsJSONPB.pairs + [ ( let encodeResult = + ( case f1_or_f2 of + Hs.Just (ErrorsResponseResultSuccess f1) -> + (HsJSONPB.pair "success" f1) + Hs.Just (ErrorsResponseResultError f2) -> + ( HsJSONPB.pair + "error" + (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + ) + Hs.Nothing -> Hs.mempty + ) + in \options -> + if HsJSONPB.optEmitNamedOneof options + then ("result" .= (HsJSONPB.pairsOrNull [encodeResult] options)) options + else encodeResult options + ) + ] + ) + +instance HsJSONPB.FromJSONPB ErrorsResponse where + parseJSONPB = + ( HsJSONPB.withObject + "ErrorsResponse" + ( \obj -> + (Hs.pure ErrorsResponse) + <*> ( let parseResult parseObj = + Hs.msum + [ Hs.Just Hs.. ErrorsResponseResultSuccess + <$> (HsJSONPB.parseField parseObj "success") + , Hs.Just + Hs.. ErrorsResponseResultError + Hs.. Hs.coerce @(HsProtobuf.String Hs.Text) @(Hs.Text) + <$> (HsJSONPB.parseField parseObj "error") + , Hs.pure Hs.Nothing + ] + in ( (obj .: "result") + Hs.>>= (HsJSONPB.withObject "result" parseResult) + ) + <|> (parseResult obj) + ) + ) + ) + +instance HsJSONPB.ToJSON ErrorsResponse where + toJSON = HsJSONPB.toAesonValue + toEncoding = HsJSONPB.toAesonEncoding + +instance HsJSONPB.FromJSON ErrorsResponse where + parseJSON = HsJSONPB.parseJSONPB + +data ErrorsResponseResult + = ErrorsResponseResultSuccess Monocle.Protob.Crawler.ErrorsList + | ErrorsResponseResultError Hs.Text + deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) + +instance Hs.NFData ErrorsResponseResult + +instance HsProtobuf.Named ErrorsResponseResult where + nameOf _ = (Hs.fromString "ErrorsResponseResult") + data AddDocRequest = AddDocRequest { addDocRequestIndex :: Hs.Text , addDocRequestCrawler :: Hs.Text diff --git a/doc/openapi.yaml b/doc/openapi.yaml index b8c307407..5ec0a5678 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -90,6 +90,23 @@ paths: application/json: schema: $ref: '#/components/schemas/monocle_crawler_CommitResponse' + /api/2/crawler/errors: + post: + summary: Get crawler errors + operationId: Crawler_Errors + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/monocle_crawler_ErrorsRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/monocle_crawler_ErrorsResponse' /api/2/crawler/get_commit_info: post: summary: Get commit message @@ -749,6 +766,24 @@ components: user_name: type: string description: A descriptive name of the entities being added + monocle_crawler_ErrorsList: + properties: + errors: + type: array + items: + $ref: '#/components/schemas/monocle_crawler_CrawlerError' + monocle_crawler_ErrorsRequest: + properties: + index: + type: string + query: + type: string + monocle_crawler_ErrorsResponse: + properties: + success: + $ref: '#/components/schemas/monocle_crawler_ErrorsList' + error: + type: string monocle_crawler_Project: properties: full_path: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index 28c5ce0cd..472fbeb65 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -34,6 +34,22 @@ message CrawlerError { google.protobuf.Timestamp created_at = 3; } +message ErrorsRequest { + string index = 1; + string query = 2; +} + +message ErrorsList { + repeated CrawlerError errors = 1; +} + +message ErrorsResponse { + oneof result { + ErrorsList success = 1; + string error = 2; + } +} + message AddDocRequest { string index = 1; string crawler = 2; diff --git a/schemas/monocle/protob/http.proto b/schemas/monocle/protob/http.proto index 37aeeb4a8..5317c011a 100644 --- a/schemas/monocle/protob/http.proto +++ b/schemas/monocle/protob/http.proto @@ -172,6 +172,14 @@ service Crawler { body: "*" }; } + // Get crawler errors + rpc Errors(monocle_crawler.ErrorsRequest) + returns (monocle_crawler.ErrorsResponse) { + option (google.api.http) = { + post: "/api/2/crawler/errors" + body: "*" + }; + } } // The monocle HTTP API diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 790c75a40..03410f3aa 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -561,6 +561,25 @@ searchCheck auth request = checkAuth auth response SearchPB.CheckResponseResultError $ SearchPB.QueryError (from msg) (fromInteger . toInteger $ offset) +-- | /crawler/errors endpoint +crawlerErrors :: ApiEffects es => AuthResult AuthenticatedUser -> CrawlerPB.ErrorsRequest -> Eff es CrawlerPB.ErrorsResponse +crawlerErrors auth request = checkAuth auth response + where + response _authenticatedUserM = do + requestE <- validateSearchRequest request.errorsRequestIndex request.errorsRequestQuery "nobody" + + case requestE of + Right (tenant, query) -> runQueryM tenant (Q.ensureMinBound query) $ do + logInfo "ListingErrors" ["index" .= request.errorsRequestIndex] + errors <- fmap from <$> Q.crawlerErrors + pure $ CrawlerPB.ErrorsResponse $ Just $ CrawlerPB.ErrorsResponseResultSuccess $ CrawlerPB.ErrorsList $ fromList errors + Left (ParseError msg offset) -> + pure + $ CrawlerPB.ErrorsResponse + $ Just + $ CrawlerPB.ErrorsResponseResultError + $ (show offset <> ":" <> from msg) + -- | /search/query endpoint searchQuery :: ApiEffects es => AuthResult AuthenticatedUser -> SearchPB.QueryRequest -> Eff es SearchPB.QueryResponse searchQuery auth request = checkAuth auth response diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 35cdcca55..960ae17b0 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -26,6 +26,7 @@ import Data.Vector qualified as V import Monocle.Entity import Monocle.Prelude import Monocle.Protob.Change qualified as ChangePB +import Monocle.Protob.Crawler (CrawlerError (..)) import Monocle.Protob.Crawler qualified as CrawlerPB import Monocle.Protob.Issue qualified as IssuePB import Monocle.Protob.Search qualified as SearchPB @@ -203,6 +204,14 @@ data EError = EError } deriving (Show, Eq, Generic) +instance From EError CrawlerError where + from eerror = + CrawlerError + { crawlerErrorBody = from eerror.erBody + , crawlerErrorMessage = from eerror.erMessage + , crawlerErrorCreatedAt = undefined + } + instance ToJSON EError where toJSON = genericToJSON $ aesonPrefix snakeCase diff --git a/src/Monocle/Backend/Queries.hs b/src/Monocle/Backend/Queries.hs index 9eaef257c..141cb91e6 100644 --- a/src/Monocle/Backend/Queries.hs +++ b/src/Monocle/Backend/Queries.hs @@ -17,16 +17,17 @@ import Database.Bloodhound qualified as BH import Database.Bloodhound.Raw (TermsCompositeAggBucket) import Database.Bloodhound.Raw qualified as BHR import Json.Extras qualified as Json -import Monocle.Backend.Documents (EChange (..), EChangeEvent (..), EChangeState (..), EDocType (..), allEventTypes) +import Monocle.Backend.Documents (EChange (..), EChangeEvent (..), EChangeState (..), EDocType (..), EError, allEventTypes) import Monocle.Config qualified as Config import Monocle.Prelude import Monocle.Protob.Metric qualified as MetricPB import Monocle.Protob.Search qualified as SearchPB -import Monocle.Search.Query (AuthorFlavor (..), QueryFlavor (..), RangeFlavor (..), rangeField) +import Monocle.Search.Query (AuthorFlavor (..), QueryFlavor (..), RangeFlavor (..), blankQuery, rangeField) import Monocle.Search.Query qualified as Q import Streaming.Prelude qualified as Streaming import Monocle.Effects +import Proto3.Suite (Enumerated (Enumerated)) -- Legacy wrappers simpleSearchLegacy :: (LoggerEffect :> es, ElasticEffect :> es, FromJSON a) => BH.IndexName -> BH.Search -> Eff es [BH.Hit a] @@ -232,6 +233,23 @@ doTermsCompositeAgg term = getPages Nothing ------------------------------------------------------------------------------- -- High level queries +orderDesc :: Enumerated SearchPB.Order_Direction +orderDesc = Enumerated $ Right SearchPB.Order_DirectionDESC + +crawlerErrors :: QEffects es => Eff es [EError] +crawlerErrors = do + (since, to) <- getQueryBound + -- keep only the time range of the user query + withQuery (blankQuery since to) do + withDocTypes [EErrorDoc] (QueryFlavor Author CreatedAt) do + doSearch (Just order) 500 + where + order = + SearchPB.Order + { orderField = "created_at" + , orderDirection = orderDesc + } + changes :: QEffects es => Maybe SearchPB.Order -> Word32 -> Eff es [EChange] changes orderM limit = withDocTypes [EChangeDoc] (QueryFlavor Author UpdatedAt) diff --git a/src/Monocle/Client/Api.hs b/src/Monocle/Client/Api.hs index a9aac52b5..4413d80f8 100644 --- a/src/Monocle/Client/Api.hs +++ b/src/Monocle/Client/Api.hs @@ -72,3 +72,6 @@ crawlerCommit = monocleReq "api/2/crawler/commit" crawlerCommitInfo :: MonadIO m => MonocleClient -> CommitInfoRequest -> m CommitInfoResponse crawlerCommitInfo = monocleReq "api/2/crawler/get_commit_info" + +crawlerErrors :: MonadIO m => MonocleClient -> ErrorsRequest -> m ErrorsResponse +crawlerErrors = monocleReq "api/2/crawler/errors" diff --git a/src/Monocle/Servant/HTTP.hs b/src/Monocle/Servant/HTTP.hs index 76811edc2..2a47635c4 100644 --- a/src/Monocle/Servant/HTTP.hs +++ b/src/Monocle/Servant/HTTP.hs @@ -11,11 +11,11 @@ import Effectful (Eff) import Effectful qualified as E import Effectful.Concurrent qualified as E import Monocle.Api.Jwt (AuthenticatedUser) -import Monocle.Api.Server (authGetMagicJwt, authWhoAmi, configGetAbout, configGetGroupMembers, configGetGroups, configGetProjects, configGetWorkspaces, crawlerAddDoc, crawlerCommit, crawlerCommitInfo, loginLoginValidation, metricGet, metricInfo, metricList, searchAuthor, searchCheck, searchFields, searchQuery, searchSuggestions) +import Monocle.Api.Server (authGetMagicJwt, authWhoAmi, configGetAbout, configGetGroupMembers, configGetGroups, configGetProjects, configGetWorkspaces, crawlerAddDoc, crawlerCommit, crawlerCommitInfo, crawlerErrors, loginLoginValidation, metricGet, metricInfo, metricList, searchAuthor, searchCheck, searchFields, searchQuery, searchSuggestions) import Monocle.Effects (ApiEffects) import Monocle.Protob.Auth (GetMagicJwtRequest, GetMagicJwtResponse, WhoAmiRequest, WhoAmiResponse) import Monocle.Protob.Config (GetAboutRequest, GetAboutResponse, GetGroupMembersRequest, GetGroupMembersResponse, GetGroupsRequest, GetGroupsResponse, GetProjectsRequest, GetProjectsResponse, GetWorkspacesRequest, GetWorkspacesResponse) -import Monocle.Protob.Crawler (AddDocRequest, AddDocResponse, CommitInfoRequest, CommitInfoResponse, CommitRequest, CommitResponse) +import Monocle.Protob.Crawler (AddDocRequest, AddDocResponse, CommitInfoRequest, CommitInfoResponse, CommitRequest, CommitResponse, ErrorsRequest, ErrorsResponse) import Monocle.Protob.Login (LoginValidationRequest, LoginValidationResponse) import Monocle.Protob.Metric (GetRequest, GetResponse, InfoRequest, InfoResponse, ListRequest, ListResponse) import Monocle.Protob.Search (AuthorRequest, AuthorResponse, CheckRequest, CheckResponse, FieldsRequest, FieldsResponse, QueryRequest, QueryResponse, SuggestionsRequest, SuggestionsResponse) @@ -43,6 +43,7 @@ type MonocleAPI = :<|> "crawler" :> "add" :> Auth '[JWT, Cookie] AuthenticatedUser :> ReqBody '[JSON] Monocle.Protob.Crawler.AddDocRequest :> Post '[PBJSON, JSON] Monocle.Protob.Crawler.AddDocResponse :<|> "crawler" :> "commit" :> Auth '[JWT, Cookie] AuthenticatedUser :> ReqBody '[JSON] Monocle.Protob.Crawler.CommitRequest :> Post '[PBJSON, JSON] Monocle.Protob.Crawler.CommitResponse :<|> "crawler" :> "get_commit_info" :> Auth '[JWT, Cookie] AuthenticatedUser :> ReqBody '[JSON] Monocle.Protob.Crawler.CommitInfoRequest :> Post '[PBJSON, JSON] Monocle.Protob.Crawler.CommitInfoResponse + :<|> "crawler" :> "errors" :> Auth '[JWT, Cookie] AuthenticatedUser :> ReqBody '[JSON] Monocle.Protob.Crawler.ErrorsRequest :> Post '[PBJSON, JSON] Monocle.Protob.Crawler.ErrorsResponse server :: ApiEffects es => E.Concurrent E.:> es => ServerT MonocleAPI (Eff es) server = loginLoginValidation @@ -64,3 +65,4 @@ server = :<|> crawlerAddDoc :<|> crawlerCommit :<|> crawlerCommitInfo + :<|> crawlerErrors From 5e660b097eb3cf24601db3f07778c781d4744124 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Thu, 21 Dec 2023 22:32:45 +0000 Subject: [PATCH 09/28] index: store the entity and timestamp in the errors_data structure --- codegen/Monocle/Protob/Crawler.hs | 45 ++++++++++++++++++- doc/openapi.yaml | 2 + schemas/monocle/protob/crawler.proto | 1 + src/Lentille.hs | 10 ----- src/Macroscope/Test.hs | 7 ++- src/Macroscope/Worker.hs | 7 ++- src/Monocle/Api/Server.hs | 23 +++++----- src/Monocle/Backend/Documents.hs | 67 +++++++++++++++++++--------- src/Monocle/Backend/Index.hs | 12 ++--- src/Monocle/Backend/Queries.hs | 16 ++++--- src/Monocle/Prelude.hs | 5 +++ src/Monocle/Search/Query.hs | 1 + 12 files changed, 140 insertions(+), 56 deletions(-) diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index 41a1b7c9e..f379d4ac2 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -346,6 +346,7 @@ data CrawlerError = CrawlerError , crawlerErrorBody :: Hs.Text , crawlerErrorCreatedAt :: Hs.Maybe Google.Protobuf.Timestamp.Timestamp + , crawlerErrorEntity :: Hs.Maybe Monocle.Protob.Crawler.Entity } deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) @@ -363,6 +364,7 @@ instance HsProtobuf.Message CrawlerError where { crawlerErrorMessage = crawlerErrorMessage , crawlerErrorBody = crawlerErrorBody , crawlerErrorCreatedAt = crawlerErrorCreatedAt + , crawlerErrorEntity = crawlerErrorEntity } = ( Hs.mconcat [ ( HsProtobuf.encodeMessageField @@ -384,6 +386,13 @@ instance HsProtobuf.Message CrawlerError where (crawlerErrorCreatedAt) ) ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 4) + ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (crawlerErrorEntity) + ) + ) ] ) decodeMessage _ = @@ -408,6 +417,14 @@ instance HsProtobuf.Message CrawlerError where (HsProtobuf.FieldNumber 3) ) ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + @(Hs.Maybe Monocle.Protob.Crawler.Entity) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 4) + ) + ) dotProto _ = [ ( HsProtobufAST.DotProtoField (HsProtobuf.FieldNumber 1) @@ -436,10 +453,19 @@ instance HsProtobuf.Message CrawlerError where [] "" ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 4) + ( HsProtobufAST.Prim + (HsProtobufAST.Named (HsProtobufAST.Single "Entity")) + ) + (HsProtobufAST.Single "entity") + [] + "" + ) ] instance HsJSONPB.ToJSONPB CrawlerError where - toJSONPB (CrawlerError f1 f2 f3) = + toJSONPB (CrawlerError f1 f2 f3 f4) = ( HsJSONPB.object [ "message" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) @@ -449,9 +475,14 @@ instance HsJSONPB.ToJSONPB CrawlerError where @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) (f3) ) + , "entity" + .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (f4) + ) ] ) - toEncodingPB (CrawlerError f1 f2 f3) = + toEncodingPB (CrawlerError f1 f2 f3 f4) = ( HsJSONPB.pairs [ "message" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) @@ -461,6 +492,11 @@ instance HsJSONPB.ToJSONPB CrawlerError where @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) (f3) ) + , "entity" + .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (f4) + ) ] ) @@ -481,6 +517,11 @@ instance HsJSONPB.FromJSONPB CrawlerError where @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) (obj .: "created_at") ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + @(Hs.Maybe Monocle.Protob.Crawler.Entity) + (obj .: "entity") + ) ) ) diff --git a/doc/openapi.yaml b/doc/openapi.yaml index 5ec0a5678..370664ed5 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -753,6 +753,8 @@ components: created_at: type: string format: RFC3339 + entity: + $ref: '#/components/schemas/monocle_crawler_Entity' monocle_crawler_Entity: properties: organization_name: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index 472fbeb65..ebcc47201 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -32,6 +32,7 @@ message CrawlerError { string message = 1; string body = 2; google.protobuf.Timestamp created_at = 3; + Entity entity = 4; } message ErrorsRequest { diff --git a/src/Lentille.hs b/src/Lentille.hs index 42f7232f9..ae3b97eb5 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -31,7 +31,6 @@ module Lentille ( module Monocle.Logging, ) where -import Data.ByteString.Base64.Lazy qualified as B64 import Data.Text qualified as T import Google.Protobuf.Timestamp qualified as T import Monocle.Class @@ -46,7 +45,6 @@ import Monocle.Protob.Change ( Change_ChangeState (Change_ChangeStateClosed, Change_ChangeStateMerged), Ident (..), ) -import Monocle.Protob.Crawler (CrawlerError (..)) import Network.HTTP.Client qualified as HTTP import Proto3.Suite (Enumerated (Enumerated)) import Streaming.Prelude qualified as S @@ -96,14 +94,6 @@ data LentilleError | GraphError UTCTime GraphQLError deriving (Show, Generic, ToJSON) -instance From LentilleError CrawlerError where - from = \case - DecodeError ts xs -> CrawlerError "decode error" (encodeBlob xs) (Just $ from ts) - GraphError ts x -> CrawlerError "graph error" (encodeBlob x) (Just $ from ts) - -encodeBlob :: ToJSON a => a -> LText -encodeBlob = decodeUtf8 . B64.encode . encode - type LentilleStream es a = Stream (Of (Either LentilleError a)) (Eff es) () ------------------------------------------------------------------------------- diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index 51211ded0..fee86d647 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -26,6 +26,7 @@ import Monocle.Protob.Crawler qualified as CrawlerPB import Streaming.Prelude qualified as Streaming import Test.Tasty import Test.Tasty.HUnit +import Monocle.Client.Api (crawlerErrors) runLentilleM :: MonocleClient -> Eff [E.Reader CrawlerEnv, MonoClientEffect, LoggerEffect, GerritEffect, BZEffect, TimeEffect, HttpEffect, PrometheusEffect, EnvEffect, Fail, Retry, Concurrent, IOE] a -> IO a runLentilleM client action = do @@ -55,7 +56,11 @@ testCrawlingPoint = do (currentOldestAge, _) <- getOldest liftIO $ assertBool "Commit date is updated on failure" (currentOldestAge > oldestAge) - -- TODO: check that the errors got indexed + errorResponse <- crawlerErrors client (CrawlerPB.ErrorsRequest (from indexName) "from:2020") + case errorResponse of + CrawlerPB.ErrorsResponse Nothing -> error "Bad response" + CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultError err)) -> error $ from err + CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultSuccess errors)) -> liftIO $ assertEqual "Error got indexed" (length errors.errorsListErrors) 1 Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes $ goodStream currentOldestAge) diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index e900efdfe..75d6af2c9 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -83,6 +83,7 @@ data ProcessError es -- | 'processStream' read the stream of document and post to the monocle API processStream :: forall es. + Entity -> -- | Funtion to log about the processing (Int -> Eff es ()) -> -- | Function to post on the Monocle API @@ -91,7 +92,7 @@ processStream :: LentilleStream es DocumentType -> -- | The processing results Eff es [Maybe (ProcessError es)] -processStream logFunc postFunc = go (0 :: Word) [] [] +processStream entity logFunc postFunc = go (0 :: Word) [] [] where go count acc results stream = do eDocument <- S.next stream @@ -104,7 +105,8 @@ processStream logFunc postFunc = go (0 :: Word) [] [] -- We got a new document let doc = case edoc of Right x -> x - Left err -> DTError $ from err + Left (DecodeError ts err) -> DTError $ CrawlerError "decode" (encodeBlob err) (Just $ from ts) (Just $ from entity) + Left (GraphError ts err) -> DTError $ CrawlerError "graph" (encodeBlob err) (Just $ from ts) (Just $ from entity) let newAcc = doc : acc if count == 499 then do @@ -179,6 +181,7 @@ runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStre -- Run the document stream for that entity postResult <- processStream + entity (\c -> logInfo "Posting documents" ["count" .= c]) (httpRetry "api/commit/add" . mCrawlerAddDoc . mkRequest entity) (getStream oldestAge entity) diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 03410f3aa..380243eb2 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -315,7 +315,8 @@ crawlerAddDoc _auth request = do case requestE of Right (index, crawler) -> runEmptyQueryM index do - addErrors crawlerName (toEntity entity) errors + unless (V.null errors) do + addErrors crawlerName (toEntity entity) errors case toEntity entity of Project _ -> addChanges crawlerName changes events ProjectIssue _ -> addIssues crawlerName issues issuesEvents @@ -325,17 +326,17 @@ crawlerAddDoc _auth request = do Left err -> pure $ toErrorResponse err where addErrors crawlerName entity errors = do - logInfo "AddingErrors" ["crawler" .= crawlerName, "errors" .= length errors] - let toError :: CrawlerError -> (UTCTime, EError) + logInfo "AddingErrors" ["crawler" .= crawlerName, "entity" .= entity, "errors" .= length errors] + let toError :: CrawlerError -> EError toError ce = - ( from $ fromMaybe (error "missing timestamp") ce.crawlerErrorCreatedAt - , EError - { erCrawlerName = from crawlerName - , erEntity = from entity - , erMessage = from ce.crawlerErrorMessage - , erBody = from ce.crawlerErrorBody - } - ) + EError + { erCrawlerName = from crawlerName + , erEntity = from entity + , erMessage = from ce.crawlerErrorMessage + , erBody = from ce.crawlerErrorBody + , erCreatedAt = from $ fromMaybe (error "missing timestamp") ce.crawlerErrorCreatedAt + } + I.indexErrors $ toList (toError <$> errors) addTDs crawlerName taskDatas = do diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 960ae17b0..6ad3f40b7 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -21,6 +21,7 @@ module Monocle.Backend.Documents where import Data.Aeson (Value (String), defaultOptions, genericParseJSON, genericToJSON, withObject, withText, (.:)) import Data.Aeson.Casing (aesonPrefix, snakeCase) +import Data.Aeson.Types qualified import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) import Data.Vector qualified as V import Monocle.Entity @@ -198,7 +199,8 @@ instance From ETaskData SearchPB.TaskData where data EError = EError { erCrawlerName :: Text - , erEntity :: Text + , erEntity :: Entity + , erCreatedAt :: UTCTime , erMessage :: Text , erBody :: Text } @@ -209,14 +211,52 @@ instance From EError CrawlerError where CrawlerError { crawlerErrorBody = from eerror.erBody , crawlerErrorMessage = from eerror.erMessage - , crawlerErrorCreatedAt = undefined + , crawlerErrorCreatedAt = Just $ from eerror.erCreatedAt + , crawlerErrorEntity = Just $ from eerror.erEntity } +-- Custom encoder to manually serialize the entity type +-- This needs to match the "error_data" schema above instance ToJSON EError where - toJSON = genericToJSON $ aesonPrefix snakeCase + toJSON e = + object + [ ("crawler_name", toJSON e.erCrawlerName) + , ("created_at", toJSON e.erCreatedAt) + , ("entity_type", String (entityTypeName (from e.erEntity))) + , ("entity_value", String $ entityValue e.erEntity) + , ("message", String $ e.erMessage) + , ("body", String $ e.erBody) + ] instance FromJSON EError where - parseJSON = genericParseJSON $ aesonPrefix snakeCase + parseJSON = withObject "EError" $ \root -> do + v <- root .: "error_data" + erCrawlerName <- v .: "crawler_name" + erCreatedAt <- v .: "created_at" + evalue <- v .: "entity_value" + etype <- v .: "entity_type" + erEntity <- parseEntity evalue etype + erMessage <- v .: "message" + erBody <- v .: "body" + pure EError {..} + +-- | Helper to encode entity +-- WARNING: don't forget to update the parseEntity implementation below when changing the entity document encoding +entityValue :: Entity -> Text +entityValue = \case + Organization n -> n + Project n -> n + ProjectIssue n -> n + TaskDataEntity n -> n + User n -> n + +parseEntity :: Text -> Text -> Data.Aeson.Types.Parser Entity +parseEntity evalue = \case + "organization" -> pure $ Organization evalue + "project" -> pure $ Project evalue + "taskdata" -> pure $ TaskDataEntity evalue + "user" -> pure $ User evalue + etype -> fail $ "Unknown crawler entity type name: " <> from etype data EChangeState = EChangeOpen @@ -648,29 +688,16 @@ instance ToJSON ECrawlerMetadataObject where [ ("crawler_name", toJSON (ecmCrawlerName e)) , ("last_commit_at", toJSON (ecmLastCommitAt e)) , ("crawler_type", String (entityTypeName (from $ ecmCrawlerEntity e))) - , ("crawler_type_value", String entityValue) + , ("crawler_type_value", String $ entityValue $ e.ecmCrawlerEntity) ] - where - -- WARNING: don't forget to update the FromJSON implementation below when changing the entity document encoding - entityValue = case ecmCrawlerEntity e of - Organization n -> n - Project n -> n - ProjectIssue n -> n - TaskDataEntity n -> n - User n -> n instance FromJSON ECrawlerMetadataObject where parseJSON = withObject "CrawlerMetadataObject" $ \v -> do ecmCrawlerName <- v .: "crawler_name" ecmLastCommitAt <- v .: "last_commit_at" - (etype :: Text) <- v .: "crawler_type" + etype <- v .: "crawler_type" evalue <- v .: "crawler_type_value" - ecmCrawlerEntity <- case etype of - "organization" -> pure $ Organization evalue - "project" -> pure $ Project evalue - "taskdata" -> pure $ TaskDataEntity evalue - "user" -> pure $ User evalue - _ -> fail $ "Unknown crawler entity type name: " <> from etype + ecmCrawlerEntity <- parseEntity evalue etype pure ECrawlerMetadataObject {..} newtype ECrawlerMetadata = ECrawlerMetadata diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index f3ff5927f..7f2d13aee 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -233,9 +233,11 @@ instance ToJSON ChangesIndexMapping where [ "properties" .= object [ "crawler_name" .= KWMapping - , "entity" .= TextAndKWMapping + , "entity_type" .= KWMapping + , "entity_value" .= KWMapping , "message" .= TextAndKWMapping , "body" .= BlobMapping + , "created_at" .= DateIndexMapping ] ] ] @@ -649,13 +651,13 @@ indexChanges changes = indexDocs $ fmap (toDoc . ensureType) changes toDoc change = (toJSON change, getChangeDocId change) ensureType change = change {echangeType = EChangeDoc} -indexErrors :: MonoQuery :> es => IndexEffects es => [(UTCTime, EError)] -> Eff es () +indexErrors :: MonoQuery :> es => IndexEffects es => [EError] -> Eff es () indexErrors errors = indexDocs $ fmap toDoc errors where - toDoc (ts, err) = (getErrorDoc ts err, getErrorDocId err) + toDoc err = (getErrorDoc err, getErrorDocId err) - getErrorDoc :: UTCTime -> EError -> Value - getErrorDoc ts err = object ["created_at" .= ts, "type" .= EErrorDoc, "error_data" .= toJSON err] + getErrorDoc :: EError -> Value + getErrorDoc err = object ["type" .= EErrorDoc, "error_data" .= toJSON err] getErrorDocId :: EError -> BH.DocId getErrorDocId = getBHDocID . erBody diff --git a/src/Monocle/Backend/Queries.hs b/src/Monocle/Backend/Queries.hs index 141cb91e6..4a1584c60 100644 --- a/src/Monocle/Backend/Queries.hs +++ b/src/Monocle/Backend/Queries.hs @@ -22,7 +22,7 @@ import Monocle.Config qualified as Config import Monocle.Prelude import Monocle.Protob.Metric qualified as MetricPB import Monocle.Protob.Search qualified as SearchPB -import Monocle.Search.Query (AuthorFlavor (..), QueryFlavor (..), RangeFlavor (..), blankQuery, rangeField) +import Monocle.Search.Query (AuthorFlavor (..), QueryFlavor (..), RangeFlavor (..), rangeField) import Monocle.Search.Query qualified as Q import Streaming.Prelude qualified as Streaming @@ -239,14 +239,20 @@ orderDesc = Enumerated $ Right SearchPB.Order_DirectionDESC crawlerErrors :: QEffects es => Eff es [EError] crawlerErrors = do (since, to) <- getQueryBound + let queryFilter = + [ BH.QueryRangeQuery + $ BH.mkRangeQuery (BH.FieldName "error_data.created_at") + $ BH.RangeDateGteLte (coerce since) (coerce to) + ] -- keep only the time range of the user query - withQuery (blankQuery since to) do - withDocTypes [EErrorDoc] (QueryFlavor Author CreatedAt) do - doSearch (Just order) 500 + dropQuery do + withFilter queryFilter do + withDocTypes [EErrorDoc] (QueryFlavor Author CreatedAt) do + doSearch (Just order) 500 where order = SearchPB.Order - { orderField = "created_at" + { orderField = "error_data.created_at" , orderDirection = orderDesc } diff --git a/src/Monocle/Prelude.hs b/src/Monocle/Prelude.hs index 06afd783e..4911287d9 100644 --- a/src/Monocle/Prelude.hs +++ b/src/Monocle/Prelude.hs @@ -16,6 +16,7 @@ module Monocle.Prelude ( setEnv, headMaybe, (:::), + encodeBlob, -- * secret Secret, @@ -222,6 +223,7 @@ import Data.Aeson.Encode.Pretty qualified as Aeson import Data.Aeson.Key qualified as AesonKey import Data.Aeson.Lens (_Integer, _Object) import Data.Aeson.Types (Pair) +import Data.ByteString.Base64.Lazy qualified as B64 import Data.Fixed (Deci, Fixed (..), HasResolution (resolution), Pico) import Data.Map qualified as Map import Data.Tagged @@ -604,3 +606,6 @@ streamingFromListT = S.unfoldr go go listT = do res <- ListT.uncons listT pure $ res `orDie` () + +encodeBlob :: ToJSON a => a -> LText +encodeBlob = decodeUtf8 . B64.encode . encode diff --git a/src/Monocle/Search/Query.hs b/src/Monocle/Search/Query.hs index e42780707..d7d532beb 100644 --- a/src/Monocle/Search/Query.hs +++ b/src/Monocle/Search/Query.hs @@ -157,6 +157,7 @@ fields = ] queryFieldToDocument :: Field -> Maybe Field +queryFieldToDocument "error_data.created_at" = Just "error_data.created_at" queryFieldToDocument name = do (_, field, _) <- lookup name fields pure field From 37656c9e7233df0ba69a24c0869f9cd3a4f3521f Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 15:56:17 +0000 Subject: [PATCH 10/28] test: verify the indexed error content --- src/Macroscope/Test.hs | 12 ++++++++---- src/Monocle/Backend/Documents.hs | 14 ++++++++++++-- src/Monocle/Backend/Queries.hs | 8 ++++++-- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index fee86d647..dac14bb19 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -1,6 +1,7 @@ -- | Tests for the macroscope process module Macroscope.Test where +import Data.ByteString.Base64.Lazy qualified as B64 import Effectful.Env import Effectful.Prometheus import Effectful.Reader.Static qualified as E @@ -17,6 +18,7 @@ import Monocle.Backend.Queries qualified as Q import Monocle.Backend.Test (fakeChangePB, withTenantConfig) import Monocle.Backend.Test qualified as BT (fakeChange, fakeDate, fakeDateAlt) import Monocle.Client +import Monocle.Client.Api (crawlerErrors) import Monocle.Config qualified as Config import Monocle.Effects import Monocle.Entity (CrawlerName (..), Entity (Project)) @@ -26,7 +28,6 @@ import Monocle.Protob.Crawler qualified as CrawlerPB import Streaming.Prelude qualified as Streaming import Test.Tasty import Test.Tasty.HUnit -import Monocle.Client.Api (crawlerErrors) runLentilleM :: MonocleClient -> Eff [E.Reader CrawlerEnv, MonoClientEffect, LoggerEffect, GerritEffect, BZEffect, TimeEffect, HttpEffect, PrometheusEffect, EnvEffect, Fail, Retry, Concurrent, IOE] a -> IO a runLentilleM client action = do @@ -56,11 +57,14 @@ testCrawlingPoint = do (currentOldestAge, _) <- getOldest liftIO $ assertBool "Commit date is updated on failure" (currentOldestAge > oldestAge) + -- Check that the error got indexed errorResponse <- crawlerErrors client (CrawlerPB.ErrorsRequest (from indexName) "from:2020") case errorResponse of - CrawlerPB.ErrorsResponse Nothing -> error "Bad response" - CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultError err)) -> error $ from err - CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultSuccess errors)) -> liftIO $ assertEqual "Error got indexed" (length errors.errorsListErrors) 1 + CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultSuccess (CrawlerPB.ErrorsList (toList -> [e])))) -> liftIO do + e.crawlerErrorMessage @?= "decode" + (B64.decode . encodeUtf8 $ e.crawlerErrorBody) @?= Right "[\"Oops\"]" + (from <$> e.crawlerErrorEntity) @?= Just (Project "opendev/neutron") + _ -> error $ "Expected one error, got: " <> show errorResponse Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes $ goodStream currentOldestAge) diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 6ad3f40b7..88b1e977a 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -197,6 +197,17 @@ instance From ETaskData SearchPB.TaskData where taskDataPrefix = from $ fromMaybe "" $ tdPrefix td in SearchPB.TaskData {..} +newtype EErrorData = EErrorData + { eeErrorData :: EError + } + deriving (Show, Eq, Generic) + +instance ToJSON EErrorData where + toJSON = genericToJSON $ aesonPrefix snakeCase + +instance FromJSON EErrorData where + parseJSON = genericParseJSON $ aesonPrefix snakeCase + data EError = EError { erCrawlerName :: Text , erEntity :: Entity @@ -229,8 +240,7 @@ instance ToJSON EError where ] instance FromJSON EError where - parseJSON = withObject "EError" $ \root -> do - v <- root .: "error_data" + parseJSON = withObject "EError" $ \v -> do erCrawlerName <- v .: "crawler_name" erCreatedAt <- v .: "created_at" evalue <- v .: "entity_value" diff --git a/src/Monocle/Backend/Queries.hs b/src/Monocle/Backend/Queries.hs index 4a1584c60..8d9038f4f 100644 --- a/src/Monocle/Backend/Queries.hs +++ b/src/Monocle/Backend/Queries.hs @@ -17,7 +17,7 @@ import Database.Bloodhound qualified as BH import Database.Bloodhound.Raw (TermsCompositeAggBucket) import Database.Bloodhound.Raw qualified as BHR import Json.Extras qualified as Json -import Monocle.Backend.Documents (EChange (..), EChangeEvent (..), EChangeState (..), EDocType (..), EError, allEventTypes) +import Monocle.Backend.Documents (EChange (..), EChangeEvent (..), EChangeState (..), EDocType (..), EError, EErrorData, allEventTypes, eeErrorData) import Monocle.Config qualified as Config import Monocle.Prelude import Monocle.Protob.Metric qualified as MetricPB @@ -248,8 +248,12 @@ crawlerErrors = do dropQuery do withFilter queryFilter do withDocTypes [EErrorDoc] (QueryFlavor Author CreatedAt) do - doSearch (Just order) 500 + fmap toError <$> doSearch (Just order) 500 where + -- it is necessary to request the EErrorData so that the source fields are correctly set in BHR.search + toError :: EErrorData -> EError + toError = eeErrorData + order = SearchPB.Order { orderField = "error_data.created_at" From ee8a8699c974acba3216392f2c576899e9ea813c Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 16:06:53 +0000 Subject: [PATCH 11/28] crawler: continue processing even when there are decoding errors --- src/Lentille/GraphQL.hs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index 368bfbb93..b2e0cabf0 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -264,13 +264,12 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe -- Log crawling status logStep pageInfo rateLimitM xs totalFetched - case decodingErrors of - _ : _ -> do + unless (null decodingErrors) do now <- lift mGetCurrentTime S.yield (Left $ DecodeError now decodingErrors) - [] -> do - -- Yield the results - S.each (Right <$> xs) - -- Call recursively when response has a next page - when (hasNextPage pageInfo) $ go (Just pageInfo) (totalFetched + length xs) + -- Yield the results + S.each (Right <$> xs) + + -- Call recursively when response has a next page + when (hasNextPage pageInfo) $ go (Just pageInfo) (totalFetched + length xs) From 38119ec15f614cbb8a2a9acbea15c516643c36d3 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 16:14:08 +0000 Subject: [PATCH 12/28] chore: perform monocle-reformat-run --- src/Lentille/GraphQL.hs | 4 ++-- src/Monocle/Api/Server.hs | 3 +-- src/Monocle/Backend/Documents.hs | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index b2e0cabf0..90b22df29 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -265,8 +265,8 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe logStep pageInfo rateLimitM xs totalFetched unless (null decodingErrors) do - now <- lift mGetCurrentTime - S.yield (Left $ DecodeError now decodingErrors) + now <- lift mGetCurrentTime + S.yield (Left $ DecodeError now decodingErrors) -- Yield the results S.each (Right <$> xs) diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 380243eb2..92117a8cf 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -578,8 +578,7 @@ crawlerErrors auth request = checkAuth auth response pure $ CrawlerPB.ErrorsResponse $ Just - $ CrawlerPB.ErrorsResponseResultError - $ (show offset <> ":" <> from msg) + $ CrawlerPB.ErrorsResponseResultError (show offset <> ":" <> from msg) -- | /search/query endpoint searchQuery :: ApiEffects es => AuthResult AuthenticatedUser -> SearchPB.QueryRequest -> Eff es SearchPB.QueryResponse diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 88b1e977a..c172fc792 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -235,8 +235,8 @@ instance ToJSON EError where , ("created_at", toJSON e.erCreatedAt) , ("entity_type", String (entityTypeName (from e.erEntity))) , ("entity_value", String $ entityValue e.erEntity) - , ("message", String $ e.erMessage) - , ("body", String $ e.erBody) + , ("message", String e.erMessage) + , ("body", String e.erBody) ] instance FromJSON EError where @@ -698,7 +698,7 @@ instance ToJSON ECrawlerMetadataObject where [ ("crawler_name", toJSON (ecmCrawlerName e)) , ("last_commit_at", toJSON (ecmLastCommitAt e)) , ("crawler_type", String (entityTypeName (from $ ecmCrawlerEntity e))) - , ("crawler_type_value", String $ entityValue $ e.ecmCrawlerEntity) + , ("crawler_type_value", String $ entityValue e.ecmCrawlerEntity) ] instance FromJSON ECrawlerMetadataObject where From f179448ac9a3f3a64cf30585d169b2e35bce213a Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 20:45:58 +0000 Subject: [PATCH 13/28] api: update dropTime to keep the current hour This change ensures that bounded query includes errors added during the current day. The elasticsearch filters for the errors changed from lte: 2023-12-22T00:00:00Z to lte: 2023-12-22T20:45:45Z, resulting in the desired behavior. --- src/Monocle/Prelude.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Monocle/Prelude.hs b/src/Monocle/Prelude.hs index 4911287d9..8a0b30b61 100644 --- a/src/Monocle/Prelude.hs +++ b/src/Monocle/Prelude.hs @@ -378,7 +378,7 @@ utctime = qqLiteral eitherParseUTCTime 'eitherParseUTCTime -- | dropTime ensures the encoded date does not have millisecond. -- This actually discard hour differences dropTime :: UTCTime -> UTCTime -dropTime (UTCTime day _sec) = UTCTime day 0 +dropTime (UTCTime day sec) = UTCTime day (fromInteger $ ceiling sec) -- | A newtype for UTCTime which doesn't have milli second and tolerates a missing trailing 'Z' when decoding from JSON newtype MonocleTime = MonocleTime UTCTime From 4084bff8ff6afa565206a2734e5ec00b32a79a2d Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 19:32:23 +0000 Subject: [PATCH 14/28] doc: add example to run a single test --- CONTRIBUTING.md | 26 ++++++++++++++++++++++++++ src/Monocle/Backend/Index.hs | 2 ++ 2 files changed, 28 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3f685e815..6aa726142 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -52,6 +52,12 @@ nix develop --command monocle-repl λ> run $ defaultApiConfig 8080 "http://localhost:19200" "etc/config.yaml" ``` +… or by running the executable: + +```ShellSession +CRAWLERS_API_KEY=secret MONOCLE_CONFIG=./etc/config.yaml nix develop --command cabal run -O0 monocle -- api +``` + The Monocle UI should be accessible: ```ShellSession @@ -145,6 +151,12 @@ Run the full test suite with: nix develop --command monocle-ci-run ``` +Run a single test: + +```ShellSession +cabal test --test-options='-p "Change stream"' +``` + ## Start the web development server Start the web dev server (hot-reload): @@ -239,3 +251,17 @@ Test the containers: podman run --network host -v prom-data:/var/lib/prometheus:Z -e API_TARGET=localhost:8080 --rm quay.io/change-metrics/monocle-prometheus:latest podman run -it --rm --network host quay.io/change-metrics/monocle-grafana:latest ``` + +## Example query + +Add a crawler error: + +```ShellSession +curl -X POST -d '{"index": "monocle", "crawler": "demo", "apikey": "secret", "entity": {"project_name": "neutron"}, "errors": [{"created_at": "2023-12-22T10:11:12Z"}]}' -H "Content-type: application/json" localhost:8080/api/2/crawler/add +``` + +Get crawler errors: + +```ShellSession +curl -X POST -d '{"index": "monocle"}' -H "Content-type: application/json" localhost:8080/api/2/crawler/errors +``` diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 7f2d13aee..90600dd66 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -777,6 +777,7 @@ getOrphanTaskDataAndDeclareAdoption urls = do ) updateChangesAndEventsFromOrphanTaskData :: MonoQuery :> es => IndexEffects es => [EChange] -> [EChangeEvent] -> Eff es () +updateChangesAndEventsFromOrphanTaskData [] [] = pure () updateChangesAndEventsFromOrphanTaskData changes events = do let mapping = uMapping Map.empty getFlatMapping adoptedTDs <- getOrphanTaskDataAndDeclareAdoption $ from <$> Map.keys mapping @@ -1047,6 +1048,7 @@ populateAuthorCache = do -- | This function extacts authors from events and adds them to the author cache addCachedAuthors :: MonoQuery :> es => IndexEffects es => [EChangeEvent] -> Eff es () +addCachedAuthors [] = pure () addCachedAuthors events = do indexName <- getIndexName let muids = from . authorMuid <$> mapMaybe echangeeventAuthor events From 9f040a75666601c762f2fde20fadddc8027aa06a Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 17:56:44 +0000 Subject: [PATCH 15/28] web: add crawler api codegen This change enables the web client to query the crawler api to collect any errors. --- Makefile | 2 +- codegen/MonocleCodegen.hs | 1 - codegen/rename_bs_module.py | 71 +- web/src/components/WebApi.res | 37 + web/src/messages/ChangeBs.ml | 1109 +++++++++++++++++++++++++++++ web/src/messages/ChangeBs.mli | 109 +++ web/src/messages/ChangeTypes.ml | 310 ++++++++ web/src/messages/ChangeTypes.mli | 266 +++++++ web/src/messages/CrawlerBs.ml | 788 ++++++++++++++++++++ web/src/messages/CrawlerBs.mli | 109 +++ web/src/messages/CrawlerTypes.ml | 212 ++++++ web/src/messages/CrawlerTypes.mli | 205 ++++++ web/src/messages/IssueBs.ml | 376 ++++++++++ web/src/messages/IssueBs.mli | 43 ++ web/src/messages/IssueTypes.ml | 117 +++ web/src/messages/IssueTypes.mli | 104 +++ 16 files changed, 3839 insertions(+), 20 deletions(-) create mode 100644 web/src/messages/ChangeBs.ml create mode 100644 web/src/messages/ChangeBs.mli create mode 100644 web/src/messages/ChangeTypes.ml create mode 100644 web/src/messages/ChangeTypes.mli create mode 100644 web/src/messages/CrawlerBs.ml create mode 100644 web/src/messages/CrawlerBs.mli create mode 100644 web/src/messages/CrawlerTypes.ml create mode 100644 web/src/messages/CrawlerTypes.mli create mode 100644 web/src/messages/IssueBs.ml create mode 100644 web/src/messages/IssueBs.mli create mode 100644 web/src/messages/IssueTypes.ml create mode 100644 web/src/messages/IssueTypes.mli diff --git a/Makefile b/Makefile index 1b45948ba..27cec1b46 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,7 @@ codegen-haskell: codegen-javascript: rm -f web/src/messages/* - sh -c 'for pb in $(MESSAGES); do ocaml-protoc $(PINCLUDE) -bs -ml_out web/src/messages/ schemas/$${pb}; done' + sh -c 'for pb in $(MESSAGES) $(CRAWLER); do ocaml-protoc $(PINCLUDE) -bs -ml_out web/src/messages/ schemas/$${pb}; done' python3 ./codegen/rename_bs_module.py ./web/src/messages/ codegen-openapi: diff --git a/codegen/MonocleCodegen.hs b/codegen/MonocleCodegen.hs index ca9fdee36..5f4eef369 100644 --- a/codegen/MonocleCodegen.hs +++ b/codegen/MonocleCodegen.hs @@ -120,7 +120,6 @@ protoToReScript = fromProto headers mkService msgName moduleName msg = moduleName <> "Types." <> snake (attrName msg) mkMethod moduleName (name, input, output, path) - | "/crawler/" `Text.isInfixOf` path = [] | otherwise = [ "@module(\"axios\")" , "external " <> camel name <> "Raw: (string, 'a) => axios<'b> = \"post\"" diff --git a/codegen/rename_bs_module.py b/codegen/rename_bs_module.py index 09932983c..7fbc1363d 100644 --- a/codegen/rename_bs_module.py +++ b/codegen/rename_bs_module.py @@ -35,38 +35,73 @@ def fix_field_name(content): ) .replace("Task_data_types", "TaskDataTypes") .replace("Task_data_bs", "TaskDataBs") - #.replace("Ratio", "_ratio") + .replace("Change_types", "ChangeTypes") + .replace("Change_bs", "ChangeBs") + .replace("Search_types", "SearchTypes") + .replace("Search_bs", "SearchBs") + .replace("Issue_types", "IssueTypes") + .replace("Issue_bs", "IssueBs") .replace("_ofChanges", "_of_changes") .replace("_withTests", "_with_tests") .replace("_perChange", "_per_change") # on_createdAt -> on_created_at .replace("edAt", "ed_at") + .replace("commitAt", "commit_at") .replace("Count", "_count") ) def fix_timestamp(content): # Fix timestamp message encoding which is a rfc3339 string, not an object + return ( + functools.reduce( + lambda acc, field: acc.replace( + field + '" (Js.Json.object_', field + '" (Js.Json.string' + ), + # TODO: add new timestamp field to this list, e.g. when this error happens: + # Js.Dict.set json "updated_at" (Js.Json.object_ json'); + # This has type: string, Somewhere wanted: Js.Json.t Js.Dict.t + [ + "timestamp", + "updated_at", + "closed_at", + "created_at", + "changed_at", + "authored_at", + "committed_at", + "merged_at", + "commit_at", + ], + content, + ) + .replace( + "TimestampBs.decode_timestamp (Pbrt_bs.object_", + "TimestampBs.decode_timestamp (Pbrt_bs.string", + ) + .replace( + # The codegen believes that TimestampTypes.default_timestamp is a function but it is a term + "TimestampTypes.default_timestamp ()", + "TimestampTypes.default_timestamp", + ) + ) + + +def fix_enum(content): + # Fix the following error: + # This variant constructor, Change_commit_pushed, expects 0 arguments; here, we've found 1. return functools.reduce( - lambda acc, field: acc.replace( - field + '" (Js.Json.object_', field + '" (Js.Json.string' - ), - # TODO: add new timestamp field to this list, e.g. when this error happens: - # Js.Dict.set json "updated_at" (Js.Json.object_ json'); - # This has type: string, Somewhere wanted: Js.Json.t Js.Dict.t + lambda acc, field: acc.replace("| " + field + " v ->", "| " + field + " ->"), [ - "timestamp", - "updated_at", - "created_at", - "changed_at", - "authored_at", - "committed_at", - "merged_at", + "Change_created", + "Change_commented", + "Change_abandoned", + "Change_commit_force_pushed", + "Change_commit_pushed", + "Change_merged", + "Issue_created", + "Issue_closed", ], content, - ).replace( - "TimestampBs.decode_timestamp (Pbrt_bs.object_", - "TimestampBs.decode_timestamp (Pbrt_bs.string", ) @@ -83,7 +118,7 @@ def fix_module(filepath): typeName = pascalCase(filepath.name.split("_bs")[0] + "_types") newTypeName = pascalCases(typeName) content = content.replace(typeName, newTypeName) - newFile.write_text(fix_timestamp(fix_field_name(content))) + newFile.write_text(fix_enum(fix_timestamp(fix_field_name(content)))) def fixable_file(filename): diff --git a/web/src/components/WebApi.res b/web/src/components/WebApi.res index d2c3c198d..ed371730c 100644 --- a/web/src/components/WebApi.res +++ b/web/src/components/WebApi.res @@ -181,5 +181,42 @@ module Metric = { } module Crawler = { + @module("axios") + external addDocRaw: (string, 'a) => axios<'b> = "post" + + let addDoc = (request: CrawlerTypes.add_doc_request): axios => + request->CrawlerBs.encode_add_doc_request + |> addDocRaw(serverUrl ++ "/api/2/crawler/add") + |> Js.Promise.then_(resp => + {data: resp.data->CrawlerBs.decode_add_doc_response}->Js.Promise.resolve + ) + @module("axios") + external commitRaw: (string, 'a) => axios<'b> = "post" + + let commit = (request: CrawlerTypes.commit_request): axios => + request->CrawlerBs.encode_commit_request + |> commitRaw(serverUrl ++ "/api/2/crawler/commit") + |> Js.Promise.then_(resp => + {data: resp.data->CrawlerBs.decode_commit_response}->Js.Promise.resolve + ) + @module("axios") + external commitInfoRaw: (string, 'a) => axios<'b> = "post" + + let commitInfo = (request: CrawlerTypes.commit_info_request): axios< + CrawlerTypes.commit_info_response, + > => + request->CrawlerBs.encode_commit_info_request + |> commitInfoRaw(serverUrl ++ "/api/2/crawler/get_commit_info") + |> Js.Promise.then_(resp => + {data: resp.data->CrawlerBs.decode_commit_info_response}->Js.Promise.resolve + ) + @module("axios") + external errorsRaw: (string, 'a) => axios<'b> = "post" + let errors = (request: CrawlerTypes.errors_request): axios => + request->CrawlerBs.encode_errors_request + |> errorsRaw(serverUrl ++ "/api/2/crawler/errors") + |> Js.Promise.then_(resp => + {data: resp.data->CrawlerBs.decode_errors_response}->Js.Promise.resolve + ) } diff --git a/web/src/messages/ChangeBs.ml b/web/src/messages/ChangeBs.ml new file mode 100644 index 000000000..005f19aa0 --- /dev/null +++ b/web/src/messages/ChangeBs.ml @@ -0,0 +1,1109 @@ +[@@@ocaml.warning "-27-30-39"] + +type ident_mutable = { + mutable uid : string; + mutable muid : string; + mutable groups : string list; +} + +let default_ident_mutable () : ident_mutable = { + uid = ""; + muid = ""; + groups = []; +} + +type changed_file_mutable = { + mutable additions : int32; + mutable deletions : int32; + mutable path : string; +} + +let default_changed_file_mutable () : changed_file_mutable = { + additions = 0l; + deletions = 0l; + path = ""; +} + +type changed_file_path_mutable = { + mutable path : string; +} + +let default_changed_file_path_mutable () : changed_file_path_mutable = { + path = ""; +} + +type commit_mutable = { + mutable sha : string; + mutable author : ChangeTypes.ident option; + mutable committer : ChangeTypes.ident option; + mutable authored_at : TimestampTypes.timestamp option; + mutable committed_at : TimestampTypes.timestamp option; + mutable additions : int32; + mutable deletions : int32; + mutable title : string; +} + +let default_commit_mutable () : commit_mutable = { + sha = ""; + author = None; + committer = None; + authored_at = None; + committed_at = None; + additions = 0l; + deletions = 0l; + title = ""; +} + +type change_mutable = { + mutable id : string; + mutable number : int32; + mutable change_id : string; + mutable title : string; + mutable text : string; + mutable url : string; + mutable commit_count : int32; + mutable additions : int32; + mutable deletions : int32; + mutable changed_files_count : int32; + mutable changed_files : ChangeTypes.changed_file list; + mutable commits : ChangeTypes.commit list; + mutable repository_prefix : string; + mutable repository_fullname : string; + mutable repository_shortname : string; + mutable author : ChangeTypes.ident option; + mutable optional_merged_by : ChangeTypes.change_optional_merged_by; + mutable branch : string; + mutable target_branch : string; + mutable created_at : TimestampTypes.timestamp option; + mutable optional_merged_at : ChangeTypes.change_optional_merged_at; + mutable updated_at : TimestampTypes.timestamp option; + mutable optional_closed_at : ChangeTypes.change_optional_closed_at; + mutable state : ChangeTypes.change_change_state; + mutable optional_duration : ChangeTypes.change_optional_duration; + mutable mergeable : string; + mutable labels : string list; + mutable assignees : ChangeTypes.ident list; + mutable approvals : string list; + mutable draft : bool; + mutable optional_self_merged : ChangeTypes.change_optional_self_merged; + mutable optional_merged_commit_sha : ChangeTypes.change_optional_merged_commit_sha; +} + +let default_change_mutable () : change_mutable = { + id = ""; + number = 0l; + change_id = ""; + title = ""; + text = ""; + url = ""; + commit_count = 0l; + additions = 0l; + deletions = 0l; + changed_files_count = 0l; + changed_files = []; + commits = []; + repository_prefix = ""; + repository_fullname = ""; + repository_shortname = ""; + author = None; + optional_merged_by = ChangeTypes.Merged_by (ChangeTypes.default_ident ()); + branch = ""; + target_branch = ""; + created_at = None; + optional_merged_at = ChangeTypes.Merged_at (TimestampTypes.default_timestamp); + updated_at = None; + optional_closed_at = ChangeTypes.Closed_at (TimestampTypes.default_timestamp); + state = ChangeTypes.default_change_change_state (); + optional_duration = ChangeTypes.Duration (0l); + mergeable = ""; + labels = []; + assignees = []; + approvals = []; + draft = false; + optional_self_merged = ChangeTypes.Self_merged (false); + optional_merged_commit_sha = ChangeTypes.Merged_commit_sha (""); +} + +type change_reviewed_event_mutable = { + mutable approvals : string list; +} + +let default_change_reviewed_event_mutable () : change_reviewed_event_mutable = { + approvals = []; +} + +type change_event_mutable = { + mutable id : string; + mutable created_at : TimestampTypes.timestamp option; + mutable author : ChangeTypes.ident option; + mutable repository_prefix : string; + mutable repository_fullname : string; + mutable repository_shortname : string; + mutable branch : string; + mutable target_branch : string; + mutable number : int32; + mutable change_id : string; + mutable url : string; + mutable on_author : ChangeTypes.ident option; + mutable on_created_at : TimestampTypes.timestamp option; + mutable changed_files : ChangeTypes.changed_file_path list; + mutable type_ : ChangeTypes.change_event_type; + mutable labels : string list; + mutable optional_duration : ChangeTypes.change_event_optional_duration; + mutable draft : bool; + mutable optional_merged_commit_sha : ChangeTypes.change_event_optional_merged_commit_sha; +} + +let default_change_event_mutable () : change_event_mutable = { + id = ""; + created_at = None; + author = None; + repository_prefix = ""; + repository_fullname = ""; + repository_shortname = ""; + branch = ""; + target_branch = ""; + number = 0l; + change_id = ""; + url = ""; + on_author = None; + on_created_at = None; + changed_files = []; + type_ = ChangeTypes.Change_created; + labels = []; + optional_duration = ChangeTypes.Duration (0l); + draft = false; + optional_merged_commit_sha = ChangeTypes.Merged_commit_sha (""); +} + + +let rec decode_ident json = + let v = default_ident_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "uid" -> + let json = Js.Dict.unsafeGet json "uid" in + v.uid <- Pbrt_bs.string json "ident" "uid" + | "muid" -> + let json = Js.Dict.unsafeGet json "muid" in + v.muid <- Pbrt_bs.string json "ident" "muid" + | "groups" -> begin + let a = + let a = Js.Dict.unsafeGet json "groups" in + Pbrt_bs.array_ a "ident" "groups" + in + v.groups <- Array.map (fun json -> + Pbrt_bs.string json "ident" "groups" + ) a |> Array.to_list; + end + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.uid = v.uid; + ChangeTypes.muid = v.muid; + ChangeTypes.groups = v.groups; + } : ChangeTypes.ident) + +let rec decode_changed_file json = + let v = default_changed_file_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "additions" -> + let json = Js.Dict.unsafeGet json "additions" in + v.additions <- Pbrt_bs.int32 json "changed_file" "additions" + | "deletions" -> + let json = Js.Dict.unsafeGet json "deletions" in + v.deletions <- Pbrt_bs.int32 json "changed_file" "deletions" + | "path" -> + let json = Js.Dict.unsafeGet json "path" in + v.path <- Pbrt_bs.string json "changed_file" "path" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.additions = v.additions; + ChangeTypes.deletions = v.deletions; + ChangeTypes.path = v.path; + } : ChangeTypes.changed_file) + +let rec decode_changed_file_path json = + let v = default_changed_file_path_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "path" -> + let json = Js.Dict.unsafeGet json "path" in + v.path <- Pbrt_bs.string json "changed_file_path" "path" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.path = v.path; + } : ChangeTypes.changed_file_path) + +let rec decode_commit json = + let v = default_commit_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "sha" -> + let json = Js.Dict.unsafeGet json "sha" in + v.sha <- Pbrt_bs.string json "commit" "sha" + | "author" -> + let json = Js.Dict.unsafeGet json "author" in + v.author <- Some ((decode_ident (Pbrt_bs.object_ json "commit" "author"))) + | "committer" -> + let json = Js.Dict.unsafeGet json "committer" in + v.committer <- Some ((decode_ident (Pbrt_bs.object_ json "commit" "committer"))) + | "authored_at" -> + let json = Js.Dict.unsafeGet json "authored_at" in + v.authored_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "commit" "authored_at"))) + | "committed_at" -> + let json = Js.Dict.unsafeGet json "committed_at" in + v.committed_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "commit" "committed_at"))) + | "additions" -> + let json = Js.Dict.unsafeGet json "additions" in + v.additions <- Pbrt_bs.int32 json "commit" "additions" + | "deletions" -> + let json = Js.Dict.unsafeGet json "deletions" in + v.deletions <- Pbrt_bs.int32 json "commit" "deletions" + | "title" -> + let json = Js.Dict.unsafeGet json "title" in + v.title <- Pbrt_bs.string json "commit" "title" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.sha = v.sha; + ChangeTypes.author = v.author; + ChangeTypes.committer = v.committer; + ChangeTypes.authored_at = v.authored_at; + ChangeTypes.committed_at = v.committed_at; + ChangeTypes.additions = v.additions; + ChangeTypes.deletions = v.deletions; + ChangeTypes.title = v.title; + } : ChangeTypes.commit) + +let rec decode_change_change_state (json:Js.Json.t) = + match Pbrt_bs.string json "change_change_state" "value" with + | "Open" -> (ChangeTypes.Open : ChangeTypes.change_change_state) + | "Merged" -> (ChangeTypes.Merged : ChangeTypes.change_change_state) + | "Closed" -> (ChangeTypes.Closed : ChangeTypes.change_change_state) + | "" -> ChangeTypes.Open + | _ -> Pbrt_bs.E.malformed_variant "change_change_state" + +let rec decode_change_optional_merged_by json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_optional_merged_by" + | i -> + begin match Array.unsafe_get keys i with + | "merged_by" -> + let json = Js.Dict.unsafeGet json "merged_by" in + (ChangeTypes.Merged_by ((decode_ident (Pbrt_bs.object_ json "change_optional_merged_by" "Merged_by"))) : ChangeTypes.change_optional_merged_by) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_optional_merged_at json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_optional_merged_at" + | i -> + begin match Array.unsafe_get keys i with + | "merged_at" -> + let json = Js.Dict.unsafeGet json "merged_at" in + (ChangeTypes.Merged_at ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change_optional_merged_at" "Merged_at"))) : ChangeTypes.change_optional_merged_at) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_optional_closed_at json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_optional_closed_at" + | i -> + begin match Array.unsafe_get keys i with + | "closed_at" -> + let json = Js.Dict.unsafeGet json "closed_at" in + (ChangeTypes.Closed_at ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change_optional_closed_at" "Closed_at"))) : ChangeTypes.change_optional_closed_at) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_optional_duration json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_optional_duration" + | i -> + begin match Array.unsafe_get keys i with + | "duration" -> + let json = Js.Dict.unsafeGet json "duration" in + (ChangeTypes.Duration (Pbrt_bs.int32 json "change_optional_duration" "Duration") : ChangeTypes.change_optional_duration) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_optional_self_merged json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_optional_self_merged" + | i -> + begin match Array.unsafe_get keys i with + | "self_merged" -> + let json = Js.Dict.unsafeGet json "self_merged" in + (ChangeTypes.Self_merged (Pbrt_bs.bool json "change_optional_self_merged" "Self_merged") : ChangeTypes.change_optional_self_merged) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_optional_merged_commit_sha json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_optional_merged_commit_sha" + | i -> + begin match Array.unsafe_get keys i with + | "merged_commitSha" -> + let json = Js.Dict.unsafeGet json "merged_commitSha" in + (ChangeTypes.Merged_commit_sha (Pbrt_bs.string json "change_optional_merged_commit_sha" "Merged_commit_sha") : ChangeTypes.change_optional_merged_commit_sha) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change json = + let v = default_change_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "id" -> + let json = Js.Dict.unsafeGet json "id" in + v.id <- Pbrt_bs.string json "change" "id" + | "number" -> + let json = Js.Dict.unsafeGet json "number" in + v.number <- Pbrt_bs.int32 json "change" "number" + | "change_id" -> + let json = Js.Dict.unsafeGet json "change_id" in + v.change_id <- Pbrt_bs.string json "change" "change_id" + | "title" -> + let json = Js.Dict.unsafeGet json "title" in + v.title <- Pbrt_bs.string json "change" "title" + | "text" -> + let json = Js.Dict.unsafeGet json "text" in + v.text <- Pbrt_bs.string json "change" "text" + | "url" -> + let json = Js.Dict.unsafeGet json "url" in + v.url <- Pbrt_bs.string json "change" "url" + | "commit_count" -> + let json = Js.Dict.unsafeGet json "commit_count" in + v.commit_count <- Pbrt_bs.int32 json "change" "commit_count" + | "additions" -> + let json = Js.Dict.unsafeGet json "additions" in + v.additions <- Pbrt_bs.int32 json "change" "additions" + | "deletions" -> + let json = Js.Dict.unsafeGet json "deletions" in + v.deletions <- Pbrt_bs.int32 json "change" "deletions" + | "changed_files_count" -> + let json = Js.Dict.unsafeGet json "changed_files_count" in + v.changed_files_count <- Pbrt_bs.int32 json "change" "changed_files_count" + | "changed_files" -> begin + let a = + let a = Js.Dict.unsafeGet json "changed_files" in + Pbrt_bs.array_ a "change" "changed_files" + in + v.changed_files <- Array.map (fun json -> + (decode_changed_file (Pbrt_bs.object_ json "change" "changed_files")) + ) a |> Array.to_list; + end + | "commits" -> begin + let a = + let a = Js.Dict.unsafeGet json "commits" in + Pbrt_bs.array_ a "change" "commits" + in + v.commits <- Array.map (fun json -> + (decode_commit (Pbrt_bs.object_ json "change" "commits")) + ) a |> Array.to_list; + end + | "repository_prefix" -> + let json = Js.Dict.unsafeGet json "repository_prefix" in + v.repository_prefix <- Pbrt_bs.string json "change" "repository_prefix" + | "repository_fullname" -> + let json = Js.Dict.unsafeGet json "repository_fullname" in + v.repository_fullname <- Pbrt_bs.string json "change" "repository_fullname" + | "repository_shortname" -> + let json = Js.Dict.unsafeGet json "repository_shortname" in + v.repository_shortname <- Pbrt_bs.string json "change" "repository_shortname" + | "author" -> + let json = Js.Dict.unsafeGet json "author" in + v.author <- Some ((decode_ident (Pbrt_bs.object_ json "change" "author"))) + | "merged_by" -> + let json = Js.Dict.unsafeGet json "merged_by" in + v.optional_merged_by <- Merged_by ((decode_ident (Pbrt_bs.object_ json "change" "optional_merged_by"))) + | "branch" -> + let json = Js.Dict.unsafeGet json "branch" in + v.branch <- Pbrt_bs.string json "change" "branch" + | "target_branch" -> + let json = Js.Dict.unsafeGet json "target_branch" in + v.target_branch <- Pbrt_bs.string json "change" "target_branch" + | "created_at" -> + let json = Js.Dict.unsafeGet json "created_at" in + v.created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change" "created_at"))) + | "merged_at" -> + let json = Js.Dict.unsafeGet json "merged_at" in + v.optional_merged_at <- Merged_at ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change" "optional_merged_at"))) + | "updated_at" -> + let json = Js.Dict.unsafeGet json "updated_at" in + v.updated_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change" "updated_at"))) + | "closed_at" -> + let json = Js.Dict.unsafeGet json "closed_at" in + v.optional_closed_at <- Closed_at ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change" "optional_closed_at"))) + | "state" -> + let json = Js.Dict.unsafeGet json "state" in + v.state <- (decode_change_change_state json) + | "duration" -> + let json = Js.Dict.unsafeGet json "duration" in + v.optional_duration <- Duration (Pbrt_bs.int32 json "change" "optional_duration") + | "mergeable" -> + let json = Js.Dict.unsafeGet json "mergeable" in + v.mergeable <- Pbrt_bs.string json "change" "mergeable" + | "labels" -> begin + let a = + let a = Js.Dict.unsafeGet json "labels" in + Pbrt_bs.array_ a "change" "labels" + in + v.labels <- Array.map (fun json -> + Pbrt_bs.string json "change" "labels" + ) a |> Array.to_list; + end + | "assignees" -> begin + let a = + let a = Js.Dict.unsafeGet json "assignees" in + Pbrt_bs.array_ a "change" "assignees" + in + v.assignees <- Array.map (fun json -> + (decode_ident (Pbrt_bs.object_ json "change" "assignees")) + ) a |> Array.to_list; + end + | "approvals" -> begin + let a = + let a = Js.Dict.unsafeGet json "approvals" in + Pbrt_bs.array_ a "change" "approvals" + in + v.approvals <- Array.map (fun json -> + Pbrt_bs.string json "change" "approvals" + ) a |> Array.to_list; + end + | "draft" -> + let json = Js.Dict.unsafeGet json "draft" in + v.draft <- Pbrt_bs.bool json "change" "draft" + | "self_merged" -> + let json = Js.Dict.unsafeGet json "self_merged" in + v.optional_self_merged <- Self_merged (Pbrt_bs.bool json "change" "optional_self_merged") + | "merged_commitSha" -> + let json = Js.Dict.unsafeGet json "merged_commitSha" in + v.optional_merged_commit_sha <- Merged_commit_sha (Pbrt_bs.string json "change" "optional_merged_commit_sha") + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.id = v.id; + ChangeTypes.number = v.number; + ChangeTypes.change_id = v.change_id; + ChangeTypes.title = v.title; + ChangeTypes.text = v.text; + ChangeTypes.url = v.url; + ChangeTypes.commit_count = v.commit_count; + ChangeTypes.additions = v.additions; + ChangeTypes.deletions = v.deletions; + ChangeTypes.changed_files_count = v.changed_files_count; + ChangeTypes.changed_files = v.changed_files; + ChangeTypes.commits = v.commits; + ChangeTypes.repository_prefix = v.repository_prefix; + ChangeTypes.repository_fullname = v.repository_fullname; + ChangeTypes.repository_shortname = v.repository_shortname; + ChangeTypes.author = v.author; + ChangeTypes.optional_merged_by = v.optional_merged_by; + ChangeTypes.branch = v.branch; + ChangeTypes.target_branch = v.target_branch; + ChangeTypes.created_at = v.created_at; + ChangeTypes.optional_merged_at = v.optional_merged_at; + ChangeTypes.updated_at = v.updated_at; + ChangeTypes.optional_closed_at = v.optional_closed_at; + ChangeTypes.state = v.state; + ChangeTypes.optional_duration = v.optional_duration; + ChangeTypes.mergeable = v.mergeable; + ChangeTypes.labels = v.labels; + ChangeTypes.assignees = v.assignees; + ChangeTypes.approvals = v.approvals; + ChangeTypes.draft = v.draft; + ChangeTypes.optional_self_merged = v.optional_self_merged; + ChangeTypes.optional_merged_commit_sha = v.optional_merged_commit_sha; + } : ChangeTypes.change) + +let rec decode_change_reviewed_event json = + let v = default_change_reviewed_event_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "approvals" -> begin + let a = + let a = Js.Dict.unsafeGet json "approvals" in + Pbrt_bs.array_ a "change_reviewed_event" "approvals" + in + v.approvals <- Array.map (fun json -> + Pbrt_bs.string json "change_reviewed_event" "approvals" + ) a |> Array.to_list; + end + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.approvals = v.approvals; + } : ChangeTypes.change_reviewed_event) + +let rec decode_change_event_type json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_event_type" + | i -> + begin match Array.unsafe_get keys i with + | "change_created" -> (ChangeTypes.Change_created : ChangeTypes.change_event_type) + | "change_commented" -> (ChangeTypes.Change_commented : ChangeTypes.change_event_type) + | "change_abandoned" -> (ChangeTypes.Change_abandoned : ChangeTypes.change_event_type) + | "change_reviewed" -> + let json = Js.Dict.unsafeGet json "change_reviewed" in + (ChangeTypes.Change_reviewed ((decode_change_reviewed_event (Pbrt_bs.object_ json "change_event_type" "Change_reviewed"))) : ChangeTypes.change_event_type) + | "change_commitForcePushed" -> (ChangeTypes.Change_commit_force_pushed : ChangeTypes.change_event_type) + | "change_commitPushed" -> (ChangeTypes.Change_commit_pushed : ChangeTypes.change_event_type) + | "change_merged" -> (ChangeTypes.Change_merged : ChangeTypes.change_event_type) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_event_optional_duration json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_event_optional_duration" + | i -> + begin match Array.unsafe_get keys i with + | "duration" -> + let json = Js.Dict.unsafeGet json "duration" in + (ChangeTypes.Duration (Pbrt_bs.int32 json "change_event_optional_duration" "Duration") : ChangeTypes.change_event_optional_duration) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_event_optional_merged_commit_sha json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "change_event_optional_merged_commit_sha" + | i -> + begin match Array.unsafe_get keys i with + | "merged_commitSha" -> + let json = Js.Dict.unsafeGet json "merged_commitSha" in + (ChangeTypes.Merged_commit_sha (Pbrt_bs.string json "change_event_optional_merged_commit_sha" "Merged_commit_sha") : ChangeTypes.change_event_optional_merged_commit_sha) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_change_event json = + let v = default_change_event_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "id" -> + let json = Js.Dict.unsafeGet json "id" in + v.id <- Pbrt_bs.string json "change_event" "id" + | "created_at" -> + let json = Js.Dict.unsafeGet json "created_at" in + v.created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change_event" "created_at"))) + | "author" -> + let json = Js.Dict.unsafeGet json "author" in + v.author <- Some ((decode_ident (Pbrt_bs.object_ json "change_event" "author"))) + | "repository_prefix" -> + let json = Js.Dict.unsafeGet json "repository_prefix" in + v.repository_prefix <- Pbrt_bs.string json "change_event" "repository_prefix" + | "repository_fullname" -> + let json = Js.Dict.unsafeGet json "repository_fullname" in + v.repository_fullname <- Pbrt_bs.string json "change_event" "repository_fullname" + | "repository_shortname" -> + let json = Js.Dict.unsafeGet json "repository_shortname" in + v.repository_shortname <- Pbrt_bs.string json "change_event" "repository_shortname" + | "branch" -> + let json = Js.Dict.unsafeGet json "branch" in + v.branch <- Pbrt_bs.string json "change_event" "branch" + | "target_branch" -> + let json = Js.Dict.unsafeGet json "target_branch" in + v.target_branch <- Pbrt_bs.string json "change_event" "target_branch" + | "number" -> + let json = Js.Dict.unsafeGet json "number" in + v.number <- Pbrt_bs.int32 json "change_event" "number" + | "change_id" -> + let json = Js.Dict.unsafeGet json "change_id" in + v.change_id <- Pbrt_bs.string json "change_event" "change_id" + | "url" -> + let json = Js.Dict.unsafeGet json "url" in + v.url <- Pbrt_bs.string json "change_event" "url" + | "on_author" -> + let json = Js.Dict.unsafeGet json "on_author" in + v.on_author <- Some ((decode_ident (Pbrt_bs.object_ json "change_event" "on_author"))) + | "on_created_at" -> + let json = Js.Dict.unsafeGet json "on_created_at" in + v.on_created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "change_event" "on_created_at"))) + | "changed_files" -> begin + let a = + let a = Js.Dict.unsafeGet json "changed_files" in + Pbrt_bs.array_ a "change_event" "changed_files" + in + v.changed_files <- Array.map (fun json -> + (decode_changed_file_path (Pbrt_bs.object_ json "change_event" "changed_files")) + ) a |> Array.to_list; + end + | "change_created" -> v.type_ <- Change_created + | "change_commented" -> v.type_ <- Change_commented + | "change_abandoned" -> v.type_ <- Change_abandoned + | "change_reviewed" -> + let json = Js.Dict.unsafeGet json "change_reviewed" in + v.type_ <- Change_reviewed ((decode_change_reviewed_event (Pbrt_bs.object_ json "change_event" "type_"))) + | "change_commitForcePushed" -> v.type_ <- Change_commit_force_pushed + | "change_commitPushed" -> v.type_ <- Change_commit_pushed + | "change_merged" -> v.type_ <- Change_merged + | "labels" -> begin + let a = + let a = Js.Dict.unsafeGet json "labels" in + Pbrt_bs.array_ a "change_event" "labels" + in + v.labels <- Array.map (fun json -> + Pbrt_bs.string json "change_event" "labels" + ) a |> Array.to_list; + end + | "duration" -> + let json = Js.Dict.unsafeGet json "duration" in + v.optional_duration <- Duration (Pbrt_bs.int32 json "change_event" "optional_duration") + | "draft" -> + let json = Js.Dict.unsafeGet json "draft" in + v.draft <- Pbrt_bs.bool json "change_event" "draft" + | "merged_commitSha" -> + let json = Js.Dict.unsafeGet json "merged_commitSha" in + v.optional_merged_commit_sha <- Merged_commit_sha (Pbrt_bs.string json "change_event" "optional_merged_commit_sha") + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + ChangeTypes.id = v.id; + ChangeTypes.created_at = v.created_at; + ChangeTypes.author = v.author; + ChangeTypes.repository_prefix = v.repository_prefix; + ChangeTypes.repository_fullname = v.repository_fullname; + ChangeTypes.repository_shortname = v.repository_shortname; + ChangeTypes.branch = v.branch; + ChangeTypes.target_branch = v.target_branch; + ChangeTypes.number = v.number; + ChangeTypes.change_id = v.change_id; + ChangeTypes.url = v.url; + ChangeTypes.on_author = v.on_author; + ChangeTypes.on_created_at = v.on_created_at; + ChangeTypes.changed_files = v.changed_files; + ChangeTypes.type_ = v.type_; + ChangeTypes.labels = v.labels; + ChangeTypes.optional_duration = v.optional_duration; + ChangeTypes.draft = v.draft; + ChangeTypes.optional_merged_commit_sha = v.optional_merged_commit_sha; + } : ChangeTypes.change_event) + +let rec encode_ident (v:ChangeTypes.ident) = + let json = Js.Dict.empty () in + Js.Dict.set json "uid" (Js.Json.string v.ChangeTypes.uid); + Js.Dict.set json "muid" (Js.Json.string v.ChangeTypes.muid); + let a = v.ChangeTypes.groups |> Array.of_list |> Array.map Js.Json.string in + Js.Dict.set json "groups" (Js.Json.array a); + json + +let rec encode_changed_file (v:ChangeTypes.changed_file) = + let json = Js.Dict.empty () in + Js.Dict.set json "additions" (Js.Json.number (Int32.to_float v.ChangeTypes.additions)); + Js.Dict.set json "deletions" (Js.Json.number (Int32.to_float v.ChangeTypes.deletions)); + Js.Dict.set json "path" (Js.Json.string v.ChangeTypes.path); + json + +let rec encode_changed_file_path (v:ChangeTypes.changed_file_path) = + let json = Js.Dict.empty () in + Js.Dict.set json "path" (Js.Json.string v.ChangeTypes.path); + json + +let rec encode_commit (v:ChangeTypes.commit) = + let json = Js.Dict.empty () in + Js.Dict.set json "sha" (Js.Json.string v.ChangeTypes.sha); + begin match v.ChangeTypes.author with + | None -> () + | Some v -> + begin (* author field *) + let json' = encode_ident v in + Js.Dict.set json "author" (Js.Json.object_ json'); + end; + end; + begin match v.ChangeTypes.committer with + | None -> () + | Some v -> + begin (* committer field *) + let json' = encode_ident v in + Js.Dict.set json "committer" (Js.Json.object_ json'); + end; + end; + begin match v.ChangeTypes.authored_at with + | None -> () + | Some v -> + begin (* authored_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "authored_at" (Js.Json.string json'); + end; + end; + begin match v.ChangeTypes.committed_at with + | None -> () + | Some v -> + begin (* committed_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "committed_at" (Js.Json.string json'); + end; + end; + Js.Dict.set json "additions" (Js.Json.number (Int32.to_float v.ChangeTypes.additions)); + Js.Dict.set json "deletions" (Js.Json.number (Int32.to_float v.ChangeTypes.deletions)); + Js.Dict.set json "title" (Js.Json.string v.ChangeTypes.title); + json + +let rec encode_change_change_state (v:ChangeTypes.change_change_state) : string = + match v with + | ChangeTypes.Open -> "Open" + | ChangeTypes.Merged -> "Merged" + | ChangeTypes.Closed -> "Closed" + +let rec encode_change_optional_merged_by (v:ChangeTypes.change_optional_merged_by) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Merged_by v -> + begin (* mergedBy field *) + let json' = encode_ident v in + Js.Dict.set json "merged_by" (Js.Json.object_ json'); + end; + end; + json + +and encode_change_optional_merged_at (v:ChangeTypes.change_optional_merged_at) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Merged_at v -> + begin (* merged_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "merged_at" (Js.Json.string json'); + end; + end; + json + +and encode_change_optional_closed_at (v:ChangeTypes.change_optional_closed_at) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Closed_at v -> + begin (* closed_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "closed_at" (Js.Json.string json'); + end; + end; + json + +and encode_change_optional_duration (v:ChangeTypes.change_optional_duration) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Duration v -> + Js.Dict.set json "duration" (Js.Json.number (Int32.to_float v)); + end; + json + +and encode_change_optional_self_merged (v:ChangeTypes.change_optional_self_merged) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Self_merged v -> + Js.Dict.set json "self_merged" (Js.Json.boolean v); + end; + json + +and encode_change_optional_merged_commit_sha (v:ChangeTypes.change_optional_merged_commit_sha) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Merged_commit_sha v -> + Js.Dict.set json "merged_commitSha" (Js.Json.string v); + end; + json + +and encode_change (v:ChangeTypes.change) = + let json = Js.Dict.empty () in + Js.Dict.set json "id" (Js.Json.string v.ChangeTypes.id); + Js.Dict.set json "number" (Js.Json.number (Int32.to_float v.ChangeTypes.number)); + Js.Dict.set json "change_id" (Js.Json.string v.ChangeTypes.change_id); + Js.Dict.set json "title" (Js.Json.string v.ChangeTypes.title); + Js.Dict.set json "text" (Js.Json.string v.ChangeTypes.text); + Js.Dict.set json "url" (Js.Json.string v.ChangeTypes.url); + Js.Dict.set json "commit_count" (Js.Json.number (Int32.to_float v.ChangeTypes.commit_count)); + Js.Dict.set json "additions" (Js.Json.number (Int32.to_float v.ChangeTypes.additions)); + Js.Dict.set json "deletions" (Js.Json.number (Int32.to_float v.ChangeTypes.deletions)); + Js.Dict.set json "changed_files_count" (Js.Json.number (Int32.to_float v.ChangeTypes.changed_files_count)); + begin (* changedFiles field *) + let (changed_files':Js.Json.t) = + v.ChangeTypes.changed_files + |> Array.of_list + |> Array.map (fun v -> + v |> encode_changed_file |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "changed_files" changed_files'; + end; + begin (* commits field *) + let (commits':Js.Json.t) = + v.ChangeTypes.commits + |> Array.of_list + |> Array.map (fun v -> + v |> encode_commit |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "commits" commits'; + end; + Js.Dict.set json "repository_prefix" (Js.Json.string v.ChangeTypes.repository_prefix); + Js.Dict.set json "repository_fullname" (Js.Json.string v.ChangeTypes.repository_fullname); + Js.Dict.set json "repository_shortname" (Js.Json.string v.ChangeTypes.repository_shortname); + begin match v.ChangeTypes.author with + | None -> () + | Some v -> + begin (* author field *) + let json' = encode_ident v in + Js.Dict.set json "author" (Js.Json.object_ json'); + end; + end; + begin match v.ChangeTypes.optional_merged_by with + | Merged_by v -> + begin (* mergedBy field *) + let json' = encode_ident v in + Js.Dict.set json "merged_by" (Js.Json.object_ json'); + end; + end; (* match v.optional_merged_by *) + Js.Dict.set json "branch" (Js.Json.string v.ChangeTypes.branch); + Js.Dict.set json "target_branch" (Js.Json.string v.ChangeTypes.target_branch); + begin match v.ChangeTypes.created_at with + | None -> () + | Some v -> + begin (* created_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "created_at" (Js.Json.string json'); + end; + end; + begin match v.ChangeTypes.optional_merged_at with + | Merged_at v -> + begin (* merged_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "merged_at" (Js.Json.string json'); + end; + end; (* match v.optional_merged_at *) + begin match v.ChangeTypes.updated_at with + | None -> () + | Some v -> + begin (* updated_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "updated_at" (Js.Json.string json'); + end; + end; + begin match v.ChangeTypes.optional_closed_at with + | Closed_at v -> + begin (* closed_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "closed_at" (Js.Json.string json'); + end; + end; (* match v.optional_closed_at *) + Js.Dict.set json "state" (Js.Json.string (encode_change_change_state v.ChangeTypes.state)); + begin match v.ChangeTypes.optional_duration with + | Duration v -> + Js.Dict.set json "duration" (Js.Json.number (Int32.to_float v)); + end; (* match v.optional_duration *) + Js.Dict.set json "mergeable" (Js.Json.string v.ChangeTypes.mergeable); + let a = v.ChangeTypes.labels |> Array.of_list |> Array.map Js.Json.string in + Js.Dict.set json "labels" (Js.Json.array a); + begin (* assignees field *) + let (assignees':Js.Json.t) = + v.ChangeTypes.assignees + |> Array.of_list + |> Array.map (fun v -> + v |> encode_ident |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "assignees" assignees'; + end; + let a = v.ChangeTypes.approvals |> Array.of_list |> Array.map Js.Json.string in + Js.Dict.set json "approvals" (Js.Json.array a); + Js.Dict.set json "draft" (Js.Json.boolean v.ChangeTypes.draft); + begin match v.ChangeTypes.optional_self_merged with + | Self_merged v -> + Js.Dict.set json "self_merged" (Js.Json.boolean v); + end; (* match v.optional_self_merged *) + begin match v.ChangeTypes.optional_merged_commit_sha with + | Merged_commit_sha v -> + Js.Dict.set json "merged_commitSha" (Js.Json.string v); + end; (* match v.optional_merged_commit_sha *) + json + +let rec encode_change_reviewed_event (v:ChangeTypes.change_reviewed_event) = + let json = Js.Dict.empty () in + let a = v.ChangeTypes.approvals |> Array.of_list |> Array.map Js.Json.string in + Js.Dict.set json "approvals" (Js.Json.array a); + json + +let rec encode_change_event_type (v:ChangeTypes.change_event_type) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Change_created -> + Js.Dict.set json "change_created" Js.Json.null + | ChangeTypes.Change_commented -> + Js.Dict.set json "change_commented" Js.Json.null + | ChangeTypes.Change_abandoned -> + Js.Dict.set json "change_abandoned" Js.Json.null + | ChangeTypes.Change_reviewed v -> + begin (* changeReviewed field *) + let json' = encode_change_reviewed_event v in + Js.Dict.set json "change_reviewed" (Js.Json.object_ json'); + end; + | ChangeTypes.Change_commit_force_pushed -> + Js.Dict.set json "change_commitForcePushed" Js.Json.null + | ChangeTypes.Change_commit_pushed -> + Js.Dict.set json "change_commitPushed" Js.Json.null + | ChangeTypes.Change_merged -> + Js.Dict.set json "change_merged" Js.Json.null + end; + json + +and encode_change_event_optional_duration (v:ChangeTypes.change_event_optional_duration) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Duration v -> + Js.Dict.set json "duration" (Js.Json.number (Int32.to_float v)); + end; + json + +and encode_change_event_optional_merged_commit_sha (v:ChangeTypes.change_event_optional_merged_commit_sha) = + let json = Js.Dict.empty () in + begin match v with + | ChangeTypes.Merged_commit_sha v -> + Js.Dict.set json "merged_commitSha" (Js.Json.string v); + end; + json + +and encode_change_event (v:ChangeTypes.change_event) = + let json = Js.Dict.empty () in + Js.Dict.set json "id" (Js.Json.string v.ChangeTypes.id); + begin match v.ChangeTypes.created_at with + | None -> () + | Some v -> + begin (* created_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "created_at" (Js.Json.string json'); + end; + end; + begin match v.ChangeTypes.author with + | None -> () + | Some v -> + begin (* author field *) + let json' = encode_ident v in + Js.Dict.set json "author" (Js.Json.object_ json'); + end; + end; + Js.Dict.set json "repository_prefix" (Js.Json.string v.ChangeTypes.repository_prefix); + Js.Dict.set json "repository_fullname" (Js.Json.string v.ChangeTypes.repository_fullname); + Js.Dict.set json "repository_shortname" (Js.Json.string v.ChangeTypes.repository_shortname); + Js.Dict.set json "branch" (Js.Json.string v.ChangeTypes.branch); + Js.Dict.set json "target_branch" (Js.Json.string v.ChangeTypes.target_branch); + Js.Dict.set json "number" (Js.Json.number (Int32.to_float v.ChangeTypes.number)); + Js.Dict.set json "change_id" (Js.Json.string v.ChangeTypes.change_id); + Js.Dict.set json "url" (Js.Json.string v.ChangeTypes.url); + begin match v.ChangeTypes.on_author with + | None -> () + | Some v -> + begin (* onAuthor field *) + let json' = encode_ident v in + Js.Dict.set json "on_author" (Js.Json.object_ json'); + end; + end; + begin match v.ChangeTypes.on_created_at with + | None -> () + | Some v -> + begin (* onCreated_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "on_created_at" (Js.Json.string json'); + end; + end; + begin (* changedFiles field *) + let (changed_files':Js.Json.t) = + v.ChangeTypes.changed_files + |> Array.of_list + |> Array.map (fun v -> + v |> encode_changed_file_path |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "changed_files" changed_files'; + end; + begin match v.ChangeTypes.type_ with + | Change_created -> + Js.Dict.set json "change_created" Js.Json.null + | Change_commented -> + Js.Dict.set json "change_commented" Js.Json.null + | Change_abandoned -> + Js.Dict.set json "change_abandoned" Js.Json.null + | Change_reviewed v -> + begin (* changeReviewed field *) + let json' = encode_change_reviewed_event v in + Js.Dict.set json "change_reviewed" (Js.Json.object_ json'); + end; + | Change_commit_force_pushed -> + Js.Dict.set json "change_commitForcePushed" Js.Json.null + | Change_commit_pushed -> + Js.Dict.set json "change_commitPushed" Js.Json.null + | Change_merged -> + Js.Dict.set json "change_merged" Js.Json.null + end; (* match v.type_ *) + let a = v.ChangeTypes.labels |> Array.of_list |> Array.map Js.Json.string in + Js.Dict.set json "labels" (Js.Json.array a); + begin match v.ChangeTypes.optional_duration with + | Duration v -> + Js.Dict.set json "duration" (Js.Json.number (Int32.to_float v)); + end; (* match v.optional_duration *) + Js.Dict.set json "draft" (Js.Json.boolean v.ChangeTypes.draft); + begin match v.ChangeTypes.optional_merged_commit_sha with + | Merged_commit_sha v -> + Js.Dict.set json "merged_commitSha" (Js.Json.string v); + end; (* match v.optional_merged_commit_sha *) + json diff --git a/web/src/messages/ChangeBs.mli b/web/src/messages/ChangeBs.mli new file mode 100644 index 000000000..d03646441 --- /dev/null +++ b/web/src/messages/ChangeBs.mli @@ -0,0 +1,109 @@ +(** change.proto BuckleScript Encoding *) + + +(** {2 Protobuf JSON Encoding} *) + +val encode_ident : ChangeTypes.ident -> Js.Json.t Js.Dict.t +(** [encode_ident v dict] encodes [v] int the given JSON [dict] *) + +val encode_changed_file : ChangeTypes.changed_file -> Js.Json.t Js.Dict.t +(** [encode_changed_file v dict] encodes [v] int the given JSON [dict] *) + +val encode_changed_file_path : ChangeTypes.changed_file_path -> Js.Json.t Js.Dict.t +(** [encode_changed_file_path v dict] encodes [v] int the given JSON [dict] *) + +val encode_commit : ChangeTypes.commit -> Js.Json.t Js.Dict.t +(** [encode_commit v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_change_state : ChangeTypes.change_change_state -> string +(** [encode_change_change_state v] returns JSON string*) + +val encode_change_optional_merged_by : ChangeTypes.change_optional_merged_by -> Js.Json.t Js.Dict.t +(** [encode_change_optional_merged_by v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_optional_merged_at : ChangeTypes.change_optional_merged_at -> Js.Json.t Js.Dict.t +(** [encode_change_optional_merged_at v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_optional_closed_at : ChangeTypes.change_optional_closed_at -> Js.Json.t Js.Dict.t +(** [encode_change_optional_closed_at v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_optional_duration : ChangeTypes.change_optional_duration -> Js.Json.t Js.Dict.t +(** [encode_change_optional_duration v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_optional_self_merged : ChangeTypes.change_optional_self_merged -> Js.Json.t Js.Dict.t +(** [encode_change_optional_self_merged v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_optional_merged_commit_sha : ChangeTypes.change_optional_merged_commit_sha -> Js.Json.t Js.Dict.t +(** [encode_change_optional_merged_commit_sha v dict] encodes [v] int the given JSON [dict] *) + +val encode_change : ChangeTypes.change -> Js.Json.t Js.Dict.t +(** [encode_change v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_reviewed_event : ChangeTypes.change_reviewed_event -> Js.Json.t Js.Dict.t +(** [encode_change_reviewed_event v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_event_type : ChangeTypes.change_event_type -> Js.Json.t Js.Dict.t +(** [encode_change_event_type v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_event_optional_duration : ChangeTypes.change_event_optional_duration -> Js.Json.t Js.Dict.t +(** [encode_change_event_optional_duration v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_event_optional_merged_commit_sha : ChangeTypes.change_event_optional_merged_commit_sha -> Js.Json.t Js.Dict.t +(** [encode_change_event_optional_merged_commit_sha v dict] encodes [v] int the given JSON [dict] *) + +val encode_change_event : ChangeTypes.change_event -> Js.Json.t Js.Dict.t +(** [encode_change_event v dict] encodes [v] int the given JSON [dict] *) + + +(** {2 BS Decoding} *) + +val decode_ident : Js.Json.t Js.Dict.t -> ChangeTypes.ident +(** [decode_ident decoder] decodes a [ident] value from [decoder] *) + +val decode_changed_file : Js.Json.t Js.Dict.t -> ChangeTypes.changed_file +(** [decode_changed_file decoder] decodes a [changed_file] value from [decoder] *) + +val decode_changed_file_path : Js.Json.t Js.Dict.t -> ChangeTypes.changed_file_path +(** [decode_changed_file_path decoder] decodes a [changed_file_path] value from [decoder] *) + +val decode_commit : Js.Json.t Js.Dict.t -> ChangeTypes.commit +(** [decode_commit decoder] decodes a [commit] value from [decoder] *) + +val decode_change_change_state : Js.Json.t -> ChangeTypes.change_change_state +(** [decode_change_change_state value] decodes a [change_change_state] from a Json value*) + +val decode_change_optional_merged_by : Js.Json.t Js.Dict.t -> ChangeTypes.change_optional_merged_by +(** [decode_change_optional_merged_by decoder] decodes a [change_optional_merged_by] value from [decoder] *) + +val decode_change_optional_merged_at : Js.Json.t Js.Dict.t -> ChangeTypes.change_optional_merged_at +(** [decode_change_optional_merged_at decoder] decodes a [change_optional_merged_at] value from [decoder] *) + +val decode_change_optional_closed_at : Js.Json.t Js.Dict.t -> ChangeTypes.change_optional_closed_at +(** [decode_change_optional_closed_at decoder] decodes a [change_optional_closed_at] value from [decoder] *) + +val decode_change_optional_duration : Js.Json.t Js.Dict.t -> ChangeTypes.change_optional_duration +(** [decode_change_optional_duration decoder] decodes a [change_optional_duration] value from [decoder] *) + +val decode_change_optional_self_merged : Js.Json.t Js.Dict.t -> ChangeTypes.change_optional_self_merged +(** [decode_change_optional_self_merged decoder] decodes a [change_optional_self_merged] value from [decoder] *) + +val decode_change_optional_merged_commit_sha : Js.Json.t Js.Dict.t -> ChangeTypes.change_optional_merged_commit_sha +(** [decode_change_optional_merged_commit_sha decoder] decodes a [change_optional_merged_commit_sha] value from [decoder] *) + +val decode_change : Js.Json.t Js.Dict.t -> ChangeTypes.change +(** [decode_change decoder] decodes a [change] value from [decoder] *) + +val decode_change_reviewed_event : Js.Json.t Js.Dict.t -> ChangeTypes.change_reviewed_event +(** [decode_change_reviewed_event decoder] decodes a [change_reviewed_event] value from [decoder] *) + +val decode_change_event_type : Js.Json.t Js.Dict.t -> ChangeTypes.change_event_type +(** [decode_change_event_type decoder] decodes a [change_event_type] value from [decoder] *) + +val decode_change_event_optional_duration : Js.Json.t Js.Dict.t -> ChangeTypes.change_event_optional_duration +(** [decode_change_event_optional_duration decoder] decodes a [change_event_optional_duration] value from [decoder] *) + +val decode_change_event_optional_merged_commit_sha : Js.Json.t Js.Dict.t -> ChangeTypes.change_event_optional_merged_commit_sha +(** [decode_change_event_optional_merged_commit_sha decoder] decodes a [change_event_optional_merged_commit_sha] value from [decoder] *) + +val decode_change_event : Js.Json.t Js.Dict.t -> ChangeTypes.change_event +(** [decode_change_event decoder] decodes a [change_event] value from [decoder] *) diff --git a/web/src/messages/ChangeTypes.ml b/web/src/messages/ChangeTypes.ml new file mode 100644 index 000000000..fe10e1467 --- /dev/null +++ b/web/src/messages/ChangeTypes.ml @@ -0,0 +1,310 @@ +[@@@ocaml.warning "-27-30-39"] + + +type ident = { + uid : string; + muid : string; + groups : string list; +} + +type changed_file = { + additions : int32; + deletions : int32; + path : string; +} + +type changed_file_path = { + path : string; +} + +type commit = { + sha : string; + author : ident option; + committer : ident option; + authored_at : TimestampTypes.timestamp option; + committed_at : TimestampTypes.timestamp option; + additions : int32; + deletions : int32; + title : string; +} + +type change_change_state = + | Open + | Merged + | Closed + +type change_optional_merged_by = + | Merged_by of ident + +and change_optional_merged_at = + | Merged_at of TimestampTypes.timestamp + +and change_optional_closed_at = + | Closed_at of TimestampTypes.timestamp + +and change_optional_duration = + | Duration of int32 + +and change_optional_self_merged = + | Self_merged of bool + +and change_optional_merged_commit_sha = + | Merged_commit_sha of string + +and change = { + id : string; + number : int32; + change_id : string; + title : string; + text : string; + url : string; + commit_count : int32; + additions : int32; + deletions : int32; + changed_files_count : int32; + changed_files : changed_file list; + commits : commit list; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + author : ident option; + optional_merged_by : change_optional_merged_by; + branch : string; + target_branch : string; + created_at : TimestampTypes.timestamp option; + optional_merged_at : change_optional_merged_at; + updated_at : TimestampTypes.timestamp option; + optional_closed_at : change_optional_closed_at; + state : change_change_state; + optional_duration : change_optional_duration; + mergeable : string; + labels : string list; + assignees : ident list; + approvals : string list; + draft : bool; + optional_self_merged : change_optional_self_merged; + optional_merged_commit_sha : change_optional_merged_commit_sha; +} + +type change_reviewed_event = { + approvals : string list; +} + +type change_event_type = + | Change_created + | Change_commented + | Change_abandoned + | Change_reviewed of change_reviewed_event + | Change_commit_force_pushed + | Change_commit_pushed + | Change_merged + +and change_event_optional_duration = + | Duration of int32 + +and change_event_optional_merged_commit_sha = + | Merged_commit_sha of string + +and change_event = { + id : string; + created_at : TimestampTypes.timestamp option; + author : ident option; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + branch : string; + target_branch : string; + number : int32; + change_id : string; + url : string; + on_author : ident option; + on_created_at : TimestampTypes.timestamp option; + changed_files : changed_file_path list; + type_ : change_event_type; + labels : string list; + optional_duration : change_event_optional_duration; + draft : bool; + optional_merged_commit_sha : change_event_optional_merged_commit_sha; +} + +let rec default_ident + ?uid:((uid:string) = "") + ?muid:((muid:string) = "") + ?groups:((groups:string list) = []) + () : ident = { + uid; + muid; + groups; +} + +let rec default_changed_file + ?additions:((additions:int32) = 0l) + ?deletions:((deletions:int32) = 0l) + ?path:((path:string) = "") + () : changed_file = { + additions; + deletions; + path; +} + +let rec default_changed_file_path + ?path:((path:string) = "") + () : changed_file_path = { + path; +} + +let rec default_commit + ?sha:((sha:string) = "") + ?author:((author:ident option) = None) + ?committer:((committer:ident option) = None) + ?authored_at:((authored_at:TimestampTypes.timestamp option) = None) + ?committed_at:((committed_at:TimestampTypes.timestamp option) = None) + ?additions:((additions:int32) = 0l) + ?deletions:((deletions:int32) = 0l) + ?title:((title:string) = "") + () : commit = { + sha; + author; + committer; + authored_at; + committed_at; + additions; + deletions; + title; +} + +let rec default_change_change_state () = (Open:change_change_state) + +let rec default_change_optional_merged_by () : change_optional_merged_by = Merged_by (default_ident ()) + +and default_change_optional_merged_at () : change_optional_merged_at = Merged_at (TimestampTypes.default_timestamp) + +and default_change_optional_closed_at () : change_optional_closed_at = Closed_at (TimestampTypes.default_timestamp) + +and default_change_optional_duration () : change_optional_duration = Duration (0l) + +and default_change_optional_self_merged () : change_optional_self_merged = Self_merged (false) + +and default_change_optional_merged_commit_sha () : change_optional_merged_commit_sha = Merged_commit_sha ("") + +and default_change + ?id:((id:string) = "") + ?number:((number:int32) = 0l) + ?change_id:((change_id:string) = "") + ?title:((title:string) = "") + ?text:((text:string) = "") + ?url:((url:string) = "") + ?commit_count:((commit_count:int32) = 0l) + ?additions:((additions:int32) = 0l) + ?deletions:((deletions:int32) = 0l) + ?changed_files_count:((changed_files_count:int32) = 0l) + ?changed_files:((changed_files:changed_file list) = []) + ?commits:((commits:commit list) = []) + ?repository_prefix:((repository_prefix:string) = "") + ?repository_fullname:((repository_fullname:string) = "") + ?repository_shortname:((repository_shortname:string) = "") + ?author:((author:ident option) = None) + ?optional_merged_by:((optional_merged_by:change_optional_merged_by) = Merged_by (default_ident ())) + ?branch:((branch:string) = "") + ?target_branch:((target_branch:string) = "") + ?created_at:((created_at:TimestampTypes.timestamp option) = None) + ?optional_merged_at:((optional_merged_at:change_optional_merged_at) = Merged_at (TimestampTypes.default_timestamp)) + ?updated_at:((updated_at:TimestampTypes.timestamp option) = None) + ?optional_closed_at:((optional_closed_at:change_optional_closed_at) = Closed_at (TimestampTypes.default_timestamp)) + ?state:((state:change_change_state) = default_change_change_state ()) + ?optional_duration:((optional_duration:change_optional_duration) = Duration (0l)) + ?mergeable:((mergeable:string) = "") + ?labels:((labels:string list) = []) + ?assignees:((assignees:ident list) = []) + ?approvals:((approvals:string list) = []) + ?draft:((draft:bool) = false) + ?optional_self_merged:((optional_self_merged:change_optional_self_merged) = Self_merged (false)) + ?optional_merged_commit_sha:((optional_merged_commit_sha:change_optional_merged_commit_sha) = Merged_commit_sha ("")) + () : change = { + id; + number; + change_id; + title; + text; + url; + commit_count; + additions; + deletions; + changed_files_count; + changed_files; + commits; + repository_prefix; + repository_fullname; + repository_shortname; + author; + optional_merged_by; + branch; + target_branch; + created_at; + optional_merged_at; + updated_at; + optional_closed_at; + state; + optional_duration; + mergeable; + labels; + assignees; + approvals; + draft; + optional_self_merged; + optional_merged_commit_sha; +} + +let rec default_change_reviewed_event + ?approvals:((approvals:string list) = []) + () : change_reviewed_event = { + approvals; +} + +let rec default_change_event_type (): change_event_type = Change_created + +and default_change_event_optional_duration () : change_event_optional_duration = Duration (0l) + +and default_change_event_optional_merged_commit_sha () : change_event_optional_merged_commit_sha = Merged_commit_sha ("") + +and default_change_event + ?id:((id:string) = "") + ?created_at:((created_at:TimestampTypes.timestamp option) = None) + ?author:((author:ident option) = None) + ?repository_prefix:((repository_prefix:string) = "") + ?repository_fullname:((repository_fullname:string) = "") + ?repository_shortname:((repository_shortname:string) = "") + ?branch:((branch:string) = "") + ?target_branch:((target_branch:string) = "") + ?number:((number:int32) = 0l) + ?change_id:((change_id:string) = "") + ?url:((url:string) = "") + ?on_author:((on_author:ident option) = None) + ?on_created_at:((on_created_at:TimestampTypes.timestamp option) = None) + ?changed_files:((changed_files:changed_file_path list) = []) + ?type_:((type_:change_event_type) = Change_created) + ?labels:((labels:string list) = []) + ?optional_duration:((optional_duration:change_event_optional_duration) = Duration (0l)) + ?draft:((draft:bool) = false) + ?optional_merged_commit_sha:((optional_merged_commit_sha:change_event_optional_merged_commit_sha) = Merged_commit_sha ("")) + () : change_event = { + id; + created_at; + author; + repository_prefix; + repository_fullname; + repository_shortname; + branch; + target_branch; + number; + change_id; + url; + on_author; + on_created_at; + changed_files; + type_; + labels; + optional_duration; + draft; + optional_merged_commit_sha; +} diff --git a/web/src/messages/ChangeTypes.mli b/web/src/messages/ChangeTypes.mli new file mode 100644 index 000000000..578239a4c --- /dev/null +++ b/web/src/messages/ChangeTypes.mli @@ -0,0 +1,266 @@ +(** change.proto Types *) + + + +(** {2 Types} *) + +type ident = { + uid : string; + muid : string; + groups : string list; +} + +type changed_file = { + additions : int32; + deletions : int32; + path : string; +} + +type changed_file_path = { + path : string; +} + +type commit = { + sha : string; + author : ident option; + committer : ident option; + authored_at : TimestampTypes.timestamp option; + committed_at : TimestampTypes.timestamp option; + additions : int32; + deletions : int32; + title : string; +} + +type change_change_state = + | Open + | Merged + | Closed + +type change_optional_merged_by = + | Merged_by of ident + +and change_optional_merged_at = + | Merged_at of TimestampTypes.timestamp + +and change_optional_closed_at = + | Closed_at of TimestampTypes.timestamp + +and change_optional_duration = + | Duration of int32 + +and change_optional_self_merged = + | Self_merged of bool + +and change_optional_merged_commit_sha = + | Merged_commit_sha of string + +and change = { + id : string; + number : int32; + change_id : string; + title : string; + text : string; + url : string; + commit_count : int32; + additions : int32; + deletions : int32; + changed_files_count : int32; + changed_files : changed_file list; + commits : commit list; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + author : ident option; + optional_merged_by : change_optional_merged_by; + branch : string; + target_branch : string; + created_at : TimestampTypes.timestamp option; + optional_merged_at : change_optional_merged_at; + updated_at : TimestampTypes.timestamp option; + optional_closed_at : change_optional_closed_at; + state : change_change_state; + optional_duration : change_optional_duration; + mergeable : string; + labels : string list; + assignees : ident list; + approvals : string list; + draft : bool; + optional_self_merged : change_optional_self_merged; + optional_merged_commit_sha : change_optional_merged_commit_sha; +} + +type change_reviewed_event = { + approvals : string list; +} + +type change_event_type = + | Change_created + | Change_commented + | Change_abandoned + | Change_reviewed of change_reviewed_event + | Change_commit_force_pushed + | Change_commit_pushed + | Change_merged + +and change_event_optional_duration = + | Duration of int32 + +and change_event_optional_merged_commit_sha = + | Merged_commit_sha of string + +and change_event = { + id : string; + created_at : TimestampTypes.timestamp option; + author : ident option; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + branch : string; + target_branch : string; + number : int32; + change_id : string; + url : string; + on_author : ident option; + on_created_at : TimestampTypes.timestamp option; + changed_files : changed_file_path list; + type_ : change_event_type; + labels : string list; + optional_duration : change_event_optional_duration; + draft : bool; + optional_merged_commit_sha : change_event_optional_merged_commit_sha; +} + + +(** {2 Default values} *) + +val default_ident : + ?uid:string -> + ?muid:string -> + ?groups:string list -> + unit -> + ident +(** [default_ident ()] is the default value for type [ident] *) + +val default_changed_file : + ?additions:int32 -> + ?deletions:int32 -> + ?path:string -> + unit -> + changed_file +(** [default_changed_file ()] is the default value for type [changed_file] *) + +val default_changed_file_path : + ?path:string -> + unit -> + changed_file_path +(** [default_changed_file_path ()] is the default value for type [changed_file_path] *) + +val default_commit : + ?sha:string -> + ?author:ident option -> + ?committer:ident option -> + ?authored_at:TimestampTypes.timestamp option -> + ?committed_at:TimestampTypes.timestamp option -> + ?additions:int32 -> + ?deletions:int32 -> + ?title:string -> + unit -> + commit +(** [default_commit ()] is the default value for type [commit] *) + +val default_change_change_state : unit -> change_change_state +(** [default_change_change_state ()] is the default value for type [change_change_state] *) + +val default_change_optional_merged_by : unit -> change_optional_merged_by +(** [default_change_optional_merged_by ()] is the default value for type [change_optional_merged_by] *) + +val default_change_optional_merged_at : unit -> change_optional_merged_at +(** [default_change_optional_merged_at ()] is the default value for type [change_optional_merged_at] *) + +val default_change_optional_closed_at : unit -> change_optional_closed_at +(** [default_change_optional_closed_at ()] is the default value for type [change_optional_closed_at] *) + +val default_change_optional_duration : unit -> change_optional_duration +(** [default_change_optional_duration ()] is the default value for type [change_optional_duration] *) + +val default_change_optional_self_merged : unit -> change_optional_self_merged +(** [default_change_optional_self_merged ()] is the default value for type [change_optional_self_merged] *) + +val default_change_optional_merged_commit_sha : unit -> change_optional_merged_commit_sha +(** [default_change_optional_merged_commit_sha ()] is the default value for type [change_optional_merged_commit_sha] *) + +val default_change : + ?id:string -> + ?number:int32 -> + ?change_id:string -> + ?title:string -> + ?text:string -> + ?url:string -> + ?commit_count:int32 -> + ?additions:int32 -> + ?deletions:int32 -> + ?changed_files_count:int32 -> + ?changed_files:changed_file list -> + ?commits:commit list -> + ?repository_prefix:string -> + ?repository_fullname:string -> + ?repository_shortname:string -> + ?author:ident option -> + ?optional_merged_by:change_optional_merged_by -> + ?branch:string -> + ?target_branch:string -> + ?created_at:TimestampTypes.timestamp option -> + ?optional_merged_at:change_optional_merged_at -> + ?updated_at:TimestampTypes.timestamp option -> + ?optional_closed_at:change_optional_closed_at -> + ?state:change_change_state -> + ?optional_duration:change_optional_duration -> + ?mergeable:string -> + ?labels:string list -> + ?assignees:ident list -> + ?approvals:string list -> + ?draft:bool -> + ?optional_self_merged:change_optional_self_merged -> + ?optional_merged_commit_sha:change_optional_merged_commit_sha -> + unit -> + change +(** [default_change ()] is the default value for type [change] *) + +val default_change_reviewed_event : + ?approvals:string list -> + unit -> + change_reviewed_event +(** [default_change_reviewed_event ()] is the default value for type [change_reviewed_event] *) + +val default_change_event_type : unit -> change_event_type +(** [default_change_event_type ()] is the default value for type [change_event_type] *) + +val default_change_event_optional_duration : unit -> change_event_optional_duration +(** [default_change_event_optional_duration ()] is the default value for type [change_event_optional_duration] *) + +val default_change_event_optional_merged_commit_sha : unit -> change_event_optional_merged_commit_sha +(** [default_change_event_optional_merged_commit_sha ()] is the default value for type [change_event_optional_merged_commit_sha] *) + +val default_change_event : + ?id:string -> + ?created_at:TimestampTypes.timestamp option -> + ?author:ident option -> + ?repository_prefix:string -> + ?repository_fullname:string -> + ?repository_shortname:string -> + ?branch:string -> + ?target_branch:string -> + ?number:int32 -> + ?change_id:string -> + ?url:string -> + ?on_author:ident option -> + ?on_created_at:TimestampTypes.timestamp option -> + ?changed_files:changed_file_path list -> + ?type_:change_event_type -> + ?labels:string list -> + ?optional_duration:change_event_optional_duration -> + ?draft:bool -> + ?optional_merged_commit_sha:change_event_optional_merged_commit_sha -> + unit -> + change_event +(** [default_change_event ()] is the default value for type [change_event] *) diff --git a/web/src/messages/CrawlerBs.ml b/web/src/messages/CrawlerBs.ml new file mode 100644 index 000000000..8b05db5c4 --- /dev/null +++ b/web/src/messages/CrawlerBs.ml @@ -0,0 +1,788 @@ +[@@@ocaml.warning "-27-30-39"] + +type crawler_error_mutable = { + mutable message : string; + mutable body : string; + mutable created_at : TimestampTypes.timestamp option; + mutable entity : CrawlerTypes.entity option; +} + +let default_crawler_error_mutable () : crawler_error_mutable = { + message = ""; + body = ""; + created_at = None; + entity = None; +} + +type errors_request_mutable = { + mutable index : string; + mutable query : string; +} + +let default_errors_request_mutable () : errors_request_mutable = { + index = ""; + query = ""; +} + +type errors_list_mutable = { + mutable errors : CrawlerTypes.crawler_error list; +} + +let default_errors_list_mutable () : errors_list_mutable = { + errors = []; +} + +type project_mutable = { + mutable full_path : string; +} + +let default_project_mutable () : project_mutable = { + full_path = ""; +} + +type add_doc_request_mutable = { + mutable index : string; + mutable crawler : string; + mutable apikey : string; + mutable entity : CrawlerTypes.entity option; + mutable changes : ChangeTypes.change list; + mutable events : ChangeTypes.change_event list; + mutable projects : CrawlerTypes.project list; + mutable task_datas : SearchTypes.task_data list; + mutable issues : IssueTypes.issue list; + mutable issue_events : IssueTypes.issue_event list; + mutable errors : CrawlerTypes.crawler_error list; +} + +let default_add_doc_request_mutable () : add_doc_request_mutable = { + index = ""; + crawler = ""; + apikey = ""; + entity = None; + changes = []; + events = []; + projects = []; + task_datas = []; + issues = []; + issue_events = []; + errors = []; +} + +type commit_request_mutable = { + mutable index : string; + mutable crawler : string; + mutable apikey : string; + mutable entity : CrawlerTypes.entity option; + mutable timestamp : TimestampTypes.timestamp option; +} + +let default_commit_request_mutable () : commit_request_mutable = { + index = ""; + crawler = ""; + apikey = ""; + entity = None; + timestamp = None; +} + +type commit_info_request_mutable = { + mutable index : string; + mutable crawler : string; + mutable entity : CrawlerTypes.entity_type; + mutable offset : int32; +} + +let default_commit_info_request_mutable () : commit_info_request_mutable = { + index = ""; + crawler = ""; + entity = CrawlerTypes.default_entity_type (); + offset = 0l; +} + +type commit_info_response_oldest_entity_mutable = { + mutable entity : CrawlerTypes.entity option; + mutable last_commit_at : TimestampTypes.timestamp option; +} + +let default_commit_info_response_oldest_entity_mutable () : commit_info_response_oldest_entity_mutable = { + entity = None; + last_commit_at = None; +} + + +let rec decode_entity json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "entity" + | i -> + begin match Array.unsafe_get keys i with + | "organization_name" -> + let json = Js.Dict.unsafeGet json "organization_name" in + (CrawlerTypes.Organization_name (Pbrt_bs.string json "entity" "Organization_name") : CrawlerTypes.entity) + | "project_name" -> + let json = Js.Dict.unsafeGet json "project_name" in + (CrawlerTypes.Project_name (Pbrt_bs.string json "entity" "Project_name") : CrawlerTypes.entity) + | "project_issueName" -> + let json = Js.Dict.unsafeGet json "project_issueName" in + (CrawlerTypes.Project_issue_name (Pbrt_bs.string json "entity" "Project_issue_name") : CrawlerTypes.entity) + | "td_name" -> + let json = Js.Dict.unsafeGet json "td_name" in + (CrawlerTypes.Td_name (Pbrt_bs.string json "entity" "Td_name") : CrawlerTypes.entity) + | "user_name" -> + let json = Js.Dict.unsafeGet json "user_name" in + (CrawlerTypes.User_name (Pbrt_bs.string json "entity" "User_name") : CrawlerTypes.entity) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +let rec decode_entity_type (json:Js.Json.t) = + match Pbrt_bs.string json "entity_type" "value" with + | "ENTITY_TYPE_ORGANIZATION" -> (CrawlerTypes.Entity_type_organization : CrawlerTypes.entity_type) + | "ENTITY_TYPE_PROJECT" -> (CrawlerTypes.Entity_type_project : CrawlerTypes.entity_type) + | "ENTITY_TYPE_TASK_DATA" -> (CrawlerTypes.Entity_type_task_data : CrawlerTypes.entity_type) + | "ENTITY_TYPE_USER" -> (CrawlerTypes.Entity_type_user : CrawlerTypes.entity_type) + | "" -> CrawlerTypes.Entity_type_organization + | _ -> Pbrt_bs.E.malformed_variant "entity_type" + +let rec decode_crawler_error json = + let v = default_crawler_error_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "message" -> + let json = Js.Dict.unsafeGet json "message" in + v.message <- Pbrt_bs.string json "crawler_error" "message" + | "body" -> + let json = Js.Dict.unsafeGet json "body" in + v.body <- Pbrt_bs.string json "crawler_error" "body" + | "created_at" -> + let json = Js.Dict.unsafeGet json "created_at" in + v.created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "crawler_error" "created_at"))) + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + v.entity <- Some ((decode_entity (Pbrt_bs.object_ json "crawler_error" "entity"))) + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.message = v.message; + CrawlerTypes.body = v.body; + CrawlerTypes.created_at = v.created_at; + CrawlerTypes.entity = v.entity; + } : CrawlerTypes.crawler_error) + +let rec decode_errors_request json = + let v = default_errors_request_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "index" -> + let json = Js.Dict.unsafeGet json "index" in + v.index <- Pbrt_bs.string json "errors_request" "index" + | "query" -> + let json = Js.Dict.unsafeGet json "query" in + v.query <- Pbrt_bs.string json "errors_request" "query" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.index = v.index; + CrawlerTypes.query = v.query; + } : CrawlerTypes.errors_request) + +let rec decode_errors_list json = + let v = default_errors_list_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "errors" -> begin + let a = + let a = Js.Dict.unsafeGet json "errors" in + Pbrt_bs.array_ a "errors_list" "errors" + in + v.errors <- Array.map (fun json -> + (decode_crawler_error (Pbrt_bs.object_ json "errors_list" "errors")) + ) a |> Array.to_list; + end + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.errors = v.errors; + } : CrawlerTypes.errors_list) + +let rec decode_errors_response json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "errors_response" + | i -> + begin match Array.unsafe_get keys i with + | "success" -> + let json = Js.Dict.unsafeGet json "success" in + (CrawlerTypes.Success ((decode_errors_list (Pbrt_bs.object_ json "errors_response" "Success"))) : CrawlerTypes.errors_response) + | "error" -> + let json = Js.Dict.unsafeGet json "error" in + (CrawlerTypes.Error (Pbrt_bs.string json "errors_response" "Error") : CrawlerTypes.errors_response) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +let rec decode_project json = + let v = default_project_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "full_path" -> + let json = Js.Dict.unsafeGet json "full_path" in + v.full_path <- Pbrt_bs.string json "project" "full_path" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.full_path = v.full_path; + } : CrawlerTypes.project) + +let rec decode_add_doc_request json = + let v = default_add_doc_request_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "index" -> + let json = Js.Dict.unsafeGet json "index" in + v.index <- Pbrt_bs.string json "add_doc_request" "index" + | "crawler" -> + let json = Js.Dict.unsafeGet json "crawler" in + v.crawler <- Pbrt_bs.string json "add_doc_request" "crawler" + | "apikey" -> + let json = Js.Dict.unsafeGet json "apikey" in + v.apikey <- Pbrt_bs.string json "add_doc_request" "apikey" + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + v.entity <- Some ((decode_entity (Pbrt_bs.object_ json "add_doc_request" "entity"))) + | "changes" -> begin + let a = + let a = Js.Dict.unsafeGet json "changes" in + Pbrt_bs.array_ a "add_doc_request" "changes" + in + v.changes <- Array.map (fun json -> + (ChangeBs.decode_change (Pbrt_bs.object_ json "add_doc_request" "changes")) + ) a |> Array.to_list; + end + | "events" -> begin + let a = + let a = Js.Dict.unsafeGet json "events" in + Pbrt_bs.array_ a "add_doc_request" "events" + in + v.events <- Array.map (fun json -> + (ChangeBs.decode_change_event (Pbrt_bs.object_ json "add_doc_request" "events")) + ) a |> Array.to_list; + end + | "projects" -> begin + let a = + let a = Js.Dict.unsafeGet json "projects" in + Pbrt_bs.array_ a "add_doc_request" "projects" + in + v.projects <- Array.map (fun json -> + (decode_project (Pbrt_bs.object_ json "add_doc_request" "projects")) + ) a |> Array.to_list; + end + | "task_datas" -> begin + let a = + let a = Js.Dict.unsafeGet json "task_datas" in + Pbrt_bs.array_ a "add_doc_request" "task_datas" + in + v.task_datas <- Array.map (fun json -> + (SearchBs.decode_task_data (Pbrt_bs.object_ json "add_doc_request" "task_datas")) + ) a |> Array.to_list; + end + | "issues" -> begin + let a = + let a = Js.Dict.unsafeGet json "issues" in + Pbrt_bs.array_ a "add_doc_request" "issues" + in + v.issues <- Array.map (fun json -> + (IssueBs.decode_issue (Pbrt_bs.object_ json "add_doc_request" "issues")) + ) a |> Array.to_list; + end + | "issue_events" -> begin + let a = + let a = Js.Dict.unsafeGet json "issue_events" in + Pbrt_bs.array_ a "add_doc_request" "issue_events" + in + v.issue_events <- Array.map (fun json -> + (IssueBs.decode_issue_event (Pbrt_bs.object_ json "add_doc_request" "issue_events")) + ) a |> Array.to_list; + end + | "errors" -> begin + let a = + let a = Js.Dict.unsafeGet json "errors" in + Pbrt_bs.array_ a "add_doc_request" "errors" + in + v.errors <- Array.map (fun json -> + (decode_crawler_error (Pbrt_bs.object_ json "add_doc_request" "errors")) + ) a |> Array.to_list; + end + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.index = v.index; + CrawlerTypes.crawler = v.crawler; + CrawlerTypes.apikey = v.apikey; + CrawlerTypes.entity = v.entity; + CrawlerTypes.changes = v.changes; + CrawlerTypes.events = v.events; + CrawlerTypes.projects = v.projects; + CrawlerTypes.task_datas = v.task_datas; + CrawlerTypes.issues = v.issues; + CrawlerTypes.issue_events = v.issue_events; + CrawlerTypes.errors = v.errors; + } : CrawlerTypes.add_doc_request) + +let rec decode_add_doc_error (json:Js.Json.t) = + match Pbrt_bs.string json "add_doc_error" "value" with + | "AddUnknownIndex" -> (CrawlerTypes.Add_unknown_index : CrawlerTypes.add_doc_error) + | "AddUnknownCrawler" -> (CrawlerTypes.Add_unknown_crawler : CrawlerTypes.add_doc_error) + | "AddUnknownApiKey" -> (CrawlerTypes.Add_unknown_api_key : CrawlerTypes.add_doc_error) + | "AddFailed" -> (CrawlerTypes.Add_failed : CrawlerTypes.add_doc_error) + | "" -> CrawlerTypes.Add_unknown_index + | _ -> Pbrt_bs.E.malformed_variant "add_doc_error" + +let rec decode_add_doc_response json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "add_doc_response" + | i -> + begin match Array.unsafe_get keys i with + | "error" -> + let json = Js.Dict.unsafeGet json "error" in + (CrawlerTypes.Error ((decode_add_doc_error json)) : CrawlerTypes.add_doc_response) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +let rec decode_commit_request json = + let v = default_commit_request_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "index" -> + let json = Js.Dict.unsafeGet json "index" in + v.index <- Pbrt_bs.string json "commit_request" "index" + | "crawler" -> + let json = Js.Dict.unsafeGet json "crawler" in + v.crawler <- Pbrt_bs.string json "commit_request" "crawler" + | "apikey" -> + let json = Js.Dict.unsafeGet json "apikey" in + v.apikey <- Pbrt_bs.string json "commit_request" "apikey" + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + v.entity <- Some ((decode_entity (Pbrt_bs.object_ json "commit_request" "entity"))) + | "timestamp" -> + let json = Js.Dict.unsafeGet json "timestamp" in + v.timestamp <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "commit_request" "timestamp"))) + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.index = v.index; + CrawlerTypes.crawler = v.crawler; + CrawlerTypes.apikey = v.apikey; + CrawlerTypes.entity = v.entity; + CrawlerTypes.timestamp = v.timestamp; + } : CrawlerTypes.commit_request) + +let rec decode_commit_error (json:Js.Json.t) = + match Pbrt_bs.string json "commit_error" "value" with + | "CommitUnknownIndex" -> (CrawlerTypes.Commit_unknown_index : CrawlerTypes.commit_error) + | "CommitUnknownCrawler" -> (CrawlerTypes.Commit_unknown_crawler : CrawlerTypes.commit_error) + | "CommitUnknownApiKey" -> (CrawlerTypes.Commit_unknown_api_key : CrawlerTypes.commit_error) + | "CommitDateInferiorThanPrevious" -> (CrawlerTypes.Commit_date_inferior_than_previous : CrawlerTypes.commit_error) + | "CommitDateMissing" -> (CrawlerTypes.Commit_date_missing : CrawlerTypes.commit_error) + | "" -> CrawlerTypes.Commit_unknown_index + | _ -> Pbrt_bs.E.malformed_variant "commit_error" + +let rec decode_commit_response json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "commit_response" + | i -> + begin match Array.unsafe_get keys i with + | "error" -> + let json = Js.Dict.unsafeGet json "error" in + (CrawlerTypes.Error ((decode_commit_error json)) : CrawlerTypes.commit_response) + | "timestamp" -> + let json = Js.Dict.unsafeGet json "timestamp" in + (CrawlerTypes.Timestamp ((TimestampBs.decode_timestamp (Pbrt_bs.string json "commit_response" "Timestamp"))) : CrawlerTypes.commit_response) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +let rec decode_commit_info_request json = + let v = default_commit_info_request_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "index" -> + let json = Js.Dict.unsafeGet json "index" in + v.index <- Pbrt_bs.string json "commit_info_request" "index" + | "crawler" -> + let json = Js.Dict.unsafeGet json "crawler" in + v.crawler <- Pbrt_bs.string json "commit_info_request" "crawler" + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + v.entity <- (decode_entity_type json) + | "offset" -> + let json = Js.Dict.unsafeGet json "offset" in + v.offset <- Pbrt_bs.int32 json "commit_info_request" "offset" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.index = v.index; + CrawlerTypes.crawler = v.crawler; + CrawlerTypes.entity = v.entity; + CrawlerTypes.offset = v.offset; + } : CrawlerTypes.commit_info_request) + +let rec decode_commit_info_error (json:Js.Json.t) = + match Pbrt_bs.string json "commit_info_error" "value" with + | "CommitGetUnknownIndex" -> (CrawlerTypes.Commit_get_unknown_index : CrawlerTypes.commit_info_error) + | "CommitGetUnknownCrawler" -> (CrawlerTypes.Commit_get_unknown_crawler : CrawlerTypes.commit_info_error) + | "CommitGetNoEntity" -> (CrawlerTypes.Commit_get_no_entity : CrawlerTypes.commit_info_error) + | "" -> CrawlerTypes.Commit_get_unknown_index + | _ -> Pbrt_bs.E.malformed_variant "commit_info_error" + +let rec decode_commit_info_response_oldest_entity json = + let v = default_commit_info_response_oldest_entity_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + v.entity <- Some ((decode_entity (Pbrt_bs.object_ json "commit_info_response_oldest_entity" "entity"))) + | "last_commit_at" -> + let json = Js.Dict.unsafeGet json "last_commit_at" in + v.last_commit_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "commit_info_response_oldest_entity" "last_commit_at"))) + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.entity = v.entity; + CrawlerTypes.last_commit_at = v.last_commit_at; + } : CrawlerTypes.commit_info_response_oldest_entity) + +let rec decode_commit_info_response json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "commit_info_response" + | i -> + begin match Array.unsafe_get keys i with + | "error" -> + let json = Js.Dict.unsafeGet json "error" in + (CrawlerTypes.Error ((decode_commit_info_error json)) : CrawlerTypes.commit_info_response) + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + (CrawlerTypes.Entity ((decode_commit_info_response_oldest_entity (Pbrt_bs.object_ json "commit_info_response" "Entity"))) : CrawlerTypes.commit_info_response) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +let rec encode_entity (v:CrawlerTypes.entity) = + let json = Js.Dict.empty () in + begin match v with + | CrawlerTypes.Organization_name v -> + Js.Dict.set json "organization_name" (Js.Json.string v); + | CrawlerTypes.Project_name v -> + Js.Dict.set json "project_name" (Js.Json.string v); + | CrawlerTypes.Project_issue_name v -> + Js.Dict.set json "project_issueName" (Js.Json.string v); + | CrawlerTypes.Td_name v -> + Js.Dict.set json "td_name" (Js.Json.string v); + | CrawlerTypes.User_name v -> + Js.Dict.set json "user_name" (Js.Json.string v); + end; + json + +let rec encode_entity_type (v:CrawlerTypes.entity_type) : string = + match v with + | CrawlerTypes.Entity_type_organization -> "ENTITY_TYPE_ORGANIZATION" + | CrawlerTypes.Entity_type_project -> "ENTITY_TYPE_PROJECT" + | CrawlerTypes.Entity_type_task_data -> "ENTITY_TYPE_TASK_DATA" + | CrawlerTypes.Entity_type_user -> "ENTITY_TYPE_USER" + +let rec encode_crawler_error (v:CrawlerTypes.crawler_error) = + let json = Js.Dict.empty () in + Js.Dict.set json "message" (Js.Json.string v.CrawlerTypes.message); + Js.Dict.set json "body" (Js.Json.string v.CrawlerTypes.body); + begin match v.CrawlerTypes.created_at with + | None -> () + | Some v -> + begin (* created_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "created_at" (Js.Json.string json'); + end; + end; + begin match v.CrawlerTypes.entity with + | None -> () + | Some v -> + begin (* entity field *) + let json' = encode_entity v in + Js.Dict.set json "entity" (Js.Json.object_ json'); + end; + end; + json + +let rec encode_errors_request (v:CrawlerTypes.errors_request) = + let json = Js.Dict.empty () in + Js.Dict.set json "index" (Js.Json.string v.CrawlerTypes.index); + Js.Dict.set json "query" (Js.Json.string v.CrawlerTypes.query); + json + +let rec encode_errors_list (v:CrawlerTypes.errors_list) = + let json = Js.Dict.empty () in + begin (* errors field *) + let (errors':Js.Json.t) = + v.CrawlerTypes.errors + |> Array.of_list + |> Array.map (fun v -> + v |> encode_crawler_error |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "errors" errors'; + end; + json + +let rec encode_errors_response (v:CrawlerTypes.errors_response) = + let json = Js.Dict.empty () in + begin match v with + | CrawlerTypes.Success v -> + begin (* success field *) + let json' = encode_errors_list v in + Js.Dict.set json "success" (Js.Json.object_ json'); + end; + | CrawlerTypes.Error v -> + Js.Dict.set json "error" (Js.Json.string v); + end; + json + +let rec encode_project (v:CrawlerTypes.project) = + let json = Js.Dict.empty () in + Js.Dict.set json "full_path" (Js.Json.string v.CrawlerTypes.full_path); + json + +let rec encode_add_doc_request (v:CrawlerTypes.add_doc_request) = + let json = Js.Dict.empty () in + Js.Dict.set json "index" (Js.Json.string v.CrawlerTypes.index); + Js.Dict.set json "crawler" (Js.Json.string v.CrawlerTypes.crawler); + Js.Dict.set json "apikey" (Js.Json.string v.CrawlerTypes.apikey); + begin match v.CrawlerTypes.entity with + | None -> () + | Some v -> + begin (* entity field *) + let json' = encode_entity v in + Js.Dict.set json "entity" (Js.Json.object_ json'); + end; + end; + begin (* changes field *) + let (changes':Js.Json.t) = + v.CrawlerTypes.changes + |> Array.of_list + |> Array.map (fun v -> + v |> ChangeBs.encode_change |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "changes" changes'; + end; + begin (* events field *) + let (events':Js.Json.t) = + v.CrawlerTypes.events + |> Array.of_list + |> Array.map (fun v -> + v |> ChangeBs.encode_change_event |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "events" events'; + end; + begin (* projects field *) + let (projects':Js.Json.t) = + v.CrawlerTypes.projects + |> Array.of_list + |> Array.map (fun v -> + v |> encode_project |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "projects" projects'; + end; + begin (* taskDatas field *) + let (task_datas':Js.Json.t) = + v.CrawlerTypes.task_datas + |> Array.of_list + |> Array.map (fun v -> + v |> SearchBs.encode_task_data |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "task_datas" task_datas'; + end; + begin (* issues field *) + let (issues':Js.Json.t) = + v.CrawlerTypes.issues + |> Array.of_list + |> Array.map (fun v -> + v |> IssueBs.encode_issue |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "issues" issues'; + end; + begin (* issueEvents field *) + let (issue_events':Js.Json.t) = + v.CrawlerTypes.issue_events + |> Array.of_list + |> Array.map (fun v -> + v |> IssueBs.encode_issue_event |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "issue_events" issue_events'; + end; + begin (* errors field *) + let (errors':Js.Json.t) = + v.CrawlerTypes.errors + |> Array.of_list + |> Array.map (fun v -> + v |> encode_crawler_error |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "errors" errors'; + end; + json + +let rec encode_add_doc_error (v:CrawlerTypes.add_doc_error) : string = + match v with + | CrawlerTypes.Add_unknown_index -> "AddUnknownIndex" + | CrawlerTypes.Add_unknown_crawler -> "AddUnknownCrawler" + | CrawlerTypes.Add_unknown_api_key -> "AddUnknownApiKey" + | CrawlerTypes.Add_failed -> "AddFailed" + +let rec encode_add_doc_response (v:CrawlerTypes.add_doc_response) = + let json = Js.Dict.empty () in + begin match v with + | CrawlerTypes.Error v -> + Js.Dict.set json "error" (Js.Json.string (encode_add_doc_error v)); + end; + json + +let rec encode_commit_request (v:CrawlerTypes.commit_request) = + let json = Js.Dict.empty () in + Js.Dict.set json "index" (Js.Json.string v.CrawlerTypes.index); + Js.Dict.set json "crawler" (Js.Json.string v.CrawlerTypes.crawler); + Js.Dict.set json "apikey" (Js.Json.string v.CrawlerTypes.apikey); + begin match v.CrawlerTypes.entity with + | None -> () + | Some v -> + begin (* entity field *) + let json' = encode_entity v in + Js.Dict.set json "entity" (Js.Json.object_ json'); + end; + end; + begin match v.CrawlerTypes.timestamp with + | None -> () + | Some v -> + begin (* timestamp field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "timestamp" (Js.Json.string json'); + end; + end; + json + +let rec encode_commit_error (v:CrawlerTypes.commit_error) : string = + match v with + | CrawlerTypes.Commit_unknown_index -> "CommitUnknownIndex" + | CrawlerTypes.Commit_unknown_crawler -> "CommitUnknownCrawler" + | CrawlerTypes.Commit_unknown_api_key -> "CommitUnknownApiKey" + | CrawlerTypes.Commit_date_inferior_than_previous -> "CommitDateInferiorThanPrevious" + | CrawlerTypes.Commit_date_missing -> "CommitDateMissing" + +let rec encode_commit_response (v:CrawlerTypes.commit_response) = + let json = Js.Dict.empty () in + begin match v with + | CrawlerTypes.Error v -> + Js.Dict.set json "error" (Js.Json.string (encode_commit_error v)); + | CrawlerTypes.Timestamp v -> + begin (* timestamp field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "timestamp" (Js.Json.string json'); + end; + end; + json + +let rec encode_commit_info_request (v:CrawlerTypes.commit_info_request) = + let json = Js.Dict.empty () in + Js.Dict.set json "index" (Js.Json.string v.CrawlerTypes.index); + Js.Dict.set json "crawler" (Js.Json.string v.CrawlerTypes.crawler); + Js.Dict.set json "entity" (Js.Json.string (encode_entity_type v.CrawlerTypes.entity)); + Js.Dict.set json "offset" (Js.Json.number (Int32.to_float v.CrawlerTypes.offset)); + json + +let rec encode_commit_info_error (v:CrawlerTypes.commit_info_error) : string = + match v with + | CrawlerTypes.Commit_get_unknown_index -> "CommitGetUnknownIndex" + | CrawlerTypes.Commit_get_unknown_crawler -> "CommitGetUnknownCrawler" + | CrawlerTypes.Commit_get_no_entity -> "CommitGetNoEntity" + +let rec encode_commit_info_response_oldest_entity (v:CrawlerTypes.commit_info_response_oldest_entity) = + let json = Js.Dict.empty () in + begin match v.CrawlerTypes.entity with + | None -> () + | Some v -> + begin (* entity field *) + let json' = encode_entity v in + Js.Dict.set json "entity" (Js.Json.object_ json'); + end; + end; + begin match v.CrawlerTypes.last_commit_at with + | None -> () + | Some v -> + begin (* lastCommitAt field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "last_commit_at" (Js.Json.string json'); + end; + end; + json + +let rec encode_commit_info_response (v:CrawlerTypes.commit_info_response) = + let json = Js.Dict.empty () in + begin match v with + | CrawlerTypes.Error v -> + Js.Dict.set json "error" (Js.Json.string (encode_commit_info_error v)); + | CrawlerTypes.Entity v -> + begin (* entity field *) + let json' = encode_commit_info_response_oldest_entity v in + Js.Dict.set json "entity" (Js.Json.object_ json'); + end; + end; + json diff --git a/web/src/messages/CrawlerBs.mli b/web/src/messages/CrawlerBs.mli new file mode 100644 index 000000000..ad6a614f2 --- /dev/null +++ b/web/src/messages/CrawlerBs.mli @@ -0,0 +1,109 @@ +(** crawler.proto BuckleScript Encoding *) + + +(** {2 Protobuf JSON Encoding} *) + +val encode_entity : CrawlerTypes.entity -> Js.Json.t Js.Dict.t +(** [encode_entity v dict] encodes [v] int the given JSON [dict] *) + +val encode_entity_type : CrawlerTypes.entity_type -> string +(** [encode_entity_type v] returns JSON string*) + +val encode_crawler_error : CrawlerTypes.crawler_error -> Js.Json.t Js.Dict.t +(** [encode_crawler_error v dict] encodes [v] int the given JSON [dict] *) + +val encode_errors_request : CrawlerTypes.errors_request -> Js.Json.t Js.Dict.t +(** [encode_errors_request v dict] encodes [v] int the given JSON [dict] *) + +val encode_errors_list : CrawlerTypes.errors_list -> Js.Json.t Js.Dict.t +(** [encode_errors_list v dict] encodes [v] int the given JSON [dict] *) + +val encode_errors_response : CrawlerTypes.errors_response -> Js.Json.t Js.Dict.t +(** [encode_errors_response v dict] encodes [v] int the given JSON [dict] *) + +val encode_project : CrawlerTypes.project -> Js.Json.t Js.Dict.t +(** [encode_project v dict] encodes [v] int the given JSON [dict] *) + +val encode_add_doc_request : CrawlerTypes.add_doc_request -> Js.Json.t Js.Dict.t +(** [encode_add_doc_request v dict] encodes [v] int the given JSON [dict] *) + +val encode_add_doc_error : CrawlerTypes.add_doc_error -> string +(** [encode_add_doc_error v] returns JSON string*) + +val encode_add_doc_response : CrawlerTypes.add_doc_response -> Js.Json.t Js.Dict.t +(** [encode_add_doc_response v dict] encodes [v] int the given JSON [dict] *) + +val encode_commit_request : CrawlerTypes.commit_request -> Js.Json.t Js.Dict.t +(** [encode_commit_request v dict] encodes [v] int the given JSON [dict] *) + +val encode_commit_error : CrawlerTypes.commit_error -> string +(** [encode_commit_error v] returns JSON string*) + +val encode_commit_response : CrawlerTypes.commit_response -> Js.Json.t Js.Dict.t +(** [encode_commit_response v dict] encodes [v] int the given JSON [dict] *) + +val encode_commit_info_request : CrawlerTypes.commit_info_request -> Js.Json.t Js.Dict.t +(** [encode_commit_info_request v dict] encodes [v] int the given JSON [dict] *) + +val encode_commit_info_error : CrawlerTypes.commit_info_error -> string +(** [encode_commit_info_error v] returns JSON string*) + +val encode_commit_info_response_oldest_entity : CrawlerTypes.commit_info_response_oldest_entity -> Js.Json.t Js.Dict.t +(** [encode_commit_info_response_oldest_entity v dict] encodes [v] int the given JSON [dict] *) + +val encode_commit_info_response : CrawlerTypes.commit_info_response -> Js.Json.t Js.Dict.t +(** [encode_commit_info_response v dict] encodes [v] int the given JSON [dict] *) + + +(** {2 BS Decoding} *) + +val decode_entity : Js.Json.t Js.Dict.t -> CrawlerTypes.entity +(** [decode_entity decoder] decodes a [entity] value from [decoder] *) + +val decode_entity_type : Js.Json.t -> CrawlerTypes.entity_type +(** [decode_entity_type value] decodes a [entity_type] from a Json value*) + +val decode_crawler_error : Js.Json.t Js.Dict.t -> CrawlerTypes.crawler_error +(** [decode_crawler_error decoder] decodes a [crawler_error] value from [decoder] *) + +val decode_errors_request : Js.Json.t Js.Dict.t -> CrawlerTypes.errors_request +(** [decode_errors_request decoder] decodes a [errors_request] value from [decoder] *) + +val decode_errors_list : Js.Json.t Js.Dict.t -> CrawlerTypes.errors_list +(** [decode_errors_list decoder] decodes a [errors_list] value from [decoder] *) + +val decode_errors_response : Js.Json.t Js.Dict.t -> CrawlerTypes.errors_response +(** [decode_errors_response decoder] decodes a [errors_response] value from [decoder] *) + +val decode_project : Js.Json.t Js.Dict.t -> CrawlerTypes.project +(** [decode_project decoder] decodes a [project] value from [decoder] *) + +val decode_add_doc_request : Js.Json.t Js.Dict.t -> CrawlerTypes.add_doc_request +(** [decode_add_doc_request decoder] decodes a [add_doc_request] value from [decoder] *) + +val decode_add_doc_error : Js.Json.t -> CrawlerTypes.add_doc_error +(** [decode_add_doc_error value] decodes a [add_doc_error] from a Json value*) + +val decode_add_doc_response : Js.Json.t Js.Dict.t -> CrawlerTypes.add_doc_response +(** [decode_add_doc_response decoder] decodes a [add_doc_response] value from [decoder] *) + +val decode_commit_request : Js.Json.t Js.Dict.t -> CrawlerTypes.commit_request +(** [decode_commit_request decoder] decodes a [commit_request] value from [decoder] *) + +val decode_commit_error : Js.Json.t -> CrawlerTypes.commit_error +(** [decode_commit_error value] decodes a [commit_error] from a Json value*) + +val decode_commit_response : Js.Json.t Js.Dict.t -> CrawlerTypes.commit_response +(** [decode_commit_response decoder] decodes a [commit_response] value from [decoder] *) + +val decode_commit_info_request : Js.Json.t Js.Dict.t -> CrawlerTypes.commit_info_request +(** [decode_commit_info_request decoder] decodes a [commit_info_request] value from [decoder] *) + +val decode_commit_info_error : Js.Json.t -> CrawlerTypes.commit_info_error +(** [decode_commit_info_error value] decodes a [commit_info_error] from a Json value*) + +val decode_commit_info_response_oldest_entity : Js.Json.t Js.Dict.t -> CrawlerTypes.commit_info_response_oldest_entity +(** [decode_commit_info_response_oldest_entity decoder] decodes a [commit_info_response_oldest_entity] value from [decoder] *) + +val decode_commit_info_response : Js.Json.t Js.Dict.t -> CrawlerTypes.commit_info_response +(** [decode_commit_info_response decoder] decodes a [commit_info_response] value from [decoder] *) diff --git a/web/src/messages/CrawlerTypes.ml b/web/src/messages/CrawlerTypes.ml new file mode 100644 index 000000000..70a3be931 --- /dev/null +++ b/web/src/messages/CrawlerTypes.ml @@ -0,0 +1,212 @@ +[@@@ocaml.warning "-27-30-39"] + + +type entity = + | Organization_name of string + | Project_name of string + | Project_issue_name of string + | Td_name of string + | User_name of string + +type entity_type = + | Entity_type_organization + | Entity_type_project + | Entity_type_task_data + | Entity_type_user + +type crawler_error = { + message : string; + body : string; + created_at : TimestampTypes.timestamp option; + entity : entity option; +} + +type errors_request = { + index : string; + query : string; +} + +type errors_list = { + errors : crawler_error list; +} + +type errors_response = + | Success of errors_list + | Error of string + +type project = { + full_path : string; +} + +type add_doc_request = { + index : string; + crawler : string; + apikey : string; + entity : entity option; + changes : ChangeTypes.change list; + events : ChangeTypes.change_event list; + projects : project list; + task_datas : SearchTypes.task_data list; + issues : IssueTypes.issue list; + issue_events : IssueTypes.issue_event list; + errors : crawler_error list; +} + +type add_doc_error = + | Add_unknown_index + | Add_unknown_crawler + | Add_unknown_api_key + | Add_failed + +type add_doc_response = + | Error of add_doc_error + +type commit_request = { + index : string; + crawler : string; + apikey : string; + entity : entity option; + timestamp : TimestampTypes.timestamp option; +} + +type commit_error = + | Commit_unknown_index + | Commit_unknown_crawler + | Commit_unknown_api_key + | Commit_date_inferior_than_previous + | Commit_date_missing + +type commit_response = + | Error of commit_error + | Timestamp of TimestampTypes.timestamp + +type commit_info_request = { + index : string; + crawler : string; + entity : entity_type; + offset : int32; +} + +type commit_info_error = + | Commit_get_unknown_index + | Commit_get_unknown_crawler + | Commit_get_no_entity + +type commit_info_response_oldest_entity = { + entity : entity option; + last_commit_at : TimestampTypes.timestamp option; +} + +type commit_info_response = + | Error of commit_info_error + | Entity of commit_info_response_oldest_entity + +let rec default_entity () : entity = Organization_name ("") + +let rec default_entity_type () = (Entity_type_organization:entity_type) + +let rec default_crawler_error + ?message:((message:string) = "") + ?body:((body:string) = "") + ?created_at:((created_at:TimestampTypes.timestamp option) = None) + ?entity:((entity:entity option) = None) + () : crawler_error = { + message; + body; + created_at; + entity; +} + +let rec default_errors_request + ?index:((index:string) = "") + ?query:((query:string) = "") + () : errors_request = { + index; + query; +} + +let rec default_errors_list + ?errors:((errors:crawler_error list) = []) + () : errors_list = { + errors; +} + +let rec default_errors_response () : errors_response = Success (default_errors_list ()) + +let rec default_project + ?full_path:((full_path:string) = "") + () : project = { + full_path; +} + +let rec default_add_doc_request + ?index:((index:string) = "") + ?crawler:((crawler:string) = "") + ?apikey:((apikey:string) = "") + ?entity:((entity:entity option) = None) + ?changes:((changes:ChangeTypes.change list) = []) + ?events:((events:ChangeTypes.change_event list) = []) + ?projects:((projects:project list) = []) + ?task_datas:((task_datas:SearchTypes.task_data list) = []) + ?issues:((issues:IssueTypes.issue list) = []) + ?issue_events:((issue_events:IssueTypes.issue_event list) = []) + ?errors:((errors:crawler_error list) = []) + () : add_doc_request = { + index; + crawler; + apikey; + entity; + changes; + events; + projects; + task_datas; + issues; + issue_events; + errors; +} + +let rec default_add_doc_error () = (Add_unknown_index:add_doc_error) + +let rec default_add_doc_response () : add_doc_response = Error (default_add_doc_error ()) + +let rec default_commit_request + ?index:((index:string) = "") + ?crawler:((crawler:string) = "") + ?apikey:((apikey:string) = "") + ?entity:((entity:entity option) = None) + ?timestamp:((timestamp:TimestampTypes.timestamp option) = None) + () : commit_request = { + index; + crawler; + apikey; + entity; + timestamp; +} + +let rec default_commit_error () = (Commit_unknown_index:commit_error) + +let rec default_commit_response () : commit_response = Error (default_commit_error ()) + +let rec default_commit_info_request + ?index:((index:string) = "") + ?crawler:((crawler:string) = "") + ?entity:((entity:entity_type) = default_entity_type ()) + ?offset:((offset:int32) = 0l) + () : commit_info_request = { + index; + crawler; + entity; + offset; +} + +let rec default_commit_info_error () = (Commit_get_unknown_index:commit_info_error) + +let rec default_commit_info_response_oldest_entity + ?entity:((entity:entity option) = None) + ?last_commit_at:((last_commit_at:TimestampTypes.timestamp option) = None) + () : commit_info_response_oldest_entity = { + entity; + last_commit_at; +} + +let rec default_commit_info_response () : commit_info_response = Error (default_commit_info_error ()) diff --git a/web/src/messages/CrawlerTypes.mli b/web/src/messages/CrawlerTypes.mli new file mode 100644 index 000000000..14e3338b6 --- /dev/null +++ b/web/src/messages/CrawlerTypes.mli @@ -0,0 +1,205 @@ +(** crawler.proto Types *) + + + +(** {2 Types} *) + +type entity = + | Organization_name of string + | Project_name of string + | Project_issue_name of string + | Td_name of string + | User_name of string + +type entity_type = + | Entity_type_organization + | Entity_type_project + | Entity_type_task_data + | Entity_type_user + +type crawler_error = { + message : string; + body : string; + created_at : TimestampTypes.timestamp option; + entity : entity option; +} + +type errors_request = { + index : string; + query : string; +} + +type errors_list = { + errors : crawler_error list; +} + +type errors_response = + | Success of errors_list + | Error of string + +type project = { + full_path : string; +} + +type add_doc_request = { + index : string; + crawler : string; + apikey : string; + entity : entity option; + changes : ChangeTypes.change list; + events : ChangeTypes.change_event list; + projects : project list; + task_datas : SearchTypes.task_data list; + issues : IssueTypes.issue list; + issue_events : IssueTypes.issue_event list; + errors : crawler_error list; +} + +type add_doc_error = + | Add_unknown_index + | Add_unknown_crawler + | Add_unknown_api_key + | Add_failed + +type add_doc_response = + | Error of add_doc_error + +type commit_request = { + index : string; + crawler : string; + apikey : string; + entity : entity option; + timestamp : TimestampTypes.timestamp option; +} + +type commit_error = + | Commit_unknown_index + | Commit_unknown_crawler + | Commit_unknown_api_key + | Commit_date_inferior_than_previous + | Commit_date_missing + +type commit_response = + | Error of commit_error + | Timestamp of TimestampTypes.timestamp + +type commit_info_request = { + index : string; + crawler : string; + entity : entity_type; + offset : int32; +} + +type commit_info_error = + | Commit_get_unknown_index + | Commit_get_unknown_crawler + | Commit_get_no_entity + +type commit_info_response_oldest_entity = { + entity : entity option; + last_commit_at : TimestampTypes.timestamp option; +} + +type commit_info_response = + | Error of commit_info_error + | Entity of commit_info_response_oldest_entity + + +(** {2 Default values} *) + +val default_entity : unit -> entity +(** [default_entity ()] is the default value for type [entity] *) + +val default_entity_type : unit -> entity_type +(** [default_entity_type ()] is the default value for type [entity_type] *) + +val default_crawler_error : + ?message:string -> + ?body:string -> + ?created_at:TimestampTypes.timestamp option -> + ?entity:entity option -> + unit -> + crawler_error +(** [default_crawler_error ()] is the default value for type [crawler_error] *) + +val default_errors_request : + ?index:string -> + ?query:string -> + unit -> + errors_request +(** [default_errors_request ()] is the default value for type [errors_request] *) + +val default_errors_list : + ?errors:crawler_error list -> + unit -> + errors_list +(** [default_errors_list ()] is the default value for type [errors_list] *) + +val default_errors_response : unit -> errors_response +(** [default_errors_response ()] is the default value for type [errors_response] *) + +val default_project : + ?full_path:string -> + unit -> + project +(** [default_project ()] is the default value for type [project] *) + +val default_add_doc_request : + ?index:string -> + ?crawler:string -> + ?apikey:string -> + ?entity:entity option -> + ?changes:ChangeTypes.change list -> + ?events:ChangeTypes.change_event list -> + ?projects:project list -> + ?task_datas:SearchTypes.task_data list -> + ?issues:IssueTypes.issue list -> + ?issue_events:IssueTypes.issue_event list -> + ?errors:crawler_error list -> + unit -> + add_doc_request +(** [default_add_doc_request ()] is the default value for type [add_doc_request] *) + +val default_add_doc_error : unit -> add_doc_error +(** [default_add_doc_error ()] is the default value for type [add_doc_error] *) + +val default_add_doc_response : unit -> add_doc_response +(** [default_add_doc_response ()] is the default value for type [add_doc_response] *) + +val default_commit_request : + ?index:string -> + ?crawler:string -> + ?apikey:string -> + ?entity:entity option -> + ?timestamp:TimestampTypes.timestamp option -> + unit -> + commit_request +(** [default_commit_request ()] is the default value for type [commit_request] *) + +val default_commit_error : unit -> commit_error +(** [default_commit_error ()] is the default value for type [commit_error] *) + +val default_commit_response : unit -> commit_response +(** [default_commit_response ()] is the default value for type [commit_response] *) + +val default_commit_info_request : + ?index:string -> + ?crawler:string -> + ?entity:entity_type -> + ?offset:int32 -> + unit -> + commit_info_request +(** [default_commit_info_request ()] is the default value for type [commit_info_request] *) + +val default_commit_info_error : unit -> commit_info_error +(** [default_commit_info_error ()] is the default value for type [commit_info_error] *) + +val default_commit_info_response_oldest_entity : + ?entity:entity option -> + ?last_commit_at:TimestampTypes.timestamp option -> + unit -> + commit_info_response_oldest_entity +(** [default_commit_info_response_oldest_entity ()] is the default value for type [commit_info_response_oldest_entity] *) + +val default_commit_info_response : unit -> commit_info_response +(** [default_commit_info_response ()] is the default value for type [commit_info_response] *) diff --git a/web/src/messages/IssueBs.ml b/web/src/messages/IssueBs.ml new file mode 100644 index 000000000..2d05238fd --- /dev/null +++ b/web/src/messages/IssueBs.ml @@ -0,0 +1,376 @@ +[@@@ocaml.warning "-27-30-39"] + +type ident_mutable = { + mutable uid : string; + mutable muid : string; +} + +let default_ident_mutable () : ident_mutable = { + uid = ""; + muid = ""; +} + +type issue_mutable = { + mutable id : string; + mutable number : int32; + mutable title : string; + mutable text : string; + mutable url : string; + mutable repository_prefix : string; + mutable repository_fullname : string; + mutable repository_shortname : string; + mutable author : ChangeTypes.ident option; + mutable created_at : TimestampTypes.timestamp option; + mutable updated_at : TimestampTypes.timestamp option; + mutable optional_closed_at : IssueTypes.issue_optional_closed_at; + mutable state : string; +} + +let default_issue_mutable () : issue_mutable = { + id = ""; + number = 0l; + title = ""; + text = ""; + url = ""; + repository_prefix = ""; + repository_fullname = ""; + repository_shortname = ""; + author = None; + created_at = None; + updated_at = None; + optional_closed_at = IssueTypes.Closed_at (TimestampTypes.default_timestamp); + state = ""; +} + +type issue_commented_event_mutable = { + mutable comment : string; +} + +let default_issue_commented_event_mutable () : issue_commented_event_mutable = { + comment = ""; +} + +type issue_event_mutable = { + mutable id : string; + mutable created_at : TimestampTypes.timestamp option; + mutable author : ChangeTypes.ident option; + mutable repository_prefix : string; + mutable repository_fullname : string; + mutable repository_shortname : string; + mutable number : int32; + mutable url : string; + mutable type_ : IssueTypes.issue_event_type; +} + +let default_issue_event_mutable () : issue_event_mutable = { + id = ""; + created_at = None; + author = None; + repository_prefix = ""; + repository_fullname = ""; + repository_shortname = ""; + number = 0l; + url = ""; + type_ = IssueTypes.Issue_created; +} + + +let rec decode_ident json = + let v = default_ident_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "uid" -> + let json = Js.Dict.unsafeGet json "uid" in + v.uid <- Pbrt_bs.string json "ident" "uid" + | "muid" -> + let json = Js.Dict.unsafeGet json "muid" in + v.muid <- Pbrt_bs.string json "ident" "muid" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + IssueTypes.uid = v.uid; + IssueTypes.muid = v.muid; + } : IssueTypes.ident) + +let rec decode_issue_optional_closed_at json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "issue_optional_closed_at" + | i -> + begin match Array.unsafe_get keys i with + | "closed_at" -> + let json = Js.Dict.unsafeGet json "closed_at" in + (IssueTypes.Closed_at ((TimestampBs.decode_timestamp (Pbrt_bs.string json "issue_optional_closed_at" "Closed_at"))) : IssueTypes.issue_optional_closed_at) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_issue json = + let v = default_issue_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "id" -> + let json = Js.Dict.unsafeGet json "id" in + v.id <- Pbrt_bs.string json "issue" "id" + | "number" -> + let json = Js.Dict.unsafeGet json "number" in + v.number <- Pbrt_bs.int32 json "issue" "number" + | "title" -> + let json = Js.Dict.unsafeGet json "title" in + v.title <- Pbrt_bs.string json "issue" "title" + | "text" -> + let json = Js.Dict.unsafeGet json "text" in + v.text <- Pbrt_bs.string json "issue" "text" + | "url" -> + let json = Js.Dict.unsafeGet json "url" in + v.url <- Pbrt_bs.string json "issue" "url" + | "repository_prefix" -> + let json = Js.Dict.unsafeGet json "repository_prefix" in + v.repository_prefix <- Pbrt_bs.string json "issue" "repository_prefix" + | "repository_fullname" -> + let json = Js.Dict.unsafeGet json "repository_fullname" in + v.repository_fullname <- Pbrt_bs.string json "issue" "repository_fullname" + | "repository_shortname" -> + let json = Js.Dict.unsafeGet json "repository_shortname" in + v.repository_shortname <- Pbrt_bs.string json "issue" "repository_shortname" + | "author" -> + let json = Js.Dict.unsafeGet json "author" in + v.author <- Some ((ChangeBs.decode_ident (Pbrt_bs.object_ json "issue" "author"))) + | "created_at" -> + let json = Js.Dict.unsafeGet json "created_at" in + v.created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "issue" "created_at"))) + | "updated_at" -> + let json = Js.Dict.unsafeGet json "updated_at" in + v.updated_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "issue" "updated_at"))) + | "closed_at" -> + let json = Js.Dict.unsafeGet json "closed_at" in + v.optional_closed_at <- Closed_at ((TimestampBs.decode_timestamp (Pbrt_bs.string json "issue" "optional_closed_at"))) + | "state" -> + let json = Js.Dict.unsafeGet json "state" in + v.state <- Pbrt_bs.string json "issue" "state" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + IssueTypes.id = v.id; + IssueTypes.number = v.number; + IssueTypes.title = v.title; + IssueTypes.text = v.text; + IssueTypes.url = v.url; + IssueTypes.repository_prefix = v.repository_prefix; + IssueTypes.repository_fullname = v.repository_fullname; + IssueTypes.repository_shortname = v.repository_shortname; + IssueTypes.author = v.author; + IssueTypes.created_at = v.created_at; + IssueTypes.updated_at = v.updated_at; + IssueTypes.optional_closed_at = v.optional_closed_at; + IssueTypes.state = v.state; + } : IssueTypes.issue) + +let rec decode_issue_commented_event json = + let v = default_issue_commented_event_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "comment" -> + let json = Js.Dict.unsafeGet json "comment" in + v.comment <- Pbrt_bs.string json "issue_commented_event" "comment" + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + IssueTypes.comment = v.comment; + } : IssueTypes.issue_commented_event) + +let rec decode_issue_event_type json = + let keys = Js.Dict.keys json in + let rec loop = function + | -1 -> Pbrt_bs.E.malformed_variant "issue_event_type" + | i -> + begin match Array.unsafe_get keys i with + | "issue_created" -> (IssueTypes.Issue_created : IssueTypes.issue_event_type) + | "issue_commented" -> + let json = Js.Dict.unsafeGet json "issue_commented" in + (IssueTypes.Issue_commented ((decode_issue_commented_event (Pbrt_bs.object_ json "issue_event_type" "Issue_commented"))) : IssueTypes.issue_event_type) + | "issue_closed" -> (IssueTypes.Issue_closed : IssueTypes.issue_event_type) + + | _ -> loop (i - 1) + end + in + loop (Array.length keys - 1) + +and decode_issue_event json = + let v = default_issue_event_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "id" -> + let json = Js.Dict.unsafeGet json "id" in + v.id <- Pbrt_bs.string json "issue_event" "id" + | "created_at" -> + let json = Js.Dict.unsafeGet json "created_at" in + v.created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "issue_event" "created_at"))) + | "author" -> + let json = Js.Dict.unsafeGet json "author" in + v.author <- Some ((ChangeBs.decode_ident (Pbrt_bs.object_ json "issue_event" "author"))) + | "repository_prefix" -> + let json = Js.Dict.unsafeGet json "repository_prefix" in + v.repository_prefix <- Pbrt_bs.string json "issue_event" "repository_prefix" + | "repository_fullname" -> + let json = Js.Dict.unsafeGet json "repository_fullname" in + v.repository_fullname <- Pbrt_bs.string json "issue_event" "repository_fullname" + | "repository_shortname" -> + let json = Js.Dict.unsafeGet json "repository_shortname" in + v.repository_shortname <- Pbrt_bs.string json "issue_event" "repository_shortname" + | "number" -> + let json = Js.Dict.unsafeGet json "number" in + v.number <- Pbrt_bs.int32 json "issue_event" "number" + | "url" -> + let json = Js.Dict.unsafeGet json "url" in + v.url <- Pbrt_bs.string json "issue_event" "url" + | "issue_created" -> v.type_ <- Issue_created + | "issue_commented" -> + let json = Js.Dict.unsafeGet json "issue_commented" in + v.type_ <- Issue_commented ((decode_issue_commented_event (Pbrt_bs.object_ json "issue_event" "type_"))) + | "issue_closed" -> v.type_ <- Issue_closed + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + IssueTypes.id = v.id; + IssueTypes.created_at = v.created_at; + IssueTypes.author = v.author; + IssueTypes.repository_prefix = v.repository_prefix; + IssueTypes.repository_fullname = v.repository_fullname; + IssueTypes.repository_shortname = v.repository_shortname; + IssueTypes.number = v.number; + IssueTypes.url = v.url; + IssueTypes.type_ = v.type_; + } : IssueTypes.issue_event) + +let rec encode_ident (v:IssueTypes.ident) = + let json = Js.Dict.empty () in + Js.Dict.set json "uid" (Js.Json.string v.IssueTypes.uid); + Js.Dict.set json "muid" (Js.Json.string v.IssueTypes.muid); + json + +let rec encode_issue_optional_closed_at (v:IssueTypes.issue_optional_closed_at) = + let json = Js.Dict.empty () in + begin match v with + | IssueTypes.Closed_at v -> + begin (* closed_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "closed_at" (Js.Json.string json'); + end; + end; + json + +and encode_issue (v:IssueTypes.issue) = + let json = Js.Dict.empty () in + Js.Dict.set json "id" (Js.Json.string v.IssueTypes.id); + Js.Dict.set json "number" (Js.Json.number (Int32.to_float v.IssueTypes.number)); + Js.Dict.set json "title" (Js.Json.string v.IssueTypes.title); + Js.Dict.set json "text" (Js.Json.string v.IssueTypes.text); + Js.Dict.set json "url" (Js.Json.string v.IssueTypes.url); + Js.Dict.set json "repository_prefix" (Js.Json.string v.IssueTypes.repository_prefix); + Js.Dict.set json "repository_fullname" (Js.Json.string v.IssueTypes.repository_fullname); + Js.Dict.set json "repository_shortname" (Js.Json.string v.IssueTypes.repository_shortname); + begin match v.IssueTypes.author with + | None -> () + | Some v -> + begin (* author field *) + let json' = ChangeBs.encode_ident v in + Js.Dict.set json "author" (Js.Json.object_ json'); + end; + end; + begin match v.IssueTypes.created_at with + | None -> () + | Some v -> + begin (* created_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "created_at" (Js.Json.string json'); + end; + end; + begin match v.IssueTypes.updated_at with + | None -> () + | Some v -> + begin (* updated_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "updated_at" (Js.Json.string json'); + end; + end; + begin match v.IssueTypes.optional_closed_at with + | Closed_at v -> + begin (* closed_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "closed_at" (Js.Json.string json'); + end; + end; (* match v.optional_closed_at *) + Js.Dict.set json "state" (Js.Json.string v.IssueTypes.state); + json + +let rec encode_issue_commented_event (v:IssueTypes.issue_commented_event) = + let json = Js.Dict.empty () in + Js.Dict.set json "comment" (Js.Json.string v.IssueTypes.comment); + json + +let rec encode_issue_event_type (v:IssueTypes.issue_event_type) = + let json = Js.Dict.empty () in + begin match v with + | IssueTypes.Issue_created -> + Js.Dict.set json "issue_created" Js.Json.null + | IssueTypes.Issue_commented v -> + begin (* issueCommented field *) + let json' = encode_issue_commented_event v in + Js.Dict.set json "issue_commented" (Js.Json.object_ json'); + end; + | IssueTypes.Issue_closed -> + Js.Dict.set json "issue_closed" Js.Json.null + end; + json + +and encode_issue_event (v:IssueTypes.issue_event) = + let json = Js.Dict.empty () in + Js.Dict.set json "id" (Js.Json.string v.IssueTypes.id); + begin match v.IssueTypes.created_at with + | None -> () + | Some v -> + begin (* created_at field *) + let json' = TimestampBs.encode_timestamp v in + Js.Dict.set json "created_at" (Js.Json.string json'); + end; + end; + begin match v.IssueTypes.author with + | None -> () + | Some v -> + begin (* author field *) + let json' = ChangeBs.encode_ident v in + Js.Dict.set json "author" (Js.Json.object_ json'); + end; + end; + Js.Dict.set json "repository_prefix" (Js.Json.string v.IssueTypes.repository_prefix); + Js.Dict.set json "repository_fullname" (Js.Json.string v.IssueTypes.repository_fullname); + Js.Dict.set json "repository_shortname" (Js.Json.string v.IssueTypes.repository_shortname); + Js.Dict.set json "number" (Js.Json.number (Int32.to_float v.IssueTypes.number)); + Js.Dict.set json "url" (Js.Json.string v.IssueTypes.url); + begin match v.IssueTypes.type_ with + | Issue_created -> + Js.Dict.set json "issue_created" Js.Json.null + | Issue_commented v -> + begin (* issueCommented field *) + let json' = encode_issue_commented_event v in + Js.Dict.set json "issue_commented" (Js.Json.object_ json'); + end; + | Issue_closed -> + Js.Dict.set json "issue_closed" Js.Json.null + end; (* match v.type_ *) + json diff --git a/web/src/messages/IssueBs.mli b/web/src/messages/IssueBs.mli new file mode 100644 index 000000000..b14edbbe8 --- /dev/null +++ b/web/src/messages/IssueBs.mli @@ -0,0 +1,43 @@ +(** issue.proto BuckleScript Encoding *) + + +(** {2 Protobuf JSON Encoding} *) + +val encode_ident : IssueTypes.ident -> Js.Json.t Js.Dict.t +(** [encode_ident v dict] encodes [v] int the given JSON [dict] *) + +val encode_issue_optional_closed_at : IssueTypes.issue_optional_closed_at -> Js.Json.t Js.Dict.t +(** [encode_issue_optional_closed_at v dict] encodes [v] int the given JSON [dict] *) + +val encode_issue : IssueTypes.issue -> Js.Json.t Js.Dict.t +(** [encode_issue v dict] encodes [v] int the given JSON [dict] *) + +val encode_issue_commented_event : IssueTypes.issue_commented_event -> Js.Json.t Js.Dict.t +(** [encode_issue_commented_event v dict] encodes [v] int the given JSON [dict] *) + +val encode_issue_event_type : IssueTypes.issue_event_type -> Js.Json.t Js.Dict.t +(** [encode_issue_event_type v dict] encodes [v] int the given JSON [dict] *) + +val encode_issue_event : IssueTypes.issue_event -> Js.Json.t Js.Dict.t +(** [encode_issue_event v dict] encodes [v] int the given JSON [dict] *) + + +(** {2 BS Decoding} *) + +val decode_ident : Js.Json.t Js.Dict.t -> IssueTypes.ident +(** [decode_ident decoder] decodes a [ident] value from [decoder] *) + +val decode_issue_optional_closed_at : Js.Json.t Js.Dict.t -> IssueTypes.issue_optional_closed_at +(** [decode_issue_optional_closed_at decoder] decodes a [issue_optional_closed_at] value from [decoder] *) + +val decode_issue : Js.Json.t Js.Dict.t -> IssueTypes.issue +(** [decode_issue decoder] decodes a [issue] value from [decoder] *) + +val decode_issue_commented_event : Js.Json.t Js.Dict.t -> IssueTypes.issue_commented_event +(** [decode_issue_commented_event decoder] decodes a [issue_commented_event] value from [decoder] *) + +val decode_issue_event_type : Js.Json.t Js.Dict.t -> IssueTypes.issue_event_type +(** [decode_issue_event_type decoder] decodes a [issue_event_type] value from [decoder] *) + +val decode_issue_event : Js.Json.t Js.Dict.t -> IssueTypes.issue_event +(** [decode_issue_event decoder] decodes a [issue_event] value from [decoder] *) diff --git a/web/src/messages/IssueTypes.ml b/web/src/messages/IssueTypes.ml new file mode 100644 index 000000000..76d42b15a --- /dev/null +++ b/web/src/messages/IssueTypes.ml @@ -0,0 +1,117 @@ +[@@@ocaml.warning "-27-30-39"] + + +type ident = { + uid : string; + muid : string; +} + +type issue_optional_closed_at = + | Closed_at of TimestampTypes.timestamp + +and issue = { + id : string; + number : int32; + title : string; + text : string; + url : string; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + author : ChangeTypes.ident option; + created_at : TimestampTypes.timestamp option; + updated_at : TimestampTypes.timestamp option; + optional_closed_at : issue_optional_closed_at; + state : string; +} + +type issue_commented_event = { + comment : string; +} + +type issue_event_type = + | Issue_created + | Issue_commented of issue_commented_event + | Issue_closed + +and issue_event = { + id : string; + created_at : TimestampTypes.timestamp option; + author : ChangeTypes.ident option; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + number : int32; + url : string; + type_ : issue_event_type; +} + +let rec default_ident + ?uid:((uid:string) = "") + ?muid:((muid:string) = "") + () : ident = { + uid; + muid; +} + +let rec default_issue_optional_closed_at () : issue_optional_closed_at = Closed_at (TimestampTypes.default_timestamp) + +and default_issue + ?id:((id:string) = "") + ?number:((number:int32) = 0l) + ?title:((title:string) = "") + ?text:((text:string) = "") + ?url:((url:string) = "") + ?repository_prefix:((repository_prefix:string) = "") + ?repository_fullname:((repository_fullname:string) = "") + ?repository_shortname:((repository_shortname:string) = "") + ?author:((author:ChangeTypes.ident option) = None) + ?created_at:((created_at:TimestampTypes.timestamp option) = None) + ?updated_at:((updated_at:TimestampTypes.timestamp option) = None) + ?optional_closed_at:((optional_closed_at:issue_optional_closed_at) = Closed_at (TimestampTypes.default_timestamp)) + ?state:((state:string) = "") + () : issue = { + id; + number; + title; + text; + url; + repository_prefix; + repository_fullname; + repository_shortname; + author; + created_at; + updated_at; + optional_closed_at; + state; +} + +let rec default_issue_commented_event + ?comment:((comment:string) = "") + () : issue_commented_event = { + comment; +} + +let rec default_issue_event_type (): issue_event_type = Issue_created + +and default_issue_event + ?id:((id:string) = "") + ?created_at:((created_at:TimestampTypes.timestamp option) = None) + ?author:((author:ChangeTypes.ident option) = None) + ?repository_prefix:((repository_prefix:string) = "") + ?repository_fullname:((repository_fullname:string) = "") + ?repository_shortname:((repository_shortname:string) = "") + ?number:((number:int32) = 0l) + ?url:((url:string) = "") + ?type_:((type_:issue_event_type) = Issue_created) + () : issue_event = { + id; + created_at; + author; + repository_prefix; + repository_fullname; + repository_shortname; + number; + url; + type_; +} diff --git a/web/src/messages/IssueTypes.mli b/web/src/messages/IssueTypes.mli new file mode 100644 index 000000000..b6afb9f12 --- /dev/null +++ b/web/src/messages/IssueTypes.mli @@ -0,0 +1,104 @@ +(** issue.proto Types *) + + + +(** {2 Types} *) + +type ident = { + uid : string; + muid : string; +} + +type issue_optional_closed_at = + | Closed_at of TimestampTypes.timestamp + +and issue = { + id : string; + number : int32; + title : string; + text : string; + url : string; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + author : ChangeTypes.ident option; + created_at : TimestampTypes.timestamp option; + updated_at : TimestampTypes.timestamp option; + optional_closed_at : issue_optional_closed_at; + state : string; +} + +type issue_commented_event = { + comment : string; +} + +type issue_event_type = + | Issue_created + | Issue_commented of issue_commented_event + | Issue_closed + +and issue_event = { + id : string; + created_at : TimestampTypes.timestamp option; + author : ChangeTypes.ident option; + repository_prefix : string; + repository_fullname : string; + repository_shortname : string; + number : int32; + url : string; + type_ : issue_event_type; +} + + +(** {2 Default values} *) + +val default_ident : + ?uid:string -> + ?muid:string -> + unit -> + ident +(** [default_ident ()] is the default value for type [ident] *) + +val default_issue_optional_closed_at : unit -> issue_optional_closed_at +(** [default_issue_optional_closed_at ()] is the default value for type [issue_optional_closed_at] *) + +val default_issue : + ?id:string -> + ?number:int32 -> + ?title:string -> + ?text:string -> + ?url:string -> + ?repository_prefix:string -> + ?repository_fullname:string -> + ?repository_shortname:string -> + ?author:ChangeTypes.ident option -> + ?created_at:TimestampTypes.timestamp option -> + ?updated_at:TimestampTypes.timestamp option -> + ?optional_closed_at:issue_optional_closed_at -> + ?state:string -> + unit -> + issue +(** [default_issue ()] is the default value for type [issue] *) + +val default_issue_commented_event : + ?comment:string -> + unit -> + issue_commented_event +(** [default_issue_commented_event ()] is the default value for type [issue_commented_event] *) + +val default_issue_event_type : unit -> issue_event_type +(** [default_issue_event_type ()] is the default value for type [issue_event_type] *) + +val default_issue_event : + ?id:string -> + ?created_at:TimestampTypes.timestamp option -> + ?author:ChangeTypes.ident option -> + ?repository_prefix:string -> + ?repository_fullname:string -> + ?repository_shortname:string -> + ?number:int32 -> + ?url:string -> + ?type_:issue_event_type -> + unit -> + issue_event +(** [default_issue_event ()] is the default value for type [issue_event] *) From eec06d1e966d327dbd5c893224dfc5a826ea6b41 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Fri, 22 Dec 2023 20:49:30 +0000 Subject: [PATCH 16/28] web: display crawler errors --- CHANGELOG.md | 2 ++ web/src/App.res | 39 +++++++++++++++++++++++++++++++++++ web/src/components/Search.res | 14 +++++++++++++ web/src/components/Store.res | 4 ++++ 4 files changed, 59 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7ae6a128..14c1c55d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file. - [crawler] Proxy can be configured with `HTTP_PROXY` and `HTTPS_PROXY` environment. To proxy http requests between crawlers and the api, use the `API_PROXY` environment. - [crawler] A new `groups` sub-field in all Author fields (`on_author` and `author`) for `Change` and `Events`. Groups memberships are reflected from the config file to the database. +- [crawler] Processing errors are no longer fatal and they are now stored in the index. +- [web] A red bell is added to the UI when crawler errors exists for the given query to display the missing data. ### Changed diff --git a/web/src/App.res b/web/src/App.res index 0f730dca5..11822d9e1 100644 --- a/web/src/App.res +++ b/web/src/App.res @@ -290,6 +290,34 @@ module About = { } } +module Errors = { + module CrawlerError = { + @react.component + let make = (~err: CrawlerTypes.crawler_error) => { + let entity: option = err.entity->Belt.Option.flatMap(Js.Json.stringifyAny) +
+ getDate} /> +
{("entity: " ++ entity->Belt.Option.getWithDefault(""))->str}
+
{("message: " ++ err.message)->str}
+
{("body: " ++ err.body)->str}
+
+
+ } + } + + @react.component + let make = (~store: Store.t) => { + let (state, _) = store + +

{"Crawler Errors"->str}

+

+ {"The following errors happened when updating the index. This is likely causing some data to be missing."->str} +

+ {state.errors->Belt.List.map(e => )->Belt.List.toArray->React.array} +
+ } +} + module App = { @react.component let make = (~about: ConfigTypes.about) => { @@ -350,6 +378,16 @@ module App = { + {state.errors->Belt.List.head->Belt.Option.isNone + ? React.null + : +
("/" ++ state.index ++ "/errors")->RescriptReactRouter.push} + style={ReactDOM.Style.make(~cursor="pointer", ~paddingLeft="5px", ())}> + +
+
} + // //
| list{_, "metrics"} => | list{_, "metric", name} => + | list{_, "errors"} => | _ =>

{"Not found"->str}

}} diff --git a/web/src/components/Search.res b/web/src/components/Search.res index b14cb4855..5e29d2ed7 100644 --- a/web/src/components/Search.res +++ b/web/src/components/Search.res @@ -399,6 +399,20 @@ module Top = { None }, [state.query]) + // Update crawler error + let handleErrors = (resp: WebApi.axiosResponse) => + switch resp.data { + | CrawlerTypes.Success(errors_list) => + SetErrors(errors_list.errors)->dispatch->Js.Promise.resolve + | CrawlerTypes.Error(err) => Js.Console.error(err)->Js.Promise.resolve + } + + React.useEffect1(() => { + ({index: state.index, query: state.query}->WebApi.Crawler.errors + |> Js.Promise.then_(handleErrors))->ignore + None + }, [state.query]) + // Dispatch the value upstream let handleCheck = (newValue, res: WebApi.axiosResponse) => { switch res.data { diff --git a/web/src/components/Store.res b/web/src/components/Store.res index 87afbe388..606a5f81e 100644 --- a/web/src/components/Store.res +++ b/web/src/components/Store.res @@ -66,6 +66,7 @@ module Store = { about: ConfigTypes.about, dexie: Dexie.Database.t, toasts: list, + errors: list, } type action = | ChangeIndex(string) @@ -74,6 +75,7 @@ module Store = { | SetLimit(int) | SetOrder(option) | SetAuthorScopedTab(authorScopedTab) + | SetErrors(list) | FetchFields(fieldsRespR) | FetchSuggestions(suggestionsR) | FetchProjects(projectsR) @@ -121,6 +123,7 @@ module Store = { changes_pies_panel: false, dexie: MonoIndexedDB.mkDexie(), toasts: list{}, + errors: list{}, } let reducer = (state: t, action: action) => @@ -151,6 +154,7 @@ module Store = { Prelude.setLocationSearch("l", limit->string_of_int)->ignore {...state, limit: limit} } + | SetErrors(errors) => {...state, errors: errors} | FetchFields(res) => {...state, fields: res->RemoteData.fmap(resp => resp.fields)} | FetchSuggestions(res) => {...state, suggestions: res} | FetchProjects(res) => {...state, projects: res} From 4a71ad3b26f0d532b7cf86c4712de0c31c2ac8d7 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sat, 23 Dec 2023 14:05:44 +0000 Subject: [PATCH 17/28] index: encode crawler error body by the api This change also replace bytestring-base64 with the more general purpose base64 library. --- monocle.cabal | 2 +- src/Macroscope/Test.hs | 3 +-- src/Macroscope/Worker.hs | 4 ++-- src/Monocle/Api/Jwt.hs | 6 +++--- src/Monocle/Backend/Documents.hs | 8 ++++++-- src/Monocle/Backend/Index.hs | 2 +- src/Monocle/Prelude.hs | 7 +++---- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/monocle.cabal b/monocle.cabal index 70b2cabe3..9e29a44d7 100644 --- a/monocle.cabal +++ b/monocle.cabal @@ -120,8 +120,8 @@ library , aeson-casing , aeson-pretty , attoparsec >= 0.13 + , base64 >= 0.4 , cgroup-rts-threads - , base64-bytestring >= 1.1 , blaze-markup >= 0.8.2.8 , blaze-html >= 0.9.1.2 , binary >= 0.8 diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index dac14bb19..0bd48ae45 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -1,7 +1,6 @@ -- | Tests for the macroscope process module Macroscope.Test where -import Data.ByteString.Base64.Lazy qualified as B64 import Effectful.Env import Effectful.Prometheus import Effectful.Reader.Static qualified as E @@ -62,7 +61,7 @@ testCrawlingPoint = do case errorResponse of CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultSuccess (CrawlerPB.ErrorsList (toList -> [e])))) -> liftIO do e.crawlerErrorMessage @?= "decode" - (B64.decode . encodeUtf8 $ e.crawlerErrorBody) @?= Right "[\"Oops\"]" + e.crawlerErrorBody @?= "[\"Oops\"]" (from <$> e.crawlerErrorEntity) @?= Just (Project "opendev/neutron") _ -> error $ "Expected one error, got: " <> show errorResponse diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 75d6af2c9..831d55400 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -105,8 +105,8 @@ processStream entity logFunc postFunc = go (0 :: Word) [] [] -- We got a new document let doc = case edoc of Right x -> x - Left (DecodeError ts err) -> DTError $ CrawlerError "decode" (encodeBlob err) (Just $ from ts) (Just $ from entity) - Left (GraphError ts err) -> DTError $ CrawlerError "graph" (encodeBlob err) (Just $ from ts) (Just $ from entity) + Left (DecodeError ts err) -> DTError $ CrawlerError "decode" (encodeJSON err) (Just $ from ts) (Just $ from entity) + Left (GraphError ts err) -> DTError $ CrawlerError "graph" (encodeJSON err) (Just $ from ts) (Just $ from entity) let newAcc = doc : acc if count == 499 then do diff --git a/src/Monocle/Api/Jwt.hs b/src/Monocle/Api/Jwt.hs index f46bbd23e..2f0a08c5b 100644 --- a/src/Monocle/Api/Jwt.hs +++ b/src/Monocle/Api/Jwt.hs @@ -105,7 +105,7 @@ instance FromJSON OIDCState instance ToJSON OIDCState decodeOIDCState :: ByteString -> Maybe OIDCState -decodeOIDCState bs = case B64.decode bs of +decodeOIDCState bs = case B64.decodeBase64 bs of Right json -> decode $ from json Left _ -> Nothing @@ -114,7 +114,7 @@ mkSessionStore OIDCEnv {sessionStoreStorage} stateM uriM = do let sessionStoreGenerate = do rb <- liftIO genRandomB64 let s = OIDCState (decodeUtf8 rb) uriM - pure (B64.encode $ BSL.toStrict $ encode s) + pure (B64.encodeBase64' $ BSL.toStrict $ encode s) sessionStoreSave = storeSave sessionStoreGet = storeGet sessionStoreDelete = case stateM of @@ -132,7 +132,7 @@ mkSessionStore OIDCEnv {sessionStoreStorage} stateM uriM = do -- | Generate a random fixed size string of 42 char base64 encoded genRandomB64 :: IO ByteString -genRandomB64 = B64.encode . hash <$> genRandom +genRandomB64 = B64.encodeBase64' . hash <$> genRandom -- | Generate a random fixed size string of 1024 Bytes genRandom :: IO ByteString diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index c172fc792..464b9e3bb 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -22,6 +22,7 @@ module Monocle.Backend.Documents where import Data.Aeson (Value (String), defaultOptions, genericParseJSON, genericToJSON, withObject, withText, (.:)) import Data.Aeson.Casing (aesonPrefix, snakeCase) import Data.Aeson.Types qualified +import Data.Text.Encoding.Base64 qualified as B64 import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) import Data.Vector qualified as V import Monocle.Entity @@ -236,7 +237,7 @@ instance ToJSON EError where , ("entity_type", String (entityTypeName (from e.erEntity))) , ("entity_value", String $ entityValue e.erEntity) , ("message", String e.erMessage) - , ("body", String e.erBody) + , ("body", String (B64.encodeBase64 e.erBody)) ] instance FromJSON EError where @@ -247,7 +248,10 @@ instance FromJSON EError where etype <- v .: "entity_type" erEntity <- parseEntity evalue etype erMessage <- v .: "message" - erBody <- v .: "body" + ebody <- v .: "body" + erBody <- case B64.decodeBase64 ebody of + Right x -> pure x + Left e -> fail ("Body decode failed: " <> from e) pure EError {..} -- | Helper to encode entity diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 90600dd66..ec2ff821f 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -632,7 +632,7 @@ upsertDocs = runAddDocsBulkOPs toBulkUpsert -- | Generate a Text suitable for ElasticSearch Document ID from Text getDocID :: Text -> Text -getDocID = decodeUtf8 . B64.encode . hash . encodeUtf8 +getDocID = B64.encodeBase64 . hash . encodeUtf8 -- | Generate an DocID from Text getBHDocID :: Text -> BH.DocId diff --git a/src/Monocle/Prelude.hs b/src/Monocle/Prelude.hs index 8a0b30b61..76941d46e 100644 --- a/src/Monocle/Prelude.hs +++ b/src/Monocle/Prelude.hs @@ -16,7 +16,7 @@ module Monocle.Prelude ( setEnv, headMaybe, (:::), - encodeBlob, + encodeJSON, -- * secret Secret, @@ -223,7 +223,6 @@ import Data.Aeson.Encode.Pretty qualified as Aeson import Data.Aeson.Key qualified as AesonKey import Data.Aeson.Lens (_Integer, _Object) import Data.Aeson.Types (Pair) -import Data.ByteString.Base64.Lazy qualified as B64 import Data.Fixed (Deci, Fixed (..), HasResolution (resolution), Pico) import Data.Map qualified as Map import Data.Tagged @@ -607,5 +606,5 @@ streamingFromListT = S.unfoldr go res <- ListT.uncons listT pure $ res `orDie` () -encodeBlob :: ToJSON a => a -> LText -encodeBlob = decodeUtf8 . B64.encode . encode +encodeJSON :: ToJSON a => a -> LText +encodeJSON = decodeUtf8 . encode From 0ea04b392bafca91e23d0235ed6814401b11b5e8 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sat, 23 Dec 2023 14:21:12 +0000 Subject: [PATCH 18/28] index: introduce new type for BinaryText --- src/Monocle/Backend/Documents.hs | 30 ++++++++++++++++++++++++------ src/Monocle/Backend/Index.hs | 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 464b9e3bb..39fb0ad95 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -209,12 +209,33 @@ instance ToJSON EErrorData where instance FromJSON EErrorData where parseJSON = genericParseJSON $ aesonPrefix snakeCase +-- | Helper type to store binary text in elasticsearch using b64 encoding +newtype BinaryText = BinaryText Text + deriving newtype (Show, Eq) + +instance ToJSON BinaryText where + toJSON = String . B64.encodeBase64 . from + +instance FromJSON BinaryText where + parseJSON = withText "binary" $ \v -> case B64.decodeBase64 v of + Right x -> pure (BinaryText x) + Left e -> fail ("Binary text decode failed: " <> from e) + +instance From BinaryText Text where + from (BinaryText txt) = txt + +instance From BinaryText LText where + from = via @Text + +instance From LText BinaryText where + from = BinaryText . from + data EError = EError { erCrawlerName :: Text , erEntity :: Entity , erCreatedAt :: UTCTime , erMessage :: Text - , erBody :: Text + , erBody :: BinaryText } deriving (Show, Eq, Generic) @@ -237,7 +258,7 @@ instance ToJSON EError where , ("entity_type", String (entityTypeName (from e.erEntity))) , ("entity_value", String $ entityValue e.erEntity) , ("message", String e.erMessage) - , ("body", String (B64.encodeBase64 e.erBody)) + , ("body", toJSON e.erBody) ] instance FromJSON EError where @@ -248,10 +269,7 @@ instance FromJSON EError where etype <- v .: "entity_type" erEntity <- parseEntity evalue etype erMessage <- v .: "message" - ebody <- v .: "body" - erBody <- case B64.decodeBase64 ebody of - Right x -> pure x - Left e -> fail ("Body decode failed: " <> from e) + erBody <- v .: "body" pure EError {..} -- | Helper to encode entity diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index ec2ff821f..8ddde87e8 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -660,7 +660,7 @@ indexErrors errors = indexDocs $ fmap toDoc errors getErrorDoc err = object ["type" .= EErrorDoc, "error_data" .= toJSON err] getErrorDocId :: EError -> BH.DocId - getErrorDocId = getBHDocID . erBody + getErrorDocId = getBHDocID . from . erBody indexIssues :: [EIssue] -> Eff es () indexIssues = error "todo" From 0f1df8a05ec831bac27918900ef456d8e792f1d3 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sat, 23 Dec 2023 14:46:54 +0000 Subject: [PATCH 19/28] index: bump version to apply new mapping --- src/Monocle/Backend/Index.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 8ddde87e8..d50e31b3e 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -261,7 +261,7 @@ createIndex indexName mapping = do retryPolicy = exponentialBackoff 500_000 <> limitRetries 7 configVersion :: ConfigVersion -configVersion = ConfigVersion 6 +configVersion = ConfigVersion 7 configIndex :: BH.IndexName configIndex = BH.IndexName "monocle.config" @@ -394,10 +394,10 @@ upgradeConfigV5 = do logInfo "Applying migration to schema V5 on workspace" ["index" .= indexName] void $ esPutMapping indexName mergedCommitField -upgradeConfigV6 :: forall es. MonoQuery :> es => IndexEffects es => Eff es () -upgradeConfigV6 = do +upgradeGlobalMapping :: forall es. MonoQuery :> es => IndexEffects es => Eff es () +upgradeGlobalMapping = do indexName <- getIndexName - logInfo "Applying migration to schema V6 on workspace" ["index" .= indexName] + logInfo "Applying migration to new ChangesIndexMapping" ["index" .= indexName] void $ esPutMapping indexName ChangesIndexMapping upgrades :: forall es. (E.Fail :> es, MonoQuery :> es) => IndexEffects es => [(ConfigVersion, Eff es ())] @@ -407,7 +407,7 @@ upgrades = , (ConfigVersion 3, void upgradeConfigV3) , (ConfigVersion 4, void upgradeConfigV4) , (ConfigVersion 5, void upgradeConfigV5) - , (ConfigVersion 6, void upgradeConfigV6) + , (ConfigVersion 7, void upgradeGlobalMapping) ] newtype ConfigVersion = ConfigVersion Integer From 796d3952d08d6a223d18e6d4a7fa62211b77a580 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sun, 24 Dec 2023 16:19:30 +0000 Subject: [PATCH 20/28] crawler: improve crawler error representation --- src/Lentille.hs | 20 +++++++++++++++----- src/Lentille/GraphQL.hs | 12 ++++-------- src/Macroscope/Test.hs | 2 +- src/Macroscope/Worker.hs | 10 ++++++++-- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index ae3b97eb5..920f83afa 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -9,8 +9,10 @@ module Lentille ( -- * Lentille Errors LentilleError (..), + LentilleErrorKind (..), RequestLog (..), GraphQLError (..), + yieldStreamError, -- * Facilities getChangeId, @@ -89,11 +91,19 @@ data GraphQLError = GraphQLError } deriving (Show, Generic, ToJSON) -data LentilleError - = DecodeError UTCTime [Text] - | GraphError UTCTime GraphQLError +data LentilleError = LentilleError UTCTime LentilleErrorKind deriving (Show, Generic, ToJSON) +data LentilleErrorKind + = DecodeError [Text] + | GraphError GraphQLError + deriving (Show, Generic, ToJSON) + +yieldStreamError :: TimeEffect :> es => LentilleErrorKind -> LentilleStream es a +yieldStreamError e = do + now <- lift mGetCurrentTime + S.yield (Left $ LentilleError now e) + type LentilleStream es a = Stream (Of (Either LentilleError a)) (Eff es) () ------------------------------------------------------------------------------- @@ -141,8 +151,8 @@ type Changes = (Change, [ChangeEvent]) -- We don't care about the rest so we replace it with () -- See: https://hackage.haskell.org/package/streaming-0.2.4.0/docs/Streaming-Prelude.html#v:break -- --- >>> let stream = S.yield (Left (DecodeError [utctime|2021-05-31 00:00:00|] ["oops"])) --- >>> runEff $ S.length_ $ streamDropBefore [utctime|2021-05-31 00:00:00|] stream +-- >>> let stream = yieldStreamError (DecodeError ["oops"]) +-- >>> runEff $ runTime $ S.length_ $ streamDropBefore [utctime|2021-05-31 00:00:00|] stream -- 1 streamDropBefore :: UTCTime -> LentilleStream es Changes -> LentilleStream es Changes streamDropBefore untilDate = fmap (pure ()) . S.break (isChangeTooOld untilDate) diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index 90b22df29..fb2f41094 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -238,9 +238,7 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe Nothing -> pure Nothing case mErr of - Just err -> do - now <- lift mGetCurrentTime - S.yield (Left $ GraphError now err) + Just err -> yieldStreamError $ GraphError err Nothing -> go Nothing 0 go pageInfoM totalFetched = do @@ -256,17 +254,15 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe -- Handle the response case respE of - Left e -> do + Left e -> -- Yield the error and stop the stream - now <- lift mGetCurrentTime - S.yield (Left $ GraphError now e) + yieldStreamError $ GraphError e Right (pageInfo, rateLimitM, decodingErrors, xs) -> do -- Log crawling status logStep pageInfo rateLimitM xs totalFetched unless (null decodingErrors) do - now <- lift mGetCurrentTime - S.yield (Left $ DecodeError now decodingErrors) + yieldStreamError $ DecodeError decodingErrors -- Yield the results S.each (Right <$> xs) diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index 0bd48ae45..b28c84def 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -74,7 +74,7 @@ testCrawlingPoint = do badStream date name | date == BT.fakeDateAlt && name == "opendev/neutron" = do Streaming.yield $ Right (fakeChangePB, []) - Streaming.yield $ Left (DecodeError BT.fakeDateAlt ["Oops"]) + yieldStreamError $ DecodeError ["Oops"] | otherwise = error "Bad crawling point" -- A document stream that yield a change diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 831d55400..525f01613 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -105,8 +105,7 @@ processStream entity logFunc postFunc = go (0 :: Word) [] [] -- We got a new document let doc = case edoc of Right x -> x - Left (DecodeError ts err) -> DTError $ CrawlerError "decode" (encodeJSON err) (Just $ from ts) (Just $ from entity) - Left (GraphError ts err) -> DTError $ CrawlerError "graph" (encodeJSON err) (Just $ from ts) (Just $ from entity) + Left err -> DTError $ toCrawlerError err let newAcc = doc : acc if count == 499 then do @@ -114,6 +113,13 @@ processStream entity logFunc postFunc = go (0 :: Word) [] [] go 0 [] (res : results) rest else go (count + 1) newAcc results rest + toCrawlerError (LentilleError ts err) = + CrawlerError msg body (Just $ from ts) (Just $ from entity) + where + (msg, body) = case err of + DecodeError xs -> ("decode", encodeJSON xs) + GraphError e -> ("graph", encodeJSON e) + processBatch :: [DocumentType] -> Eff es (Maybe (ProcessError es)) processBatch [] = pure Nothing processBatch (reverse -> docs) = do From 9fb3c3ba8b552acb750e50d8880e51eafdebd335 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sun, 24 Dec 2023 16:34:05 +0000 Subject: [PATCH 21/28] crawler: introduce error variant for page-info --- src/Lentille.hs | 3 ++- src/Lentille/GraphQL.hs | 4 ++-- src/Macroscope/Worker.hs | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index 920f83afa..dfcc97f70 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -96,7 +96,8 @@ data LentilleError = LentilleError UTCTime LentilleErrorKind data LentilleErrorKind = DecodeError [Text] - | GraphError GraphQLError + | RequestError GraphQLError + | PageInfoError GraphQLError deriving (Show, Generic, ToJSON) yieldStreamError :: TimeEffect :> es => LentilleErrorKind -> LentilleStream es a diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index fb2f41094..af5ff1a7b 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -238,7 +238,7 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe Nothing -> pure Nothing case mErr of - Just err -> yieldStreamError $ GraphError err + Just err -> yieldStreamError $ PageInfoError err Nothing -> go Nothing 0 go pageInfoM totalFetched = do @@ -256,7 +256,7 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe case respE of Left e -> -- Yield the error and stop the stream - yieldStreamError $ GraphError e + yieldStreamError $ RequestError e Right (pageInfo, rateLimitM, decodingErrors, xs) -> do -- Log crawling status logStep pageInfo rateLimitM xs totalFetched diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 525f01613..8f85191e9 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -118,7 +118,8 @@ processStream entity logFunc postFunc = go (0 :: Word) [] [] where (msg, body) = case err of DecodeError xs -> ("decode", encodeJSON xs) - GraphError e -> ("graph", encodeJSON e) + RequestError e -> ("graph", encodeJSON e) + PageInfoError e -> ("page-info", encodeJSON e) processBatch :: [DocumentType] -> Eff es (Maybe (ProcessError es)) processBatch [] = pure Nothing From 1731dee4112dd347bb08d2cc5bea1fbe1cc1aebb Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sun, 24 Dec 2023 17:36:45 +0000 Subject: [PATCH 22/28] crawler: preserve the original fetch error from morpheus client --- src/Lentille.hs | 22 +++++++++++++++++++--- src/Lentille/GitHub/RateLimit.hs | 17 +++++++++++------ src/Lentille/GitHub/Utils.hs | 3 +-- src/Lentille/GraphQL.hs | 17 +++++++++++------ 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index dfcc97f70..dfb74b8c6 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -13,6 +13,7 @@ module Lentille ( RequestLog (..), GraphQLError (..), yieldStreamError, + fmapFetchError, -- * Facilities getChangeId, @@ -33,6 +34,7 @@ module Lentille ( module Monocle.Logging, ) where +import Data.Morpheus.Client (FetchError (..)) import Data.Text qualified as T import Google.Protobuf.Timestamp qualified as T import Monocle.Class @@ -85,11 +87,25 @@ instance ToJSON RequestLog where -- | ErrorGraphQL is a wrapper around the morpheus's FetchError. data GraphQLError = GraphQLError - { -- TODO: keep the original error data type (instead of the Text) - err :: Text + { err :: FetchError () , request :: RequestLog } - deriving (Show, Generic, ToJSON) + deriving (Show, Generic) + +fmapFetchError :: (a -> b) -> FetchError a -> FetchError b +fmapFetchError f = \case + FetchErrorProducedErrors es Nothing -> FetchErrorProducedErrors es Nothing + FetchErrorProducedErrors es (Just a) -> FetchErrorProducedErrors es (Just $ f a) + FetchErrorNoResult -> FetchErrorNoResult + FetchErrorParseFailure s -> FetchErrorParseFailure s + +instance ToJSON GraphQLError where + toJSON e = object ["request" .= e.request, "fetch_error" .= fetchError] + where + fetchError = case e.err of + FetchErrorParseFailure s -> toJSON @Text $ "parse failure: " <> from s + FetchErrorNoResult -> toJSON @Text "no result" + FetchErrorProducedErrors es _ -> toJSON es data LentilleError = LentilleError UTCTime LentilleErrorKind deriving (Show, Generic, ToJSON) diff --git a/src/Lentille/GitHub/RateLimit.hs b/src/Lentille/GitHub/RateLimit.hs index 2c70cd989..f2e53ec1d 100644 --- a/src/Lentille/GitHub/RateLimit.hs +++ b/src/Lentille/GitHub/RateLimit.hs @@ -26,8 +26,8 @@ declareLocalTypesInline } |] -transformResponse :: GetRateLimit -> Maybe RateLimit -transformResponse = \case +transformResponseRL :: GetRateLimit -> Maybe RateLimit +transformResponseRL = \case GetRateLimit ( Just (GetRateLimitRateLimit used remaining (DateTime resetAt')) @@ -37,17 +37,22 @@ transformResponse = \case GetRateLimit Nothing -> Nothing respOther -> error ("Invalid response: " <> show respOther) -getRateLimit :: GraphEffects es => GraphClient -> Eff es (Either GraphQLError (Maybe RateLimit)) +transformResponse :: GraphResp GetRateLimit -> GraphResp (Maybe RateLimit) +transformResponse = \case + Right x -> Right $ transformResponseRL x + Left (l, e) -> Left (l, fmapFetchError transformResponseRL e) + +getRateLimit :: GraphEffects es => GraphClient -> Eff es (GraphResp (Maybe RateLimit)) getRateLimit client = do - fmap transformResponse + transformResponse <$> doRequest client mkRateLimitArgs retryCheck Nothing Nothing where mkRateLimitArgs = const . const $ () -retryCheck :: forall es a. GraphEffects es => Either GraphQLError a -> Eff es RetryAction +retryCheck :: forall es a. Show a => GraphEffects es => GraphResp a -> Eff es RetryAction retryCheck = \case Right _ -> pure DontRetry - Left (GraphQLError err (RequestLog _ _ resp _)) + Left (RequestLog _ _ resp _, err) | status == unauthorized401 -> do logWarn "Authentication error" ["body" .= body] pure DontRetry diff --git a/src/Lentille/GitHub/Utils.hs b/src/Lentille/GitHub/Utils.hs index 532f0cacd..020d693e7 100644 --- a/src/Lentille/GitHub/Utils.hs +++ b/src/Lentille/GitHub/Utils.hs @@ -104,8 +104,7 @@ instance From DateTime ChangeOptionalMergedAt where instance From DateTime T.Timestamp where from = dateTimeToTimestamp -optParams :: - GraphEffects es => StreamFetchOptParams es a +optParams :: Show a => GraphEffects es => StreamFetchOptParams es a optParams = let fpRetryCheck = retryCheck fpDepth = Just 25 diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index af5ff1a7b..14c86492d 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -23,6 +23,7 @@ module Lentille.GraphQL ( GraphEffects, GraphResponse, GraphResp, + GraphError, RateLimit (..), PageInfo (..), StreamFetchOptParams (..), @@ -127,7 +128,8 @@ data RateLimit = RateLimit {used :: Int, remaining :: Int, resetAt :: UTCTime} instance From RateLimit Text where from RateLimit {..} = "remains:" <> show remaining <> ", reset at: " <> show resetAt -type GraphResp a = Either GraphQLError a +type GraphError a = (RequestLog, FetchError a) +type GraphResp a = Either (GraphError a) a -- | wrapper around fetchWithLog than can optionaly handle fetch retries -- based on the returned data inspection via a provided function (see RetryCheck). @@ -159,7 +161,7 @@ doRequest client mkArgs retryCheck depthM pageInfoM = pure $ case resp of (Right x, _) -> Right x -- Throw an exception for the retryCheckM - (Left e, [req]) -> Left $ GraphQLError (show e) req + (Left e, [req]) -> Left (req, e) _ -> error $ "Unknown response: " <> show resp where aDepthM = decreaseValue retried <$> depthM @@ -186,6 +188,9 @@ data StreamFetchOptParams es a = StreamFetchOptParams defaultStreamFetchOptParams :: StreamFetchOptParams m a defaultStreamFetchOptParams = StreamFetchOptParams (const $ pure DontRetry) Nothing Nothing +mkGraphQLError :: GraphError a -> GraphQLError +mkGraphQLError (req, fe) = GraphQLError (fmapFetchError (const ()) fe) req + streamFetch :: forall es a b. (GraphEffects es, Fetch a, Show a) => @@ -225,7 +230,7 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe startFetch = do --- Perform a pre GraphQL request to gather rateLimit - (mErr :: Maybe GraphQLError) <- case fpGetRatelimit of + (mErr :: Maybe (GraphError (Maybe RateLimit))) <- case fpGetRatelimit of Just getRateLimit -> lift $ E.modifyMVar rateLimitMVar $ const do @@ -238,7 +243,7 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe Nothing -> pure Nothing case mErr of - Just err -> yieldStreamError $ PageInfoError err + Just err -> yieldStreamError $ PageInfoError $ mkGraphQLError err Nothing -> go Nothing 0 go pageInfoM totalFetched = do @@ -254,9 +259,9 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe -- Handle the response case respE of - Left e -> + Left err -> -- Yield the error and stop the stream - yieldStreamError $ RequestError e + yieldStreamError $ RequestError (mkGraphQLError err) Right (pageInfo, rateLimitM, decodingErrors, xs) -> do -- Log crawling status logStep pageInfo rateLimitM xs totalFetched From 32f140a8d42344d36bec3764290c8e5614276840 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Sun, 24 Dec 2023 18:10:50 +0000 Subject: [PATCH 23/28] crawler: handle partial results --- src/Lentille.hs | 1 + src/Lentille/GraphQL.hs | 18 ++++++++++++++---- src/Macroscope/Worker.hs | 1 + 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index dfb74b8c6..bf5e54d2a 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -114,6 +114,7 @@ data LentilleErrorKind = DecodeError [Text] | RequestError GraphQLError | PageInfoError GraphQLError + | PartialErrors Value deriving (Show, Generic, ToJSON) yieldStreamError :: TimeEffect :> es => LentilleErrorKind -> LentilleStream es a diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index 14c86492d..03a414fc1 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -191,6 +191,8 @@ defaultStreamFetchOptParams = StreamFetchOptParams (const $ pure DontRetry) Noth mkGraphQLError :: GraphError a -> GraphQLError mkGraphQLError (req, fe) = GraphQLError (fmapFetchError (const ()) fe) req +data RequestResult a = RequestResult (Maybe Value) a + streamFetch :: forall es a b. (GraphEffects es, Fetch a, Show a) => @@ -215,11 +217,15 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe requestWithPageInfo pageInfoM storedRateLimitM = do holdOnIfNeeded storedRateLimitM eResp <- doRequest client mkArgs fpRetryCheck fpDepth pageInfoM + let handleResp mPartial resp = + let (pageInfo, rateLimitM, decodingErrors, xs) = transformResponse resp + in Right (rateLimitM, RequestResult mPartial (pageInfo, rateLimitM, decodingErrors, xs)) pure $ case eResp of + -- This is not a fatal error, it contains the desired response so handle it as a success. + -- The handler below will insert a 'PartialErrors' + Left (_, FetchErrorProducedErrors err (Just resp)) -> handleResp (Just (toJSON err)) resp Left err -> Left err - Right resp -> - let (pageInfo, rateLimitM, decodingErrors, xs) = transformResponse resp - in Right (rateLimitM, (pageInfo, rateLimitM, decodingErrors, xs)) + Right resp -> handleResp Nothing resp logStep pageInfo rateLimitM xs totalFetched = do lift @@ -262,10 +268,14 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe Left err -> -- Yield the error and stop the stream yieldStreamError $ RequestError (mkGraphQLError err) - Right (pageInfo, rateLimitM, decodingErrors, xs) -> do + Right (RequestResult mPartial (pageInfo, rateLimitM, decodingErrors, xs)) -> do -- Log crawling status logStep pageInfo rateLimitM xs totalFetched + forM_ mPartial \partial -> do + lift $ logWarn "Fetched partial result" ["err" .= partial] + yieldStreamError $ PartialErrors partial + unless (null decodingErrors) do yieldStreamError $ DecodeError decodingErrors diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 8f85191e9..310f86b62 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -120,6 +120,7 @@ processStream entity logFunc postFunc = go (0 :: Word) [] [] DecodeError xs -> ("decode", encodeJSON xs) RequestError e -> ("graph", encodeJSON e) PageInfoError e -> ("page-info", encodeJSON e) + PartialErrors es -> ("partial", encodeJSON es) processBatch :: [DocumentType] -> Eff es (Maybe (ProcessError es)) processBatch [] = pure Nothing From 4d2bf8ab91dab0b6c85ec6ac293673e290bbd241 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 27 Dec 2023 16:55:18 +0000 Subject: [PATCH 24/28] api: introduce CrawlerErrorList This change enables grouping errors by crawler/entity --- codegen/Monocle/Protob/Crawler.hs | 250 ++++++++++++++++++++------- doc/openapi.yaml | 10 +- schemas/monocle/protob/crawler.proto | 15 +- src/Macroscope/Test.hs | 9 +- src/Macroscope/Worker.hs | 10 +- src/Monocle/Api/Server.hs | 19 +- src/Monocle/Backend/Documents.hs | 1 - web/src/App.res | 17 +- web/src/components/Store.res | 4 +- web/src/messages/CrawlerBs.ml | 68 +++++++- web/src/messages/CrawlerBs.mli | 6 + web/src/messages/CrawlerTypes.ml | 19 +- web/src/messages/CrawlerTypes.mli | 18 +- 13 files changed, 346 insertions(+), 100 deletions(-) diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index f379d4ac2..9d8d15ac0 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -346,7 +346,6 @@ data CrawlerError = CrawlerError , crawlerErrorBody :: Hs.Text , crawlerErrorCreatedAt :: Hs.Maybe Google.Protobuf.Timestamp.Timestamp - , crawlerErrorEntity :: Hs.Maybe Monocle.Protob.Crawler.Entity } deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) @@ -364,35 +363,27 @@ instance HsProtobuf.Message CrawlerError where { crawlerErrorMessage = crawlerErrorMessage , crawlerErrorBody = crawlerErrorBody , crawlerErrorCreatedAt = crawlerErrorCreatedAt - , crawlerErrorEntity = crawlerErrorEntity } = ( Hs.mconcat [ ( HsProtobuf.encodeMessageField - (HsProtobuf.FieldNumber 1) + (HsProtobuf.FieldNumber 2) ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (crawlerErrorMessage) ) ) , ( HsProtobuf.encodeMessageField - (HsProtobuf.FieldNumber 2) + (HsProtobuf.FieldNumber 3) ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (crawlerErrorBody) ) ) , ( HsProtobuf.encodeMessageField - (HsProtobuf.FieldNumber 3) + (HsProtobuf.FieldNumber 4) ( Hs.coerce @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) (crawlerErrorCreatedAt) ) ) - , ( HsProtobuf.encodeMessageField - (HsProtobuf.FieldNumber 4) - ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) - @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) - (crawlerErrorEntity) - ) - ) ] ) decodeMessage _ = @@ -400,26 +391,18 @@ instance HsProtobuf.Message CrawlerError where <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) ( HsProtobuf.at HsProtobuf.decodeMessageField - (HsProtobuf.FieldNumber 1) + (HsProtobuf.FieldNumber 2) ) ) <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) ( HsProtobuf.at HsProtobuf.decodeMessageField - (HsProtobuf.FieldNumber 2) + (HsProtobuf.FieldNumber 3) ) ) <*> ( HsProtobuf.coerceOver @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) - ( HsProtobuf.at - HsProtobuf.decodeMessageField - (HsProtobuf.FieldNumber 3) - ) - ) - <*> ( HsProtobuf.coerceOver - @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) - @(Hs.Maybe Monocle.Protob.Crawler.Entity) ( HsProtobuf.at HsProtobuf.decodeMessageField (HsProtobuf.FieldNumber 4) @@ -427,21 +410,21 @@ instance HsProtobuf.Message CrawlerError where ) dotProto _ = [ ( HsProtobufAST.DotProtoField - (HsProtobuf.FieldNumber 1) + (HsProtobuf.FieldNumber 2) (HsProtobufAST.Prim HsProtobufAST.String) (HsProtobufAST.Single "message") [] "" ) , ( HsProtobufAST.DotProtoField - (HsProtobuf.FieldNumber 2) + (HsProtobuf.FieldNumber 3) (HsProtobufAST.Prim HsProtobufAST.String) (HsProtobufAST.Single "body") [] "" ) , ( HsProtobufAST.DotProtoField - (HsProtobuf.FieldNumber 3) + (HsProtobuf.FieldNumber 4) ( HsProtobufAST.Prim ( HsProtobufAST.Named ( HsProtobufAST.Dots @@ -453,48 +436,29 @@ instance HsProtobuf.Message CrawlerError where [] "" ) - , ( HsProtobufAST.DotProtoField - (HsProtobuf.FieldNumber 4) - ( HsProtobufAST.Prim - (HsProtobufAST.Named (HsProtobufAST.Single "Entity")) - ) - (HsProtobufAST.Single "entity") - [] - "" - ) ] instance HsJSONPB.ToJSONPB CrawlerError where - toJSONPB (CrawlerError f1 f2 f3 f4) = + toJSONPB (CrawlerError f2 f3 f4) = ( HsJSONPB.object [ "message" - .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) - , "body" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + , "body" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f3)) , "created_at" .= ( Hs.coerce @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) - (f3) - ) - , "entity" - .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) - @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) (f4) ) ] ) - toEncodingPB (CrawlerError f1 f2 f3 f4) = + toEncodingPB (CrawlerError f2 f3 f4) = ( HsJSONPB.pairs [ "message" - .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) - , "body" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f2)) + , "body" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f3)) , "created_at" .= ( Hs.coerce @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) - (f3) - ) - , "entity" - .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) - @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) (f4) ) ] @@ -517,19 +481,177 @@ instance HsJSONPB.FromJSONPB CrawlerError where @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) (obj .: "created_at") ) + ) + ) + +instance HsJSONPB.ToJSON CrawlerError where + toJSON = HsJSONPB.toAesonValue + toEncoding = HsJSONPB.toAesonEncoding + +instance HsJSONPB.FromJSON CrawlerError where + parseJSON = HsJSONPB.parseJSONPB + +data CrawlerErrorList = CrawlerErrorList + { crawlerErrorListCrawler :: + Hs.Text + , crawlerErrorListEntity :: + Hs.Maybe Monocle.Protob.Crawler.Entity + , crawlerErrorListErrors :: + Hs.Vector Monocle.Protob.Crawler.CrawlerError + } + deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) + +instance Hs.NFData CrawlerErrorList + +instance HsProtobuf.Named CrawlerErrorList where + nameOf _ = (Hs.fromString "CrawlerErrorList") + +instance HsProtobuf.HasDefault CrawlerErrorList + +instance HsProtobuf.Message CrawlerErrorList where + encodeMessage + _ + CrawlerErrorList + { crawlerErrorListCrawler = crawlerErrorListCrawler + , crawlerErrorListEntity = crawlerErrorListEntity + , crawlerErrorListErrors = crawlerErrorListErrors + } = + ( Hs.mconcat + [ ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 1) + ( Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) + (crawlerErrorListCrawler) + ) + ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 2) + ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (crawlerErrorListEntity) + ) + ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 3) + ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (crawlerErrorListErrors) + ) + ) + ] + ) + decodeMessage _ = + (Hs.pure CrawlerErrorList) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 1) + ) + ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + @(Hs.Maybe Monocle.Protob.Crawler.Entity) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 2) + ) + ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 3) + ) + ) + dotProto _ = + [ ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 1) + (HsProtobufAST.Prim HsProtobufAST.String) + (HsProtobufAST.Single "crawler") + [] + "" + ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 2) + ( HsProtobufAST.Prim + (HsProtobufAST.Named (HsProtobufAST.Single "Entity")) + ) + (HsProtobufAST.Single "entity") + [] + "" + ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 3) + ( HsProtobufAST.Repeated + (HsProtobufAST.Named (HsProtobufAST.Single "CrawlerError")) + ) + (HsProtobufAST.Single "errors") + [] + "" + ) + ] + +instance HsJSONPB.ToJSONPB CrawlerErrorList where + toJSONPB (CrawlerErrorList f1 f2 f3) = + ( HsJSONPB.object + [ "crawler" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) + , "entity" + .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (f2) + ) + , "errors" + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (f3) + ) + ] + ) + toEncodingPB (CrawlerErrorList f1 f2 f3) = + ( HsJSONPB.pairs + [ "crawler" + .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) + , "entity" + .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (f2) + ) + , "errors" + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + (f3) + ) + ] + ) + +instance HsJSONPB.FromJSONPB CrawlerErrorList where + parseJSONPB = + ( HsJSONPB.withObject + "CrawlerErrorList" + ( \obj -> + (Hs.pure CrawlerErrorList) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (obj .: "crawler") + ) <*> ( HsProtobuf.coerceOver @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) @(Hs.Maybe Monocle.Protob.Crawler.Entity) (obj .: "entity") ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + (obj .: "errors") + ) ) ) -instance HsJSONPB.ToJSON CrawlerError where +instance HsJSONPB.ToJSON CrawlerErrorList where toJSON = HsJSONPB.toAesonValue toEncoding = HsJSONPB.toAesonEncoding -instance HsJSONPB.FromJSON CrawlerError where +instance HsJSONPB.FromJSON CrawlerErrorList where parseJSON = HsJSONPB.parseJSONPB data ErrorsRequest = ErrorsRequest @@ -640,7 +762,7 @@ instance HsJSONPB.FromJSON ErrorsRequest where newtype ErrorsList = ErrorsList { errorsListErrors :: - Hs.Vector Monocle.Protob.Crawler.CrawlerError + Hs.Vector Monocle.Protob.Crawler.CrawlerErrorList } deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) @@ -656,8 +778,8 @@ instance HsProtobuf.Message ErrorsList where ( Hs.mconcat [ ( HsProtobuf.encodeMessageField (HsProtobuf.FieldNumber 1) - ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) - @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerErrorList) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerErrorList) (errorsListErrors) ) ) @@ -666,8 +788,8 @@ instance HsProtobuf.Message ErrorsList where decodeMessage _ = (Hs.pure ErrorsList) <*> ( HsProtobuf.coerceOver - @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) - @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerErrorList) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerErrorList) ( HsProtobuf.at HsProtobuf.decodeMessageField (HsProtobuf.FieldNumber 1) @@ -677,7 +799,7 @@ instance HsProtobuf.Message ErrorsList where [ ( HsProtobufAST.DotProtoField (HsProtobuf.FieldNumber 1) ( HsProtobufAST.Repeated - (HsProtobufAST.Named (HsProtobufAST.Single "CrawlerError")) + (HsProtobufAST.Named (HsProtobufAST.Single "CrawlerErrorList")) ) (HsProtobufAST.Single "errors") [] @@ -689,8 +811,8 @@ instance HsJSONPB.ToJSONPB ErrorsList where toJSONPB (ErrorsList f1) = ( HsJSONPB.object [ "errors" - .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) - @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerErrorList) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerErrorList) (f1) ) ] @@ -698,8 +820,8 @@ instance HsJSONPB.ToJSONPB ErrorsList where toEncodingPB (ErrorsList f1) = ( HsJSONPB.pairs [ "errors" - .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) - @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) + .= ( Hs.coerce @(Hs.Vector Monocle.Protob.Crawler.CrawlerErrorList) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerErrorList) (f1) ) ] @@ -712,8 +834,8 @@ instance HsJSONPB.FromJSONPB ErrorsList where ( \obj -> (Hs.pure ErrorsList) <*> ( HsProtobuf.coerceOver - @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerError) - @(Hs.Vector Monocle.Protob.Crawler.CrawlerError) + @(HsProtobuf.NestedVec Monocle.Protob.Crawler.CrawlerErrorList) + @(Hs.Vector Monocle.Protob.Crawler.CrawlerErrorList) (obj .: "errors") ) ) diff --git a/doc/openapi.yaml b/doc/openapi.yaml index 370664ed5..9bd8404c6 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -753,8 +753,16 @@ components: created_at: type: string format: RFC3339 + monocle_crawler_CrawlerErrorList: + properties: + crawler: + type: string entity: $ref: '#/components/schemas/monocle_crawler_Entity' + errors: + type: array + items: + $ref: '#/components/schemas/monocle_crawler_CrawlerError' monocle_crawler_Entity: properties: organization_name: @@ -773,7 +781,7 @@ components: errors: type: array items: - $ref: '#/components/schemas/monocle_crawler_CrawlerError' + $ref: '#/components/schemas/monocle_crawler_CrawlerErrorList' monocle_crawler_ErrorsRequest: properties: index: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index ebcc47201..f573b337d 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -29,10 +29,15 @@ enum EntityType { } message CrawlerError { - string message = 1; - string body = 2; - google.protobuf.Timestamp created_at = 3; - Entity entity = 4; + string message = 2; + string body = 3; + google.protobuf.Timestamp created_at = 4; +} + +message CrawlerErrorList { + string crawler = 1; + Entity entity = 2; + repeated CrawlerError errors = 3; } message ErrorsRequest { @@ -41,7 +46,7 @@ message ErrorsRequest { } message ErrorsList { - repeated CrawlerError errors = 1; + repeated CrawlerErrorList errors = 1; } message ErrorsResponse { diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index b28c84def..95044d4a8 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -1,6 +1,7 @@ -- | Tests for the macroscope process module Macroscope.Test where +import Data.Vector qualified as V import Effectful.Env import Effectful.Prometheus import Effectful.Reader.Static qualified as E @@ -60,9 +61,11 @@ testCrawlingPoint = do errorResponse <- crawlerErrors client (CrawlerPB.ErrorsRequest (from indexName) "from:2020") case errorResponse of CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultSuccess (CrawlerPB.ErrorsList (toList -> [e])))) -> liftIO do - e.crawlerErrorMessage @?= "decode" - e.crawlerErrorBody @?= "[\"Oops\"]" - (from <$> e.crawlerErrorEntity) @?= Just (Project "opendev/neutron") + length e.crawlerErrorListErrors @?= 1 + let err = V.head e.crawlerErrorListErrors + err.crawlerErrorMessage @?= "decode" + err.crawlerErrorBody @?= "[\"Oops\"]" + (from <$> e.crawlerErrorListEntity) @?= Just (Project "opendev/neutron") _ -> error $ "Expected one error, got: " <> show errorResponse Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes $ goodStream currentOldestAge) diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 310f86b62..e19776a1a 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -83,7 +83,6 @@ data ProcessError es -- | 'processStream' read the stream of document and post to the monocle API processStream :: forall es. - Entity -> -- | Funtion to log about the processing (Int -> Eff es ()) -> -- | Function to post on the Monocle API @@ -92,7 +91,7 @@ processStream :: LentilleStream es DocumentType -> -- | The processing results Eff es [Maybe (ProcessError es)] -processStream entity logFunc postFunc = go (0 :: Word) [] [] +processStream logFunc postFunc = go (0 :: Word) [] [] where go count acc results stream = do eDocument <- S.next stream @@ -113,10 +112,10 @@ processStream entity logFunc postFunc = go (0 :: Word) [] [] go 0 [] (res : results) rest else go (count + 1) newAcc results rest - toCrawlerError (LentilleError ts err) = - CrawlerError msg body (Just $ from ts) (Just $ from entity) + toCrawlerError (LentilleError ts err) = CrawlerError {..} where - (msg, body) = case err of + crawlerErrorCreatedAt = Just $ from ts + (crawlerErrorMessage, crawlerErrorBody) = case err of DecodeError xs -> ("decode", encodeJSON xs) RequestError e -> ("graph", encodeJSON e) PageInfoError e -> ("page-info", encodeJSON e) @@ -189,7 +188,6 @@ runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStre -- Run the document stream for that entity postResult <- processStream - entity (\c -> logInfo "Posting documents" ["count" .= c]) (httpRetry "api/commit/add" . mCrawlerAddDoc . mkRequest entity) (getStream oldestAge entity) diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 92117a8cf..0df45b4c5 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -572,7 +572,7 @@ crawlerErrors auth request = checkAuth auth response case requestE of Right (tenant, query) -> runQueryM tenant (Q.ensureMinBound query) $ do logInfo "ListingErrors" ["index" .= request.errorsRequestIndex] - errors <- fmap from <$> Q.crawlerErrors + errors <- toErrorsList <$> Q.crawlerErrors pure $ CrawlerPB.ErrorsResponse $ Just $ CrawlerPB.ErrorsResponseResultSuccess $ CrawlerPB.ErrorsList $ fromList errors Left (ParseError msg offset) -> pure @@ -580,6 +580,23 @@ crawlerErrors auth request = checkAuth auth response $ Just $ CrawlerPB.ErrorsResponseResultError (show offset <> ":" <> from msg) + -- Group eerror by crawler name and entity + toErrorsList :: [EError] -> [CrawlerPB.CrawlerErrorList] + toErrorsList = fmap mkErrorList . Map.toList . mergeErrors + + mkErrorList :: ((LText, CrawlerPB.Entity), [EError]) -> CrawlerPB.CrawlerErrorList + mkErrorList ((crawlerErrorListCrawler, entity), errors) = CrawlerPB.CrawlerErrorList {..} + where + crawlerErrorListEntity = Just (from entity) + crawlerErrorListErrors = fromList (from <$> errors) + + mergeErrors :: [EError] -> Map (LText, CrawlerPB.Entity) [EError] + mergeErrors = Map.fromListWith (<>) . fmap toKVList + where + toKVList eerror = + let k = (from eerror.erCrawlerName, from eerror.erEntity) + in (k, [eerror]) + -- | /search/query endpoint searchQuery :: ApiEffects es => AuthResult AuthenticatedUser -> SearchPB.QueryRequest -> Eff es SearchPB.QueryResponse searchQuery auth request = checkAuth auth response diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 39fb0ad95..cb0e2b86e 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -245,7 +245,6 @@ instance From EError CrawlerError where { crawlerErrorBody = from eerror.erBody , crawlerErrorMessage = from eerror.erMessage , crawlerErrorCreatedAt = Just $ from eerror.erCreatedAt - , crawlerErrorEntity = Just $ from eerror.erEntity } -- Custom encoder to manually serialize the entity type diff --git a/web/src/App.res b/web/src/App.res index 11822d9e1..2e91152c1 100644 --- a/web/src/App.res +++ b/web/src/App.res @@ -294,10 +294,8 @@ module Errors = { module CrawlerError = { @react.component let make = (~err: CrawlerTypes.crawler_error) => { - let entity: option = err.entity->Belt.Option.flatMap(Js.Json.stringifyAny)
getDate} /> -
{("entity: " ++ entity->Belt.Option.getWithDefault(""))->str}
{("message: " ++ err.message)->str}
{("body: " ++ err.body)->str}

@@ -305,6 +303,19 @@ module Errors = { } } + module CrawlerErrors = { + @react.component + let make = (~err: CrawlerTypes.crawler_error_list) => { + let entity: option = err.entity->Belt.Option.flatMap(Js.Json.stringifyAny) +
+
{("entity: " ++ entity->Belt.Option.getWithDefault(""))->str}
+
{("crawler: " ++ err.crawler)->str}
+ {err.errors->Belt.List.map(e => )->Belt.List.toArray->React.array} +
+
+ } + } + @react.component let make = (~store: Store.t) => { let (state, _) = store @@ -313,7 +324,7 @@ module Errors = {

{"The following errors happened when updating the index. This is likely causing some data to be missing."->str}

- {state.errors->Belt.List.map(e => )->Belt.List.toArray->React.array} + {state.errors->Belt.List.map(e => )->Belt.List.toArray->React.array} } } diff --git a/web/src/components/Store.res b/web/src/components/Store.res index 606a5f81e..3a20f1c9c 100644 --- a/web/src/components/Store.res +++ b/web/src/components/Store.res @@ -66,7 +66,7 @@ module Store = { about: ConfigTypes.about, dexie: Dexie.Database.t, toasts: list, - errors: list, + errors: list, } type action = | ChangeIndex(string) @@ -75,7 +75,7 @@ module Store = { | SetLimit(int) | SetOrder(option) | SetAuthorScopedTab(authorScopedTab) - | SetErrors(list) + | SetErrors(list) | FetchFields(fieldsRespR) | FetchSuggestions(suggestionsR) | FetchProjects(projectsR) diff --git a/web/src/messages/CrawlerBs.ml b/web/src/messages/CrawlerBs.ml index 8b05db5c4..2029af094 100644 --- a/web/src/messages/CrawlerBs.ml +++ b/web/src/messages/CrawlerBs.ml @@ -4,14 +4,24 @@ type crawler_error_mutable = { mutable message : string; mutable body : string; mutable created_at : TimestampTypes.timestamp option; - mutable entity : CrawlerTypes.entity option; } let default_crawler_error_mutable () : crawler_error_mutable = { message = ""; body = ""; created_at = None; +} + +type crawler_error_list_mutable = { + mutable crawler : string; + mutable entity : CrawlerTypes.entity option; + mutable errors : CrawlerTypes.crawler_error list; +} + +let default_crawler_error_list_mutable () : crawler_error_list_mutable = { + crawler = ""; entity = None; + errors = []; } type errors_request_mutable = { @@ -25,7 +35,7 @@ let default_errors_request_mutable () : errors_request_mutable = { } type errors_list_mutable = { - mutable errors : CrawlerTypes.crawler_error list; + mutable errors : CrawlerTypes.crawler_error_list list; } let default_errors_list_mutable () : errors_list_mutable = { @@ -160,9 +170,6 @@ let rec decode_crawler_error json = | "created_at" -> let json = Js.Dict.unsafeGet json "created_at" in v.created_at <- Some ((TimestampBs.decode_timestamp (Pbrt_bs.string json "crawler_error" "created_at"))) - | "entity" -> - let json = Js.Dict.unsafeGet json "entity" in - v.entity <- Some ((decode_entity (Pbrt_bs.object_ json "crawler_error" "entity"))) | _ -> () (*Unknown fields are ignored*) done; @@ -170,9 +177,38 @@ let rec decode_crawler_error json = CrawlerTypes.message = v.message; CrawlerTypes.body = v.body; CrawlerTypes.created_at = v.created_at; - CrawlerTypes.entity = v.entity; } : CrawlerTypes.crawler_error) +let rec decode_crawler_error_list json = + let v = default_crawler_error_list_mutable () in + let keys = Js.Dict.keys json in + let last_key_index = Array.length keys - 1 in + for i = 0 to last_key_index do + match Array.unsafe_get keys i with + | "crawler" -> + let json = Js.Dict.unsafeGet json "crawler" in + v.crawler <- Pbrt_bs.string json "crawler_error_list" "crawler" + | "entity" -> + let json = Js.Dict.unsafeGet json "entity" in + v.entity <- Some ((decode_entity (Pbrt_bs.object_ json "crawler_error_list" "entity"))) + | "errors" -> begin + let a = + let a = Js.Dict.unsafeGet json "errors" in + Pbrt_bs.array_ a "crawler_error_list" "errors" + in + v.errors <- Array.map (fun json -> + (decode_crawler_error (Pbrt_bs.object_ json "crawler_error_list" "errors")) + ) a |> Array.to_list; + end + + | _ -> () (*Unknown fields are ignored*) + done; + ({ + CrawlerTypes.crawler = v.crawler; + CrawlerTypes.entity = v.entity; + CrawlerTypes.errors = v.errors; + } : CrawlerTypes.crawler_error_list) + let rec decode_errors_request json = let v = default_errors_request_mutable () in let keys = Js.Dict.keys json in @@ -205,7 +241,7 @@ let rec decode_errors_list json = Pbrt_bs.array_ a "errors_list" "errors" in v.errors <- Array.map (fun json -> - (decode_crawler_error (Pbrt_bs.object_ json "errors_list" "errors")) + (decode_crawler_error_list (Pbrt_bs.object_ json "errors_list" "errors")) ) a |> Array.to_list; end @@ -540,6 +576,11 @@ let rec encode_crawler_error (v:CrawlerTypes.crawler_error) = Js.Dict.set json "created_at" (Js.Json.string json'); end; end; + json + +let rec encode_crawler_error_list (v:CrawlerTypes.crawler_error_list) = + let json = Js.Dict.empty () in + Js.Dict.set json "crawler" (Js.Json.string v.CrawlerTypes.crawler); begin match v.CrawlerTypes.entity with | None -> () | Some v -> @@ -548,6 +589,17 @@ let rec encode_crawler_error (v:CrawlerTypes.crawler_error) = Js.Dict.set json "entity" (Js.Json.object_ json'); end; end; + begin (* errors field *) + let (errors':Js.Json.t) = + v.CrawlerTypes.errors + |> Array.of_list + |> Array.map (fun v -> + v |> encode_crawler_error |> Js.Json.object_ + ) + |> Js.Json.array + in + Js.Dict.set json "errors" errors'; + end; json let rec encode_errors_request (v:CrawlerTypes.errors_request) = @@ -563,7 +615,7 @@ let rec encode_errors_list (v:CrawlerTypes.errors_list) = v.CrawlerTypes.errors |> Array.of_list |> Array.map (fun v -> - v |> encode_crawler_error |> Js.Json.object_ + v |> encode_crawler_error_list |> Js.Json.object_ ) |> Js.Json.array in diff --git a/web/src/messages/CrawlerBs.mli b/web/src/messages/CrawlerBs.mli index ad6a614f2..d625e6727 100644 --- a/web/src/messages/CrawlerBs.mli +++ b/web/src/messages/CrawlerBs.mli @@ -12,6 +12,9 @@ val encode_entity_type : CrawlerTypes.entity_type -> string val encode_crawler_error : CrawlerTypes.crawler_error -> Js.Json.t Js.Dict.t (** [encode_crawler_error v dict] encodes [v] int the given JSON [dict] *) +val encode_crawler_error_list : CrawlerTypes.crawler_error_list -> Js.Json.t Js.Dict.t +(** [encode_crawler_error_list v dict] encodes [v] int the given JSON [dict] *) + val encode_errors_request : CrawlerTypes.errors_request -> Js.Json.t Js.Dict.t (** [encode_errors_request v dict] encodes [v] int the given JSON [dict] *) @@ -66,6 +69,9 @@ val decode_entity_type : Js.Json.t -> CrawlerTypes.entity_type val decode_crawler_error : Js.Json.t Js.Dict.t -> CrawlerTypes.crawler_error (** [decode_crawler_error decoder] decodes a [crawler_error] value from [decoder] *) +val decode_crawler_error_list : Js.Json.t Js.Dict.t -> CrawlerTypes.crawler_error_list +(** [decode_crawler_error_list decoder] decodes a [crawler_error_list] value from [decoder] *) + val decode_errors_request : Js.Json.t Js.Dict.t -> CrawlerTypes.errors_request (** [decode_errors_request decoder] decodes a [errors_request] value from [decoder] *) diff --git a/web/src/messages/CrawlerTypes.ml b/web/src/messages/CrawlerTypes.ml index 70a3be931..481404548 100644 --- a/web/src/messages/CrawlerTypes.ml +++ b/web/src/messages/CrawlerTypes.ml @@ -18,7 +18,12 @@ type crawler_error = { message : string; body : string; created_at : TimestampTypes.timestamp option; +} + +type crawler_error_list = { + crawler : string; entity : entity option; + errors : crawler_error list; } type errors_request = { @@ -27,7 +32,7 @@ type errors_request = { } type errors_list = { - errors : crawler_error list; + errors : crawler_error_list list; } type errors_response = @@ -109,12 +114,20 @@ let rec default_crawler_error ?message:((message:string) = "") ?body:((body:string) = "") ?created_at:((created_at:TimestampTypes.timestamp option) = None) - ?entity:((entity:entity option) = None) () : crawler_error = { message; body; created_at; +} + +let rec default_crawler_error_list + ?crawler:((crawler:string) = "") + ?entity:((entity:entity option) = None) + ?errors:((errors:crawler_error list) = []) + () : crawler_error_list = { + crawler; entity; + errors; } let rec default_errors_request @@ -126,7 +139,7 @@ let rec default_errors_request } let rec default_errors_list - ?errors:((errors:crawler_error list) = []) + ?errors:((errors:crawler_error_list list) = []) () : errors_list = { errors; } diff --git a/web/src/messages/CrawlerTypes.mli b/web/src/messages/CrawlerTypes.mli index 14e3338b6..c7777f5f7 100644 --- a/web/src/messages/CrawlerTypes.mli +++ b/web/src/messages/CrawlerTypes.mli @@ -21,7 +21,12 @@ type crawler_error = { message : string; body : string; created_at : TimestampTypes.timestamp option; +} + +type crawler_error_list = { + crawler : string; entity : entity option; + errors : crawler_error list; } type errors_request = { @@ -30,7 +35,7 @@ type errors_request = { } type errors_list = { - errors : crawler_error list; + errors : crawler_error_list list; } type errors_response = @@ -117,11 +122,18 @@ val default_crawler_error : ?message:string -> ?body:string -> ?created_at:TimestampTypes.timestamp option -> - ?entity:entity option -> unit -> crawler_error (** [default_crawler_error ()] is the default value for type [crawler_error] *) +val default_crawler_error_list : + ?crawler:string -> + ?entity:entity option -> + ?errors:crawler_error list -> + unit -> + crawler_error_list +(** [default_crawler_error_list ()] is the default value for type [crawler_error_list] *) + val default_errors_request : ?index:string -> ?query:string -> @@ -130,7 +142,7 @@ val default_errors_request : (** [default_errors_request ()] is the default value for type [errors_request] *) val default_errors_list : - ?errors:crawler_error list -> + ?errors:crawler_error_list list -> unit -> errors_list (** [default_errors_list ()] is the default value for type [errors_list] *) From 23c4def454203d05d2ca5355b5495983616d8d32 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 3 Jan 2024 14:58:53 +0000 Subject: [PATCH 25/28] crawler: add stream error to stop the stream --- src/Macroscope/Test.hs | 2 +- src/Macroscope/Worker.hs | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index 95044d4a8..835c7dd93 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -55,7 +55,7 @@ testCrawlingPoint = do Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes badStream) (currentOldestAge, _) <- getOldest - liftIO $ assertBool "Commit date is updated on failure" (currentOldestAge > oldestAge) + liftIO $ assertEqual "Commit date is not updated on failure" oldestAge currentOldestAge -- Check that the error got indexed errorResponse <- crawlerErrors client (CrawlerPB.ErrorsRequest (from indexName) "from:2020") diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index e19776a1a..1d668692f 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -76,9 +76,14 @@ data DocumentType | DTError CrawlerError deriving (Generic, ToJSON) -data ProcessError es - = CommitError Text - | AddError Text +-- | ProcessError are produced by the processStream. +data ProcessError + = -- | Monocle crawler commit API failed + CommitError Text + | -- | Monocle crawler add API failed + AddError Text + | -- | External API failed + StreamError LentilleError -- | 'processStream' read the stream of document and post to the monocle API processStream :: @@ -90,7 +95,7 @@ processStream :: -- | The stream of documents to read LentilleStream es DocumentType -> -- | The processing results - Eff es [Maybe (ProcessError es)] + Eff es [Maybe ProcessError] processStream logFunc postFunc = go (0 :: Word) [] [] where go count acc results stream = do @@ -105,12 +110,19 @@ processStream logFunc postFunc = go (0 :: Word) [] [] let doc = case edoc of Right x -> x Left err -> DTError $ toCrawlerError err + let addStreamError :: [Maybe ProcessError] -> [Maybe ProcessError] + addStreamError = case edoc of + Right _ -> id + -- This is likely an error we can't recover, so don't add stream error + Left (LentilleError _ (PartialErrors _)) -> id + -- Every other 'LentilleError' are fatal$ + Left err -> (Just (StreamError err) :) let newAcc = doc : acc if count == 499 then do res <- processBatch newAcc - go 0 [] (res : results) rest - else go (count + 1) newAcc results rest + go 0 [] (addStreamError (res : results)) rest + else go (count + 1) newAcc (addStreamError results) rest toCrawlerError (LentilleError ts err) = CrawlerError {..} where @@ -121,7 +133,7 @@ processStream logFunc postFunc = go (0 :: Word) [] [] PageInfoError e -> ("page-info", encodeJSON e) PartialErrors es -> ("partial", encodeJSON es) - processBatch :: [DocumentType] -> Eff es (Maybe (ProcessError es)) + processBatch :: [DocumentType] -> Eff es (Maybe ProcessError) processBatch [] = pure Nothing processBatch (reverse -> docs) = do logFunc (length docs) @@ -152,6 +164,7 @@ runStream apiKey indexName crawlerName documentStream = do forM_ errors \case AddError err -> logWarn "Could not add documents" ["err" .= err] CommitError err -> logWarn "Could not commit update date" ["err" .= err] + StreamError err -> logWarn "Stream produced a fatal error" ["err" .= err] -- | 'runStreamError' is the stream processor runStreamError :: @@ -163,7 +176,7 @@ runStreamError :: CrawlerName -> DocumentStream es -> Word32 -> - Eff es [ProcessError es] + Eff es [ProcessError] runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStream offset = go where go = do From 9a60ca4848f0e5425f01ce80d98603774386656a Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 3 Jan 2024 16:07:48 +0000 Subject: [PATCH 26/28] api: prevent error when submitting empty task data --- src/Monocle/Backend/Index.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index d50e31b3e..56adefd44 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -822,6 +822,7 @@ orphanTaskDataDocToBHDoc TaskDataDoc {..} = ) taskDataAdd :: MonoQuery :> es => IndexEffects es => Text -> [SearchPB.TaskData] -> Eff es () +taskDataAdd _ [] = pure () taskDataAdd crawlerName tds = do -- extract change URLs from input TDs let urls = from . SearchPB.taskDataChangeUrl <$> tds From 2ba9fdd502be3f2abb6383ee68476cafb704c45c Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Wed, 3 Jan 2024 16:15:34 +0000 Subject: [PATCH 27/28] doc: add profiling build instructions --- CONTRIBUTING.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6aa726142..787498edd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -265,3 +265,22 @@ Get crawler errors: ```ShellSession curl -X POST -d '{"index": "monocle"}' -H "Content-type: application/json" localhost:8080/api/2/crawler/errors ``` + +## Debug by dumping every stacktrace + +When the service fails with an obscure `NonEmpty.fromList: empty list`, run the following commands to get the full stacktrace: + +```ShellSession +cabal --ghc-options="-fprof-auto" --enable-executable-profiling --enable-profiling --enable-library-profiling -O0 run exe:monocle -- api +RTS -xc -RTS +``` + +Note that this also shows legitimate exceptions that are correctly caught, but hopefully you should see something like: + +``` +*** Exception (reporting due to +RTS -xc): (THUNK_1_0), stack trace: + GHC.IsList.CAF + --> evaluated by: Monocle.Backend.Index.getChangesByURL, + called from Monocle.Backend.Index.taskDataAdd, + called ... +NonEmpty.fromList: empty list +``` From 8f032883c5f38da90619dce061bafc202db070c0 Mon Sep 17 00:00:00 2001 From: Fabien Boucher Date: Wed, 3 Jan 2024 22:01:57 +0000 Subject: [PATCH 28/28] Rename PageInfoError to RateLimitInfoError --- src/Lentille.hs | 2 +- src/Lentille/GraphQL.hs | 2 +- src/Macroscope/Worker.hs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Lentille.hs b/src/Lentille.hs index bf5e54d2a..5befc974d 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -113,7 +113,7 @@ data LentilleError = LentilleError UTCTime LentilleErrorKind data LentilleErrorKind = DecodeError [Text] | RequestError GraphQLError - | PageInfoError GraphQLError + | RateLimitInfoError GraphQLError | PartialErrors Value deriving (Show, Generic, ToJSON) diff --git a/src/Lentille/GraphQL.hs b/src/Lentille/GraphQL.hs index 03a414fc1..5837faaf3 100644 --- a/src/Lentille/GraphQL.hs +++ b/src/Lentille/GraphQL.hs @@ -249,7 +249,7 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe Nothing -> pure Nothing case mErr of - Just err -> yieldStreamError $ PageInfoError $ mkGraphQLError err + Just err -> yieldStreamError $ RateLimitInfoError $ mkGraphQLError err Nothing -> go Nothing 0 go pageInfoM totalFetched = do diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index 1d668692f..61e2825c6 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -130,7 +130,7 @@ processStream logFunc postFunc = go (0 :: Word) [] [] (crawlerErrorMessage, crawlerErrorBody) = case err of DecodeError xs -> ("decode", encodeJSON xs) RequestError e -> ("graph", encodeJSON e) - PageInfoError e -> ("page-info", encodeJSON e) + RateLimitInfoError e -> ("rate-limit-info", encodeJSON e) PartialErrors es -> ("partial", encodeJSON es) processBatch :: [DocumentType] -> Eff es (Maybe ProcessError)