diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index 41a1b7c9e..f379d4ac2 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -346,6 +346,7 @@ data CrawlerError = CrawlerError , crawlerErrorBody :: Hs.Text , crawlerErrorCreatedAt :: Hs.Maybe Google.Protobuf.Timestamp.Timestamp + , crawlerErrorEntity :: Hs.Maybe Monocle.Protob.Crawler.Entity } deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) @@ -363,6 +364,7 @@ instance HsProtobuf.Message CrawlerError where { crawlerErrorMessage = crawlerErrorMessage , crawlerErrorBody = crawlerErrorBody , crawlerErrorCreatedAt = crawlerErrorCreatedAt + , crawlerErrorEntity = crawlerErrorEntity } = ( Hs.mconcat [ ( HsProtobuf.encodeMessageField @@ -384,6 +386,13 @@ instance HsProtobuf.Message CrawlerError where (crawlerErrorCreatedAt) ) ) + , ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 4) + ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (crawlerErrorEntity) + ) + ) ] ) decodeMessage _ = @@ -408,6 +417,14 @@ instance HsProtobuf.Message CrawlerError where (HsProtobuf.FieldNumber 3) ) ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + @(Hs.Maybe Monocle.Protob.Crawler.Entity) + ( HsProtobuf.at + HsProtobuf.decodeMessageField + (HsProtobuf.FieldNumber 4) + ) + ) dotProto _ = [ ( HsProtobufAST.DotProtoField (HsProtobuf.FieldNumber 1) @@ -436,10 +453,19 @@ instance HsProtobuf.Message CrawlerError where [] "" ) + , ( HsProtobufAST.DotProtoField + (HsProtobuf.FieldNumber 4) + ( HsProtobufAST.Prim + (HsProtobufAST.Named (HsProtobufAST.Single "Entity")) + ) + (HsProtobufAST.Single "entity") + [] + "" + ) ] instance HsJSONPB.ToJSONPB CrawlerError where - toJSONPB (CrawlerError f1 f2 f3) = + toJSONPB (CrawlerError f1 f2 f3 f4) = ( HsJSONPB.object [ "message" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) @@ -449,9 +475,14 @@ instance HsJSONPB.ToJSONPB CrawlerError where @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) (f3) ) + , "entity" + .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (f4) + ) ] ) - toEncodingPB (CrawlerError f1 f2 f3) = + toEncodingPB (CrawlerError f1 f2 f3 f4) = ( HsJSONPB.pairs [ "message" .= (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f1)) @@ -461,6 +492,11 @@ instance HsJSONPB.ToJSONPB CrawlerError where @(HsProtobuf.Nested Google.Protobuf.Timestamp.Timestamp) (f3) ) + , "entity" + .= ( Hs.coerce @(Hs.Maybe Monocle.Protob.Crawler.Entity) + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + (f4) + ) ] ) @@ -481,6 +517,11 @@ instance HsJSONPB.FromJSONPB CrawlerError where @(Hs.Maybe Google.Protobuf.Timestamp.Timestamp) (obj .: "created_at") ) + <*> ( HsProtobuf.coerceOver + @(HsProtobuf.Nested Monocle.Protob.Crawler.Entity) + @(Hs.Maybe Monocle.Protob.Crawler.Entity) + (obj .: "entity") + ) ) ) diff --git a/doc/openapi.yaml b/doc/openapi.yaml index 5ec0a5678..370664ed5 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -753,6 +753,8 @@ components: created_at: type: string format: RFC3339 + entity: + $ref: '#/components/schemas/monocle_crawler_Entity' monocle_crawler_Entity: properties: organization_name: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index 472fbeb65..ebcc47201 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -32,6 +32,7 @@ message CrawlerError { string message = 1; string body = 2; google.protobuf.Timestamp created_at = 3; + Entity entity = 4; } message ErrorsRequest { diff --git a/src/Lentille.hs b/src/Lentille.hs index 42f7232f9..ae3b97eb5 100644 --- a/src/Lentille.hs +++ b/src/Lentille.hs @@ -31,7 +31,6 @@ module Lentille ( module Monocle.Logging, ) where -import Data.ByteString.Base64.Lazy qualified as B64 import Data.Text qualified as T import Google.Protobuf.Timestamp qualified as T import Monocle.Class @@ -46,7 +45,6 @@ import Monocle.Protob.Change ( Change_ChangeState (Change_ChangeStateClosed, Change_ChangeStateMerged), Ident (..), ) -import Monocle.Protob.Crawler (CrawlerError (..)) import Network.HTTP.Client qualified as HTTP import Proto3.Suite (Enumerated (Enumerated)) import Streaming.Prelude qualified as S @@ -96,14 +94,6 @@ data LentilleError | GraphError UTCTime GraphQLError deriving (Show, Generic, ToJSON) -instance From LentilleError CrawlerError where - from = \case - DecodeError ts xs -> CrawlerError "decode error" (encodeBlob xs) (Just $ from ts) - GraphError ts x -> CrawlerError "graph error" (encodeBlob x) (Just $ from ts) - -encodeBlob :: ToJSON a => a -> LText -encodeBlob = decodeUtf8 . B64.encode . encode - type LentilleStream es a = Stream (Of (Either LentilleError a)) (Eff es) () ------------------------------------------------------------------------------- diff --git a/src/Macroscope/Test.hs b/src/Macroscope/Test.hs index 51211ded0..fee86d647 100644 --- a/src/Macroscope/Test.hs +++ b/src/Macroscope/Test.hs @@ -26,6 +26,7 @@ import Monocle.Protob.Crawler qualified as CrawlerPB import Streaming.Prelude qualified as Streaming import Test.Tasty import Test.Tasty.HUnit +import Monocle.Client.Api (crawlerErrors) runLentilleM :: MonocleClient -> Eff [E.Reader CrawlerEnv, MonoClientEffect, LoggerEffect, GerritEffect, BZEffect, TimeEffect, HttpEffect, PrometheusEffect, EnvEffect, Fail, Retry, Concurrent, IOE] a -> IO a runLentilleM client action = do @@ -55,7 +56,11 @@ testCrawlingPoint = do (currentOldestAge, _) <- getOldest liftIO $ assertBool "Commit date is updated on failure" (currentOldestAge > oldestAge) - -- TODO: check that the errors got indexed + errorResponse <- crawlerErrors client (CrawlerPB.ErrorsRequest (from indexName) "from:2020") + case errorResponse of + CrawlerPB.ErrorsResponse Nothing -> error "Bad response" + CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultError err)) -> error $ from err + CrawlerPB.ErrorsResponse (Just (CrawlerPB.ErrorsResponseResultSuccess errors)) -> liftIO $ assertEqual "Error got indexed" (length errors.errorsListErrors) 1 Macroscope.runStream apiKey indexName (CrawlerName crawlerName) (Macroscope.Changes $ goodStream currentOldestAge) diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index e900efdfe..75d6af2c9 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -83,6 +83,7 @@ data ProcessError es -- | 'processStream' read the stream of document and post to the monocle API processStream :: forall es. + Entity -> -- | Funtion to log about the processing (Int -> Eff es ()) -> -- | Function to post on the Monocle API @@ -91,7 +92,7 @@ processStream :: LentilleStream es DocumentType -> -- | The processing results Eff es [Maybe (ProcessError es)] -processStream logFunc postFunc = go (0 :: Word) [] [] +processStream entity logFunc postFunc = go (0 :: Word) [] [] where go count acc results stream = do eDocument <- S.next stream @@ -104,7 +105,8 @@ processStream logFunc postFunc = go (0 :: Word) [] [] -- We got a new document let doc = case edoc of Right x -> x - Left err -> DTError $ from err + Left (DecodeError ts err) -> DTError $ CrawlerError "decode" (encodeBlob err) (Just $ from ts) (Just $ from entity) + Left (GraphError ts err) -> DTError $ CrawlerError "graph" (encodeBlob err) (Just $ from ts) (Just $ from entity) let newAcc = doc : acc if count == 499 then do @@ -179,6 +181,7 @@ runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStre -- Run the document stream for that entity postResult <- processStream + entity (\c -> logInfo "Posting documents" ["count" .= c]) (httpRetry "api/commit/add" . mCrawlerAddDoc . mkRequest entity) (getStream oldestAge entity) diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 03410f3aa..380243eb2 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -315,7 +315,8 @@ crawlerAddDoc _auth request = do case requestE of Right (index, crawler) -> runEmptyQueryM index do - addErrors crawlerName (toEntity entity) errors + unless (V.null errors) do + addErrors crawlerName (toEntity entity) errors case toEntity entity of Project _ -> addChanges crawlerName changes events ProjectIssue _ -> addIssues crawlerName issues issuesEvents @@ -325,17 +326,17 @@ crawlerAddDoc _auth request = do Left err -> pure $ toErrorResponse err where addErrors crawlerName entity errors = do - logInfo "AddingErrors" ["crawler" .= crawlerName, "errors" .= length errors] - let toError :: CrawlerError -> (UTCTime, EError) + logInfo "AddingErrors" ["crawler" .= crawlerName, "entity" .= entity, "errors" .= length errors] + let toError :: CrawlerError -> EError toError ce = - ( from $ fromMaybe (error "missing timestamp") ce.crawlerErrorCreatedAt - , EError - { erCrawlerName = from crawlerName - , erEntity = from entity - , erMessage = from ce.crawlerErrorMessage - , erBody = from ce.crawlerErrorBody - } - ) + EError + { erCrawlerName = from crawlerName + , erEntity = from entity + , erMessage = from ce.crawlerErrorMessage + , erBody = from ce.crawlerErrorBody + , erCreatedAt = from $ fromMaybe (error "missing timestamp") ce.crawlerErrorCreatedAt + } + I.indexErrors $ toList (toError <$> errors) addTDs crawlerName taskDatas = do diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 960ae17b0..6ad3f40b7 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -21,6 +21,7 @@ module Monocle.Backend.Documents where import Data.Aeson (Value (String), defaultOptions, genericParseJSON, genericToJSON, withObject, withText, (.:)) import Data.Aeson.Casing (aesonPrefix, snakeCase) +import Data.Aeson.Types qualified import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) import Data.Vector qualified as V import Monocle.Entity @@ -198,7 +199,8 @@ instance From ETaskData SearchPB.TaskData where data EError = EError { erCrawlerName :: Text - , erEntity :: Text + , erEntity :: Entity + , erCreatedAt :: UTCTime , erMessage :: Text , erBody :: Text } @@ -209,14 +211,52 @@ instance From EError CrawlerError where CrawlerError { crawlerErrorBody = from eerror.erBody , crawlerErrorMessage = from eerror.erMessage - , crawlerErrorCreatedAt = undefined + , crawlerErrorCreatedAt = Just $ from eerror.erCreatedAt + , crawlerErrorEntity = Just $ from eerror.erEntity } +-- Custom encoder to manually serialize the entity type +-- This needs to match the "error_data" schema above instance ToJSON EError where - toJSON = genericToJSON $ aesonPrefix snakeCase + toJSON e = + object + [ ("crawler_name", toJSON e.erCrawlerName) + , ("created_at", toJSON e.erCreatedAt) + , ("entity_type", String (entityTypeName (from e.erEntity))) + , ("entity_value", String $ entityValue e.erEntity) + , ("message", String $ e.erMessage) + , ("body", String $ e.erBody) + ] instance FromJSON EError where - parseJSON = genericParseJSON $ aesonPrefix snakeCase + parseJSON = withObject "EError" $ \root -> do + v <- root .: "error_data" + erCrawlerName <- v .: "crawler_name" + erCreatedAt <- v .: "created_at" + evalue <- v .: "entity_value" + etype <- v .: "entity_type" + erEntity <- parseEntity evalue etype + erMessage <- v .: "message" + erBody <- v .: "body" + pure EError {..} + +-- | Helper to encode entity +-- WARNING: don't forget to update the parseEntity implementation below when changing the entity document encoding +entityValue :: Entity -> Text +entityValue = \case + Organization n -> n + Project n -> n + ProjectIssue n -> n + TaskDataEntity n -> n + User n -> n + +parseEntity :: Text -> Text -> Data.Aeson.Types.Parser Entity +parseEntity evalue = \case + "organization" -> pure $ Organization evalue + "project" -> pure $ Project evalue + "taskdata" -> pure $ TaskDataEntity evalue + "user" -> pure $ User evalue + etype -> fail $ "Unknown crawler entity type name: " <> from etype data EChangeState = EChangeOpen @@ -648,29 +688,16 @@ instance ToJSON ECrawlerMetadataObject where [ ("crawler_name", toJSON (ecmCrawlerName e)) , ("last_commit_at", toJSON (ecmLastCommitAt e)) , ("crawler_type", String (entityTypeName (from $ ecmCrawlerEntity e))) - , ("crawler_type_value", String entityValue) + , ("crawler_type_value", String $ entityValue $ e.ecmCrawlerEntity) ] - where - -- WARNING: don't forget to update the FromJSON implementation below when changing the entity document encoding - entityValue = case ecmCrawlerEntity e of - Organization n -> n - Project n -> n - ProjectIssue n -> n - TaskDataEntity n -> n - User n -> n instance FromJSON ECrawlerMetadataObject where parseJSON = withObject "CrawlerMetadataObject" $ \v -> do ecmCrawlerName <- v .: "crawler_name" ecmLastCommitAt <- v .: "last_commit_at" - (etype :: Text) <- v .: "crawler_type" + etype <- v .: "crawler_type" evalue <- v .: "crawler_type_value" - ecmCrawlerEntity <- case etype of - "organization" -> pure $ Organization evalue - "project" -> pure $ Project evalue - "taskdata" -> pure $ TaskDataEntity evalue - "user" -> pure $ User evalue - _ -> fail $ "Unknown crawler entity type name: " <> from etype + ecmCrawlerEntity <- parseEntity evalue etype pure ECrawlerMetadataObject {..} newtype ECrawlerMetadata = ECrawlerMetadata diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index f3ff5927f..7f2d13aee 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -233,9 +233,11 @@ instance ToJSON ChangesIndexMapping where [ "properties" .= object [ "crawler_name" .= KWMapping - , "entity" .= TextAndKWMapping + , "entity_type" .= KWMapping + , "entity_value" .= KWMapping , "message" .= TextAndKWMapping , "body" .= BlobMapping + , "created_at" .= DateIndexMapping ] ] ] @@ -649,13 +651,13 @@ indexChanges changes = indexDocs $ fmap (toDoc . ensureType) changes toDoc change = (toJSON change, getChangeDocId change) ensureType change = change {echangeType = EChangeDoc} -indexErrors :: MonoQuery :> es => IndexEffects es => [(UTCTime, EError)] -> Eff es () +indexErrors :: MonoQuery :> es => IndexEffects es => [EError] -> Eff es () indexErrors errors = indexDocs $ fmap toDoc errors where - toDoc (ts, err) = (getErrorDoc ts err, getErrorDocId err) + toDoc err = (getErrorDoc err, getErrorDocId err) - getErrorDoc :: UTCTime -> EError -> Value - getErrorDoc ts err = object ["created_at" .= ts, "type" .= EErrorDoc, "error_data" .= toJSON err] + getErrorDoc :: EError -> Value + getErrorDoc err = object ["type" .= EErrorDoc, "error_data" .= toJSON err] getErrorDocId :: EError -> BH.DocId getErrorDocId = getBHDocID . erBody diff --git a/src/Monocle/Backend/Queries.hs b/src/Monocle/Backend/Queries.hs index 141cb91e6..4a1584c60 100644 --- a/src/Monocle/Backend/Queries.hs +++ b/src/Monocle/Backend/Queries.hs @@ -22,7 +22,7 @@ import Monocle.Config qualified as Config import Monocle.Prelude import Monocle.Protob.Metric qualified as MetricPB import Monocle.Protob.Search qualified as SearchPB -import Monocle.Search.Query (AuthorFlavor (..), QueryFlavor (..), RangeFlavor (..), blankQuery, rangeField) +import Monocle.Search.Query (AuthorFlavor (..), QueryFlavor (..), RangeFlavor (..), rangeField) import Monocle.Search.Query qualified as Q import Streaming.Prelude qualified as Streaming @@ -239,14 +239,20 @@ orderDesc = Enumerated $ Right SearchPB.Order_DirectionDESC crawlerErrors :: QEffects es => Eff es [EError] crawlerErrors = do (since, to) <- getQueryBound + let queryFilter = + [ BH.QueryRangeQuery + $ BH.mkRangeQuery (BH.FieldName "error_data.created_at") + $ BH.RangeDateGteLte (coerce since) (coerce to) + ] -- keep only the time range of the user query - withQuery (blankQuery since to) do - withDocTypes [EErrorDoc] (QueryFlavor Author CreatedAt) do - doSearch (Just order) 500 + dropQuery do + withFilter queryFilter do + withDocTypes [EErrorDoc] (QueryFlavor Author CreatedAt) do + doSearch (Just order) 500 where order = SearchPB.Order - { orderField = "created_at" + { orderField = "error_data.created_at" , orderDirection = orderDesc } diff --git a/src/Monocle/Prelude.hs b/src/Monocle/Prelude.hs index 06afd783e..4911287d9 100644 --- a/src/Monocle/Prelude.hs +++ b/src/Monocle/Prelude.hs @@ -16,6 +16,7 @@ module Monocle.Prelude ( setEnv, headMaybe, (:::), + encodeBlob, -- * secret Secret, @@ -222,6 +223,7 @@ import Data.Aeson.Encode.Pretty qualified as Aeson import Data.Aeson.Key qualified as AesonKey import Data.Aeson.Lens (_Integer, _Object) import Data.Aeson.Types (Pair) +import Data.ByteString.Base64.Lazy qualified as B64 import Data.Fixed (Deci, Fixed (..), HasResolution (resolution), Pico) import Data.Map qualified as Map import Data.Tagged @@ -604,3 +606,6 @@ streamingFromListT = S.unfoldr go go listT = do res <- ListT.uncons listT pure $ res `orDie` () + +encodeBlob :: ToJSON a => a -> LText +encodeBlob = decodeUtf8 . B64.encode . encode diff --git a/src/Monocle/Search/Query.hs b/src/Monocle/Search/Query.hs index e42780707..d7d532beb 100644 --- a/src/Monocle/Search/Query.hs +++ b/src/Monocle/Search/Query.hs @@ -157,6 +157,7 @@ fields = ] queryFieldToDocument :: Field -> Maybe Field +queryFieldToDocument "error_data.created_at" = Just "error_data.created_at" queryFieldToDocument name = do (_, field, _) <- lookup name fields pure field