diff --git a/CHANGELOG.md b/CHANGELOG.md index cdb326330..293a4c63e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file. ### Added +- [crawler] Enable usage of the GitHub user PRs crawler via the Monocle config. + ### Changed ### Removed diff --git a/README.md b/README.md index 759bf920b..76bc3ad74 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,20 @@ Regarding the Github fine grained tokens (new): - To crawl privates repositories, select "All repositories" or "Only select repositories", then in "Repository permissions" select "Pull Requests", "Contents" as "Read-only". +The GitHub provider can also be configured to crawl Pull-Requests created by specific GitHub users. +For instance the following crawler's provider will fetch Pull-Requests and related events for +users john and jane: + +```YAML + provider: + github_users: + - john + - jane + # Optional settings + github_url: https://github.com/api/graphql + github_token: GITHUB_TOKEN +``` + A Gerrit provider settings ```YAML diff --git a/codegen/Monocle/Protob/Crawler.hs b/codegen/Monocle/Protob/Crawler.hs index ec956602a..b991fe0bf 100644 --- a/codegen/Monocle/Protob/Crawler.hs +++ b/codegen/Monocle/Protob/Crawler.hs @@ -94,6 +94,13 @@ instance HsProtobuf.Message Entity where (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (y)) ) ) + EntityEntityUserName y -> + ( HsProtobuf.encodeMessageField + (HsProtobuf.FieldNumber 5) + ( HsProtobuf.ForceEmit + (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (y)) + ) + ) ] ) decodeMessage _ = @@ -128,15 +135,22 @@ instance HsProtobuf.Message Entity where (HsProtobuf.decodeMessageField) ) ) + , + ( (HsProtobuf.FieldNumber 5) + , (Hs.pure (Hs.Just Hs.. EntityEntityUserName)) + <*> ( HsProtobuf.coerceOver @(HsProtobuf.String Hs.Text) @(Hs.Text) + (HsProtobuf.decodeMessageField) + ) + ) ] ) dotProto _ = [] instance HsJSONPB.ToJSONPB Entity where - toJSONPB (Entity f1_or_f2_or_f4_or_f3) = + toJSONPB (Entity f1_or_f2_or_f4_or_f3_or_f5) = ( HsJSONPB.object [ ( let encodeEntity = - ( case f1_or_f2_or_f4_or_f3 of + ( case f1_or_f2_or_f4_or_f3_or_f5 of Hs.Just (EntityEntityOrganizationName f1) -> ( HsJSONPB.pair "organization_name" @@ -157,6 +171,11 @@ instance HsJSONPB.ToJSONPB Entity where "td_name" (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f3)) ) + Hs.Just (EntityEntityUserName f5) -> + ( HsJSONPB.pair + "user_name" + (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f5)) + ) Hs.Nothing -> Hs.mempty ) in \options -> @@ -168,10 +187,10 @@ instance HsJSONPB.ToJSONPB Entity where ) ] ) - toEncodingPB (Entity f1_or_f2_or_f4_or_f3) = + toEncodingPB (Entity f1_or_f2_or_f4_or_f3_or_f5) = ( HsJSONPB.pairs [ ( let encodeEntity = - ( case f1_or_f2_or_f4_or_f3 of + ( case f1_or_f2_or_f4_or_f3_or_f5 of Hs.Just (EntityEntityOrganizationName f1) -> ( HsJSONPB.pair "organization_name" @@ -192,6 +211,11 @@ instance HsJSONPB.ToJSONPB Entity where "td_name" (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f3)) ) + Hs.Just (EntityEntityUserName f5) -> + ( HsJSONPB.pair + "user_name" + (Hs.coerce @(Hs.Text) @(HsProtobuf.String Hs.Text) (f5)) + ) Hs.Nothing -> Hs.mempty ) in \options -> @@ -226,6 +250,10 @@ instance HsJSONPB.FromJSONPB Entity where Hs.. EntityEntityTdName Hs.. Hs.coerce @(HsProtobuf.String Hs.Text) @(Hs.Text) <$> (HsJSONPB.parseField parseObj "td_name") + , Hs.Just + Hs.. EntityEntityUserName + Hs.. Hs.coerce @(HsProtobuf.String Hs.Text) @(Hs.Text) + <$> (HsJSONPB.parseField parseObj "user_name") , Hs.pure Hs.Nothing ] in ( (obj .: "entity") @@ -248,6 +276,7 @@ data EntityEntity | EntityEntityProjectName Hs.Text | EntityEntityProjectIssueName Hs.Text | EntityEntityTdName Hs.Text + | EntityEntityUserName Hs.Text deriving (Hs.Show, Hs.Eq, Hs.Ord, Hs.Generic) instance Hs.NFData EntityEntity @@ -259,6 +288,7 @@ data EntityType = EntityTypeENTITY_TYPE_ORGANIZATION | EntityTypeENTITY_TYPE_PROJECT | EntityTypeENTITY_TYPE_TASK_DATA + | EntityTypeENTITY_TYPE_USER deriving (Hs.Show, Hs.Eq, Hs.Generic, Hs.NFData) instance HsProtobuf.Named EntityType where @@ -268,7 +298,7 @@ instance HsProtobuf.HasDefault EntityType instance Hs.Bounded EntityType where minBound = EntityTypeENTITY_TYPE_ORGANIZATION - maxBound = EntityTypeENTITY_TYPE_TASK_DATA + maxBound = EntityTypeENTITY_TYPE_USER instance Hs.Ord EntityType where compare x y = @@ -280,10 +310,12 @@ instance HsProtobuf.ProtoEnum EntityType where toProtoEnumMay 0 = Hs.Just EntityTypeENTITY_TYPE_ORGANIZATION toProtoEnumMay 1 = Hs.Just EntityTypeENTITY_TYPE_PROJECT toProtoEnumMay 2 = Hs.Just EntityTypeENTITY_TYPE_TASK_DATA + toProtoEnumMay 3 = Hs.Just EntityTypeENTITY_TYPE_USER toProtoEnumMay _ = Hs.Nothing fromProtoEnum (EntityTypeENTITY_TYPE_ORGANIZATION) = 0 fromProtoEnum (EntityTypeENTITY_TYPE_PROJECT) = 1 fromProtoEnum (EntityTypeENTITY_TYPE_TASK_DATA) = 2 + fromProtoEnum (EntityTypeENTITY_TYPE_USER) = 3 instance HsJSONPB.ToJSONPB EntityType where toJSONPB x _ = HsJSONPB.enumFieldString x @@ -296,6 +328,8 @@ instance HsJSONPB.FromJSONPB EntityType where Hs.pure EntityTypeENTITY_TYPE_PROJECT parseJSONPB (HsJSONPB.String "ENTITY_TYPE_TASK_DATA") = Hs.pure EntityTypeENTITY_TYPE_TASK_DATA + parseJSONPB (HsJSONPB.String "ENTITY_TYPE_USER") = + Hs.pure EntityTypeENTITY_TYPE_USER parseJSONPB v = (HsJSONPB.typeMismatch "EntityType" v) instance HsJSONPB.ToJSON EntityType where diff --git a/doc/openapi.yaml b/doc/openapi.yaml index 39489d9f1..6f99a0a23 100644 --- a/doc/openapi.yaml +++ b/doc/openapi.yaml @@ -642,12 +642,12 @@ components: type: array items: $ref: '#/components/schemas/monocle_change_Change' - description: changes are added when Entity is project_name + description: changes are added when Entity is project_name or user_name events: type: array items: $ref: '#/components/schemas/monocle_change_ChangeEvent' - description: events are added when Entity is project_name + description: events are added when Entity is project_name or user_name projects: type: array items: @@ -725,6 +725,8 @@ components: type: string td_name: type: string + user_name: + type: string description: A descriptive name of the entities being added monocle_crawler_Project: properties: diff --git a/schemas/monocle/protob/crawler.proto b/schemas/monocle/protob/crawler.proto index 65f211943..60e147c8e 100644 --- a/schemas/monocle/protob/crawler.proto +++ b/schemas/monocle/protob/crawler.proto @@ -17,6 +17,7 @@ message Entity { string project_name = 2; string project_issue_name = 4; string td_name = 3; + string user_name = 5; } } @@ -24,6 +25,7 @@ enum EntityType { ENTITY_TYPE_ORGANIZATION = 0; ENTITY_TYPE_PROJECT = 1; ENTITY_TYPE_TASK_DATA = 2; + ENTITY_TYPE_USER = 3; } message AddDocRequest { @@ -31,9 +33,9 @@ message AddDocRequest { string crawler = 2; string apikey = 3; Entity entity = 4; - // changes are added when Entity is project_name + // changes are added when Entity is project_name or user_name repeated monocle_change.Change changes = 5; - // events are added when Entity is project_name + // events are added when Entity is project_name or user_name repeated monocle_change.ChangeEvent events = 6; // projects are added when Entity is organization_name repeated Project projects = 7; diff --git a/src/Macroscope/Main.hs b/src/Macroscope/Main.hs index a5f38d3d9..ef33ee466 100644 --- a/src/Macroscope/Main.hs +++ b/src/Macroscope/Main.hs @@ -49,6 +49,7 @@ import Effectful.Env import Effectful.Prometheus import Effectful.Reader.Static qualified as E import Monocle.Effects +import Lentille.GitHub.UserPullRequests (streamUserPullRequests) -- | A structure to carry a single crawler information. data InfoCrawler = InfoCrawler @@ -362,22 +363,27 @@ getCrawler inf@(InfoCrawler _ _ crawler idents) = getCompose $ fmap addInfos (Co pure $ Just (k, [bzCrawler bzClient]) Config.GithubProvider ghCrawler -> do let Config.Github {..} = ghCrawler - ghToken <- lift $ Config.getSecret "GITHUB_TOKEN" github_token - (k, ghClient) <- - getClientGraphQL - (fromMaybe "https://api.github.com/graphql" github_url) - ghToken + (k, ghClient) <- getGHClient github_token github_url let crawlers = [ghOrgCrawler ghClient | isNothing github_repositories] <> [ghIssuesCrawler ghClient] <> [ghPRCrawler ghClient getIdentByAliasCB] pure $ Just (k, crawlers) - Config.GithubUserProvider _ -> pure Nothing -- Not yet implemented + Config.GithubUserProvider ghUserCrawler -> do + let Config.GithubUser {..} = ghUserCrawler + (k, ghClient) <- getGHClient github_token github_url + pure $ Just (k, [ghUserPRCrawler ghClient getIdentByAliasCB]) Config.GithubApplicationProvider _ -> pure Nothing -- "Not (yet) implemented" Config.TaskDataProvider -> pure Nothing -- This is a generic crawler, not managed by the macroscope getIdentByAliasCB :: Text -> Maybe Text getIdentByAliasCB = flip Config.getIdentByAliasFromIdents idents + getGHClient mToken mAPIUrl = do + ghToken <- lift $ Config.getSecret "GITHUB_TOKEN" mToken + getClientGraphQL + (fromMaybe "https://api.github.com/graphql" mAPIUrl) + ghToken + glMRCrawler :: GraphClient -> (Text -> Maybe Text) -> DocumentStream es glMRCrawler glClient cb = Changes $ streamMergeRequests glClient cb @@ -396,6 +402,9 @@ getCrawler inf@(InfoCrawler _ _ crawler idents) = getCompose $ fmap addInfos (Co ghPRCrawler :: GraphClient -> (Text -> Maybe Text) -> DocumentStream es ghPRCrawler glClient cb = Changes $ streamPullRequests glClient cb + ghUserPRCrawler :: GraphClient -> (Text -> Maybe Text) -> DocumentStream es + ghUserPRCrawler glClient cb = UserChanges $ streamUserPullRequests glClient cb + gerritRegexProjects :: [Text] -> [Text] gerritRegexProjects = filter (T.isPrefixOf "^") diff --git a/src/Macroscope/Worker.hs b/src/Macroscope/Worker.hs index e9be76b52..d41bd18cf 100644 --- a/src/Macroscope/Worker.hs +++ b/src/Macroscope/Worker.hs @@ -36,6 +36,8 @@ data DocumentStream es Changes (UTCTime -> Text -> LentilleStream es (Change, [ChangeEvent])) | -- | Fetch recent task data TaskDatas (UTCTime -> Text -> LentilleStream es TaskData) + | -- | Fetch recent changes from a user + UserChanges (UTCTime -> Text -> LentilleStream es (Change, [ChangeEvent])) -- | Get the entity type managed by a given stream streamEntity :: DocumentStream es -> CrawlerPB.EntityType @@ -43,6 +45,7 @@ streamEntity = \case Projects _ -> EntityTypeENTITY_TYPE_ORGANIZATION Changes _ -> EntityTypeENTITY_TYPE_PROJECT TaskDatas _ -> EntityTypeENTITY_TYPE_TASK_DATA + UserChanges _ -> EntityTypeENTITY_TYPE_USER -- | Get a text representation of a stream type streamName :: DocumentStream m -> Text @@ -50,6 +53,7 @@ streamName = \case Projects _ -> "Projects" Changes _ -> "Changes" TaskDatas _ -> "TaskDatas" + UserChanges _ -> "UserChanges" isTDStream :: DocumentStream m -> Bool isTDStream = \case @@ -204,6 +208,9 @@ runStreamError startTime apiKey indexName (CrawlerName crawlerName) documentStre TaskDatas s -> let td = extractEntityValue _TaskDataEntity in S.map (fmap DTTaskData) (s oldestAge td) + UserChanges s -> + let user = extractEntityValue _User + in S.map (fmap DTChanges) (s oldestAge user) where extractEntityValue prism = fromMaybe (error $ "Entity is not the right shape: " <> show entity) diff --git a/src/Monocle/Api/Server.hs b/src/Monocle/Api/Server.hs index 327560a7e..9cbc55594 100644 --- a/src/Monocle/Api/Server.hs +++ b/src/Monocle/Api/Server.hs @@ -266,11 +266,16 @@ pattern TDEntity :: LText -> Maybe CrawlerPB.Entity pattern TDEntity td = Just (CrawlerPB.Entity (Just (CrawlerPB.EntityEntityTdName td))) +pattern UserEntity :: LText -> Maybe CrawlerPB.Entity +pattern UserEntity user = + Just (CrawlerPB.Entity (Just (CrawlerPB.EntityEntityUserName user))) + toEntity :: Maybe CrawlerPB.Entity -> Entity toEntity entityPB = case entityPB of ProjectEntity projectName -> Project $ from projectName OrganizationEntity organizationName -> Organization $ from organizationName TDEntity tdName -> TaskDataEntity $ from tdName + UserEntity userName -> User $ from userName otherEntity -> error $ "Unknown Entity type: " <> show otherEntity -- | /crawler/add endpoint @@ -311,6 +316,7 @@ crawlerAddDoc _auth request = do ProjectIssue _ -> addIssues crawlerName issues issuesEvents Organization organizationName -> addProjects crawler organizationName projects TaskDataEntity _ -> addTDs crawlerName taskDatas + User _ -> addChanges crawlerName changes events Left err -> pure $ toErrorResponse err where addTDs crawlerName taskDatas = do diff --git a/src/Monocle/Backend/Documents.hs b/src/Monocle/Backend/Documents.hs index 37b8a2be5..556d27aec 100644 --- a/src/Monocle/Backend/Documents.hs +++ b/src/Monocle/Backend/Documents.hs @@ -624,6 +624,7 @@ instance ToJSON ECrawlerMetadataObject where Project n -> n ProjectIssue n -> n TaskDataEntity n -> n + User n -> n instance FromJSON ECrawlerMetadataObject where parseJSON = withObject "CrawlerMetadataObject" $ \v -> do @@ -635,6 +636,7 @@ instance FromJSON ECrawlerMetadataObject where "organization" -> pure $ Organization evalue "project" -> pure $ Project evalue "taskdata" -> pure $ TaskDataEntity evalue + "user" -> pure $ User evalue _ -> fail $ "Unknown crawler entity type name: " <> from etype pure ECrawlerMetadataObject {..} diff --git a/src/Monocle/Backend/Index.hs b/src/Monocle/Backend/Index.hs index 81ef78e90..28997eaae 100644 --- a/src/Monocle/Backend/Index.hs +++ b/src/Monocle/Backend/Index.hs @@ -930,27 +930,23 @@ initCrawlerEntities entities worker = traverse_ run entities ensureCrawlerMetadata (CrawlerName $ getWorkerName worker) updated_since entity defaultUpdatedSince = getWorkerUpdatedSince worker -getProjectEntityFromCrawler :: Config.Crawler -> [Entity] -getProjectEntityFromCrawler worker = Project <$> Config.getCrawlerProject worker - -getProjectIssueFromCrawler :: Config.Crawler -> [Entity] -getProjectIssueFromCrawler worker = ProjectIssue <$> Config.getCrawlerProjectIssue worker - -getOrganizationEntityFromCrawler :: Config.Crawler -> [Entity] -getOrganizationEntityFromCrawler worker = Organization <$> Config.getCrawlerOrganization worker - -getTaskDataEntityFromCrawler :: Config.Crawler -> [Entity] -getTaskDataEntityFromCrawler worker = TaskDataEntity <$> Config.getCrawlerTaskData worker initCrawlerMetadata :: MonoQuery :> es => IndexEffects es => Config.Crawler -> Eff es () initCrawlerMetadata crawler = initCrawlerEntities - ( getProjectEntityFromCrawler crawler - <> getOrganizationEntityFromCrawler crawler - <> getTaskDataEntityFromCrawler crawler - <> getProjectIssueFromCrawler crawler + ( getProjectEntityFromCrawler + <> getOrganizationEntityFromCrawler + <> getTaskDataEntityFromCrawler + <> getProjectIssueFromCrawler + <> getUserEntityFromCrawler ) crawler + where + getProjectEntityFromCrawler = Project <$> Config.getCrawlerProject crawler + getProjectIssueFromCrawler = ProjectIssue <$> Config.getCrawlerProjectIssue crawler + getOrganizationEntityFromCrawler = Organization <$> Config.getCrawlerOrganization crawler + getTaskDataEntityFromCrawler = TaskDataEntity <$> Config.getCrawlerTaskData crawler + getUserEntityFromCrawler = User <$> Config.getCrawlerUser crawler -- Author cache functions ------------------------- diff --git a/src/Monocle/Config.hs b/src/Monocle/Config.hs index b9d32d4d5..4f69673da 100644 --- a/src/Monocle/Config.hs +++ b/src/Monocle/Config.hs @@ -26,6 +26,7 @@ module Monocle.Config ( Gitlab (..), Gerrit (..), Github (..), + GithubUser (..), Bugzilla (..), GithubApplication (..), Link (..), @@ -73,6 +74,7 @@ module Monocle.Config ( getCrawlerProjectIssue, getCrawlerOrganization, getCrawlerTaskData, + getCrawlerUser, -- * Some utility functions mkTenant, @@ -456,6 +458,12 @@ getCrawlerProject Crawler {..} = case provider of GerritProvider Gerrit {..} -> maybe [] (filter (not . T.isPrefixOf "^")) gerrit_repositories _anyOtherProvider -> [] +-- | Get 'Crawler' user names +getCrawlerUser :: Crawler -> [Text] +getCrawlerUser Crawler {..} = case provider of + GithubUserProvider GithubUser {..} -> github_users + _anyOtherProvider -> [] + -- | Get 'Crawler' organization names getCrawlerOrganization :: Crawler -> [Text] getCrawlerOrganization Crawler {..} = case provider of diff --git a/src/Monocle/Entity.hs b/src/Monocle/Entity.hs index a4c330c5b..b75766c05 100644 --- a/src/Monocle/Entity.hs +++ b/src/Monocle/Entity.hs @@ -11,6 +11,7 @@ module Monocle.Entity ( _Project, _Organization, _TaskDataEntity, + _User, ) where import Data.Text qualified @@ -30,6 +31,8 @@ data Entity Organization Text | -- | Crawler collect task data TaskDataEntity Text + | -- | Crawler collect change from User + User Text deriving (Eq, Show, Generic, ToJSON) instance From Entity Text where @@ -38,6 +41,7 @@ instance From Entity Text where ProjectIssue _ -> "issue" Organization _ -> "organization" TaskDataEntity _ -> "taskdata" + User _ -> "user" makePrisms ''Entity @@ -49,6 +53,7 @@ entityTypeName = \case CrawlerPB.EntityTypeENTITY_TYPE_PROJECT -> "project" CrawlerPB.EntityTypeENTITY_TYPE_ORGANIZATION -> "organization" CrawlerPB.EntityTypeENTITY_TYPE_TASK_DATA -> "taskdata" + CrawlerPB.EntityTypeENTITY_TYPE_USER -> "user" -- TODO: check if the value needs to be hashed to prevent escape issue entityDocID :: CrawlerName -> Entity -> BH.DocId @@ -62,6 +67,8 @@ entityDocID (CrawlerName name) e = ProjectIssue n -> n Organization n -> n TaskDataEntity n -> n + -- TODO check if a username could lead to troubles with the DocId format + User n -> n instance From Entity CrawlerPB.Entity where from e = CrawlerPB.Entity (Just pbe) @@ -71,6 +78,7 @@ instance From Entity CrawlerPB.Entity where ProjectIssue n -> CrawlerPB.EntityEntityProjectIssueName (from n) Organization n -> CrawlerPB.EntityEntityOrganizationName (from n) TaskDataEntity n -> CrawlerPB.EntityEntityTdName (from n) + User n -> CrawlerPB.EntityEntityUserName (from n) instance From CrawlerPB.Entity Entity where from = \case @@ -79,6 +87,7 @@ instance From CrawlerPB.Entity Entity where CrawlerPB.EntityEntityProjectIssueName n -> ProjectIssue (from n) CrawlerPB.EntityEntityOrganizationName n -> Organization (from n) CrawlerPB.EntityEntityTdName n -> TaskDataEntity (from n) + CrawlerPB.EntityEntityUserName n -> User (from n) CrawlerPB.Entity Nothing -> error "Missing CrawlerPB.Entity value" instance From Entity CrawlerPB.EntityType where @@ -87,3 +96,4 @@ instance From Entity CrawlerPB.EntityType where ProjectIssue _ -> CrawlerPB.EntityTypeENTITY_TYPE_TASK_DATA Organization _ -> CrawlerPB.EntityTypeENTITY_TYPE_ORGANIZATION TaskDataEntity _ -> CrawlerPB.EntityTypeENTITY_TYPE_TASK_DATA + User _ -> CrawlerPB.EntityTypeENTITY_TYPE_USER