Skip to content

Commit

Permalink
1652 scorer dump (#383)
Browse files Browse the repository at this point in the history
* chore(infra): create dump bucket and point subdomain at it

* chore(infra): use bucket policy

* chore(infra): update bucketpolicy definition

* fix(infra): correct domain

* chore(infra): add bucket config to staging and prod

* fix(ci): switch back to main

* chore(infra): add cloudfront cdn connection between s3 and route53

* chore(infra): correctly generating cname and bucket for download

* chore(infra): update allocatedStorage to minimum

* chore(infra): added correct permissioning removed bucket from s3

* fix(infra): switch back to exports ever 30 min on staging

* chore(infra): including cloudfront distribution

* chore(infra): use cloudfront to access bucket via ssl

* fix(infra): update prod script

* feat(infra): setting different domain and hostred zone for public.scorer.gitcon.co

* fix(api,load_test): update readme (#386)

* fix(infra): make scheduled tasks cpu & memory configurable

* fix(infra): fix bucket name for frequent-allo-scorer-data-dump

* fix(infra): fix hardcoded docker image and bad schedule for task

* fix(infra): add alias to cloudfront distro

---------

Co-authored-by: Gerald Iakobinyi-Pich <[email protected]>
  • Loading branch information
tim-schultz and nutrina authored Sep 11, 2023
1 parent cae866e commit 0a7ce95
Show file tree
Hide file tree
Showing 9 changed files with 1,796 additions and 20 deletions.
1 change: 1 addition & 0 deletions .github/workflows/api-promote-prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ jobs:
DOCKER_GTC_PASSPORT_SCORER_IMAGE: public.ecr.aws/c8n6v8e9/passport-scorer:${{ needs.build-api.outputs.dockerTag }}
DOCKER_GTC_PASSPORT_VERIFIER_IMAGE: public.ecr.aws/c8n6v8e9/passport-verifier:${{ needs.build-verifier.outputs.dockerTag }}
ROUTE_53_ZONE: ${{ secrets.ROUTE53_ZONE_ID }}
ROUTE_53_ZONE_FOR_PUBLIC_DATA: ${{ secrets.ROUTE_53_ZONE_FOR_PUBLIC_DATA }}
DOMAIN: ${{ secrets.DOMAIN }}
SCORER_SERVER_SSM_ARN: ${{ secrets.SCORER_SERVER_SSM_ARN }}
FLOWER_USER: ${{ secrets.FLOWER_USER }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/api-promote-staging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ jobs:
DOCKER_GTC_PASSPORT_SCORER_IMAGE: public.ecr.aws/t1g3k9q8/passport-scorer:${{ needs.build-api.outputs.dockerTag }}
DOCKER_GTC_PASSPORT_VERIFIER_IMAGE: public.ecr.aws/t1g3k9q8/passport-verifier:${{ needs.build-verifier.outputs.dockerTag }}
ROUTE_53_ZONE: ${{ secrets.ROUTE53_ZONE_ID }}
ROUTE_53_ZONE_FOR_PUBLIC_DATA: ${{ secrets.ROUTE_53_ZONE_FOR_PUBLIC_DATA }}
DOMAIN: ${{ secrets.DOMAIN_STAGING }}
SCORER_SERVER_SSM_ARN: ${{ secrets.SCORER_SERVER_SSM_ARN }}
FLOWER_USER: ${{ secrets.FLOWER_USER }}
Expand Down
2 changes: 1 addition & 1 deletion api/ceramic_cache/management/commands/scorer_dump_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def handle(self, *args, **options):
else model_config["filename"]
)

s3_key = f"{s3_folder}/{file_name}"
s3_key = f"{file_name}"

# chunk_size = 1000

Expand Down
8 changes: 6 additions & 2 deletions infra/lib/scorer/scheduledTasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ export type ScheduledTaskConfig = Pick<
command: string[];
scheduleExpression: string;
ephemeralStorageSizeInGiB?: number;
cpu?: number;
memory?: number;
};

export function createScheduledTask(
Expand All @@ -37,6 +39,8 @@ export function createScheduledTask(
command,
scheduleExpression,
ephemeralStorageSizeInGiB,
cpu,
memory,
} = config;

const task = new awsx.ecs.FargateTaskDefinition(name, {
Expand All @@ -52,8 +56,8 @@ export function createScheduledTask(
web: {
name: `${name}-container`,
image: dockerImageScorer,
cpu: 256,
memory: 2048,
cpu: cpu ? cpu : 256,
memory: memory ? memory : 2048,
secrets,
environment: getEnvironment(envConfig),
command,
Expand Down
157 changes: 157 additions & 0 deletions infra/lib/scorer/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { TargetGroup, ListenerRule } from "@pulumi/aws/lb";
import * as aws from "@pulumi/aws";

import { Cluster } from "@pulumi/aws/ecs";
import { LoadBalancer } from "@pulumi/aws/alb";

let SCORER_SERVER_SSM_ARN = `${process.env["SCORER_SERVER_SSM_ARN"]}`;

Expand Down Expand Up @@ -306,3 +307,159 @@ export function createScorerECSService(

return service;
}

export async function createScoreExportBucketAndDomain(
domain: string,
route53Zone: string
) {
const scoreBucket = new aws.s3.Bucket(domain, {
bucket: domain,
website: {
indexDocument: "registry_score.jsonl",
},
});

new aws.s3.BucketPublicAccessBlock("myBucketPublicAccessBlock", {
bucket: scoreBucket.bucket.apply((bucket) => bucket),
blockPublicAcls: false,
ignorePublicAcls: false,
blockPublicPolicy: false,
restrictPublicBuckets: false,
});

const serviceAccount = await aws.elb.getServiceAccount({});

const bucketPolicy = scoreBucket.arn.apply((arn) =>
JSON.stringify({
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Principal: "*",
Action: "s3:GetObject",
Resource: `${arn}/*`,
},
{
Effect: "Allow",
Principal: {
AWS: serviceAccount.arn,
},
Action: ["s3:PutObject", "s3:PutObjectAcl"],
Resource: `${arn}/*`,
},
],
})
);

new aws.s3.BucketPolicy("bucketPolicy", {
bucket: scoreBucket.bucket.apply((bucket: any) => bucket),
policy: bucketPolicy,
});

const eastRegion = new aws.Provider("east", {
profile: aws.config.profile,
region: "us-east-1", // Per AWS, ACM certificate must be in the us-east-1 region.
});

const exportCertificate = new aws.acm.Certificate(
domain,
{
domainName: domain,
validationMethod: "DNS",
},
{ provider: eastRegion }
);

const publicExportCertificateValidationDomain = new aws.route53.Record(
`${domain}-validation`,
{
name: exportCertificate.domainValidationOptions[0].resourceRecordName,
zoneId: route53Zone,
type: exportCertificate.domainValidationOptions[0].resourceRecordType,
records: [
exportCertificate.domainValidationOptions[0].resourceRecordValue,
],
ttl: 600,
},
{ provider: eastRegion }
);

const publicCertificateValidation = new aws.acm.CertificateValidation(
"publicCertificateValidation",
{
certificateArn: exportCertificate.arn,
validationRecordFqdns: [
publicExportCertificateValidationDomain.fqdn.apply((fqdn) => fqdn),
],
},
{
provider: eastRegion,
}
);

const cloudFront = new aws.cloudfront.Distribution(
"publicExportCloudFront",
{
origins: [
{
originId: scoreBucket.arn.apply((arn) => arn),
domainName: scoreBucket.bucketDomainName.apply(
(domainName) => domainName
),
},
],
aliases: [domain],
defaultRootObject: "registry_score.jsonl",
enabled: true,
defaultCacheBehavior: {
targetOriginId: scoreBucket.arn.apply((arn) => arn),
allowedMethods: ["GET", "HEAD"],
cachedMethods: ["GET", "HEAD"],
forwardedValues: {
queryString: false,
cookies: { forward: "none" },
},
viewerProtocolPolicy: "redirect-to-https",
},
customErrorResponses: [
{
errorCode: 404,
responseCode: 200,
responsePagePath: "/registry_score.jsonl",
},
],
restrictions: {
geoRestriction: {
restrictionType: "none",
},
},
viewerCertificate: {
acmCertificateArn: publicCertificateValidation.certificateArn.apply(
(arn) => arn
), // Per AWS, ACM certificate must be in the us-east-1 region.
sslSupportMethod: "sni-only",
},
},
{}
);

new aws.route53.Record(domain, {
name: domain,
zoneId: route53Zone,
type: "A",
aliases: [
{
name: cloudFront.domainName,
zoneId: cloudFront.hostedZoneId,
evaluateTargetHealth: false,
},
],
});

return {
exportCertificate,
publicExportCertificateValidationDomain,
publicCertificateValidation,
cloudFront,
};
}
14 changes: 11 additions & 3 deletions infra/prod/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import * as awsx from "@pulumi/awsx";
import {
ScorerEnvironmentConfig,
ScorerService,
createScoreExportBucketAndDomain,
createScorerECSService,
createTargetGroup,
getEnvironment,
Expand All @@ -15,7 +16,9 @@ import { createScheduledTask } from "../lib/scorer/scheduledTasks";
// The following vars are not allowed to be undefined, hence the `${...}` magic

let route53Zone = `${process.env["ROUTE_53_ZONE"]}`;
let route53ZoneForPublicData = `${process.env["ROUTE_53_ZONE_FOR_PUBLIC_DATA"]}`;
export const domain = `api.scorer.${process.env["DOMAIN"]}`;
export const publicDataDomain = `public.scorer.${process.env["DOMAIN"]}`;
export const publicServiceUrl = `https://${domain}`;

let SCORER_SERVER_SSM_ARN = `${process.env["SCORER_SERVER_SSM_ARN"]}`;
Expand Down Expand Up @@ -1221,6 +1224,8 @@ export const dailyDataDumpTaskDefinition = createScheduledTask(
"daily-data-dump",
{
...baseScorerServiceConfig,
cpu: 4,
memory: 8192,
securityGroup: secgrp,
ephemeralStorageSizeInGiB: 100,
command: [
Expand Down Expand Up @@ -1264,11 +1269,9 @@ export const frequentAlloScorerDataDumpTaskDefinition = createScheduledTask(
name: "registry.Score",
filter: { community_id: 335 },
select_related: ["passport"],
"extra-args": { ACL: "public-read" },
},
]),

"--s3-uri=s3://passport-scorer-public/grants-stack/",
`--s3-uri=s3://${publicDataDomain}`,
"--summary-extra-args",
JSON.stringify({ ACL: "public-read" }),
],
Expand All @@ -1277,3 +1280,8 @@ export const frequentAlloScorerDataDumpTaskDefinition = createScheduledTask(
},
envConfig
);

const exportVals = createScoreExportBucketAndDomain(
publicDataDomain,
route53ZoneForPublicData
);
15 changes: 1 addition & 14 deletions infra/review/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as aws from "@pulumi/aws";
import * as awsx from "@pulumi/awsx";

import {
createScoreExportBucketAndDomain,
createTargetGroup,
ScorerEnvironmentConfig,
} from "../lib/scorer/service";
Expand Down Expand Up @@ -835,17 +836,3 @@ const envConfig: ScorerEnvironmentConfig = {
debug: "off",
passportPublicUrl: "https://staging.passport.gitcoin.co/",
};

export const weeklyDataDumpTaskDefinition = createScheduledTask(
"weekly-data-dump",
{
cluster,
executionRole: dpoppEcsRole,
subnets: vpcPrivateSubnetIds,
dockerImageScorer: dockerGtcPassportScorerImage,
securityGroup: secgrp,
command: ["python", "manage.py", "dump_stamp_data"],
scheduleExpression: "cron(30 23 ? * FRI *)", // Run the task every friday at 23:30 UTC
},
envConfig
);
33 changes: 33 additions & 0 deletions infra/staging/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import * as awsx from "@pulumi/awsx";
import {
ScorerEnvironmentConfig,
ScorerService,
createScoreExportBucketAndDomain,
createScorerECSService,
createTargetGroup,
getEnvironment,
Expand All @@ -15,7 +16,9 @@ import { createScheduledTask } from "../lib/scorer/scheduledTasks";
// The following vars are not allowed to be undefined, hence the `${...}` magic

let route53Zone = `${process.env["ROUTE_53_ZONE"]}`;
let route53ZoneForPublicData = `${process.env["ROUTE_53_ZONE_FOR_PUBLIC_DATA"]}`;
export const domain = `api.staging.scorer.${process.env["DOMAIN"]}`;
export const publicDataDomain = `public.staging.scorer.${process.env["DOMAIN"]}`;
export const publicServiceUrl = `https://${domain}`;

let SCORER_SERVER_SSM_ARN = `${process.env["SCORER_SERVER_SSM_ARN"]}`;
Expand Down Expand Up @@ -1139,3 +1142,33 @@ export const weeklyDataDumpTaskDefinition = createScheduledTask(
},
envConfig
);

export const frequentAlloScorerDataDumpTaskDefinition = createScheduledTask(
"frequent-allo-scorer-data-dump",
{
...baseScorerServiceConfig,
securityGroup: secgrp,
command: [
"python",
"manage.py",
"scorer_dump_data",
"--config",
JSON.stringify([
{
name: "registry.Score",
filter: { community_id: 14 },
select_related: ["passport"],
},
]),

`--s3-uri=s3://public.${domain}`,
"--summary-extra-args",
JSON.stringify({ ACL: "public-read" }),
],

scheduleExpression: "cron(*/30 * ? * * *)", // Run the task every 30 min
},
envConfig
);

const exportVals = createScoreExportBucketAndDomain(publicDataDomain, route53ZoneForPublicData);
Loading

0 comments on commit 0a7ce95

Please sign in to comment.