Skip to content

Commit

Permalink
chore: analytics exporter
Browse files Browse the repository at this point in the history
JIRA:CLOUDSEC-12
  • Loading branch information
katebygrace committed Jun 24, 2024
1 parent 2c479b2 commit 3886db5
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 17 deletions.
46 changes: 32 additions & 14 deletions dataeng/jobs/analytics/AnalyticsExporter.groovy
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package analytics
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters
import static org.edx.jenkins.dsl.AnalyticsConstants.config_scm
import static org.edx.jenkins.dsl.AnalyticsConstants.opsgenie_heartbeat_publisher

class AnalyticsExporter {
Expand All @@ -19,7 +18,6 @@ class AnalyticsExporter {
stringParam('TASKS', '', 'Space separated list of tasks to process. Leave this blank to use the task list specified in the config file. Specify here only if you are running tests of a specific task.')
stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.')
}
parameters secure_scm_parameters(allVars)

environmentVariables {
env('REMOTE_CONFIG_PROD_EDX_ROLE_ARN', allVars.get('REMOTE_CONFIG_PROD_EDX_ROLE_ARN'))
Expand All @@ -32,7 +30,7 @@ class AnalyticsExporter {
env('REMOTE_CONFIG_DECRYPTION_KEYS_VAULT_KV_VERSION', allVars.get('REMOTE_CONFIG_DECRYPTION_KEYS_VAULT_KV_VERSION'))
}

multiscm secure_scm(allVars) << {
multiscm config_scm(allVars) << {
git {
remote {
url('[email protected]:openedx/edx-platform.git')
Expand All @@ -55,6 +53,17 @@ class AnalyticsExporter {
relativeTargetDirectory('analytics-exporter')
}
}
git {
remote {
url('[email protected]:edx/analytics-tools.git')
branch('master')
credentials('1')
}
extensions {
pruneBranches()
relativeTargetDirectory('analytics-tools')
}
}

}

Expand All @@ -73,7 +82,7 @@ class AnalyticsExporter {
}
}

dslFactory.job('analytics-exporter-worker') {
dslFactory.job('analytics-exporter-worker-test') {
description('This is a worker/downstream job to the Analytics Exporter. It does all of the legwork of exporting/encrypting the data for a given org. See also: analytics-exporter-master.')
parameters {
stringParam('NOTIFY')
Expand All @@ -89,7 +98,6 @@ class AnalyticsExporter {
stringParam('EXTRA_OPTIONS')
stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python version to use for creating virtualenv.')
}
parameters secure_scm_parameters(allVars)

environmentVariables {
env('REMOTE_CONFIG_PROD_EDX_ROLE_ARN', allVars.get('REMOTE_CONFIG_PROD_EDX_ROLE_ARN'))
Expand All @@ -113,7 +121,7 @@ class AnalyticsExporter {

concurrentBuild()

multiscm secure_scm(allVars)
multiscm config_scm(allVars)

wrappers {
timestamps()
Expand All @@ -139,29 +147,29 @@ class AnalyticsExporter {
}
}

dslFactory.job('analytics-exporter-master') {
dslFactory.job('analytics-exporter-master-test') {
description('The Analytics Exporter weekly job, which exports tons of structure and state data for every course for every participating org and delivers them encrypted to our partners via S3. Specifically, this sets up the shared edx-platform execution environment, fetches a list of all the orgs, then kicks off downstream analytics-exporter-worker jobs for each one that corresponds to a partner which is configured to receive export data.')
parameters {
stringParam('ORGS', '*', 'Space separated list of organizations to process. Can use wildcards. e.g.: idbx HarvardX')
stringParam('EXPORTER_BRANCH', 'origin/master', 'Branch from the edx-analytics-exporter repository. For tags use tags/[tag-name].')
stringParam('PLATFORM_BRANCH', 'origin/2u/release', 'Branch from the edx-platform repository. For tags use tags/[tag-name].')
stringParam('EXPORTER_CONFIG_FILENAME', 'default.yaml', 'Name of configuration file in analytics-secure/analytics-exporter.')
stringParam('OUTPUT_BUCKET', allVars.get('EXPORTER_OUTPUT_BUCKET'), 'Name of the bucket for the destination of the export data. Can use a path. (eg. export-data/test).')
stringParam('NOTIFY', allVars.get('ANALYTICS_EXPORTER_NOTIFY_LIST'), 'Space separated list of emails to notify in case of failure.')
stringParam('OUTPUT_BUCKET', 's3://edx-analytics-scratch/analytics-test', 'Name of the bucket for the destination of the export data. Can use a path. (eg. export-data/test).')
stringParam('NOTIFY', '', 'Space separated list of emails to notify in case of failure.')

stringParam('DATE_MODIFIER', '', 'Used to set the date of the CWSM dump. Leave blank to use today\'s date. Set to "-d 202x-0x-0x" if that is when the CWSM dump took place. (Leave off quotes.)')
stringParam('EXTRA_OPTIONS', '--exclude-task=OrgEmailOptInTask', 'e.g. --exclude-task=OrgEmailOptInTask')
stringParam('ORG_CONFIG', 'data-czar-keys/config.yaml', 'Path to the data-czar organization config file.')
stringParam('DATA_CZAR_KEYS_BRANCH', 'master', 'Branch to use for the data-czar-keys repository.')
stringParam('PRIORITY_ORGS', allVars.get('PRIORITY_ORGS'), 'Space separated list of organizations to process first.')
}
parameters secure_scm_parameters(allVars)
environmentVariables {
env('OPSGENIE_HEARTBEAT_NAME', allVars.get('OPSGENIE_HEARTBEAT_NAME'))
env('OPSGENIE_HEARTBEAT_DURATION_NUM', allVars.get('OPSGENIE_HEARTBEAT_DURATION_NUM'))
env('OPSGENIE_HEARTBEAT_DURATION_UNIT', allVars.get('OPSGENIE_HEARTBEAT_DURATION_UNIT'))
}

multiscm secure_scm(allVars) << {
multiscm config_scm(allVars) << {
git {
remote {
url('[email protected]:openedx/edx-platform.git')
Expand Down Expand Up @@ -194,7 +202,17 @@ class AnalyticsExporter {
relativeTargetDirectory('data-czar-keys')
}
}

git {
remote {
url('[email protected]:edx/analytics-tools.git')
branch('master')
credentials('1')
}
extensions {
pruneBranches()
relativeTargetDirectory('analytics-tools')
}
}
}

triggers{
Expand All @@ -220,7 +238,7 @@ class AnalyticsExporter {
shell(dslFactory.readFileFromWorkspace('dataeng/resources/setup-platform-venv-py3.sh'))
shell(dslFactory.readFileFromWorkspace('dataeng/resources/setup-exporter.sh'))
downstreamParameterized {
trigger('analytics-exporter-worker') {
trigger('analytics-exporter-worker-test') {
block {
// Mark this build step as FAILURE if at least one of the downstream builds were marked FAILED.
buildStepFailure('FAILURE')
Expand Down
13 changes: 10 additions & 3 deletions dataeng/resources/setup-exporter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mkdir -p /var/lib/jenkins/tmp/analytics-exporter/course-data

# Create and activate a virtualenv in shell script
EXPORTER_VENV="exporter_venv"
virtualenv --python=python3.8 --clear "${EXPORTER_VENV}"
virtualenv --python=python3.11 --clear "${EXPORTER_VENV}"
source "${EXPORTER_VENV}/bin/activate"

# Install requirements into this (exporter) virtual environment
Expand All @@ -16,8 +16,15 @@ pip install -r github_requirements.txt
pip install mysql-connector-python -e .
popd

# Configuration paths in analytics-secure
SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter


cd analytics-tools/snowflake
pip install argparse boto3
python3 secrets-manager.py -w -n analytics-secure/analytics-exporter/task-auth.json -v task-auth.json
cd ../../

# Configuration paths in analytics-config
SECURE_ROOT=${WORKSPACE}/analytics-config/analytics-exporter
CONFIG_PATH=${SECURE_ROOT}/${EXPORTER_CONFIG_FILENAME}
GPG_KEYS_PATH=${WORKSPACE}/data-czar-keys

Expand Down
14 changes: 14 additions & 0 deletions src/main/groovy/org/edx/jenkins/dsl/AnalyticsConstants.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,20 @@ class AnalyticsConstants {
}
}

public static def config_scm = { allVars ->
return {
git {
remote {
url('[email protected]:edx/analytics-config.git')
branch('master')
}
extensions {
pruneBranches()
relativeTargetDirectory('analytics-config')
}
}
}
}
public static def data_czar_keys_scm = { allVars ->
return {
git {
Expand Down

0 comments on commit 3886db5

Please sign in to comment.