Merge pull request #238 from azavea/tt/236/bulk-create-contributors

Closes #236
azavea · Jun 16, 2023 · 4c6ba5e · 4c6ba5e
2 parents a0e0d5d + a224caa
commit 4c6ba5e
Show file tree

Hide file tree

Showing 7 changed files with 214 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,7 @@ deployment/ansible/roles/azavea.*
 /src/django/data/
 /src/django/media/
 /.venv
+/src/django/**/*.log
 
 # JS
 node_modules/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Add management command to bulk import contributors [#238](https://github.com/azavea/iow-boundary-tool/pull/238)
+
 ### Changed
 
 ### Fixed

diff --git a/data/README.md b/data/README.md
@@ -96,3 +96,78 @@ AWS_CDN=$(aws --profile=iow-boundary-tool cloudfront list-distributions --query
 
 aws --profile=iow-boundary-tool cloudfront create-invalidation --distribution-id $AWS_CDN --paths "/example/*"
 ```
+
+## Importing Contributors
+
+While individual Contributors can be created via the Django Admin, that can be tedious when dealing with a large set of new users.
+To make that process easier, we support bulk creation of new Contributors via a Django management command.
+
+This command can be run locally, as well as on the staging and production instances.
+
+### CSV File
+
+To begin, first create a CSV file like this:
+
+```csv
+email,password,pwsids,full_name,phone_number,job_title
+[email protected],password,123456789,Contributor 2,5551234567,Engineer
+[email protected],password,123456789;OTHERUTIL,Contributor 3,5558675309,Manager
+```
+
+All fields are required and must be specified.
+Multiple `pwsids` can be specified by separating them with semi-colons `;`, as shown above.
+All contributors will be prompted to set their own password after the first login.
+The administrator will have to inform the new contributors of their login email and default password once this succeeds.
+
+### S3 Bucket
+
+Then, upload the CSV file to the appropriate bucket. Here are the buckets for each environment:
+
+| Environment | S3 Bucket                        |
+|-------------|----------------------------------|
+| Development | `iow-development-logs-us-east-1` |
+| Staging     | `iow-staging-logs-us-east-1`     |
+| Production  | `iow-production-logs-us-east-1`  |
+
+The file should be placed inside the `csv/` folder in the bucket.
+
+### Running the Import
+
+The import is transactional, which means that in case of error none of the entries will have been saved to the database.
+This allows for easy re-running of the same command with a fixed file until all the rows are imported correctly.
+
+The import can be run locally in Development, or in an AWS environment like Staging or Production.
+
+#### Development
+
+The import can be run locally in Development like this:
+
+```bash
+./scripts/manage create_contributors_from_s3 iow-development-data-us-east-1 csv/test-contributors-success.csv
+```
+
+All success and errors will be logged to the console.
+
+#### Staging
+
+The import can be run on Staging like this:
+
+```bash
+./scripts/manage ecsmanage create_contributors_from_s3 iow-staging-data-us-east-1 csv/test-contributors-success.csv
+```
+
+Note the use of the staging bucket for input.
+In this case, all success and error messages will be logged to the console output of the ECS task, which can be viewed in the "Logs" tab of the Task Details in AWS, which will be linked to from the `ecsmanage` output.
+The logs will also be saved to S3, as `s3://iow-staging-logs-us-east-1/management/create_contributors_from_s3_$TIMESTAMP`, for every run of this command.
+
+#### Production
+
+The import can be run on Production like this:
+
+```bash
+./scripts/manage ecsmanage --environment production create_contributors_from_s3 iow-production-data-us-east-1 csv/$CSV_FILE
+```
+
+Note the use of the production bucket for input.
+In this case, all success and error messages will be logged to the console output of the ECS task, which can be viewed in the "Logs" tab of the Task Details in AWS, which will be linked to from the `ecsmanage` output.
+The logs will also be saved to S3, as `s3://iow-production-logs-us-east-1/management/create_contributors_from_s3_$TIMESTAMP`, for every run of this command.
diff --git a/deployment/terraform/iam.tf b/deployment/terraform/iam.tf
@@ -76,3 +76,24 @@ resource "aws_iam_role_policy" "s3_read_write_data_bucket" {
     role   = aws_iam_role.ecs_task_role.name
     policy = data.aws_iam_policy_document.s3_read_write_data_bucket.json
 }
+
+data "aws_iam_policy_document" "s3_write_logs_bucket" {
+    statement {
+        effect = "Allow"
+
+        resources = [
+            aws_s3_bucket.logs.arn,
+            "${aws_s3_bucket.logs.arn}/*",
+        ]
+
+        actions = [
+            "s3:PutObject",
+        ]
+    }
+}
+
+resource "aws_iam_role_policy" "s3_write_logs_bucket" {
+    name   = "S3WriteLogs"
+    role   = aws_iam_role.ecs_task_role.name
+    policy = data.aws_iam_policy_document.s3_write_logs_bucket.json
+}
diff --git a/src/django/api/management/commands/create_contributors_from_s3.py b/src/django/api/management/commands/create_contributors_from_s3.py
@@ -0,0 +1,111 @@
+import csv
+import logging
+import sys
+from datetime import datetime
+
+import boto3
+from botocore.exceptions import ClientError
+from django.conf import settings
+from django.core.exceptions import ValidationError
+from django.core.management.base import BaseCommand
+from django.db import transaction
+
+from api.models import Roles, User, Utility
+
+
+class Command(BaseCommand):
+    help = "Create User models from a CSV file in S3"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.logger = logging.getLogger("django")
+        self.logfile = f"create_contributors_from_s3_{datetime.now().isoformat()}.log"
+
+        file_handler = logging.FileHandler(self.logfile)
+        formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
+
+        file_handler.setFormatter(formatter)
+        self.logger.addHandler(file_handler)
+
+    def add_arguments(self, parser):
+        parser.add_argument("bucket_name", type=str, help="Name of the S3 bucket")
+        parser.add_argument(
+            "csv_file_key", type=str, help="Key of the CSV file in the S3 bucket"
+        )
+
+    def handle(self, *args, **options):
+        bucket_name = options["bucket_name"]
+        csv_file_key = options["csv_file_key"]
+        s3 = boto3.client("s3")
+
+        try:
+            # Fetch the specified CSV file from S3
+            response = s3.get_object(Bucket=bucket_name, Key=csv_file_key)
+            content = response["Body"].read().decode("utf-8")
+            reader = csv.DictReader(content.splitlines())
+
+            # We process the CSV file atomically, so if there's any errors encountered,
+            # the entire import is cancelled
+            with transaction.atomic():
+                try:
+                    for row in reader:
+                        # Create a user with specified utilities for each row in CSV
+                        try:
+                            user = User.objects.create_user(
+                                email=row["email"],
+                                role=Roles.CONTRIBUTOR,
+                                password=row["password"],
+                                full_name=row["full_name"],
+                                phone_number=row["phone_number"],
+                                job_title=row["job_title"],
+                            )
+                        except ValidationError as e:
+                            self.logger.error(
+                                f"Validation error while creating user {row['email']}: "
+                                f"{str(e)}"
+                            )
+                            raise
+
+                        # Associate with utilities matching given ; separated PWSIDs.
+                        # We don't use .filter(pwsid__in) here because we want to fail
+                        # if any given PWSID is not found in the system to call out
+                        # mistakes in input CSV.
+                        pwsids = row["pwsids"].split(";")
+                        utilities = []
+                        for pwsid in pwsids:
+                            try:
+                                utilities.append(Utility.objects.get(pwsid=pwsid))
+                            except Utility.DoesNotExist:
+                                self.logger.error(
+                                    f"Invalid PWSID '{pwsid}' "
+                                    f"for contributor {user.email}"
+                                )
+                                raise
+
+                        user.utilities.set(utilities)
+
+                        utilities_str = ", ".join(
+                            [str(ut) for ut in user.utilities.all()]
+                        )
+
+                        self.logger.info(
+                            f"Successfully created contributor {user.email}, "
+                            f"associated with {utilities_str}"
+                        )
+
+                except Exception as e:
+                    self.logger.error(
+                        f"Error occurred during contributor creation: {str(e)}"
+                    )
+                    transaction.set_rollback(True)
+                    sys.exit(1)
+        except ClientError as e:
+            self.logger.error(f"Error occurred while accessing the CSV file: {str(e)}")
+            sys.exit(1)
+        finally:
+            if settings.ENVIRONMENT != "Development":
+                # Upload log to S3
+                log_file_key = f"management/{self.logfile}"
+                s3.upload_file(
+                    self.logfile, settings.AWS_LOGS_BUCKET_NAME, log_file_key
+                )
diff --git a/src/django/api/models/user.py b/src/django/api/models/user.py
@@ -19,14 +19,16 @@ class EmailAsUsernameUserManager(BaseUserManager):
 
     use_in_migrations = True
 
-    def _create_user(self, email, role, password=None, **extra_fields):
+    def _create_user(self, email, role, password=None, validate=True, **extra_fields):
         if not email:
             raise ValueError("An email address must be provided.")
         if not role:
             raise ValueError("A role must be provided.")
         email = self.normalize_email(email)
         user = self.model(email=email, role=role, **extra_fields)
         user.set_password(password)
+        if validate:
+            user.full_clean()
         user.save()
         return user
 

diff --git a/src/django/iow/settings.py b/src/django/iow/settings.py
@@ -305,6 +305,7 @@
 MEDIA_ROOT = os.path.join(BASE_DIR, "media")
 MEDIA_URL = f"/media/"
 AWS_STORAGE_BUCKET_NAME = f'iow-{ENVIRONMENT.lower()}-data-us-east-1'
+AWS_LOGS_BUCKET_NAME = f'iow-{ENVIRONMENT.lower()}-logs-us-east-1'
 
 # IOW Settings