diff --git a/.gitignore b/.gitignore index 3697e323..32fcfb22 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ deployment/ansible/roles/azavea.* /src/django/data/ /src/django/media/ /.venv +/src/django/**/*.log # JS node_modules/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aee238e..0b961207 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Add management command to bulk import contributors [#238](https://github.com/azavea/iow-boundary-tool/pull/238) + ### Changed ### Fixed diff --git a/data/README.md b/data/README.md index d4966417..dd69fd90 100644 --- a/data/README.md +++ b/data/README.md @@ -96,3 +96,78 @@ AWS_CDN=$(aws --profile=iow-boundary-tool cloudfront list-distributions --query aws --profile=iow-boundary-tool cloudfront create-invalidation --distribution-id $AWS_CDN --paths "/example/*" ``` + +## Importing Contributors + +While individual Contributors can be created via the Django Admin, that can be tedious when dealing with a large set of new users. +To make that process easier, we support bulk creation of new Contributors via a Django management command. + +This command can be run locally, as well as on the staging and production instances. + +### CSV File + +To begin, first create a CSV file like this: + +```csv +email,password,pwsids,full_name,phone_number,job_title +c2@element84.com,password,123456789,Contributor 2,5551234567,Engineer +c3@element84.com,password,123456789;OTHERUTIL,Contributor 3,5558675309,Manager +``` + +All fields are required and must be specified. +Multiple `pwsids` can be specified by separating them with semi-colons `;`, as shown above. +All contributors will be prompted to set their own password after the first login. +The administrator will have to inform the new contributors of their login email and default password once this succeeds. + +### S3 Bucket + +Then, upload the CSV file to the appropriate bucket. Here are the buckets for each environment: + +| Environment | S3 Bucket | +|-------------|----------------------------------| +| Development | `iow-development-logs-us-east-1` | +| Staging | `iow-staging-logs-us-east-1` | +| Production | `iow-production-logs-us-east-1` | + +The file should be placed inside the `csv/` folder in the bucket. + +### Running the Import + +The import is transactional, which means that in case of error none of the entries will have been saved to the database. +This allows for easy re-running of the same command with a fixed file until all the rows are imported correctly. + +The import can be run locally in Development, or in an AWS environment like Staging or Production. + +#### Development + +The import can be run locally in Development like this: + +```bash +./scripts/manage create_contributors_from_s3 iow-development-data-us-east-1 csv/test-contributors-success.csv +``` + +All success and errors will be logged to the console. + +#### Staging + +The import can be run on Staging like this: + +```bash +./scripts/manage ecsmanage create_contributors_from_s3 iow-staging-data-us-east-1 csv/test-contributors-success.csv +``` + +Note the use of the staging bucket for input. +In this case, all success and error messages will be logged to the console output of the ECS task, which can be viewed in the "Logs" tab of the Task Details in AWS, which will be linked to from the `ecsmanage` output. +The logs will also be saved to S3, as `s3://iow-staging-logs-us-east-1/management/create_contributors_from_s3_$TIMESTAMP`, for every run of this command. + +#### Production + +The import can be run on Production like this: + +```bash +./scripts/manage ecsmanage --environment production create_contributors_from_s3 iow-production-data-us-east-1 csv/$CSV_FILE +``` + +Note the use of the production bucket for input. +In this case, all success and error messages will be logged to the console output of the ECS task, which can be viewed in the "Logs" tab of the Task Details in AWS, which will be linked to from the `ecsmanage` output. +The logs will also be saved to S3, as `s3://iow-production-logs-us-east-1/management/create_contributors_from_s3_$TIMESTAMP`, for every run of this command. diff --git a/deployment/terraform/iam.tf b/deployment/terraform/iam.tf index 6cd1b6ab..14d27217 100644 --- a/deployment/terraform/iam.tf +++ b/deployment/terraform/iam.tf @@ -76,3 +76,24 @@ resource "aws_iam_role_policy" "s3_read_write_data_bucket" { role = aws_iam_role.ecs_task_role.name policy = data.aws_iam_policy_document.s3_read_write_data_bucket.json } + +data "aws_iam_policy_document" "s3_write_logs_bucket" { + statement { + effect = "Allow" + + resources = [ + aws_s3_bucket.logs.arn, + "${aws_s3_bucket.logs.arn}/*", + ] + + actions = [ + "s3:PutObject", + ] + } +} + +resource "aws_iam_role_policy" "s3_write_logs_bucket" { + name = "S3WriteLogs" + role = aws_iam_role.ecs_task_role.name + policy = data.aws_iam_policy_document.s3_write_logs_bucket.json +} diff --git a/src/django/api/management/commands/create_contributors_from_s3.py b/src/django/api/management/commands/create_contributors_from_s3.py new file mode 100644 index 00000000..ca47a386 --- /dev/null +++ b/src/django/api/management/commands/create_contributors_from_s3.py @@ -0,0 +1,111 @@ +import csv +import logging +import sys +from datetime import datetime + +import boto3 +from botocore.exceptions import ClientError +from django.conf import settings +from django.core.exceptions import ValidationError +from django.core.management.base import BaseCommand +from django.db import transaction + +from api.models import Roles, User, Utility + + +class Command(BaseCommand): + help = "Create User models from a CSV file in S3" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.logger = logging.getLogger("django") + self.logfile = f"create_contributors_from_s3_{datetime.now().isoformat()}.log" + + file_handler = logging.FileHandler(self.logfile) + formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s") + + file_handler.setFormatter(formatter) + self.logger.addHandler(file_handler) + + def add_arguments(self, parser): + parser.add_argument("bucket_name", type=str, help="Name of the S3 bucket") + parser.add_argument( + "csv_file_key", type=str, help="Key of the CSV file in the S3 bucket" + ) + + def handle(self, *args, **options): + bucket_name = options["bucket_name"] + csv_file_key = options["csv_file_key"] + s3 = boto3.client("s3") + + try: + # Fetch the specified CSV file from S3 + response = s3.get_object(Bucket=bucket_name, Key=csv_file_key) + content = response["Body"].read().decode("utf-8") + reader = csv.DictReader(content.splitlines()) + + # We process the CSV file atomically, so if there's any errors encountered, + # the entire import is cancelled + with transaction.atomic(): + try: + for row in reader: + # Create a user with specified utilities for each row in CSV + try: + user = User.objects.create_user( + email=row["email"], + role=Roles.CONTRIBUTOR, + password=row["password"], + full_name=row["full_name"], + phone_number=row["phone_number"], + job_title=row["job_title"], + ) + except ValidationError as e: + self.logger.error( + f"Validation error while creating user {row['email']}: " + f"{str(e)}" + ) + raise + + # Associate with utilities matching given ; separated PWSIDs. + # We don't use .filter(pwsid__in) here because we want to fail + # if any given PWSID is not found in the system to call out + # mistakes in input CSV. + pwsids = row["pwsids"].split(";") + utilities = [] + for pwsid in pwsids: + try: + utilities.append(Utility.objects.get(pwsid=pwsid)) + except Utility.DoesNotExist: + self.logger.error( + f"Invalid PWSID '{pwsid}' " + f"for contributor {user.email}" + ) + raise + + user.utilities.set(utilities) + + utilities_str = ", ".join( + [str(ut) for ut in user.utilities.all()] + ) + + self.logger.info( + f"Successfully created contributor {user.email}, " + f"associated with {utilities_str}" + ) + + except Exception as e: + self.logger.error( + f"Error occurred during contributor creation: {str(e)}" + ) + transaction.set_rollback(True) + sys.exit(1) + except ClientError as e: + self.logger.error(f"Error occurred while accessing the CSV file: {str(e)}") + sys.exit(1) + finally: + if settings.ENVIRONMENT != "Development": + # Upload log to S3 + log_file_key = f"management/{self.logfile}" + s3.upload_file( + self.logfile, settings.AWS_LOGS_BUCKET_NAME, log_file_key + ) diff --git a/src/django/api/models/user.py b/src/django/api/models/user.py index cfd5d226..b5ff800f 100644 --- a/src/django/api/models/user.py +++ b/src/django/api/models/user.py @@ -19,7 +19,7 @@ class EmailAsUsernameUserManager(BaseUserManager): use_in_migrations = True - def _create_user(self, email, role, password=None, **extra_fields): + def _create_user(self, email, role, password=None, validate=True, **extra_fields): if not email: raise ValueError("An email address must be provided.") if not role: @@ -27,6 +27,8 @@ def _create_user(self, email, role, password=None, **extra_fields): email = self.normalize_email(email) user = self.model(email=email, role=role, **extra_fields) user.set_password(password) + if validate: + user.full_clean() user.save() return user diff --git a/src/django/iow/settings.py b/src/django/iow/settings.py index 7067c3db..403224bd 100644 --- a/src/django/iow/settings.py +++ b/src/django/iow/settings.py @@ -305,6 +305,7 @@ MEDIA_ROOT = os.path.join(BASE_DIR, "media") MEDIA_URL = f"/media/" AWS_STORAGE_BUCKET_NAME = f'iow-{ENVIRONMENT.lower()}-data-us-east-1' +AWS_LOGS_BUCKET_NAME = f'iow-{ENVIRONMENT.lower()}-logs-us-east-1' # IOW Settings