From 7a8a371168c1fbfe0f780a09c2336643c5816f26 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 13 May 2021 18:52:05 -0700 Subject: [PATCH 01/12] Allow by sample or project summaries --- microsetta_private_api/admin/admin_impl.py | 92 +++---------------- .../api/microsetta_private_api.yaml | 35 +++++-- 2 files changed, 44 insertions(+), 83 deletions(-) diff --git a/microsetta_private_api/admin/admin_impl.py b/microsetta_private_api/admin/admin_impl.py index 387cec3f8..422ef8c02 100644 --- a/microsetta_private_api/admin/admin_impl.py +++ b/microsetta_private_api/admin/admin_impl.py @@ -5,7 +5,6 @@ from microsetta_private_api.config_manager import SERVER_CONFIG from microsetta_private_api.model.log_event import LogEvent -from microsetta_private_api.model.source import Source from microsetta_private_api.model.project import Project from microsetta_private_api.model.daklapack_order import DaklapackOrder, \ ORDER_ID_KEY, SUBMITTER_ACCT_KEY @@ -18,15 +17,15 @@ from microsetta_private_api.repo.source_repo import SourceRepo from microsetta_private_api.repo.transaction import Transaction from microsetta_private_api.repo.admin_repo import AdminRepo -from microsetta_private_api.repo.survey_template_repo import SurveyTemplateRepo from microsetta_private_api.repo.metadata_repo import (retrieve_metadata, drop_private_columns) -from microsetta_private_api.repo.vioscreen_repo import VioscreenSessionRepo -from microsetta_private_api.tasks import send_email as celery_send_email +from microsetta_private_api.tasks import send_email as celery_send_email,\ + per_sample_summary as celery_per_sample_summary from microsetta_private_api.admin.email_templates import EmailMessage from microsetta_private_api.util.redirects import build_login_redirect from microsetta_private_api.admin.daklapack_communication import \ post_daklapack_order, send_daklapack_hold_email +from microsetta_private_api.admin.sample_summary import per_sample from werkzeug.exceptions import Unauthorized @@ -397,83 +396,22 @@ def query_email_stats(body, token_info): return jsonify(results), 200 -def query_barcode_stats(body, token_info): +def query_project_barcode_stats(body, token_info): validate_admin_access(token_info) - - barcodes = body.get("sample_barcodes") + email = body.get("email") project = body["project"] + celery_per_sample_summary.delay(email, project) + return None, 200 - summaries = [] - with Transaction() as t: - admin_repo = AdminRepo(t) - sample_repo = SampleRepo(t) - template_repo = SurveyTemplateRepo(t) - vs_repo = VioscreenSessionRepo(t) - project_barcodes = admin_repo.get_project_barcodes(project) - - if barcodes is None: - barcodes = project_barcodes - else: - barcodes = [b for b in barcodes if b in set(project_barcodes)] - not_found = [b for b in barcodes if b not in set(project_barcodes)] - if len(not_found) > 0: - nf = ", ".join(not_found) - message = f"The following barcodes were not found: '[{nf}]'" - return jsonify(code=404, message=message), 404 - - for barcode in barcodes: - diag = admin_repo.retrieve_diagnostics_by_barcode(barcode) - sample = diag['sample'] - account = diag['account'] - source = diag['source'] - - account_email = None if account is None else account.email - source_email = None - source_type = None if source is None else source.source_type - vio_id = None - - if source is not None and source_type is Source.SOURCE_TYPE_HUMAN: - source_email = source.email - - vio_id = template_repo.get_vioscreen_id_if_exists(account.id, - source.id, - sample.id) - - sample_status = sample_repo.get_sample_status( - sample.barcode, - sample._latest_scan_timestamp - ) - - ffq_complete, ffq_taken, _ = vs_repo.get_ffq_status_by_sample( - sample.id - ) - - summary = { - "sampleid": barcode, - "project": project, - "source-type": source_type, - "site-sampled": sample.site, - "source-email": source_email, - "account-email": account_email, - "vioscreen_username": vio_id, - "ffq-taken": ffq_taken, - "ffq-complete": ffq_complete, - "sample-status": sample_status, - "sample-received": sample_status is not None - } - - for status in ["sample-is-valid", - "no-associated-source", - "no-registered-account", - "no-collection-info", - "sample-has-inconsistencies", - "received-unknown-validity"]: - summary[status] = sample_status == status - - summaries.append(summary) - - return jsonify(summaries), 200 +def query_barcode_stats(body, token_info): + validate_admin_access(token_info) + print(body) + barcodes = body["sample_barcodes"] + if len(barcodes) > 100: + return jsonify({"message": "Too manny barcodes requested"}), 429 + summary = per_sample(None, barcodes) + return jsonify(summary), 200 def create_daklapack_order(body, token_info): diff --git a/microsetta_private_api/api/microsetta_private_api.yaml b/microsetta_private_api/api/microsetta_private_api.yaml index c85a2e044..b80cfec81 100644 --- a/microsetta_private_api/api/microsetta_private_api.yaml +++ b/microsetta_private_api/api/microsetta_private_api.yaml @@ -1510,6 +1510,30 @@ paths: schema: type: array + '/admin/account_project_barcode_summary': + post: + operationId: microsetta_private_api.admin.admin_impl.query_project_barcode_stats + tags: + - Admin + requestBody: + content: + application/json: + schema: + type: object + properties: + 'project': + type: integer + 'email': + type: string + + responses: + '200': + description: Successfully triggered the summary job + '404': + description: Requested project not found + '429': + description: Too many barcodes requested + '/admin/account_barcode_summary': post: operationId: microsetta_private_api.admin.admin_impl.query_barcode_stats @@ -1527,18 +1551,17 @@ paths: # not using the defined schema for sample_barcode as it is # readOnly type: string - nullable: true - project: - type: integer responses: '200': description: Return list of dictionaries of sample status for requested accounts - '404': - description: Project not found, or requested barcodes not found content: application/json: schema: - type: object + type: array + items: + type: object + '404': + description: Requested barcodes not found '/admin/activation': post: From 2248321f19266376476fb0c5a08f5ecfd3e2a105 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 13 May 2021 18:52:21 -0700 Subject: [PATCH 02/12] 200x performance improvement for diagnostics --- microsetta_private_api/repo/sample_repo.py | 48 ++++++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/microsetta_private_api/repo/sample_repo.py b/microsetta_private_api/repo/sample_repo.py index e6395ebf6..74bc6d4cf 100644 --- a/microsetta_private_api/repo/sample_repo.py +++ b/microsetta_private_api/repo/sample_repo.py @@ -30,6 +30,31 @@ class SampleRepo(BaseRepo): LEFT JOIN ag.source ON ag.ag_kit_barcodes.source_id = ag.source.id""" + # the nested query requires a full scan of the barcode_scans + # table, which is not necessary if limited to a specific sample. + # this query is approximately 200x faster than the above + # Note the WHERE in the nested query + PARTIAL_SQL_BARCODE_SPECIFIC = """SELECT + ag_kit_barcodes.ag_kit_barcode_id, + ag.ag_kit_barcodes.sample_date, + ag.ag_kit_barcodes.sample_time, + ag.ag_kit_barcodes.site_sampled, + ag.ag_kit_barcodes.notes, + ag.ag_kit_barcodes.barcode, + latest_scan.scan_timestamp, + ag.source.id, + ag.source.account_id + FROM ag.ag_kit_barcodes + LEFT JOIN ( + SELECT barcode, max(scan_timestamp) AS scan_timestamp + FROM barcodes.barcode_scans + WHERE barcode = %s + GROUP BY barcode + ) latest_scan + ON ag.ag_kit_barcodes.barcode = latest_scan.barcode + LEFT JOIN ag.source + ON ag.ag_kit_barcodes.source_id = ag.source.id""" + def __init__(self, transaction): super().__init__(transaction) @@ -90,16 +115,30 @@ def _update_sample_association(self, sample_id, source_id, "ag_kit_barcode_id = %s", (source_id, sample_id)) + def _get_sample_barcode_from_id(self, sample_id): + """Obtain a barcode from a ag.ag_kit_barcode_id""" + sql = """SELECT barcode + FROM ag.ag_kit_barcodes + WHERE ag_kit_barcode_id = %s""" + with self._transaction.cursor() as cur: + cur.execute(sql, (sample_id,)) + sample_row = cur.fetchone() + if sample_row is None: + return None + else: + return sample_row[0] + def _get_sample_by_id(self, sample_id): """ Do not use from api layer, you must validate account and source.""" sql = "{0}{1}".format( - self.PARTIAL_SQL, + self.PARTIAL_SQL_BARCODE_SPECIFIC, " WHERE" " ag_kit_barcodes.ag_kit_barcode_id = %s") with self._transaction.cursor() as cur: - cur.execute(sql, (sample_id,)) + barcode = self._get_sample_barcode_from_id(sample_id) + cur.execute(sql, (barcode, sample_id,)) sample_row = cur.fetchone() return self._create_sample_obj(sample_row) @@ -129,14 +168,15 @@ def get_samples_by_source(self, account_id, source_id): def get_sample(self, account_id, source_id, sample_id): sql = "{0}{1}".format( - self.PARTIAL_SQL, + self.PARTIAL_SQL_BARCODE_SPECIFIC, " WHERE" " source.account_id = %s" " AND source.id = %s" " AND ag_kit_barcodes.ag_kit_barcode_id = %s ") with self._transaction.cursor() as cur: - cur.execute(sql, (account_id, source_id, sample_id)) + barcode = self._get_sample_barcode_from_id(sample_id) + cur.execute(sql, (barcode, account_id, source_id, sample_id)) sample_row = cur.fetchone() return self._create_sample_obj(sample_row) From f8301f235b63e9d98ec3b25a093f57555344a0c3 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 13 May 2021 18:52:40 -0700 Subject: [PATCH 03/12] missing index --- microsetta_private_api/db/patches/0080.sql | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 microsetta_private_api/db/patches/0080.sql diff --git a/microsetta_private_api/db/patches/0080.sql b/microsetta_private_api/db/patches/0080.sql new file mode 100644 index 000000000..754f9e2fd --- /dev/null +++ b/microsetta_private_api/db/patches/0080.sql @@ -0,0 +1,2 @@ +-- Scans are often looked up by barcode +CREATE INDEX barcode_scans_barcode_idx ON barcodes.barcode_scans (barcode); From aa8ca2e99b0e5338bcb22c53c41b7e5d8c53a608 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 13 May 2021 18:53:07 -0700 Subject: [PATCH 04/12] Migrate per sample code away from API --- .../admin/sample_summary.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 microsetta_private_api/admin/sample_summary.py diff --git a/microsetta_private_api/admin/sample_summary.py b/microsetta_private_api/admin/sample_summary.py new file mode 100644 index 000000000..a1249a8ea --- /dev/null +++ b/microsetta_private_api/admin/sample_summary.py @@ -0,0 +1,90 @@ +from microsetta_private_api.model.source import Source +from microsetta_private_api.repo.sample_repo import SampleRepo +from microsetta_private_api.repo.transaction import Transaction +from microsetta_private_api.repo.admin_repo import AdminRepo +from microsetta_private_api.repo.survey_template_repo import SurveyTemplateRepo +from microsetta_private_api.repo.vioscreen_repo import VioscreenSessionRepo +from werkzeug.exceptions import NotFound + + +def per_sample(project, barcodes): + summaries = [] + with Transaction() as t: + admin_repo = AdminRepo(t) + sample_repo = SampleRepo(t) + template_repo = SurveyTemplateRepo(t) + vs_repo = VioscreenSessionRepo(t) + + if project is not None: + project_barcodes = admin_repo.get_project_barcodes(project) + else: + project = 'Unspecified' + + if barcodes is None: + barcodes = project_barcodes + + ####### + ######### + for barcode in barcodes[:100]: + diag = admin_repo.retrieve_diagnostics_by_barcode(barcode) + if diag is None: + raise NotFound(f"Barcode not found: {barcode}") + + sample = diag['sample'] + account = diag['account'] + source = diag['source'] + + account_email = None if account is None else account.email + source_email = None + source_type = None if source is None else source.source_type + vio_id = None + + if source is not None and source_type is Source.SOURCE_TYPE_HUMAN: + source_email = source.email + + vio_id = template_repo.get_vioscreen_id_if_exists(account.id, + source.id, + sample.id) + + # at least one sample has been observed that "is_microsetta", + # described in the barcodes.project_barcode table, but which is + # unexpectedly not present in ag.ag_kit_barcodes + if sample is None: + sample_status = None + sample_site = None + ffq_complete = None + ffq_taken = None + else: + sample_status = sample_repo.get_sample_status( + sample.barcode, + sample._latest_scan_timestamp + ) + sample_site = sample.site + ffq_complete, ffq_taken, _ = vs_repo.get_ffq_status_by_sample( + sample.id + ) + + summary = { + "sampleid": barcode, + "project": project, + "source-type": source_type, + "site-sampled": sample_site, + "source-email": source_email, + "account-email": account_email, + "vioscreen_username": vio_id, + "ffq-taken": ffq_taken, + "ffq-complete": ffq_complete, + "sample-status": sample_status, + "sample-received": sample_status is not None + } + + for status in ["sample-is-valid", + "no-associated-source", + "no-registered-account", + "no-collection-info", + "sample-has-inconsistencies", + "received-unknown-validity"]: + summary[status] = sample_status == status + + summaries.append(summary) + return summaries From 5b124706d806ee188891ae5d86f7433f7d895b1f Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 13 May 2021 18:53:45 -0700 Subject: [PATCH 05/12] Add stub for celery task when requesting a large summary --- microsetta_private_api/tasks.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/microsetta_private_api/tasks.py b/microsetta_private_api/tasks.py index 1a4e89a1f..4d302e1d0 100644 --- a/microsetta_private_api/tasks.py +++ b/microsetta_private_api/tasks.py @@ -3,6 +3,7 @@ from microsetta_private_api.model.log_event import EventType, EventSubtype from microsetta_private_api.admin.email_templates import EmailMessage, \ BasicEmailMessage +from microsetta_private_api.admin.sample_summary import per_sample @celery.task(ignore_result=True) @@ -20,3 +21,11 @@ def send_basic_email(to_email, subject, template_base_fp, req_template_keys, msg_obj = BasicEmailMessage(subject, template_base_fp, req_template_keys, event_type, event_subtype) SendEmail.send(to_email, msg_obj, msg_args, from_email) + + +@celery.task(ignore_result=True) +def per_sample_summary(email, project): + summaries = per_sample(project, barcodes=None) + import pandas as pd + blah = pd.DataFrame(summaries) + blah.to_csv('/tmp/footest.stuff') From 085caf13636a96c422c91c94050c323e98f4bc58 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 14 May 2021 08:34:33 -0700 Subject: [PATCH 06/12] Harmonize tests --- microsetta_private_api/admin/admin_impl.py | 3 +- .../admin/sample_summary.py | 4 +- .../admin/tests/test_admin_api.py | 40 +++---------------- 3 files changed, 8 insertions(+), 39 deletions(-) diff --git a/microsetta_private_api/admin/admin_impl.py b/microsetta_private_api/admin/admin_impl.py index 422ef8c02..9835ed77d 100644 --- a/microsetta_private_api/admin/admin_impl.py +++ b/microsetta_private_api/admin/admin_impl.py @@ -406,9 +406,8 @@ def query_project_barcode_stats(body, token_info): def query_barcode_stats(body, token_info): validate_admin_access(token_info) - print(body) barcodes = body["sample_barcodes"] - if len(barcodes) > 100: + if len(barcodes) > 1000: return jsonify({"message": "Too manny barcodes requested"}), 429 summary = per_sample(None, barcodes) return jsonify(summary), 200 diff --git a/microsetta_private_api/admin/sample_summary.py b/microsetta_private_api/admin/sample_summary.py index a1249a8ea..412183831 100644 --- a/microsetta_private_api/admin/sample_summary.py +++ b/microsetta_private_api/admin/sample_summary.py @@ -23,9 +23,7 @@ def per_sample(project, barcodes): if barcodes is None: barcodes = project_barcodes - ####### - ######### - for barcode in barcodes[:100]: + for barcode in barcodes: diag = admin_repo.retrieve_diagnostics_by_barcode(barcode) if diag is None: raise NotFound(f"Barcode not found: {barcode}") diff --git a/microsetta_private_api/admin/tests/test_admin_api.py b/microsetta_private_api/admin/tests/test_admin_api.py index e6ada343c..f38c10d6a 100644 --- a/microsetta_private_api/admin/tests/test_admin_api.py +++ b/microsetta_private_api/admin/tests/test_admin_api.py @@ -976,51 +976,23 @@ def test_post_daklapack_orders_failure_dak_api(self): mock_email.side_effect = [True] self._test_post_daklapack_orders(order_info, 401) - def test_query_barcode_stats_no_project_no_barcodes(self): - input_json = json.dumps({'project': 0, 'sample_barcodes': None}) + def test_query_project_barcode_stats_project(self): + input_json = json.dumps({'project': 7, 'email': 'foobar'}) response = self.client.post( - "api/admin/account_barcode_summary", - content_type='application/json', - data=input_json, - headers=MOCK_HEADERS - ) - - # an empty string project should be unkown - self.assertEqual(404, response.status_code) - - def test_query_barcode_stats_project_no_barcodes(self): - input_json = json.dumps({'project': 7, 'sample_barcodes': None}) - - response = self.client.post( - "api/admin/account_barcode_summary", + "api/admin/account_project_barcode_summary", content_type='application/json', data=input_json, headers=MOCK_HEADERS ) + # ...we assume the system is processing to send an email + # so nothing specific to verify on the response data self.assertEqual(200, response.status_code) - response_obj = json.loads(response.data) - - # there are 24 samples with project 7 in the test db - self.assertEqual(len(response_obj), 24) - - # ...10 have been received - self.assertEqual(sum([v['sample-received'] - for v in response_obj]), 10) - - # ...9 are valid - self.assertEqual(sum([v['sample-is-valid'] - for v in response_obj]), 9) - - # ...1 is missing a source - self.assertEqual(sum([v['no-associated-source'] - for v in response_obj]), 1) - def test_query_barcode_stats_project_barcodes(self): barcodes = ['000010307', '000023344', '000036855'] - input_json = json.dumps({'project': 7, 'sample_barcodes': barcodes}) + input_json = json.dumps({'sample_barcodes': barcodes}) response = self.client.post( "api/admin/account_barcode_summary", From 0650ab4778a1313ee8856fc8a43544d9b48033a4 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 14 May 2021 15:56:58 -0700 Subject: [PATCH 07/12] Emailing for sample summaries --- microsetta_private_api/model/log_event.py | 2 ++ microsetta_private_api/tasks.py | 29 +++++++++++++++---- .../templates/email/sample_summary.jinja2 | 2 ++ .../templates/email/sample_summary.plain | 2 ++ microsetta_private_api/util/email.py | 24 ++++++++++++++- .../util/tests/test_email.py | 21 ++++++++++++++ 6 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 microsetta_private_api/templates/email/sample_summary.jinja2 create mode 100644 microsetta_private_api/templates/email/sample_summary.plain diff --git a/microsetta_private_api/model/log_event.py b/microsetta_private_api/model/log_event.py index 33fde3a02..5a68c78b6 100644 --- a/microsetta_private_api/model/log_event.py +++ b/microsetta_private_api/model/log_event.py @@ -38,6 +38,8 @@ class EventSubtype(Enum): DAK_ORDER_HOLD = "daklapack_order_hold" # Pester daniel if for what are expected to be unusual situations PESTER_DANIEL = "pester_daniel" + # for project per-sample summaries + EMAIL_PER_PROJECT_SUMMARY = "per_project_summary" class LogEvent(ModelBase): diff --git a/microsetta_private_api/tasks.py b/microsetta_private_api/tasks.py index 4d302e1d0..06a54f928 100644 --- a/microsetta_private_api/tasks.py +++ b/microsetta_private_api/tasks.py @@ -4,6 +4,10 @@ from microsetta_private_api.admin.email_templates import EmailMessage, \ BasicEmailMessage from microsetta_private_api.admin.sample_summary import per_sample +import pandas as pd +import tempfile +import os +import datetime @celery.task(ignore_result=True) @@ -15,17 +19,32 @@ def send_email(email, template_name, template_args): @celery.task(ignore_result=True) def send_basic_email(to_email, subject, template_base_fp, req_template_keys, msg_args, event_type_name, event_subtype_name, - from_email=None): + from_email=None, **kwargs): event_type = EventType[event_type_name] event_subtype = EventSubtype[event_subtype_name] msg_obj = BasicEmailMessage(subject, template_base_fp, req_template_keys, event_type, event_subtype) - SendEmail.send(to_email, msg_obj, msg_args, from_email) + SendEmail.send(to_email, msg_obj, msg_args, from_email, **kwargs) @celery.task(ignore_result=True) def per_sample_summary(email, project): summaries = per_sample(project, barcodes=None) - import pandas as pd - blah = pd.DataFrame(summaries) - blah.to_csv('/tmp/footest.stuff') + df = pd.DataFrame(summaries) + _, path = tempfile.mkstemp() + df.to_csv(path) + date = datetime.datetime.now().strftime("%d%b%Y") + filename = f'project-{project}-summary-{date}.csv' + + # NOTE: we are not using .delay so this action remains + # within the current celery task + template_args = {'date': date, 'project': project}, + send_basic_email(email, + f"[TMI-summary] Project {project}", + 'email/sample_summary', + list(template_args), + template_args, + "EMAIL", "EMAIL_PER_PROJECT_SUMMARY", + attachment_filepath=path, + attachment_filename=filename) + os.remove(path) diff --git a/microsetta_private_api/templates/email/sample_summary.jinja2 b/microsetta_private_api/templates/email/sample_summary.jinja2 new file mode 100644 index 000000000..9174ec6a6 --- /dev/null +++ b/microsetta_private_api/templates/email/sample_summary.jinja2 @@ -0,0 +1,2 @@ +

{{ date }}

+

{{ project }}

diff --git a/microsetta_private_api/templates/email/sample_summary.plain b/microsetta_private_api/templates/email/sample_summary.plain new file mode 100644 index 000000000..54f6e4403 --- /dev/null +++ b/microsetta_private_api/templates/email/sample_summary.plain @@ -0,0 +1,2 @@ +{{ date }} +{{ project }} diff --git a/microsetta_private_api/util/email.py b/microsetta_private_api/util/email.py index 393e228a6..8cd058bd2 100644 --- a/microsetta_private_api/util/email.py +++ b/microsetta_private_api/util/email.py @@ -1,6 +1,7 @@ import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart +from email.mime.application import MIMEApplication from email.utils import formataddr from microsetta_private_api.config_manager import SERVER_CONFIG @@ -48,7 +49,8 @@ def reconnect(): raise smtplib.SMTPException("Unable to connect") @classmethod - def send(cls, to, email_template, email_template_args=None, from_=None): + def send(cls, to, email_template, email_template_args=None, from_=None, + attachment_filepath=None, attachment_filename=None): """Send a message Parameters @@ -63,7 +65,19 @@ def send(cls, to, email_template, email_template_args=None, from_=None): from_ : str, optional A from email address. This is optional, and if not provided the default defined by this class is used. + attachment_filepath : str, optional + A path to a file to attach. If specified, it is necessary for the + file to exist, and it is also necessary to provide a + "attachment_filename" + attachment_filename : str, optional + The name of the attachment within the email. If + "attachment_filepath" is specified, it is necessary to also include + a value here. """ + if attachment_filepath is not None or attachment_filename is not None: + assert attachment_filepath is not None and \ + attachment_filename is not None + message = MIMEMultipart("alternative") message['To'] = to message['From'] = from_ or cls.from_ @@ -79,5 +93,13 @@ def send(cls, to, email_template, email_template_args=None, from_=None): message.attach(first) message.attach(second) + if attachment_filepath: + with open(attachment_filepath, 'rb') as f: + data = f.read() + attachment = MIMEApplication(data, Name=attachment_filename) + disposition = f'attachment; filename="{attachment_filename}"' + attachment['Content-Disposition'] = disposition + message.attach(attachment) + cls.connect() cls.connection.send_message(message) diff --git a/microsetta_private_api/util/tests/test_email.py b/microsetta_private_api/util/tests/test_email.py index 7f94c972b..9bbcc078b 100644 --- a/microsetta_private_api/util/tests/test_email.py +++ b/microsetta_private_api/util/tests/test_email.py @@ -84,6 +84,27 @@ def test_failure_on_retry(self): SendEmail.send("foo", self.message) self.assertEqual(SendEmail._connect.call_count, 4) + def test_send_valid_message_attachment(self): + SendEmail._connect = MockConnect(False) + + SendEmail.send("foo", self.message, attachment_filepath=__file__, + attachment_filename='testfile') + obs = SendEmail.connection.observed.as_string() + self.assertRegex(obs, 'Content-Type: text/html') + self.assertRegex(obs, 'Content-Type: text/plain') + self.assertRegex(obs, ('Content-Disposition: attachment; ' + 'filename="testfile"')) + self.assertRegex(obs, 'a plain message') + self.assertRegex(obs, "a html message") + + def test_send_valid_message_badattachments(self): + SendEmail._connect = MockConnect(False) + + with self.assertRaises(AssertionError): + SendEmail.send("foo", self.message, attachment_filepath=__file__) + with self.assertRaises(AssertionError): + SendEmail.send("foo", self.message, attachment_filename="foo") + if __name__ == '__main__': unittest.main() From bafd644e00502d0204ffba8dcb64a5d95c5a9f1a Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 14 May 2021 17:21:08 -0700 Subject: [PATCH 08/12] typo --- microsetta_private_api/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microsetta_private_api/tasks.py b/microsetta_private_api/tasks.py index 06a54f928..9b27c5472 100644 --- a/microsetta_private_api/tasks.py +++ b/microsetta_private_api/tasks.py @@ -38,7 +38,7 @@ def per_sample_summary(email, project): # NOTE: we are not using .delay so this action remains # within the current celery task - template_args = {'date': date, 'project': project}, + template_args = {'date': date, 'project': project} send_basic_email(email, f"[TMI-summary] Project {project}", 'email/sample_summary', From 713687105ff17740cbfe331c28986d16b19cb326 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 17 May 2021 09:50:08 -0700 Subject: [PATCH 09/12] Addressing @wasade's and @dhakim87's comments --- microsetta_private_api/admin/admin_impl.py | 10 ++-- .../admin/sample_summary.py | 8 ++-- .../admin/tests/test_admin_api.py | 48 +++++++++++++++++-- .../api/microsetta_private_api.yaml | 13 ++++- microsetta_private_api/tasks.py | 5 +- 5 files changed, 68 insertions(+), 16 deletions(-) diff --git a/microsetta_private_api/admin/admin_impl.py b/microsetta_private_api/admin/admin_impl.py index 9835ed77d..2b1b63064 100644 --- a/microsetta_private_api/admin/admin_impl.py +++ b/microsetta_private_api/admin/admin_impl.py @@ -396,20 +396,20 @@ def query_email_stats(body, token_info): return jsonify(results), 200 -def query_project_barcode_stats(body, token_info): +def query_project_barcode_stats(body, token_info, strip_sampleid): validate_admin_access(token_info) email = body.get("email") project = body["project"] - celery_per_sample_summary.delay(email, project) + celery_per_sample_summary.delay(email, project, strip_sampleid) return None, 200 -def query_barcode_stats(body, token_info): +def query_barcode_stats(body, token_info, strip_sampleid): validate_admin_access(token_info) barcodes = body["sample_barcodes"] if len(barcodes) > 1000: - return jsonify({"message": "Too manny barcodes requested"}), 429 - summary = per_sample(None, barcodes) + return jsonify({"message": "Too manny barcodes requested"}), 400 + summary = per_sample(None, barcodes, strip_sampleid) return jsonify(summary), 200 diff --git a/microsetta_private_api/admin/sample_summary.py b/microsetta_private_api/admin/sample_summary.py index 412183831..cf8228e76 100644 --- a/microsetta_private_api/admin/sample_summary.py +++ b/microsetta_private_api/admin/sample_summary.py @@ -7,7 +7,7 @@ from werkzeug.exceptions import NotFound -def per_sample(project, barcodes): +def per_sample(project, barcodes, strip_sampleid): summaries = [] with Transaction() as t: admin_repo = AdminRepo(t) @@ -37,8 +37,8 @@ def per_sample(project, barcodes): source_type = None if source is None else source.source_type vio_id = None - if source is not None and source_type is Source.SOURCE_TYPE_HUMAN: - source_email = source.email + if source is not None and source_type == Source.SOURCE_TYPE_HUMAN: + source_email = source.source_data.email vio_id = template_repo.get_vioscreen_id_if_exists(account.id, source.id, @@ -63,7 +63,7 @@ def per_sample(project, barcodes): ) summary = { - "sampleid": barcode, + "sampleid": None if strip_sampleid else barcode, "project": project, "source-type": source_type, "site-sampled": sample_site, diff --git a/microsetta_private_api/admin/tests/test_admin_api.py b/microsetta_private_api/admin/tests/test_admin_api.py index f38c10d6a..bfa3b8788 100644 --- a/microsetta_private_api/admin/tests/test_admin_api.py +++ b/microsetta_private_api/admin/tests/test_admin_api.py @@ -976,11 +976,11 @@ def test_post_daklapack_orders_failure_dak_api(self): mock_email.side_effect = [True] self._test_post_daklapack_orders(order_info, 401) - def test_query_project_barcode_stats_project(self): + def test_query_project_barcode_stats_project_without_strip(self): input_json = json.dumps({'project': 7, 'email': 'foobar'}) response = self.client.post( - "api/admin/account_project_barcode_summary", + "api/admin/account_project_barcode_summary?strip_sampleid=false", content_type='application/json', data=input_json, headers=MOCK_HEADERS @@ -990,12 +990,26 @@ def test_query_project_barcode_stats_project(self): # so nothing specific to verify on the response data self.assertEqual(200, response.status_code) - def test_query_barcode_stats_project_barcodes(self): + def test_query_project_barcode_stats_project_with_strip(self): + input_json = json.dumps({'project': 7, 'email': 'foobar'}) + + response = self.client.post( + "api/admin/account_project_barcode_summary?strip_sampleid=True", + content_type='application/json', + data=input_json, + headers=MOCK_HEADERS + ) + + # ...we assume the system is processing to send an email + # so nothing specific to verify on the response data + self.assertEqual(200, response.status_code) + + def test_query_barcode_stats_project_barcodes_without_strip(self): barcodes = ['000010307', '000023344', '000036855'] input_json = json.dumps({'sample_barcodes': barcodes}) response = self.client.post( - "api/admin/account_barcode_summary", + "api/admin/account_barcode_summary?strip_sampleid=False", content_type='application/json', data=input_json, headers=MOCK_HEADERS @@ -1011,3 +1025,29 @@ def test_query_barcode_stats_project_barcodes(self): exp_status = [None, 'no-associated-source', 'sample-is-valid'] self.assertEqual([v['sample-status'] for v in response_obj], exp_status) + n_src = sum([v['source-email'] is not None for v in response_obj]) + self.assertEqual(n_src, 1) + + def test_query_barcode_stats_project_barcodes_with_strip(self): + barcodes = ['000010307', '000023344', '000036855'] + input_json = json.dumps({'sample_barcodes': barcodes}) + + response = self.client.post( + "api/admin/account_barcode_summary?strip_sampleid=True", + content_type='application/json', + data=input_json, + headers=MOCK_HEADERS + ) + # an empty string project should be unkown + self.assertEqual(200, response.status_code) + + response_obj = json.loads(response.data) + self.assertEqual(len(response_obj), 3) + + self.assertEqual([v['sampleid'] for v in response_obj], + [None] * len(barcodes)) + exp_status = [None, 'no-associated-source', 'sample-is-valid'] + self.assertEqual([v['sample-status'] for v in response_obj], + exp_status) + n_src = sum([v['source-email'] is not None for v in response_obj]) + self.assertEqual(n_src, 1) diff --git a/microsetta_private_api/api/microsetta_private_api.yaml b/microsetta_private_api/api/microsetta_private_api.yaml index b80cfec81..46f321d87 100644 --- a/microsetta_private_api/api/microsetta_private_api.yaml +++ b/microsetta_private_api/api/microsetta_private_api.yaml @@ -1513,6 +1513,8 @@ paths: '/admin/account_project_barcode_summary': post: operationId: microsetta_private_api.admin.admin_impl.query_project_barcode_stats + parameters: + - $ref: '#/components/parameters/strip_sampleid' tags: - Admin requestBody: @@ -1531,12 +1533,14 @@ paths: description: Successfully triggered the summary job '404': description: Requested project not found - '429': + '400': description: Too many barcodes requested '/admin/account_barcode_summary': post: operationId: microsetta_private_api.admin.admin_impl.query_barcode_stats + parameters: + - $ref: '#/components/parameters/strip_sampleid' tags: - Admin requestBody: @@ -1596,6 +1600,13 @@ components: schema: $ref: '#/components/schemas/account_id' required: true + strip_sampleid: + name: strip_sampleid + in: query + description: Whether to strip barcode information + schema: + type: boolean + required: true kit_id: name: kit_id in: path diff --git a/microsetta_private_api/tasks.py b/microsetta_private_api/tasks.py index 9b27c5472..82642db3e 100644 --- a/microsetta_private_api/tasks.py +++ b/microsetta_private_api/tasks.py @@ -28,8 +28,9 @@ def send_basic_email(to_email, subject, template_base_fp, req_template_keys, @celery.task(ignore_result=True) -def per_sample_summary(email, project): - summaries = per_sample(project, barcodes=None) +def per_sample_summary(email, project, strip_sampleid): + summaries = per_sample(project, barcodes=None, + strip_sampleid=strip_sampleid) df = pd.DataFrame(summaries) _, path = tempfile.mkstemp() df.to_csv(path) From 117fcac17e93efddada222695e7a533f6f54547d Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 17 May 2021 10:03:00 -0700 Subject: [PATCH 10/12] Allow celery tasks to execute in tests without a worker --- microsetta_private_api/admin/tests/test_admin_api.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/microsetta_private_api/admin/tests/test_admin_api.py b/microsetta_private_api/admin/tests/test_admin_api.py index bfa3b8788..8413e4863 100644 --- a/microsetta_private_api/admin/tests/test_admin_api.py +++ b/microsetta_private_api/admin/tests/test_admin_api.py @@ -4,6 +4,7 @@ from flask import Response import json import microsetta_private_api.server +from microsetta_private_api.celery_utils import celery from microsetta_private_api.model.account import Account, Address from microsetta_private_api.model.project import Project from microsetta_private_api.repo.transaction import Transaction @@ -26,6 +27,10 @@ # so our private api expects it to be sent as an int DUMMY_INT_DAK_ARTICLE_CODE = int(DUMMY_DAK_ARTICLE_CODE) +# force celery tasks to run locally rather than dispatch +# see https://docs.celeryproject.org/en/latest/userguide/configuration.html#std-setting-task_always_eager # noqa +celery.task_always_eager = False + def teardown_test_data(): with Transaction() as t: From ed80023387eaf41843cba367c359382415eb4476 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 17 May 2021 10:34:36 -0700 Subject: [PATCH 11/12] patch instead --- .../admin/tests/test_admin_api.py | 37 ++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/microsetta_private_api/admin/tests/test_admin_api.py b/microsetta_private_api/admin/tests/test_admin_api.py index 8413e4863..14f322d0c 100644 --- a/microsetta_private_api/admin/tests/test_admin_api.py +++ b/microsetta_private_api/admin/tests/test_admin_api.py @@ -4,7 +4,6 @@ from flask import Response import json import microsetta_private_api.server -from microsetta_private_api.celery_utils import celery from microsetta_private_api.model.account import Account, Address from microsetta_private_api.model.project import Project from microsetta_private_api.repo.transaction import Transaction @@ -27,10 +26,6 @@ # so our private api expects it to be sent as an int DUMMY_INT_DAK_ARTICLE_CODE = int(DUMMY_DAK_ARTICLE_CODE) -# force celery tasks to run locally rather than dispatch -# see https://docs.celeryproject.org/en/latest/userguide/configuration.html#std-setting-task_always_eager # noqa -celery.task_always_eager = False - def teardown_test_data(): with Transaction() as t: @@ -984,12 +979,16 @@ def test_post_daklapack_orders_failure_dak_api(self): def test_query_project_barcode_stats_project_without_strip(self): input_json = json.dumps({'project': 7, 'email': 'foobar'}) - response = self.client.post( - "api/admin/account_project_barcode_summary?strip_sampleid=false", - content_type='application/json', - data=input_json, - headers=MOCK_HEADERS - ) + with patch("microsetta_private_api.tasks." + "per_sample_summary.delay") as mock_delay: + mock_delay.return_value = None + response = self.client.post( + "api/admin/account_project_barcode_summary?" + "strip_sampleid=false", + content_type='application/json', + data=input_json, + headers=MOCK_HEADERS + ) # ...we assume the system is processing to send an email # so nothing specific to verify on the response data @@ -998,12 +997,16 @@ def test_query_project_barcode_stats_project_without_strip(self): def test_query_project_barcode_stats_project_with_strip(self): input_json = json.dumps({'project': 7, 'email': 'foobar'}) - response = self.client.post( - "api/admin/account_project_barcode_summary?strip_sampleid=True", - content_type='application/json', - data=input_json, - headers=MOCK_HEADERS - ) + with patch("microsetta_private_api.tasks." + "per_sample_summary.delay") as mock_delay: + mock_delay.return_value = None + response = self.client.post( + "api/admin/account_project_barcode_summary?" + "strip_sampleid=True", + content_type='application/json', + data=input_json, + headers=MOCK_HEADERS + ) # ...we assume the system is processing to send an email # so nothing specific to verify on the response data From 0a4818f3454739fdf906457ad81f298d3b1f781d Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 21 May 2021 15:19:49 -0700 Subject: [PATCH 12/12] Use the project name --- .../admin/tests/test_admin_repo.py | 13 +++++++ microsetta_private_api/repo/admin_repo.py | 35 +++++++++++++++++++ microsetta_private_api/tasks.py | 10 ++++-- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/microsetta_private_api/admin/tests/test_admin_repo.py b/microsetta_private_api/admin/tests/test_admin_repo.py index 434150aab..2ea4c4a7c 100644 --- a/microsetta_private_api/admin/tests/test_admin_repo.py +++ b/microsetta_private_api/admin/tests/test_admin_repo.py @@ -300,6 +300,19 @@ def test_retrieve_diagnostics_by_barcode_not_agp(self): self.assertGreater(len(diag['scans_info']), 0) self.assertGreater(len(diag['projects_info']), 0) + def test_get_project_name_fail(self): + with Transaction() as t: + admin_repo = AdminRepo(t) + + with self.assertRaisesRegex(NotFound, "not found"): + admin_repo.get_project_name(9999999) + + def test_get_project_name(self): + with Transaction() as t: + admin_repo = AdminRepo(t) + obs = admin_repo.get_project_name(1) + self.assertEqual(obs, 'American Gut Project') + def test_get_project_barcodes_fail(self): with Transaction() as t: admin_repo = AdminRepo(t) diff --git a/microsetta_private_api/repo/admin_repo.py b/microsetta_private_api/repo/admin_repo.py index f069f2509..7324474c8 100644 --- a/microsetta_private_api/repo/admin_repo.py +++ b/microsetta_private_api/repo/admin_repo.py @@ -414,6 +414,41 @@ def _rows_to_dicts_list(rows): return diagnostic + def get_project_name(self, project_id): + """Obtain the name of a project using the project_id + + Parameters + ---------- + project_id : int + The project ID to obtain barcodes for + + Returns + ------- + str + The name of the project + + Raises + ------ + NotFound + The project ID could not be found + """ + test = """SELECT EXISTS( + SELECT 1 + FROM barcodes.project + WHERE project_id=%s + )""" + query = """SELECT project + FROM barcodes.project + WHERE project_id=%s""" + + with self._transaction.cursor() as cur: + cur.execute(test, [project_id, ]) + if not cur.fetchone()[0]: + raise NotFound(f"Project f'{project_id}' not found") + else: + cur.execute(query, [project_id, ]) + return cur.fetchone()[0] + def get_project_barcodes(self, project_id): """Obtain the barcodes associated with a project diff --git a/microsetta_private_api/tasks.py b/microsetta_private_api/tasks.py index 82642db3e..ffad0794a 100644 --- a/microsetta_private_api/tasks.py +++ b/microsetta_private_api/tasks.py @@ -4,6 +4,8 @@ from microsetta_private_api.admin.email_templates import EmailMessage, \ BasicEmailMessage from microsetta_private_api.admin.sample_summary import per_sample +from microsetta_private_api.repo.transaction import Transaction +from microsetta_private_api.repo.admin_repo import AdminRepo import pandas as pd import tempfile import os @@ -29,17 +31,21 @@ def send_basic_email(to_email, subject, template_base_fp, req_template_keys, @celery.task(ignore_result=True) def per_sample_summary(email, project, strip_sampleid): + with Transaction() as t: + admin = AdminRepo(t) + project_name = admin.get_project_name(project) + summaries = per_sample(project, barcodes=None, strip_sampleid=strip_sampleid) df = pd.DataFrame(summaries) _, path = tempfile.mkstemp() df.to_csv(path) date = datetime.datetime.now().strftime("%d%b%Y") - filename = f'project-{project}-summary-{date}.csv' + filename = f'project-{project_name}-summary-{date}.csv' # NOTE: we are not using .delay so this action remains # within the current celery task - template_args = {'date': date, 'project': project} + template_args = {'date': date, 'project': project_name} send_basic_email(email, f"[TMI-summary] Project {project}", 'email/sample_summary',