Skip to content

Commit

Permalink
feat: take exec ed course data from course run instead of additional_…
Browse files Browse the repository at this point in the history
…metadata attempt 4

chore: Add space in logging

fix: courses without enroll-by date should be indexed
  • Loading branch information
marlonkeating committed Oct 24, 2024
1 parent 0eb18b0 commit e79d60d
Show file tree
Hide file tree
Showing 7 changed files with 256 additions and 201 deletions.
37 changes: 37 additions & 0 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from enterprise_catalog.apps.catalog.models import CatalogQuery, ContentMetadata
from enterprise_catalog.apps.catalog.serializers import (
DEFAULT_NORMALIZED_PRICE,
NormalizedContentMetadataSerializer,
_find_best_mode_seat,
)
from enterprise_catalog.apps.catalog.tests.factories import (
Expand Down Expand Up @@ -57,6 +58,37 @@ def mock_task(self, *args, **kwargs): # pylint: disable=unused-argument
mock_task.name = 'mock_task'


def _hydrate_normalized_metadata(metadata_record):
"""
Populate normalized_metadata fields for ContentMetadata
"""
normalized_metadata_input = {
'course_metadata': metadata_record.json_metadata,
}
metadata_record.json_metadata['normalized_metadata'] =\
NormalizedContentMetadataSerializer(normalized_metadata_input).data
metadata_record.json_metadata['normalized_metadata_by_run'] = {}
for run in metadata_record.json_metadata.get('course_runs', []):
metadata_record.json_metadata['normalized_metadata_by_run'].update({
run['key']: NormalizedContentMetadataSerializer({
'course_run_metadata': run,
'course_metadata': metadata_record.json_metadata,
}).data
})


def _hydrate_course_normalized_metadata():
"""
Populate normalized_metadata fields for all course ContentMetadata
Needed for tests that generate test ContentMetadata, which does not have
normalized_metadata populated by default.
"""
all_course_metadata = ContentMetadata.objects.filter(content_type=COURSE)
for course_metadata in all_course_metadata:
_hydrate_normalized_metadata(course_metadata)
course_metadata.save()


@ddt.ddt
class TestTaskResultFunctions(TestCase):
"""
Expand Down Expand Up @@ -830,6 +862,8 @@ def setUp(self):
self.course_run_metadata_unpublished.catalog_queries.set([course_run_catalog_query])
self.course_run_metadata_unpublished.save()

_hydrate_course_normalized_metadata()

def _set_up_factory_data_for_algolia(self):
expected_catalog_uuids = sorted([
str(self.enterprise_catalog_courses.uuid),
Expand Down Expand Up @@ -1030,6 +1064,7 @@ def test_index_algolia_program_common_uuids_only(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -1129,6 +1164,7 @@ def test_index_algolia_program_unindexable_content(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -2328,6 +2364,7 @@ def test_index_algolia_duplicate_content_uuids(self, mock_search_client):
)
course_run_for_duplicate = ContentMetadataFactory(content_type=COURSE_RUN, parent_content_key='duplicateX')
course_run_for_duplicate.catalog_queries.set([self.enterprise_catalog_course_runs.catalog_query])
_hydrate_course_normalized_metadata()

actual_algolia_products_sent_sequence = []

Expand Down
206 changes: 98 additions & 108 deletions enterprise_catalog/apps/api/v1/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,69 +170,102 @@ def program_hit_to_row(hit):
return csv_row


def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
def _base_csv_row_data(hit):
""" Returns the formatted, shared attributes common across all course types. """
title = hit.get('title')
aggregation_key = hit.get('aggregation_key')
language = hit.get('language')
transcript_languages = ', '.join(hit.get('transcript_languages', []))
marketing_url = hit.get('marketing_url')
short_description = strip_tags(hit.get('short_description', ''))
subjects = ', '.join(hit.get('subjects', []))
skills = ', '.join([skill['name'] for skill in hit.get('skills', [])])
outcome = strip_tags(hit.get('outcome', '')) # What You’ll Learn

# FIXME: currently ignores partner names when a course has multiple partners
partner_name = hit['partners'][0]['name'] if hit.get('partners') else None

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
advertised_course_run_key = advertised_course_run.get('key')
min_effort = advertised_course_run.get('min_effort')
max_effort = advertised_course_run.get('max_effort')
weeks_to_complete = advertised_course_run.get('weeks_to_complete') # Length

if start_date := advertised_course_run.get('start'):
start_date = parser.parse(start_date).strftime(DATE_FORMAT)
csv_row.append(start_date)

if end_date := advertised_course_run.get('end'):
end_date = parser.parse(end_date).strftime(DATE_FORMAT)
csv_row.append(end_date)

# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)

if enroll_by := advertised_course_run.get('enroll_by'):
enroll_by = datetime.datetime.fromtimestamp(enroll_by).strftime(DATE_FORMAT)
csv_row.append(enroll_by)

pacing_type = advertised_course_run.get('pacing_type')
key = advertised_course_run.get('key')

csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))

content_price = None
if content_price := advertised_course_run.get('content_price'):
content_price = math.trunc(float(content_price))
csv_row.append(content_price)
return {
'title': title,
'partner_name': partner_name,
'start_date': start_date,
'end_date': end_date,
'enroll_by': enroll_by,
'aggregation_key': aggregation_key,
'advertised_course_run_key': advertised_course_run_key,
'language': language,
'transcript_languages': transcript_languages,
'marketing_url': marketing_url,
'short_description': short_description,
'subjects': subjects,
'skills': skills,
'min_effort': min_effort,
'max_effort': max_effort,
'weeks_to_complete': weeks_to_complete,
'outcome': outcome,
'advertised_course_run': advertised_course_run,
'content_price': content_price
}

csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))
def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partner_name'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))
advertised_course_run = row_data.get('advertised_course_run')

advertised_course_run = hit.get('advertised_course_run', {})
csv_row.append(advertised_course_run.get('min_effort'))
csv_row.append(advertised_course_run.get('max_effort'))
csv_row.append(advertised_course_run.get('weeks_to_complete')) # Length
csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn
# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)
csv_row.append(row_data.get('enroll_by'))
csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

pacing_type = advertised_course_run.get('pacing_type')
csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))
csv_row.append(row_data.get('content_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('advertised_course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))

csv_row.append(strip_tags(hit.get('prerequisites_raw', ''))) # Pre-requisites

Expand All @@ -242,75 +275,32 @@ def course_hit_to_row(hit):
return csv_row


def fetch_and_format_registration_date(obj):
enroll_by_date = obj.get('registration_deadline')
stripped_enroll_by = enroll_by_date.split("T")[0]
formatted_enroll_by = None
try:
enroll_by_datetime_obj = datetime.datetime.strptime(stripped_enroll_by, '%Y-%m-%d')
formatted_enroll_by = enroll_by_datetime_obj.strftime('%m-%d-%Y')
except ValueError as exc:
logger.info(f"Unable to format registration deadline, failed with error: {exc}")
return formatted_enroll_by


def exec_ed_course_to_row(hit):
"""
Helper function to construct a CSV row according to a single executive education course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
if hit.get('additional_metadata'):
start_date = None
additional_md = hit['additional_metadata']
if additional_md.get('start_date'):
start_date = parser.parse(additional_md['start_date']).strftime(DATE_FORMAT)
csv_row.append(start_date)

end_date = None
if additional_md.get('end_date'):
end_date = parser.parse(additional_md['end_date']).strftime(DATE_FORMAT)
csv_row.append(end_date)
formatted_enroll_by = fetch_and_format_registration_date(additional_md)
else:
csv_row.append(None) # no start date
csv_row.append(None) # no end date
formatted_enroll_by = None

csv_row.append(formatted_enroll_by)

adv_course_run = hit.get('advertised_course_run', {})
key = adv_course_run.get('key')

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
if content_price := advertised_course_run.get('content_price'):
content_price = math.trunc(float(content_price))
csv_row.append(content_price)

csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))

csv_row.append(adv_course_run.get('min_effort'))
csv_row.append(adv_course_run.get('max_effort'))
csv_row.append(adv_course_run.get('weeks_to_complete')) # Length

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn

csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partners'))

csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))
csv_row.append(row_data.get('enroll_by'))

csv_row.append(row_data.get('content_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('advertised_course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))
csv_row.append(strip_tags(hit.get('full_description', '')))

return csv_row
Expand Down
13 changes: 0 additions & 13 deletions enterprise_catalog/apps/api/v1/tests/test_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,3 @@ def test_retrieve_available_fields(self):
"""
# assert that ALGOLIA_ATTRIBUTES_TO_RETRIEVE is a SUBSET of ALGOLIA_FIELDS
assert set(export_utils.ALGOLIA_ATTRIBUTES_TO_RETRIEVE) <= set(algolia_utils.ALGOLIA_FIELDS)

def test_fetch_and_format_registration_date(self):
"""
Test the export properly fetches executive education registration dates
"""
# expected hit format from algolia, porperly reformatted for csv download
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '2002-02-15T12:12:200'}
) == '02-15-2002'
# some other format from algolia, should return None
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '02-15-2015T12:12:200'}
) is None
1 change: 1 addition & 0 deletions enterprise_catalog/apps/api/v1/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,7 @@ class EnterpriseCatalogWorkbookViewTests(APITestMixin):
"weeks_to_complete": 6,
"upgrade_deadline": 32503680000.0,
"enroll_by": 32503680000.0,
"content_price": 2843.00
},
"course_runs": [

Expand Down
Loading

0 comments on commit e79d60d

Please sign in to comment.