Skip to content

Commit

Permalink
refactor: use serializer to compute normalized_metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
iloveagent57 committed Aug 7, 2024
1 parent 3b1fc9f commit 5c19d63
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 88 deletions.
92 changes: 7 additions & 85 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@
from django_celery_results.models import TaskResult
from requests.exceptions import ConnectionError as RequestsConnectionError

from enterprise_catalog.apps.api.constants import CourseMode
from enterprise_catalog.apps.api_client.discovery import DiscoveryApiClient
from enterprise_catalog.apps.catalog.algolia_utils import (
ALGOLIA_FIELDS,
ALGOLIA_JSON_METADATA_MAX_SIZE,
ALGOLIA_UUID_BATCH_SIZE,
_algolia_object_from_product,
_get_course_run_by_uuid,
configure_algolia_index,
create_algolia_objects,
get_algolia_object_id,
Expand Down Expand Up @@ -53,6 +51,9 @@
create_course_associated_programs,
update_contentmetadata_from_discovery,
)
from enterprise_catalog.apps.catalog.serializers import (
NormalizedContentMetadataSerializer,
)
from enterprise_catalog.apps.catalog.utils import (
batch,
get_content_filter_hash,
Expand All @@ -70,20 +71,6 @@
# ENT-4980 every batch "shard" record in Algolia should have all of these that pertain to the course
EXPLORE_CATALOG_TITLES = ['A la carte', 'Subscription']

# The closer a mode is to the beginning of this list, the more likely a seat with that mode will be used to find the
# upgrade deadline for the course (and course run).
BEST_MODE_ORDER = [
CourseMode.VERIFIED,
CourseMode.PROFESSIONAL,
CourseMode.NO_ID_PROFESSIONAL_MODE,
CourseMode.UNPAID_EXECUTIVE_EDUCATION,
CourseMode.AUDIT,
]

# The default normalized content price for any content which otherwise
# would have a null price
DEFAULT_NORMALIZED_PRICE = 0.0


def _fetch_courses_by_keys(course_keys):
"""
Expand Down Expand Up @@ -251,73 +238,6 @@ def update_full_content_metadata_task(self, force=False, dry_run=False): # pyli
_update_full_content_metadata_program(content_keys, dry_run)


def _find_best_mode_seat(seats):
"""
Find the seat with the "best" course mode. See BEST_MODE_ORDER to find which modes are best.
"""
sort_key_for_mode = {mode: index for (index, mode) in enumerate(BEST_MODE_ORDER)}

def sort_key(seat):
"""
Get a sort key (int) for a seat dictionary based on the position of its mode in the BEST_MODE_ORDER list.
Modes not found in the BEST_MODE_ORDER list get sorted to the end of the list.
"""
mode = seat['type']
return sort_key_for_mode.get(mode, len(sort_key_for_mode))

sorted_seats = sorted(seats, key=sort_key)
if sorted_seats:
return sorted_seats[0]
return None


def _normalize_course_metadata(course_metadata_record):
"""
Add normalized metadata keys with values calculated by normalizing existing keys. This will be helpful for
downstream consumers which no longer will need to do their own independent normalization.
At the time of writing, output normalized metadata keys include:
* normalized_metadata.start_date: When the course starts
* normalized_metadata.end_date: When the course ends
* normalized_metadata.enroll_by_date: The deadline for enrollment
* normalized_metadata.content_price: The price of a course
Note that course-type-specific definitions of each of these keys may be more nuanced.
"""
json_meta = course_metadata_record.json_metadata
normalized_metadata = {}
# For each content type, find the values that correspond to the desired output key.
if course_metadata_record.is_exec_ed_2u_course:
# First case covers Exec Ed courses.
additional_metadata = json_meta.get('additional_metadata', {})
normalized_metadata['start_date'] = additional_metadata.get('start_date')
normalized_metadata['end_date'] = additional_metadata.get('end_date')
normalized_metadata['enroll_by_date'] = additional_metadata.get('registration_deadline')
for entitlement in json_meta.get('entitlements', []):
if entitlement.get('mode') == CourseMode.PAID_EXECUTIVE_EDUCATION:
normalized_metadata['content_price'] = entitlement.get('price') or DEFAULT_NORMALIZED_PRICE
else:
# Else case covers OCM courses.
advertised_course_run_uuid = json_meta.get('advertised_course_run_uuid')
advertised_course_run = _get_course_run_by_uuid(json_meta, advertised_course_run_uuid)
if advertised_course_run is not None:
normalized_metadata['start_date'] = advertised_course_run.get('start')
normalized_metadata['end_date'] = advertised_course_run.get('end')
normalized_metadata['content_price'] = \
advertised_course_run.get('first_enrollable_paid_seat_price') or DEFAULT_NORMALIZED_PRICE
all_seats = advertised_course_run.get('seats', [])
seat = _find_best_mode_seat(all_seats)
if seat:
normalized_metadata['enroll_by_date'] = seat.get('upgrade_deadline')
else:
logger.info(f"No Seat Found for course run '{advertised_course_run.get('key')}'. Seats: {all_seats}")

# Add normalized values to net-new keys:
json_meta['normalized_metadata'] = normalized_metadata


def _update_full_content_metadata_course(content_keys, dry_run=False):
"""
Given content_keys, finds the associated ContentMetadata records with a type of course and looks up the full
Expand Down Expand Up @@ -379,8 +299,10 @@ def _update_full_content_metadata_course(content_keys, dry_run=False):
if run.get('uuid') == course_run_uuid:
run.update({'start': start_date, 'end': end_date})

# Perform more steps to normalize and move keys around for more consistency across content types.
_normalize_course_metadata(metadata_record)
# Perform more steps to normalize and move keys around
# for more consistency across content types.
metadata_record.json_metadata['normalized_metadata'] =\
NormalizedContentMetadataSerializer(metadata_record).data

if review := reviews_for_courses_dict.get(content_key):
metadata_record.json_metadata['reviews_count'] = review.get('reviews_count')
Expand Down
9 changes: 6 additions & 3 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
PROGRAM,
)
from enterprise_catalog.apps.catalog.models import CatalogQuery, ContentMetadata
from enterprise_catalog.apps.catalog.serializers import (
DEFAULT_NORMALIZED_PRICE,
_find_best_mode_seat,
)
from enterprise_catalog.apps.catalog.tests.factories import (
CatalogQueryFactory,
ContentMetadataFactory,
Expand Down Expand Up @@ -394,8 +398,7 @@ def test_find_best_mode_seat(self, seats, expected_seat):
"""
Test the behavior of _find_best_mode_seat().
"""
# pylint: disable=protected-access
assert tasks._find_best_mode_seat(seats) == expected_seat
assert _find_best_mode_seat(seats) == expected_seat

# pylint: disable=unused-argument
@mock.patch('enterprise_catalog.apps.api.tasks.task_recently_run', return_value=False)
Expand Down Expand Up @@ -491,7 +494,7 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key

assert metadata_2.json_metadata['aggregation_key'] == f'course:{course_key_2}'
assert metadata_2.json_metadata['full_course_only_field'] == 'test_2'
assert metadata_2.json_metadata['normalized_metadata']['content_price'] == tasks.DEFAULT_NORMALIZED_PRICE
assert metadata_2.json_metadata['normalized_metadata']['content_price'] == DEFAULT_NORMALIZED_PRICE
assert set(program_data.items()).issubset(set(metadata_2.json_metadata['programs'][0].items()))

assert metadata_3.json_metadata['aggregation_key'] == f'course:{course_key_3}'
Expand Down
151 changes: 151 additions & 0 deletions enterprise_catalog/apps/catalog/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
Defines serializers to process data at the boundaries
of the `catalog` domain.
"""
import logging

from django.utils.functional import cached_property
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers

from enterprise_catalog.apps.api.constants import CourseMode

from .algolia_utils import _get_course_run_by_uuid


logger = logging.getLogger(__name__)

# The default normalized content price for any content which otherwise
# would have a null price
DEFAULT_NORMALIZED_PRICE = 0.0

# The closer a mode is to the beginning of this list, the more likely a seat with that mode will be used to find the
# upgrade deadline for the course (and course run).
BEST_MODE_ORDER = [
CourseMode.VERIFIED,
CourseMode.PROFESSIONAL,
CourseMode.NO_ID_PROFESSIONAL_MODE,
CourseMode.UNPAID_EXECUTIVE_EDUCATION,
CourseMode.AUDIT,
]


def _find_best_mode_seat(seats):
"""
Find the seat with the "best" course mode. See BEST_MODE_ORDER to find which modes are best.
"""
sort_key_for_mode = {mode: index for (index, mode) in enumerate(BEST_MODE_ORDER)}

def sort_key(seat):
"""
Get a sort key (int) for a seat dictionary based on the position of its mode in the BEST_MODE_ORDER list.
Modes not found in the BEST_MODE_ORDER list get sorted to the end of the list.
"""
mode = seat['type']
return sort_key_for_mode.get(mode, len(sort_key_for_mode))

sorted_seats = sorted(seats, key=sort_key)
if sorted_seats:
return sorted_seats[0]
return None


class ReadOnlySerializer(serializers.Serializer):
"""
A serializer that supports serialization only. Does not support
deserialization, updates, or creates.
"""
def to_internal_value(self, data):
"""
This serializer does not support deserialization.
"""
raise NotImplementedError

def create(self, validated_data):
"""
This serializer does not support creates.
"""
raise NotImplementedError

def update(self, instance, validated_data):
"""
This serializer does not support updates.
"""
raise NotImplementedError


# pylint: disable=abstract-method
class NormalizedContentMetadataSerializer(ReadOnlySerializer):
"""
Produces a dict of metadata keys with values calculated
by normalizing existing key-values. This will be helpful for
downstream consumers, who should be able to use this dictionary
instead of doing their own independent normalization.
Note that course-type-specific definitions of each of these keys may be more nuanced.
"""
start_date = serializers.SerializerMethodField(help_text='When the course starts')
end_date = serializers.SerializerMethodField(help_text='When the course ends')
enroll_by_date = serializers.SerializerMethodField(help_text='The deadline for enrollment')
content_price = serializers.SerializerMethodField(help_text='The price of a course in USD')

@cached_property
def advertised_course_run(self):
advertised_course_run_uuid = self.instance.json_metadata.get('advertised_course_run_uuid')
return _get_course_run_by_uuid(self.instance.json_metadata, advertised_course_run_uuid)

@cached_property
def additional_metadata(self):
return self.instance.json_metadata.get('additional_metadata', {})

@extend_schema_field(serializers.DateTimeField)
def get_start_date(self, obj) -> str:
if obj.is_exec_ed_2u_course:
return self.additional_metadata.get('start_date')

if not self.advertised_course_run:
return None

if start_date_string := self.advertised_course_run.get('start'):
return start_date_string

return None

@extend_schema_field(serializers.DateTimeField)
def get_end_date(self, obj) -> str:
if obj.is_exec_ed_2u_course:
return self.additional_metadata.get('end_date')

if not self.advertised_course_run:
return None

if end_date_string := self.advertised_course_run.get('end'):
return end_date_string

return None

@extend_schema_field(serializers.DateTimeField)
def get_enroll_by_date(self, obj) -> str:
if obj.is_exec_ed_2u_course:
return self.additional_metadata.get('registration_deadline')

all_seats = self.advertised_course_run.get('seats', [])
seat = _find_best_mode_seat(all_seats)
if seat:
return seat.get('upgrade_deadline')
else:
logger.info(
f"No Seat Found for course run '{self.advertised_course_run.get('key')}'. "
f"Seats: {all_seats}"
)
return None

@extend_schema_field(serializers.FloatField)
def get_content_price(self, obj) -> float:
if obj.is_exec_ed_2u_course:
for entitlement in obj.json_metadata.get('entitlements', []):
if entitlement.get('mode') == CourseMode.PAID_EXECUTIVE_EDUCATION:
return entitlement.get('price') or DEFAULT_NORMALIZED_PRICE

return self.advertised_course_run.get('first_enrollable_paid_seat_price') or DEFAULT_NORMALIZED_PRICE

0 comments on commit 5c19d63

Please sign in to comment.