Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Merge pull request #543 from CSCfi/stable
Browse files Browse the repository at this point in the history
Merge stable to master
  • Loading branch information
hannu40k authored Mar 2, 2020
2 parents 63225e1 + 01026a3 commit 3c54b82
Show file tree
Hide file tree
Showing 14 changed files with 1,112 additions and 34 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
coveralls==1.8.2 # code coverage reportin in travis
datacite==1.0.1 # BSD-license. convert datasets to datacite xml. datacite metadata store api wrappers
python-dateutil==2.8.0
Django==2.1.11 # BSD-license
Django==2.2.10 # BSD-license
elasticsearch<6.0.0
hiredis==1.0.0 # Used by redis (redis-py) for parser
djangorestframework==3.9.4 # BSD-license
Expand Down
6 changes: 6 additions & 0 deletions src/metax_api/api/rest/base/api_schemas/catalogrecord.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
"title":"Catalog Record",
"description":"A record in a data catalog, describing a single dataset.",
"properties":{
"state":{
"title":"state",
"description":"Publishing state (published / draft) of the dataset.",
"type":"string",
"readonly": true
},
"identifier":{
"title":"Identifier",
"description":"Internal identifier of the record. Required by API write operations.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class Meta:
'next_dataset_version',
'previous_dataset_version',
'mets_object_identifier',
'state',
'editor',
'cumulative_state',
'date_cumulation_started',
Expand Down Expand Up @@ -117,6 +118,7 @@ def is_valid(self, raise_exception=False):
self.initial_data.pop('previous_dataset_version', None)
self.initial_data.pop('deprecated', None)
self.initial_data.pop('date_deprecated', None)
self.initial_data.pop('state', None)
self.initial_data.pop('preservation_identifier', None)
self.initial_data.pop('preservation_dataset_version', None)
self.initial_data.pop('preservation_dataset_origin_version', None)
Expand Down
7 changes: 7 additions & 0 deletions src/metax_api/api/rest/base/views/common_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ def handle_exception(self, exc):
def paginate_queryset(self, queryset):
if CS.get_boolean_query_param(self.request, 'no_pagination'):
return None

if self.request.query_params.get('ordering'):
# for some reason ordering is not taken into account when using pagination.
# ensure queryset is ordered.
ordering = self.request.query_params.get('ordering').split(',')
queryset.order_by(*ordering)

return super(CommonViewSet, self).paginate_queryset(queryset)

def get_queryset(self):
Expand Down
15 changes: 8 additions & 7 deletions src/metax_api/api/rpc/base/views/dataset_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def change_cumulative_state(self, request):
def refresh_directory_content(self, request):
cr_identifier = request.query_params.get('cr_identifier', False)
dir_identifier = request.query_params.get('dir_identifier', False)

if not cr_identifier:
raise Http400('Query param \'cr_identifier\' missing.')
if not dir_identifier:
Expand All @@ -134,14 +135,14 @@ def refresh_directory_content(self, request):

cr.request = request

if cr.refresh_directory_content(dir_identifier):
return_status = status.HTTP_200_OK
data = { 'new_version_created': self.get_serializer(cr).data['new_version_created'] }
else:
return_status = status.HTTP_204_NO_CONTENT
data = None
new_version, n_added_files = cr.refresh_directory_content(dir_identifier)

return Response(data=data, status=return_status)
data = { 'number_of_files_added': n_added_files }

if new_version:
data['new_version_created'] = self.get_serializer(cr).data['new_version_created']

return Response(data=data, status=status.HTTP_200_OK)

@list_route(methods=['post'], url_path="fix_deprecated")
def fix_deprecated(self, request):
Expand Down
19 changes: 19 additions & 0 deletions src/metax_api/migrations/0013_catalogrecord__access_granter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 2.1.11 on 2019-12-16 12:49

import django.contrib.postgres.fields.jsonb
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('metax_api', '0012_auto_20190823_1055'),
]

operations = [
migrations.AddField(
model_name='catalogrecord',
name='_access_granter',
field=django.contrib.postgres.fields.jsonb.JSONField(default=None, help_text='Stores data of REMS user who is currently granting access to this dataset', null=True),
),
]
18 changes: 18 additions & 0 deletions src/metax_api/migrations/0014_catalogrecord_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.1.11 on 2020-02-14 11:41

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('metax_api', '0013_catalogrecord__access_granter'),
]

operations = [
migrations.AddField(
model_name='catalogrecord',
name='state',
field=models.CharField(choices=[('published', 'published'), ('draft', 'draft')], default='published', help_text='Publishing state (published / draft) of the dataset.', max_length=200),
),
]
141 changes: 119 additions & 22 deletions src/metax_api/models/catalog_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
# :author: CSC - IT Center for Science Ltd., Espoo Finland <[email protected]>
# :license: MIT

from base64 import urlsafe_b64decode
from collections import defaultdict
from copy import deepcopy
import json
import logging

from django.conf import settings
Expand Down Expand Up @@ -82,16 +84,22 @@ def get_listing(self):
Return a list of record preferred_identifiers that belong in the same dataset version chain.
Latest first.
"""
return [

versions = [
{
'identifier': r.identifier,
'preferred_identifier': r.preferred_identifier,
'removed': r.removed,
'date_created': r.date_created.astimezone().isoformat()
'date_created': r.date_created.astimezone().isoformat(),
'date_removed': r.date_removed.astimezone().isoformat() if r.date_removed else None
}
for r in self.records(manager='objects_unfiltered').all().order_by('-date_created')
]

versions = [{key: value for (key, value) in i.items() if value is not None} for i in versions]

return versions

def print_records(self): # pragma: no cover
for r in self.records.all():
print(r.__repr__())
Expand Down Expand Up @@ -213,6 +221,14 @@ class CatalogRecord(Common):
(CUMULATIVE_STATE_CLOSED, 'closed')
)

STATE_PUBLISHED = 'published'
STATE_DRAFT = 'draft'

STATE_CHOICES = (
(STATE_PUBLISHED, 'published'),
(STATE_DRAFT, 'draft')
)

# MODEL FIELD DEFINITIONS #

alternate_record_set = models.ForeignKey(
Expand All @@ -223,6 +239,13 @@ class CatalogRecord(Common):

data_catalog = models.ForeignKey(DataCatalog, on_delete=models.DO_NOTHING, related_name='records')

state = models.CharField(
choices=STATE_CHOICES,
default=STATE_PUBLISHED,
max_length=200,
help_text='Publishing state (published / draft) of the dataset.'
)

dataset_group_edit = models.CharField(
max_length=200, blank=True, null=True,
help_text='Group which is allowed to edit the dataset in this catalog record.')
Expand Down Expand Up @@ -287,6 +310,9 @@ class CatalogRecord(Common):
date_last_cumulative_addition = models.DateTimeField(null=True, default=None,
help_text='Date of last file addition while actively cumulative.')

_access_granter = JSONField(null=True, default=None,
help_text='Stores data of REMS user who is currently granting access to this dataset')

# END OF MODEL FIELD DEFINITIONS #

"""
Expand Down Expand Up @@ -415,6 +441,14 @@ def _access_type_is_embargo(self):
from metax_api.services import CatalogRecordService as CRS
return CRS.get_research_dataset_access_type(self.research_dataset) == ACCESS_TYPES['embargo']

def _access_type_is_permit(self):
from metax_api.services import CatalogRecordService as CRS
return CRS.get_research_dataset_access_type(self.research_dataset) == ACCESS_TYPES['permit']

def _access_type_was_permit(self):
from metax_api.services import CatalogRecordService as CRS
return CRS.get_research_dataset_access_type(self._initial_data['research_dataset']) == ACCESS_TYPES['permit']

def _embargo_is_available(self):
if not self.research_dataset.get('access_rights', {}).get('available', False):
return False
Expand Down Expand Up @@ -832,6 +866,10 @@ def delete(self, *args, **kwargs):
if get_identifier_type(self.preferred_identifier) == IdentifierType.DOI:
self.add_post_request_callable(DataciteDOIUpdate(self, self.research_dataset['preferred_identifier'],
'delete'))

if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']:
self.add_post_request_callable(REMSUpdate(self, 'close', reason='deletion'))

self.add_post_request_callable(RabbitMQPublishRecord(self, 'delete'))

log_args = {
Expand All @@ -856,6 +894,10 @@ def delete(self, *args, **kwargs):
def deprecate(self, timestamp=None):
self.deprecated = True
self.date_deprecated = self.date_modified = timestamp or get_tz_aware_now_without_micros()

if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']:
self.add_post_request_callable(REMSUpdate(self, 'close', reason='deprecation'))

super().save(update_fields=['deprecated', 'date_deprecated', 'date_modified'])
self.add_post_request_callable(DelayedLog(
event='dataset_deprecated',
Expand Down Expand Up @@ -930,6 +972,39 @@ def get_metadata_version_listing(self):
entries[-1]['stored_to_pas'] = entry.stored_to_pas
return entries

def _get_user_info_for_rems(self):
"""
Parses query parameter or token to fetch needed information for REMS user
"""
if self.request.user.is_service:
b64_access_granter = self.request.query_params.get('access_granter')
user_info = json.loads(urlsafe_b64decode(f'{b64_access_granter}===').decode('utf-8'))
else:
# end user api
user_info = {
'userid': self.request.user.token.get('CSCUserName'),
'name': self.request.user.token.get('displayName'),
'email': self.request.user.token.get('email')
}

if any([v is None for v in user_info.values()]):
raise Http400('Could not find the needed user information for REMS')

if not all([isinstance(v, str) for v in user_info.values()]):
raise Http400('user information fields must be string')

return user_info

def _validate_for_rems(self):
"""
Ensures that all necessary information for REMS access
"""
if self._access_type_is_permit() and not self.research_dataset['access_rights'].get('license', False):
raise Http400('You must define license for dataset in order to make it REMS manageable')

if self.request.user.is_service and not self.request.query_params.get('access_granter', False):
raise Http400('Missing query parameter access_granter')

def _pre_create_operations(self, pid_type=None):

if not self._check_catalog_permissions(self.data_catalog.catalog_record_group_create):
Expand Down Expand Up @@ -1021,8 +1096,11 @@ def _post_create_operations(self):
self.add_post_request_callable(DataciteDOIUpdate(self, self.research_dataset['preferred_identifier'],
'create'))

if self._dataset_is_access_restricted():
self.add_post_request_callable(REMSUpdate(self), 'create')
if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']:
self._validate_for_rems()
user_info = self._get_user_info_for_rems()
self._access_granter = user_info
self.add_post_request_callable(REMSUpdate(self, 'create', user_info=user_info))

self.add_post_request_callable(RabbitMQPublishRecord(self, 'create'))

Expand Down Expand Up @@ -1112,9 +1190,15 @@ def _pre_update_operations(self):
# read-only after creating
self.metadata_provider_user = self._initial_data['metadata_provider_user']

if self._dataset_restricted_access_changed():
# todo check if restriction_grounds and access_type changed
pass
if self._dataset_rems_access_changed() and settings.REMS['ENABLED']:
if self._dataset_has_rems_managed_access():
self._validate_for_rems()
user_info = self._get_user_info_for_rems()
self._access_granter = user_info
self.add_post_request_callable(REMSUpdate(self, 'create', user_info=user_info))

else:
self.add_post_request_callable(REMSUpdate(self, 'close', reason='access type change'))

if self.field_changed('research_dataset'):
if self.preservation_state in (
Expand Down Expand Up @@ -1322,17 +1406,17 @@ def _files_added_for_first_time(self):
# creating a new dataset version already occurred once
return not metadata_versions_with_files_exist

def _dataset_is_access_restricted(self):
def _dataset_has_rems_managed_access(self):
"""
Check using logic x and y if dataset uses REMS for managing access.
Check if dataset uses REMS for managing access.
"""
return False
return self.catalog_is_ida() and self._access_type_is_permit()

def _dataset_restricted_access_changed(self):
def _dataset_rems_access_changed(self):
"""
Check using logic x and y if dataset uses REMS for managing access.
Check if dataset is updated so that REMS needs to be updated.
"""
return False
return self.catalog_is_ida() and self._access_type_is_permit() != self._access_type_was_permit()

def _calculate_total_files_byte_size(self):
rd = self.research_dataset
Expand Down Expand Up @@ -1930,6 +2014,11 @@ def _create_pas_version(self, origin_version):
pas_version.request = origin_version.request
pas_version.save(pid_type=IdentifierType.DOI)

# ensure pas dataset contains exactly the same files as origin dataset. clear the result
# that was achieved by calling save(), which processed research_dataset.files and research_dataset.directories
pas_version.files.clear()
pas_version.files.add(*origin_version.files.filter().values_list('id', flat=True))

# link origin_version and pas copy
origin_version.preservation_dataset_version = pas_version
origin_version.new_dataset_version_created = pas_version.identifiers_dict
Expand Down Expand Up @@ -2147,7 +2236,7 @@ def refresh_directory_content(self, dir_identifier):

if not added_file_ids:
_logger.info('no change in directory content')
return False
return (False, 0)

_logger.info(f'refreshing directory adds {len(added_file_ids)} files to dataset')
self.date_modified = get_tz_aware_now_without_micros()
Expand All @@ -2172,7 +2261,7 @@ def refresh_directory_content(self, dir_identifier):
super().save()
self.add_post_request_callable(RabbitMQPublishRecord(self, 'update'))

return True if self.cumulative_state != self.CUMULATIVE_STATE_YES else False
return (self.cumulative_state != self.CUMULATIVE_STATE_YES, len(added_file_ids))

def _find_new_files_added_to_dir(self, dir):
sql_insert_newly_frozen_files_by_dir_path = '''
Expand Down Expand Up @@ -2309,10 +2398,15 @@ class REMSUpdate():
Handles managing REMS resources when creating, updating and deleting datasets.
"""

def __init__(self, cr, action):
assert action in ('create', 'update', 'delete'), 'invalid value for action'
def __init__(self, cr, action, user_info={}, reason=''):
# user_info is used on creation, reason on close
from metax_api.services.rems_service import REMSService
assert action in ('close', 'create', 'update'), 'invalid value for action'
self.cr = cr
self.user_info = user_info
self.reason = reason
self.action = action
self.rems = REMSService()

def __call__(self):
"""
Expand All @@ -2324,12 +2418,15 @@ def __call__(self):
)

try:
# todo do_stuff()
pass
except:
_logger.exception('REMS interaction failed')
if self.action == 'create':
self.rems.create_rems_entity(self.cr, self.user_info)
if self.action == 'close':
self.rems.close_rems_entity(self.cr, self.reason)

except Exception as e:
_logger.error(e)
raise Http503({ 'detail': [
'failed to publish updates to rems. request is aborted.'
f'failed to publish updates to rems. request is aborted.'
]})


Expand Down
Loading

0 comments on commit 3c54b82

Please sign in to comment.