Skip to content

Commit

Permalink
Merge pull request #75 from MicroscopeIT/AQ-199-rewrite-tags
Browse files Browse the repository at this point in the history
AQ-199 Tags rewritten (data duplication approach)
  • Loading branch information
ajaskier authored Dec 4, 2019
2 parents edf7db8 + b91f7db commit a7839c5
Show file tree
Hide file tree
Showing 23 changed files with 213 additions and 123 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ flatten-dict = "*"
flask = "*"
flask-jwt-extended = "*"
flask-restful = "*"
gunicorn = "*"
gunicorn = "==19.9.0"
passlib = "*"
pillow = "*"
pymongo = "*"
Expand Down
34 changes: 16 additions & 18 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion aquascope/scripts/populate_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def run_populate_system(data_directory):
db = get_db_from_env()
db_client, db = get_db_from_env()

storage_client = blob_storage_client(connection_string=os.environ['STORAGE_CONNECTION_STRING'])
populate_system_with_items(data_directory, db, storage_client)
Expand Down
2 changes: 1 addition & 1 deletion aquascope/tasks/upload_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

@celery_app.task
def parse_upload(upload_id):
db = get_db_from_env()
db_client, db = get_db_from_env()
storage_client = get_storage_client_from_env()
try:
parse_upload_package(upload_id, db, storage_client)
Expand Down
15 changes: 8 additions & 7 deletions aquascope/tests/aquascope/webserver/api/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from flask import json
import requests

from aquascope.tests.aquascope.webserver.data_access.db.dummy_items import DUMMY_ITEMS_WITH_TAGS
from aquascope.tests.aquascope.webserver.data_access.db.dummy_items import DUMMY_ITEMS
from aquascope.tests.flask_app_test_case import FlaskAppTestCase
from aquascope.webserver.data_access.db.items import ANNOTABLE_FIELDS, Item, MORPHOMETRIC_FIELDS

Expand All @@ -29,7 +29,8 @@ def url_to_items(url):
'upload_id': lambda x: ObjectId(str(x)),
'tags': ast.literal_eval,
**{k: lambda x: float(x) for k in MORPHOMETRIC_FIELDS},
**{f'{k}_modification_time': lambda x: dateutil.parser.parse(x) if x else None for k in ANNOTABLE_FIELDS}
**{f'{k}_modification_time': lambda x: dateutil.parser.parse(x) if x else None for k in
ANNOTABLE_FIELDS}
}
df = pd.read_csv(tmp_filepath, converters=converters, sep='\t')
df = df.replace({pd.np.nan: None})
Expand Down Expand Up @@ -61,7 +62,7 @@ def test_api_can_get_export_with_all_items(self):

items = self.url_to_items(response['url'])
items = [item.serializable() for item in items]
expected_items = [item.serializable() for item in DUMMY_ITEMS_WITH_TAGS]
expected_items = [item.serializable() for item in DUMMY_ITEMS]
self.assertCountEqual(items, expected_items)

def test_api_can_get_export_with_limit_to_single_item(self):
Expand All @@ -78,7 +79,7 @@ def test_api_can_get_export_with_limit_to_single_item(self):

items = self.url_to_items(response['url'])
items = [item.serializable() for item in items]
expected_items = [DUMMY_ITEMS_WITH_TAGS[0].serializable()]
expected_items = [DUMMY_ITEMS[0].serializable()]
self.assertCountEqual(items, expected_items)

def test_api_can_get_export_with_attribute_filter(self):
Expand All @@ -95,7 +96,7 @@ def test_api_can_get_export_with_attribute_filter(self):

items = self.url_to_items(response['url'])
items = [item.serializable() for item in items]
expected_items = [item.serializable() for item in DUMMY_ITEMS_WITH_TAGS if item.eating]
expected_items = [item.serializable() for item in DUMMY_ITEMS if item.eating]
self.assertCountEqual(items, expected_items)

def test_api_can_get_export_with_taxonomy_filter(self):
Expand All @@ -112,7 +113,7 @@ def test_api_can_get_export_with_taxonomy_filter(self):

items = self.url_to_items(response['url'])
items = [item.serializable() for item in items]
expected_items = [item.serializable() for item in DUMMY_ITEMS_WITH_TAGS if item.empire is 'prokaryota']
expected_items = [item.serializable() for item in DUMMY_ITEMS if item.empire is 'prokaryota']
self.assertCountEqual(items, expected_items)

def test_api_can_get_export_with_filters_and_limit(self):
Expand All @@ -131,7 +132,7 @@ def test_api_can_get_export_with_filters_and_limit(self):

items = self.url_to_items(response['url'])
items = [item.serializable() for item in items]
expected_items = [item.serializable() for item in DUMMY_ITEMS_WITH_TAGS[:1]]
expected_items = [item.serializable() for item in DUMMY_ITEMS[:1]]
self.assertCountEqual(items, expected_items)

def test_api_can_get_export_with_filter_that_doesnt_match_any_items(self):
Expand Down
12 changes: 6 additions & 6 deletions aquascope/tests/aquascope/webserver/api/test_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def test_api_can_get_items_by_tag(self, mock_make_blob_url):
self.assertEqual(res.status_code, 200)

response = res.json
expected_items = DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[:4]
expected_items = [DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[1], DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[3]]
expected_items = [item.serializable() for item in expected_items]

self.assertCountEqual(response['items'], expected_items)
Expand All @@ -466,14 +466,14 @@ def test_api_can_get_items_by_tag_and_regular_field(self, mock_make_blob_url):
mock_make_blob_url.return_value = 'mockedurl'
with self.app.app_context():
request_data = {
'tags': ['dummy_tag_1'],
'tags': ['sth'],
'eating': True
}
res = self.client().get('/items', query_string=request_data, headers=self.headers)
self.assertEqual(res.status_code, 200)

response = res.json
expected_items = DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[:2]
expected_items = [DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[0]]
expected_items = [item.serializable() for item in expected_items]

self.assertCountEqual(response['items'], expected_items)
Expand All @@ -489,7 +489,7 @@ def test_api_can_get_items_by_tags(self, mock_make_blob_url):
self.assertEqual(res.status_code, 200)

response = res.json
expected_items = DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[:3]
expected_items = [DUMMY_ITEMS_WITH_DEFAULT_PROJECTION[1]]
expected_items = [item.serializable() for item in expected_items]

self.assertCountEqual(response['items'], expected_items)
Expand Down Expand Up @@ -639,7 +639,7 @@ def test_api_can_annotate_single_item(self, mock_make_blob_url):
self.app.config['page_size'] = 5

request_data = {
'eating': False,
'eating': True,
'tags': ['with_broken_records_field']
}
res = self.client().get('/items/paged', query_string=request_data, headers=self.headers)
Expand All @@ -648,7 +648,7 @@ def test_api_can_annotate_single_item(self, mock_make_blob_url):
response = res.json
item = response['items'][0]
changed_item = copy.deepcopy(item)
changed_item['eating'] = True
changed_item['eating'] = False

post_request_data = json.dumps([
{
Expand Down
91 changes: 88 additions & 3 deletions aquascope/tests/aquascope/webserver/api/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class TestPostUploadTags(FlaskAppTestCase):

def test_api_can_post_valid_tags_list(self):
with self.app.app_context():
upload_doc = copy.deepcopy(DUMMY_UPLOADS[0])
upload_doc = copy.deepcopy(DUMMY_UPLOADS[3])
tags = ['tag1', 'tag2']
request_data = json.dumps({
'tags': tags
Expand All @@ -48,7 +48,7 @@ def test_api_can_post_valid_tags_list(self):

def test_api_can_post_empty_tags_list(self):
with self.app.app_context():
upload_doc = copy.deepcopy(DUMMY_UPLOADS[0])
upload_doc = copy.deepcopy(DUMMY_UPLOADS[3])
tags = []
request_data = json.dumps({
'tags': tags
Expand All @@ -63,7 +63,7 @@ def test_api_can_post_empty_tags_list(self):

def test_api_cant_post_invalid_tags_list(self):
with self.app.app_context():
upload_doc = copy.deepcopy(DUMMY_UPLOADS[0])
upload_doc = copy.deepcopy(DUMMY_UPLOADS[3])
invalid_tags = [
[4],
['valid', 4],
Expand Down Expand Up @@ -97,6 +97,91 @@ def test_api_cant_post_tags_for_invalid_upload(self):
data=request_data, headers=self.headers)
self.assertEqual(res.status_code, 400)

def test_api_cant_post_tags_for_not_finished_upload(self):
with self.app.app_context():
tags = ['tag1', 'tag2']
request_data = json.dumps({
'tags': tags
})

upload_ids = [
DUMMY_UPLOADS[0]._id, DUMMY_UPLOADS[1]._id, DUMMY_UPLOADS[2]._id,
DUMMY_UPLOADS[4]._id
]

for upload_id in upload_ids:
res = self.client().post(f'/upload/{str(upload_id)}/tags',
data=request_data, headers=self.headers)
self.assertEqual(res.status_code, 400)

@mock.patch('aquascope.webserver.data_access.storage.blob.make_blob_url')
def test_api_tags_update_to_non_finished_upload_does_not_propagate_to_items(self, mock_make_blob_url):
mock_make_blob_url.return_value = 'mockedurl'

old_tag = DUMMY_UPLOADS[1].tags[1]

request_data = {
'tags': [old_tag]
}
res = self.client().get('/items', query_string=request_data, headers=self.headers)
self.assertEqual(res.status_code, 200)
original_items = res.json['items']
self.assertTrue(len(original_items) != 0)

new_tags = ['new_tag1', 'new_tag2']
request_data = json.dumps({
'tags': new_tags
})
res = self.client().post(f'/upload/{str(DUMMY_UPLOADS[1]._id)}/tags',
data=request_data, headers=self.headers)
self.assertEqual(res.status_code, 400)

request_data = {
'tags': new_tags
}
res = self.client().get('/items', query_string=request_data, headers=self.headers)
self.assertEqual(res.status_code, 200)
new_items = res.json['items']

for item in original_items + new_items:
item.pop('tags')

self.assertTrue(len(new_items) == 0)

@mock.patch('aquascope.webserver.data_access.storage.blob.make_blob_url')
def test_api_tags_update_is_propagated_to_items(self, mock_make_blob_url):
mock_make_blob_url.return_value = 'mockedurl'

old_tag = DUMMY_UPLOADS[3].tags[0]

request_data = {
'tags': [old_tag]
}
res = self.client().get('/items', query_string=request_data, headers=self.headers)
self.assertEqual(res.status_code, 200)
original_items = res.json['items']

new_tags = ['new_tag1', 'new_tag2']
request_data = json.dumps({
'tags': new_tags
})
res = self.client().post(f'/upload/{str(DUMMY_UPLOADS[3]._id)}/tags',
data=request_data, headers=self.headers)
self.assertEqual(res.status_code, 204)

request_data = {
'tags': new_tags
}
res = self.client().get('/items', query_string=request_data, headers=self.headers)
self.assertEqual(res.status_code, 200)
new_items = res.json['items']

for item in original_items + new_items:
item.pop('tags')

self.assertCountEqual(original_items, new_items)


class TestGetUpload(FlaskAppTestCase):

def test_api_can_get_existing_upload(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def add_tags_to_items(item_list):
_DUMMY_ITEMS = [
{
"_id": ObjectId('000000000000000000000000'),
"upload_id": ObjectId('000000000000000000001001'),
"upload_id": ObjectId('000000000000000000001003'),
"file_size": 1.0,
"aspect_ratio": 1.0,
"maj_axis_len": 1.0,
Expand Down Expand Up @@ -272,7 +272,7 @@ def add_tags_to_items(item_list):
},
{
"_id": ObjectId('000000000000000000000002'),
"upload_id": ObjectId('000000000000000000001001'),
"upload_id": ObjectId('000000000000000000001003'),
"file_size": 1.0,
"aspect_ratio": 1.0,
"maj_axis_len": 1.0,
Expand Down Expand Up @@ -654,5 +654,4 @@ def add_tags_to_items(item_list):
Item(project_dict(copy.deepcopy(item), DEFAULT_ITEM_PROJECTION)) for item in _DUMMY_ITEMS_WITH_TAGS
]

DUMMY_ITEMS = [Item(item) for item in _DUMMY_ITEMS]
DUMMY_ITEMS_WITH_TAGS = [Item(item) for item in _DUMMY_ITEMS_WITH_TAGS]
DUMMY_ITEMS = [Item(item) for item in _DUMMY_ITEMS_WITH_TAGS]
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
'img1.jpg',
'img2.jpg'
],
'tags': []
'tags': ['sth']
},
{
'_id': ObjectId('000000000000000000001004'),
Expand Down
3 changes: 2 additions & 1 deletion aquascope/tests/aquascope/webserver/data_access/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from bson import ObjectId
from pandas.errors import EmptyDataError

from aquascope.tests.aquascope.webserver.data_access.db.dummy_uploads import DUMMY_UPLOADS
from aquascope.tests.flask_app_test_case import FlaskAppTestCase
from aquascope.webserver.data_access.util import populate_system_with_items, MissingTsvFileError

Expand All @@ -13,7 +14,7 @@

class TestPopulateSystemWithItems(FlaskAppTestCase):

upload_id = ObjectId('999000000000000000001000')
upload_id = DUMMY_UPLOADS[3]._id

def test_can_populate_system_with_valid_data_package(self):
data_package_path = os.path.join(DATA_PATH, '5p0xMAG_small')
Expand Down
2 changes: 1 addition & 1 deletion aquascope/tests/flask_app_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def setUpClass(cls):
cls.auth_secondary_pass_raw = 'secondpassword'
secondary_pass = sha256.hash(cls.auth_secondary_pass_raw)

cls.app = make_app(cls.db, storage_connection_string, 'jwtdummysecret', cls.auth_user, auth_pass,
cls.app = make_app(client, cls.db, storage_connection_string, 'jwtdummysecret', cls.auth_user, auth_pass,
secondary_pass, environment='TESTING', celery_user='', celery_password='',
celery_address='', page_size=500)

Expand Down
3 changes: 2 additions & 1 deletion aquascope/webserver/api/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ def post(self, upload_id):
except FormattedValidationError as e:
return e.formatted_messages, 400

db_client = app.config['db_client']
db = app.config['db']

try:
res = upload.update_tags(db, upload_id, json_data['tags'])
res = upload.update_tags(db_client, db, upload_id, json_data['tags'])
except InvalidId:
return invalid_request()

Expand Down
Loading

0 comments on commit a7839c5

Please sign in to comment.