Skip to content

Commit

Permalink
YDA-5698: add size option group lifecycle report
Browse files Browse the repository at this point in the history
For optionally showing the size of the research and vault collection
in the report.
  • Loading branch information
stsnel committed May 21, 2024
1 parent 348de66 commit 14a8380
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 18 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,17 +365,19 @@ optional arguments:
### yreport\_grouplifecycle
```
usage: yreport_grouplifecycle [-h] [-q] [-y {1.7,1.8,1.9,1.10}]
usage: yreport_grouplifecycle [-h] [-q] [-s] [-y {1.7,1.8,1.9,1.10}]

Generates a list of research groups, along with their creation date,
expiration date (if available), lists of group managers, regular members, and
readonly members. The report also shows whether each research compartment
contains data, as well as whether its vault compartment contains data.

options:
optional arguments:
-h, --help show this help message and exit
-q, --quasi-xml Enable Quasi-XML parser in order to be able to parse
characters not supported by regular XML parser
-s, --size Include size of research collection and vault
collection in output
-y {1.7,1.8,1.9,1.10}, --yoda-version {1.7,1.8,1.9,1.10}
Override Yoda version on the server
```
Expand Down
13 changes: 9 additions & 4 deletions yclienttools/common_queries.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from itertools import chain
import os
from typing import Dict

from irods.column import Like
from irods.models import Collection, DataObject, Resource, User, UserGroup
from yclienttools.options import GroupByOption
from irods.session import iRODSSession
from yclienttools import exceptions
from yclienttools.options import GroupByOption


def get_collections_in_root(session, root):
Expand All @@ -26,8 +28,11 @@ def get_collections_in_root(session, root):
return chain(generator_collection, generator_subcollections)


def get_collection_size(session, collection_name,
count_all_replicas, group_by, include_revisions):
def get_collection_size(session: iRODSSession,
collection_name: str,
count_all_replicas: bool,
group_by: GroupByOption,
include_revisions: bool) -> Dict[str, int]:
'''Get total size of all data objects in collection (including its subcollections).
Options:
- count_all_replicas (boolean): specifies whether to count the size of each data
Expand All @@ -39,7 +44,7 @@ def get_collection_size(session, collection_name,
collection in the collection size.
'''

result = {}
result: Dict[str, int] = {}

collections = get_collections_in_root(
session, collection_name)
Expand Down
80 changes: 68 additions & 12 deletions yclienttools/reportgrouplifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
import sys
from typing import Dict, List, Union

import humanize
from irods.column import Like
from irods.message import (ET, XML_Parser_Type)
from irods.models import Collection, DataObject, User
from irods.session import iRODSSession
from yclienttools import common_args, common_config
from yclienttools import session as s
from yclienttools import common_args, common_config, session as s
from yclienttools.common_queries import collection_exists, get_collection_size
from yclienttools.options import GroupByOption


def entry():
Expand All @@ -37,6 +39,8 @@ def _get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-q", "--quasi-xml", default=False, action='store_true',
help='Enable Quasi-XML parser in order to be able to parse characters not supported by regular XML parser')
parser.add_argument("-s", "--size", default=False, action='store_true',
help='Include size of research collection and vault collection in output')
common_args.add_default_args(parser)
return parser.parse_args()

Expand Down Expand Up @@ -82,8 +86,7 @@ def _group_research_has_data(session: iRODSSession, group_name: str) -> int:
:returns: number of data objects in research group
"""
research_collection = f"/{session.zone}/home/{group_name}"
return _collection_has_data(session, research_collection)
return _collection_has_data(session, _get_research_group_collection(session, group_name))


def _group_vault_has_data(session: iRODSSession, group_name: str) -> int:
Expand All @@ -97,9 +100,36 @@ def _group_vault_has_data(session: iRODSSession, group_name: str) -> int:
:returns: number of data objects in vault group
"""
vault_collection = f"/{session.zone}/home/{group_name}".replace(
return _collection_has_data(session, _get_vault_group_collection(session, group_name))


def _get_vault_group_collection(session: iRODSSession, group_name: str) -> str:
return f"/{session.zone}/home/{group_name}".replace(
"research-", "vault-", 1)
return _collection_has_data(session, vault_collection)


def _get_research_group_collection(session: iRODSSession, group_name: str) -> str:
return f"/{session.zone}/home/{group_name}"


def _get_research_size(session: iRODSSession, group_name: str) -> Union[int, None]:
collection = _get_research_group_collection(session, group_name)
if collection_exists(session, collection):
return _get_collection_size_for_glr(session, collection)
else:
return None


def _get_vault_size(session: iRODSSession, group_name: str) -> Union[int, None]:
collection = _get_vault_group_collection(session, group_name)
if collection_exists(session, collection):
return _get_collection_size_for_glr(session, collection)
else:
return None


def _get_collection_size_for_glr(session: iRODSSession, collection_name: str) -> int:
return get_collection_size(session, collection_name, True, GroupByOption.none, True)['all']


def _collection_has_data(session: iRODSSession, coll_name: str) -> int:
Expand Down Expand Up @@ -142,18 +172,38 @@ def _get_group_managers(session: iRODSSession, group_name: str, attributes: Dict
return [manager.split("#")[0] for manager in attributes["manager"]]


def _get_columns(args: argparse.Namespace) -> List[str]:
base_cols = ["Group name", "Category", "Subcategory",
"Group managers", "Regular members", "Read-only members",
"Creation date", "Expiration date", "Has research data", "Has vault data"]

if args.size:
extra_cols = ["Research collection size", "Vault collection size"]
else:
extra_cols = []

result = base_cols
result.extend(extra_cols)
return result


def _list_or_str_to_str(value: Union[str, List[str]]) -> str:
if type(value) is str:
return value
else:
return ";".join(value)


def _size_to_str(value: Union[int, None]) -> str:
if value is None:
return "N/A"
else:
return humanize.naturalsize(value)


def report_groups_lifecycle(args: argparse.Namespace, session: iRODSSession):
output = csv.writer(sys.stdout, delimiter=',')
output.writerow(["Group name", "Category", "Subcategory",
"Group managers", "Regular members", "Read-only members",
"Creation date", "Expiration date", "Has research data", "Has vault data"])
output.writerow(_get_columns(args))

def _has_data_to_string(value):
if value is None:
Expand All @@ -176,6 +226,12 @@ def _has_data_to_string(value):
_group_research_has_data(session, group))
vault_has_data = _has_data_to_string(
_group_vault_has_data(session, group))
output.writerow([group, category, subcategory,
group_managers, regular_members, readonly_members,
creation_date_str, expiration_date, research_has_data, vault_has_data])
rowdata = [group, category, subcategory,
group_managers, regular_members, readonly_members,
creation_date_str, expiration_date, research_has_data, vault_has_data]

if args.size:
rowdata.append(_size_to_str(_get_research_size(session, group)))
rowdata.append(_size_to_str(_get_vault_size(session, group)))

output.writerow(rowdata)

0 comments on commit 14a8380

Please sign in to comment.