Skip to content

Commit

Permalink
Taxon list and formatter improvements.
Browse files Browse the repository at this point in the history
- `taxon list` supports `per rank`
- limit the # of API calls; fallback to `per child` if too many
- include all ranks at same specified level (affects `life` too)
- handle empty taxon list due to no ranks match
  • Loading branch information
synrg committed Aug 13, 2024
1 parent dc77e95 commit 2e73fe5
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 24 deletions.
106 changes: 90 additions & 16 deletions dronefly/core/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
from rich.markdown import Markdown

from ..clients.inat import iNatClient
from ..constants import INAT_DEFAULTS, INAT_USER_DEFAULT_PARAMS, RANK_KEYWORDS
from ..constants import (
INAT_DEFAULTS,
INAT_USER_DEFAULT_PARAMS,
RANK_EQUIVALENTS,
RANKS_FOR_LEVEL,
RANK_KEYWORDS,
RANK_LEVELS,
)

from ..parsers import NaturalParser
from ..formatters.generic import (
Expand All @@ -15,6 +22,7 @@
ListFormatter,
ObservationFormatter,
TaxonFormatter,
p,
)
from ..models.user import User
from ..query.query import get_base_query_args, QueryResponse
Expand Down Expand Up @@ -97,7 +105,14 @@ class Commands:
def _parse(self, query_str):
return self.parser.parse(query_str)

def _get_formatted_page(self, formatter, page: int = 0, selected: int = 0):
def _get_formatted_page(
self,
formatter,
page: int = 0,
selected: int = 0,
header: str = None,
footer: str = None,
):
if getattr(formatter, "format_page", None):
markdown_text = formatter.format_page(page, selected)
last_page = formatter.last_page()
Expand All @@ -107,6 +122,10 @@ def _get_formatted_page(self, formatter, page: int = 0, selected: int = 0):
)
else:
markdown_text = formatter.format()
if header or footer:
markdown_text = "\n\n".join(
[item for item in (header, markdown_text, footer) if item is not None]
)
return self._format_markdown(markdown_text)

def _format_markdown(self, markdown_text: str):
Expand Down Expand Up @@ -208,9 +227,11 @@ def life(self, ctx: Context, *args):

def taxon_list(self, ctx: Context, *args):
query = self._parse(" ".join(args))
# per_rank = query.per or "main"
# if per_rank not in [*RANK_KEYWORDS, "leaf", "child", "main", "any"]:
# return "Specify `per <rank-or-keyword>`"
per_rank = query.per or "child"
if per_rank not in [*RANK_KEYWORDS, "child"]:
return "Specify `per <rank>` or `per child` (default)"
_per_rank = per_rank
rank_level = None
sort_by = query.sort_by or None
if sort_by not in [None, "obs", "name"]:
return "Specify `sort by obs` or `sort by name` (default)"
Expand All @@ -221,6 +242,8 @@ def taxon_list(self, ctx: Context, *args):
query_args = get_base_query_args(query)
taxon = None
taxon_list = []
short_description = ""
msg = None
with self.inat_client.set_ctx(ctx) as client:
# Handle a useful subset of query args in a simplistic way for now
# (i.e. no config table lookup yet) to model full query in bot
Expand All @@ -232,27 +255,77 @@ def taxon_list(self, ctx: Context, *args):
query_args["taxon"] = taxon
query_response = QueryResponse(**query_args)
taxon = query_response.taxon
if taxon:
taxon_list = [taxon, *(taxon.children or [])]

if not taxon:
return f"No taxon {query_response.obs_query_description()}"
if not taxon:
return f"No taxon {query_response.obs_query_description()}"

_taxon_list = [
taxon,
*[_taxon for _taxon in taxon.children if _taxon.is_active],
]
if per_rank == "child":
short_description = "Children"
taxon_list = _taxon_list
else:
_per_rank = RANK_EQUIVALENTS.get(per_rank) or per_rank
rank_level = RANK_LEVELS[_per_rank]
if rank_level >= taxon.rank_level:
return self._format_markdown(
"\N{WARNING SIGN} "
f"**The rank `{per_rank}` is not lower than "
"the taxon rank: `{taxon.rank}`.**"
)
short_description = p.plural(_per_rank).capitalize()
_children = [
child for child in _taxon_list if child.rank_level == rank_level
]
_without_rank_ids = [
child.id for child in _taxon_list if child not in _children
]
if len(_without_rank_ids) > 0:
# One chance at retrieving the remaining children, i.e. if the
# remainder (direct children - those at the specified rank level)
# don't constitute a single page of results, then show children
# instead.
_descendants = client.taxa.search(
taxon_id=_without_rank_ids,
rank_level=rank_level,
is_active=True,
per_page=500,
)
# The choice of 2500 as our limit is arbitrary:
# - will take 5 more API calls to satisfy
# - encompasses the largest genera (e.g. Astragalus)
# - meant to limit unreasonable sized queries so they don't make
# excessive API demands
# - TODO: switch to using a local DB built from full taxonomy dump
# so we can lift this restriction
if _descendants.count() > 2500:
short_description = "Children"
msg = (
f"\N{WARNING SIGN} **Too many {p.plural(_per_rank)}. "
"Listing children instead:**"
)
_per_rank = "child"
taxon_list = _taxon_list
else:
taxon_list = [*_children, *_descendants.all()]
else:
taxon_list = _children

per_page = ctx.per_page
with_index = self.format == Format.rich
# TODO: support taxon lists other than of children (e.g. descendants of
# a specific rank, siblings, etc.)
# - as a simple first deliverable, we just hardwire the list to children
# List all ranks at the same level, not just the specified rank
_per_rank = RANKS_FOR_LEVEL[rank_level]
formatter = TaxonListFormatter(
taxon_list,
per_rank="child",
per_rank=_per_rank,
query_response=query_response,
with_indent=True,
per_page=per_page,
with_index=with_index,
sort_by=sort_by,
order=order,
short_description="Children",
short_description=short_description,
)
ctx.page_formatter = formatter
ctx.page = 0
Expand All @@ -262,7 +335,8 @@ def taxon_list(self, ctx: Context, *args):
if first_page:
# TODO: Provide a method in the formatter to set the title:
formatter.pages[0]["header"] = title
return self._get_formatted_page(formatter, 0, 0)
page = self._get_formatted_page(formatter, 0, 0, header=msg)
return page

def next(self, ctx: Context):
if not ctx.page_formatter:
Expand Down
16 changes: 16 additions & 0 deletions dronefly/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,21 @@
PLANTAE_ID = 47126
TRACHEOPHYTA_ID = 211194
RANK_KEYWORDS = tuple(RANK_LEVELS.keys()) + tuple(RANK_EQUIVALENTS.keys())
RANK_LEVEL_NAMES = {}
# For levels with multiple ranks, name the level after the most broadly used
# rank at that level:
RANK_LEVEL_TO_NAME = {
5: "subspecies",
10: "species",
20: "genus",
}
RANKS_FOR_LEVEL = {}
for (rank, level) in RANK_LEVELS.items():
RANK_LEVEL_NAMES[level] = RANK_LEVEL_TO_NAME.get(level) or rank
if level not in RANKS_FOR_LEVEL:
RANKS_FOR_LEVEL[level] = [rank]
else:
RANKS_FOR_LEVEL[level].append(rank)

TAXON_PRIMARY_RANKS = COMMON_RANKS
TRINOMIAL_ABBR = {"variety": "var.", "subspecies": "ssp.", "form": "f."}
38 changes: 30 additions & 8 deletions dronefly/core/formatters/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
TAXON_PRIMARY_RANKS,
TRINOMIAL_ABBR,
RANK_EQUIVALENTS,
RANKS_FOR_LEVEL,
RANK_LEVELS,
RANK_LEVEL_NAMES,
)
from dronefly.core.formatters.constants import (
ICONS,
Expand Down Expand Up @@ -196,6 +198,8 @@ def taxa_per_rank(
_sort_rank_obs_name(order) if sort_by == "obs" else _sort_rank_name(order)
)

if not any(taxon.rank in include_ranks for taxon in taxon_list):
return []
tree = make_tree(
taxon_list,
include_ranks=include_ranks,
Expand Down Expand Up @@ -256,17 +260,19 @@ def format_datetime(time, compact=False):
return formatted_time


def included_ranks(per_rank):
def included_ranks(per_rank: str):
if per_rank == "main":
ranks = COMMON_RANKS
ranks = []
for rank in COMMON_RANKS:
ranks += RANKS_FOR_LEVEL[RANK_LEVELS[rank]]
else:
ranks = list(RANK_LEVELS.keys())
return ranks


def filter_taxon_list(
taxon_list: list[Taxon],
per_rank: str,
per_rank: Union[list[str], str],
taxon: Taxon,
root_taxon_id: int = None,
sort_by: str = None,
Expand Down Expand Up @@ -302,8 +308,24 @@ def filter_taxon_list(
taxon_list, per_rank, _root_taxon_id, sort_by, order
)
else:
rank = RANK_EQUIVALENTS[per_rank] if per_rank in RANK_EQUIVALENTS else per_rank
ranks = p.plural_noun(rank)
_per_rank = per_rank
_ranks = []
if isinstance(per_rank, str):
_per_rank = [per_rank]
per_rank = []
for _rank in _per_rank:
rank = RANK_EQUIVALENTS[_rank] if _rank in RANK_EQUIVALENTS else _rank
rank_name = p.plural_noun(RANK_LEVEL_NAMES[RANK_LEVELS[_rank]])
# Add all ranks at the same level to the filter, described as
# the most commonly used rank at that level,
# - e.g. "genus" =>
# per_rank = ["genus", "genushybrid"]
# described as "genera"
if rank not in per_rank:
per_rank += RANKS_FOR_LEVEL[RANK_LEVELS[_rank]]
_ranks.append(rank_name)
# List of arbitrary ranks (e.g. "subfamily/species"):
ranks = "/".join(_ranks)
generate_taxa = taxa_per_rank(
taxon_list, per_rank, root_taxon_id, sort_by, order
)
Expand Down Expand Up @@ -713,7 +735,7 @@ class TaxonListFormatter(ListFormatter):
def __init__(
self,
taxon_list: list[Taxon],
per_rank: str,
per_rank: Union[list[str], str],
query_response: QueryResponse,
with_url: bool = True,
with_taxa: bool = True,
Expand All @@ -735,8 +757,8 @@ def __init__(
into a tree. This can be a life list other list of descendants
of a common root taxon.
per_rank: str
Rank to include in list of taxa, or one of the special values:
per_rank: list[str], str
Rank(s) to include in list of taxa, or one of the special values:
- 'leaf' (default) = leaf taxa
- 'child' = all child taxa regardless of rank
- 'main' = any of the most commonly used ranks
Expand Down

0 comments on commit 2e73fe5

Please sign in to comment.