Skip to content

Commit

Permalink
Add very basic GithubAlgorithmRepository
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jun 14, 2024
1 parent 55ac49b commit 4f022b6
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 2 deletions.
37 changes: 36 additions & 1 deletion src/esa_apex_toolbox/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import dataclasses
import json
from pathlib import Path
from typing import Optional, Union
from typing import List, Optional, Union

import requests

Expand Down Expand Up @@ -102,3 +102,38 @@ def from_ogc_api_record(cls, src: Union[dict, str, Path]) -> Algorithm:
description=properties.get("description"),
udp_link=udp_link,
)


class GithubAlgorithmRepository:
"""
GitHub based algorithm repository.
"""

# TODO: caching

def __init__(self, owner: str, repo: str, folder: str = "", branch: str = "main"):
self.owner = owner
self.repo = repo
self.folder = folder
self.branch = branch
self._session = requests.Session()

def _list_files(self):
url = f"https://api.github.com/repos/{self.owner}/{self.repo}/contents/{self.folder}".strip("/")
resp = self._session.get(url, headers={"Accept": "application/vnd.github.object+json"})
resp.raise_for_status()
listing = resp.json()
assert listing["type"] == "dir"
for item in listing["entries"]:
if item["type"] == "file":
yield item

def list_algorithms(self) -> List[str]:
# TODO: method to list names vs method to list parsed Algorithm objects?
return [item["name"] for item in self._list_files()]

def get_algorithm(self, name: str) -> Algorithm:
# TODO: get url from listing from API request, instead of hardcoding this raw url?
url = f"https://raw.githubusercontent.com/{self.owner}/{self.repo}/{self.branch}/{self.folder}/{name}"
# TODO: how to make sure GitHub URL is requested with additional headers?
return Algorithm.from_ogc_api_record(url)
36 changes: 35 additions & 1 deletion tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

import pytest

from esa_apex_toolbox.algorithms import Algorithm, InvalidMetadataError, UdpLink
from esa_apex_toolbox.algorithms import (
Algorithm,
GithubAlgorithmRepository,
InvalidMetadataError,
UdpLink,
)

DATA_ROOT = Path(__file__).parent / "data"

Expand Down Expand Up @@ -174,3 +179,32 @@ def test_from_ogc_api_record_url(self, requests_mock):
href="https://esa-apex.test/udp/algorithm01.json",
title="UDP One",
)


class TestGithubAlgorithmRepository:
@pytest.fixture
def repo(self) -> GithubAlgorithmRepository:
# TODO: avoid depending on an actual GitHub repository. Mock it instead?
# Or run this as an integration test?
return GithubAlgorithmRepository(
owner="ESA-APEx",
repo="apex_algorithms",
folder="algorithm_catalog",
)

def test_list_algorithms(self, repo):
assert repo.list_algorithms() == [
"worldcereal.json",
]

def test_get_algorithm(self, repo):
algorithm = repo.get_algorithm("worldcereal.json")
assert algorithm == Algorithm(
id="worldcereal_maize",
title="ESA worldcereal global maize detector",
description="A maize detection algorithm.",
udp_link=UdpLink(
href="https://github.com/ESA-APEX/apex_algorithms/blob/main/openeo_udp/worldcereal_inference.json",
title="openEO UDP",
),
)

0 comments on commit 4f022b6

Please sign in to comment.