Skip to content

Commit

Permalink
merge with main
Browse files Browse the repository at this point in the history
  • Loading branch information
agrouaze committed Jan 29, 2025
2 parents 9f60c9d + bdeb752 commit ba820a2
Show file tree
Hide file tree
Showing 29 changed files with 767 additions and 333 deletions.
5 changes: 5 additions & 0 deletions .github/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
changelog:
exclude:
authors:
- dependabot
- pre-commit-ci
98 changes: 98 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
name: CI

on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
detect-skip-ci-trigger:
name: "Detect CI Trigger: [skip-ci]"
if: |
github.repository == 'umr-lops/cdsodatacli'
&& github.event_name == 'push'
|| github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- uses: xarray-contrib/ci-trigger@v1
id: detect-trigger
with:
keyword: "[skip-ci]"

ci:
name: ${{ matrix.os }} py${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs: detect-skip-ci-trigger

if: needs.detect-skip-ci-trigger.outputs.triggered == 'false'

defaults:
run:
shell: bash -l {0}

strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]

steps:
- name: Checkout the repository
uses: actions/checkout@v4
with:
# need to fetch all tags to get a correct version
fetch-depth: 0 # fetch all branches and tags

- name: Setup environment variables
run: |
echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
echo "CONDA_ENV_FILE=ci/requirements/environment.yaml" >> $GITHUB_ENV
- name: Setup micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-file: ${{ env.CONDA_ENV_FILE }}
environment-name: cdsodatacli-tests
cache-environment: true
cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
create-args: >-
python=${{matrix.python-version}}
conda
- name: Install cdsodatacli
run: |
python -m pip install --no-deps -e .
- name: Import cdsodatacli
run: |
python -c "import cdsodatacli"
- name: Create secrets.yml from GitHub Secret
run: |
echo "${{ secrets.SECRET_FOR_TEST_DOWNLOAD_CDSE }}" > secrets.yml
shell: bash

- name: Export secrets as environment variables
run: |
export DEFAULT_LOGIN_CDSE=$(grep 'DEFAULT_LOGIN_CDSE' secrets.yml | cut -d':' -f2 | tr -d ' ')
export DEFAULT_PASSWD_CDSE=$(grep 'DEFAULT_PASSWD_CDSE' secrets.yml | cut -d':' -f2 | tr -d ' ')
echo "DEFAULT_LOGIN_CDSE=${DEFAULT_LOGIN_CDSE}" >> $GITHUB_ENV
echo "DEFAULT_PASSWD_CDSE=${DEFAULT_PASSWD_CDSE}" >> $GITHUB_ENV
shell: bash
- name: Run tests
env:
DEFAULT_LOGIN_CDSE: ${{ env.DEFAULT_LOGIN_CDSE }}
DEFAULT_PASSWD_CDSE: ${{ env.DEFAULT_PASSWD_CDSE }}
run: |
python -m pytest --cov=cdsodatacli
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ jobs:
path: dist/

- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@f7600683efdcb7656dec5b29656edb7bc586e597
uses: pypa/gh-action-pypi-publish@67339c736fd9354cd4f8cb0b744f2b82a74b5c70
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ fabric.properties
# modules.xml
# .idea/misc.xml
# *.ipr
.idea

localconfig.yml
# Sonarlint plugin
.idea/**/sonarlint/

Expand Down Expand Up @@ -616,3 +618,5 @@ MigrationBackup/

# End of https://www.gitignore.io/api/osx,python,pycharm,windows,visualstudio,visualstudiocode
.cdsodatacli/api_cache/

.env
30 changes: 18 additions & 12 deletions cdsodatacli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
MAX_SESSION_PER_ACCOUNT,
)
from cdsodatacli.query import fetch_data
from cdsodatacli.utils import conf, test_safe_archive, test_safe_spool
from cdsodatacli.utils import conf, check_safe_in_archive, check_safe_in_spool, check_safe_in_outputdir
from cdsodatacli.product_parser import ExplodeSAFE
from collections import defaultdict

Expand Down Expand Up @@ -166,10 +166,12 @@ def filter_product_already_present(cpt, df, outputdir, force_download=False):
to_download = False
if force_download:
to_download = True
if test_safe_archive(safename=safename_product):
if check_safe_in_archive(safename=safename_product):
cpt["archived_product"] += 1
elif test_safe_spool(safename=safename_product):
elif check_safe_in_spool(safename=safename_product):
cpt["in_spool_product"] += 1
elif check_safe_in_outputdir(outputdir=outputdir,safename=safename_product):
cpt["in_outdir_product"] += 1
else:
to_download = True
cpt["product_absent_from_local_disks"] += 1
Expand Down Expand Up @@ -206,11 +208,11 @@ def download_list_product_multithread_v2(
v2 is handling multi account round-robin and token semaphore files
Parameters
----------
list_id (list)
list_safename (list)
outputdir (str)
list_id (list): product hash
list_safename (list): product names
outputdir (str): the directory where to store the product collected
hideProgressBar (bool): True -> no tqdm progress bar in stdout
account_group (str)
account_group (str): the name of the group of CDSE logins to be used
check_on_disk (bool): True -> if the product is in the spool dir or in archive dir the download is skipped
Returns
Expand Down Expand Up @@ -341,7 +343,7 @@ def download_list_product_multithread_v2(


def download_list_product(
list_id, list_safename, outputdir, specific_account, hideProgressBar=False
list_id, list_safename, outputdir, specific_account,specific_passwd=None, hideProgressBar=False
):
"""
Expand All @@ -350,8 +352,10 @@ def download_list_product(
list_id (list) of string could be hash (eg a1e74573-aa77-55d6-a08d-7b6612761819) provided by CDS Odata
list_safename (list) of string basename of SAFE product (eg. S1A_IW_GRDH_1SDV_20221013T065030_20221013T0650...SAFE)
outputdir (str) path where product will be stored
specific_account (str): CDSE account to use
specific_passwd (str): CDSE password associated to specific_account (optional)
hideProgressBar (bool): True -> no tqdm progress bar
specific_account (str):
Returns
-------
Expand All @@ -369,7 +373,7 @@ def download_list_product(
login,
path_semphore_token,
) = get_bearer_access_token(
quiet=hideProgressBar, specific_account=specific_account
quiet=hideProgressBar, specific_account=specific_account,passwd=specific_passwd
)
else: # select randomly one token among existing
path_semphore_token = random.choice(lst_usable_tokens)
Expand All @@ -394,9 +398,9 @@ def download_list_product(
id_product = list_id[ii]
url_product = conf["URL_download"] % id_product
safename_product = list_safename[ii]
if test_safe_archive(safename=safename_product):
if check_safe_in_archive(safename=safename_product):
cpt["archived_product"] += 1
elif test_safe_spool(safename=safename_product):
elif check_safe_in_spool(safename=safename_product):
cpt["in_spool_product"] += 1
else:
cpt["product_absent_from_local_disks"] += 1
Expand Down Expand Up @@ -533,6 +537,7 @@ def add_missing_cdse_hash_ids_in_listing(listing_path):
"sensormode": [ExplodeSAFE(jj).mode for jj in list_safe_a],
"producttype": [ExplodeSAFE(jj).product[0:3] for jj in list_safe_a],
"Attributes": np.tile([None], len(list_safe_a)),
"id_query":np.tile(['dummy2getProducthash'], len(list_safe_a)),
}
)
sea_min_pct = 0
Expand Down Expand Up @@ -688,6 +693,7 @@ def download_list_product_sequential(

def main():
"""
download data from an existing listing of product
package as an alias for this method
Returns
-------
Expand Down
11 changes: 7 additions & 4 deletions cdsodatacli/fetch_access_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
MAX_VALIDITY_ACCESS_TOKEN = 600 # sec (defined by CDS API)


def get_bearer_access_token(quiet=True, specific_account=None, account_group="logins"):
def get_bearer_access_token(quiet=True, specific_account=None,passwd=None, account_group="logins"):
"""
OData access token (validity=600sec)
specific_account (str) [optional, default=None -> first available account in config file]
passwd (str): [optional, default is to search in config files]
Returns
-------
Expand All @@ -24,11 +25,13 @@ def get_bearer_access_token(quiet=True, specific_account=None, account_group="lo
if specific_account is None:
all_accounts = list(conf[account_group].keys())
login = random.choice(all_accounts)
passwd = conf[account_group][all_accounts[0]]
if passwd is None:
passwd = conf[account_group][all_accounts[0]]
else:
login = specific_account
logging.debug("conf[account_group] %s", type(conf[account_group]))
passwd = conf[account_group][specific_account]
if passwd is None:
logging.debug("conf[account_group] %s", type(conf[account_group]))
passwd = conf[account_group][specific_account]
if quiet:
prefix = "curl -s "
else:
Expand Down
75 changes: 68 additions & 7 deletions cdsodatacli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,24 @@
import cdsodatacli
from yaml import CLoader as Loader
import datetime
import pandas as pd
import pdb
import json

local_config_pontential_path = os.path.join(
os.path.dirname(cdsodatacli.__file__), "localconfig.yml"
)

config_path = os.path.join(os.path.dirname(cdsodatacli.__file__), "config.yml")
if os.path.exists(local_config_pontential_path):
config_path = local_config_pontential_path
used_config_path = local_config_pontential_path
else:
config_path = os.path.join(os.path.dirname(cdsodatacli.__file__), "config.yml")
logging.info("config path: %s", config_path)
stream = open(config_path, "r")
used_config_path = config_path
logging.info("config path that is used: %s", used_config_path)
stream = open(used_config_path, "r")
conf = load(stream, Loader=Loader)


def test_safe_spool(safename):
def check_safe_in_spool(safename):
"""
Parameters
Expand Down Expand Up @@ -48,6 +51,36 @@ def test_safe_spool(safename):
logging.debug("present_in_spool : %s", present_in_spool)
return present_in_spool

def check_safe_in_outputdir(outputdir,safename):
"""
Parameters
----------
safename (str) basename
Returns
-------
present_in_outputdir (bool): True -> the product is already in the spool dir
"""
present_in_outdir = False
for uu in ["", ".zip", "replaced"]:
if uu == "":
potential_file = os.path.join(outputdir, safename)
elif uu == ".zip":
potential_file = os.path.join(outputdir, safename + ".zip")
elif uu == "replaced":
potential_file = os.path.join(
outputdir, safename.replace(".SAFE", ".zip")
)
else:
raise NotImplemented
if os.path.exists(potential_file):
present_in_outdir = True
break
logging.debug("present_in_spool : %s", present_in_outdir)
return present_in_outdir


def WhichArchiveDir(safe):
"""
Expand All @@ -70,6 +103,10 @@ def WhichArchiveDir(safe):
satdir = "sentinel-1a"
elif sat == "S1B":
satdir = "sentinel-1b"
elif sat == 'S1C':
satdir = "sentinel-1c"
elif sat == 'S1D':
satdir = "sentinel-1d"
elif sat =='S2B':
satdir = 'sentinel-2b'
elif sat =='S2A':
Expand All @@ -90,7 +127,7 @@ def WhichArchiveDir(safe):
return gooddir


def test_safe_archive(safename):
def check_safe_in_archive(safename):
"""
Parameters
Expand Down Expand Up @@ -120,3 +157,27 @@ def test_safe_archive(safename):
if present_in_archive:
logging.debug('the product is stored in : %s',arch_potential_file)
return present_in_archive


def convert_json_opensearch_query_to_listing_safe_4_dowload(json_path)->str:
"""
Parameters
----------
json_path str: full path of the OpenSearch file giving the meta data from the CDSE
Returns
-------
output_txt str: listing with 2 columns: id,safename
"""
logging.info('input json file: %s',json_path)
with open(json_path, "r") as f:
data = json.load(f)
df = pd.json_normalize(data['features'])
sub = df[['id','properties.title']]
sub.drop_duplicates()
output_txt = json_path.replace('.json','.txt')
sub.to_csv(output_txt,header=False,index=False)
logging.info('output_txt : %s',output_txt)
return output_txt

23 changes: 23 additions & 0 deletions ci/install-upstream-dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

if command -v micromamba >/dev/null; then
conda=micromamba
elif command -v mamba >/dev/null; then
conda=mamba
else
conda=conda
fi
conda remove -y --force cytoolz numpy xarray construct toolz fsspec python-dateutil pandas
python -m pip install \
-i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--no-deps \
--pre \
--upgrade \
numpy \
pandas \
geopandas
python -m pip install --upgrade \
git+https://github.com/construct/construct \
git+https://github.com/pytoolz/toolz \
git+https://github.com/fsspec/filesystem_spec \
git+https://github.com/dateutil/dateutil
Loading

0 comments on commit ba820a2

Please sign in to comment.