Skip to content

Commit

Permalink
rf: use a single patched_env in annex remote (#84)
Browse files Browse the repository at this point in the history
This PR restricts the use of `patched_env` to the annex remote. The
complete "TRANSFER RETRIEVE" implementation is wrapped into a single
`patched_env` context.

`patched_env` became necessary because datalad's `Dataset.get` will not
work properly if the environment variables `GIT_DIR` and `GIT_WORK_TREE`
are set.
  • Loading branch information
christian-monch authored Jan 19, 2025
2 parents 4e44a2a + 21ae62b commit ff8bcdb
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 51 deletions.
98 changes: 50 additions & 48 deletions datalad_remake/annexremotes/remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ def get_assigned_value(assignment: str) -> str:
verify_file(dataset.pathobj, spec_path, trusted_key_ids)

# Ensure that the spec is actually present and read it
with patched_env(remove=['GIT_DIR', 'GIT_WORK_TREE']):
dataset.get(spec_path, result_renderer='disabled')
dataset.get(spec_path, result_renderer='disabled')
with open(spec_path, 'rb') as f:
spec = json.load(f)

Expand All @@ -169,58 +168,61 @@ def get_assigned_value(assignment: str) -> str:
def transfer_retrieve(self, key: str, file_name: str) -> None:
self.annex.debug(f'TRANSFER RETRIEVE key: {key!r}, file_name: {file_name!r}')

dataset_id = self.config_manager.get('datalad.dataset.id').value
self.annex.debug(f'TRANSFER RETRIEVE dataset_id: {dataset_id!r}')
self.annex.debug(
'TRANSFER RETRIEVE get_allow_untrusted_execution: '
f'{get_allow_untrusted_execution(dataset_id)}'
)
if get_allow_untrusted_execution(dataset_id):
trusted_key_ids = None
lgr.warning('datalad remake remote performs UNTRUSTED execution')
else:
trusted_key_ids = get_trusted_keys()

compute_info, dataset = self.get_compute_info(key, trusted_key_ids)
self.annex.debug(f'TRANSFER RETRIEVE compute_info: {compute_info!r}')

# Perform the computation, and collect the results
lgr.debug('Starting provision')
self.annex.debug('Starting provision')
with provide_context(
dataset,
compute_info['root_version'],
compute_info['input'],
) as worktree:
# Ensure that the method template is present, in case it is annexed.
lgr.debug('Fetching method template')
with patched_env(remove=['GIT_DIR', 'GIT_WORK_TREE']):
# Remove any `GIT_DIR` and `GIT_WORK_TREE` environment variables during
# the computation. This is necessary to avoid interference with the
# `Dataset.get` implementation in DataLad.
with patched_env(remove=['GIT_DIR', 'GIT_WORK_TREE']):
dataset_id = self.config_manager.get('datalad.dataset.id').value
self.annex.debug(f'TRANSFER RETRIEVE dataset_id: {dataset_id!r}')
self.annex.debug(
'TRANSFER RETRIEVE get_allow_untrusted_execution: '
f'{get_allow_untrusted_execution(dataset_id)}'
)
if get_allow_untrusted_execution(dataset_id):
trusted_key_ids = None
lgr.warning('datalad remake remote performs UNTRUSTED execution')
else:
trusted_key_ids = get_trusted_keys()

compute_info, dataset = self.get_compute_info(key, trusted_key_ids)
self.annex.debug(f'TRANSFER RETRIEVE compute_info: {compute_info!r}')

# Perform the computation, and collect the results
lgr.debug('Starting provision')
self.annex.debug('Starting provision')
with provide_context(
dataset,
compute_info['root_version'],
compute_info['input'],
) as worktree:
# Ensure that the method template is present, in case it is annexed.
lgr.debug('Fetching method template')
Dataset(worktree).get(
PatternPath(template_dir) / compute_info['method'],
result_renderer='disabled',
)

lgr.debug('Starting execution')
self.annex.debug('Starting execution')
execute(
worktree,
compute_info['method'],
compute_info['parameter'],
compute_info['output'],
trusted_key_ids,
)
lgr.debug('Starting execution')
self.annex.debug('Starting execution')
execute(
worktree,
compute_info['method'],
compute_info['parameter'],
compute_info['output'],
trusted_key_ids,
)

lgr.debug('Starting collection')
self.annex.debug('Starting collection')
self._collect(
worktree,
dataset,
compute_info['output'],
compute_info['this'],
file_name,
)
lgr.debug('Leaving provision context')
self.annex.debug('Leaving provision context')
lgr.debug('Starting collection')
self.annex.debug('Starting collection')
self._collect(
worktree,
dataset,
compute_info['output'],
compute_info['this'],
file_name,
)
lgr.debug('Leaving provision context')
self.annex.debug('Leaving provision context')

def checkpresent(self, key: str) -> bool:
# See if at least one URL with the remake url-scheme is present
Expand Down
4 changes: 1 addition & 3 deletions datalad_remake/commands/provision_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from datalad_remake import PatternPath
from datalad_remake.utils.chdir import chdir
from datalad_remake.utils.glob import glob
from datalad_remake.utils.patched_env import patched_env
from datalad_remake.utils.read_list import read_list

if TYPE_CHECKING:
Expand Down Expand Up @@ -421,8 +420,7 @@ def install_subdataset(
absolute_path.as_uri(),
]
call_git_lines(args)
with patched_env(remove=['GIT_DIR', 'GIT_WORK_TREE']):
worktree.get(str(subdataset_path), get_data=False, result_renderer='disabled')
worktree.get(str(subdataset_path), get_data=False, result_renderer='disabled')
uninstalled_subdatasets.remove(subdataset_path)
uninstalled_subdatasets.update(get_uninstalled_subdatasets(worktree))

Expand Down

0 comments on commit ff8bcdb

Please sign in to comment.