Skip to content

Commit

Permalink
Merge pull request #3014 from esdc-esac-esa-int/ESA_gaia_GAIAMNGT-170…
Browse files Browse the repository at this point in the history
…0_load_data

Gaia: change the signature of the method load_data
  • Loading branch information
bsipocz authored Oct 16, 2024
2 parents 22db7f2 + 58f8692 commit aa35035
Show file tree
Hide file tree
Showing 10 changed files with 538 additions and 54 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,13 @@ gaia

- Fix method search_async_jobs in the class TapPlus. [#2967]

- Change the signature of the function load_data: the parameter output_file that defined the file where the results were
saved, is replaced by boolean parameter dump_to_file, that in case it is true, a compressed directory named "datalink_output.zip" with
all the DataLink files is made. So the users cannot specified the output file anymore [#3014]

- New retrieval types for datalink (Gaia DR4 release). [#3110]


jplhorizons
^^^^^^^^^^^

Expand Down Expand Up @@ -423,6 +428,8 @@ gaia
epoch photometry service to return all data associated to a given source.
[#2376]

- New retrieval types for datalink (Gaia DR4 release). [#3110]

- Default Gaia catalog updated to DR3. [#2596]

heasarc
Expand Down
85 changes: 48 additions & 37 deletions astroquery/gaia/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@
Created on 30 jun. 2016
Modified on 18 Ene. 2022 by mhsarmiento
"""
import datetime
import json
import os
import shutil
import zipfile
from collections.abc import Iterable
from datetime import datetime, timezone
from pathlib import Path

from astropy import units
from astropy import units as u
Expand All @@ -28,6 +27,7 @@
from astropy.io import votable
from astropy.table import Table
from astropy.units import Quantity
from astropy.utils.decorators import deprecated_renamed_argument
from requests import HTTPError

from astroquery import log
Expand Down Expand Up @@ -168,9 +168,11 @@ def logout(self, *, verbose=False):
except HTTPError:
log.error("Error logging out data server")

@deprecated_renamed_argument("output_file", None, since="0.4.8")
def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL",
linking_parameter='SOURCE_ID', valid_data=False, band=None, avoid_datatype_check=False,
format="votable", output_file=None, overwrite_output_file=False, verbose=False):
format="votable", dump_to_file=False, overwrite_output_file=False, verbose=False,
output_file=None):
"""Loads the specified table
TAP+ only
Expand Down Expand Up @@ -218,44 +220,53 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
By default, this value will be set to False. If it is set to 'true'
the Datalink items tags will not be checked.
format : str, optional, default 'votable'
loading format. Other available formats are 'csv', 'ecsv','votable_plain' and 'fits'
output_file : string or pathlib.PosixPath, optional, default None
file where the results are saved.
If it is not provided, the http response contents are returned.
loading format. Other available formats are 'csv', 'ecsv','votable_plain', 'json' and 'fits'
dump_to_file: boolean, optional, default False.
If it is true, a compressed directory named "datalink_output_<time_stamp>.zip" with all the DataLink
files is made in the current working directory. The <time_stamp> format follows the ISO 8601 standard:
"yyyymmddThhmmss".
overwrite_output_file : boolean, optional, default False
To overwrite the output_file if it already exists.
To overwrite the output file ("datalink_output.zip") if it already exists.
verbose : bool, optional, default 'False'
flag to display information about the process
Returns
-------
A dictionary where the keys are the file names and its value is a list of astropy.table.table.Table objects
"""
now = datetime.now(timezone.utc)
now_formatted = now.strftime("%Y%m%d_%H%M%S")
temp_dirname = "temp_" + now_formatted
downloadname_formated = "download_" + now_formatted

output_file_specified = False
if output_file is None:

now = datetime.datetime.now(datetime.timezone.utc)
if not dump_to_file:
now_formatted = now.strftime("%Y%m%d_%H%M%S")
temp_dirname = "temp_" + now_formatted
downloadname_formated = "download_" + now_formatted
output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated)

else:
output_file = 'datalink_output_' + now.strftime("%Y%m%dT%H%M%S") + '.zip'
output_file_specified = True

if isinstance(output_file, str):
if not output_file.lower().endswith('.zip'):
output_file = output_file + '.zip'
elif isinstance(output_file, Path):
if not output_file.suffix.endswith('.zip'):
output_file.with_suffix('.zip')

output_file = os.path.abspath(output_file)
log.info(f"DataLink products will be stored in the {output_file} file")

if not overwrite_output_file and os.path.exists(output_file):
raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='True' to "
f"overwrite output file.")

path = os.path.dirname(output_file)

log.debug(f"Directory where the data will be saved: {path}")

if path != '':
if not os.path.isdir(path):
try:
os.mkdir(path)
except FileExistsError:
log.warn("Path %s already exist" % path)
except OSError:
log.error("Creation of the directory %s failed" % path)

if avoid_datatype_check is False:
# we need to check params
rt = str(retrieval_type).upper()
Expand Down Expand Up @@ -298,14 +309,7 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
if linking_parameter != 'SOURCE_ID':
params_dict['LINKING_PARAMETER'] = linking_parameter

if path != '':
try:
os.mkdir(path)
except FileExistsError:
log.error("Path %s already exist" % path)
except OSError:
log.error("Creation of the directory %s failed" % path)

files = dict()
try:
self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose)
files = Gaia.__get_data_files(output_file=output_file, path=path)
Expand All @@ -314,6 +318,9 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
finally:
if not output_file_specified:
shutil.rmtree(path)
else:
for file in files.keys():
os.remove(os.path.join(os.getcwd(), path, file))

if verbose:
if output_file_specified:
Expand All @@ -329,18 +336,21 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
@staticmethod
def __get_data_files(output_file, path):
files = {}
if zipfile.is_zipfile(output_file):
with zipfile.ZipFile(output_file, 'r') as zip_ref:
zip_ref.extractall(os.path.dirname(output_file))
extracted_files = []

with zipfile.ZipFile(output_file, "r") as zip_ref:
extracted_files.extend(zip_ref.namelist())
zip_ref.extractall(os.path.dirname(output_file))

# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if file.lower().endswith(('.fits', '.xml', '.csv', '.ecsv')):
if file in extracted_files:
files[file] = os.path.join(r, file)

for key, value in files.items():
if '.fits' in key:

if key.endswith('.fits'):
tables = []
with fits.open(value) as hduList:
num_hdus = len(hduList)
Expand All @@ -349,19 +359,20 @@ def __get_data_files(output_file, path):
Gaia.correct_table_units(table)
tables.append(table)
files[key] = tables
elif '.xml' in key:

elif key.endswith('.xml'):
tables = []
for table in votable.parse(value).iter_tables():
tables.append(table)
files[key] = tables

elif '.csv' in key:
elif key.endswith('.csv'):
tables = []
table = Table.read(value, format='ascii.csv', fast_reader=False)
tables.append(table)
files[key] = tables

elif '.json' in key:
elif key.endswith('.json'):
tables = []
with open(value) as f:
data = json.load(f)
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 2 additions & 1 deletion astroquery/gaia/tests/setup_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ def get_package_data():
paths = [os.path.join('data', '*.vot'),
os.path.join('data', '*.vot.gz'),
os.path.join('data', '*.json'),
os.path.join('data', '*.ecsv')
os.path.join('data', '*.ecsv'),
os.path.join('data', '*.zip')
] # etc, add other extensions
# you can also enlist files individually by names
# finally construct and return a dict for the sub module
Expand Down
Loading

0 comments on commit aa35035

Please sign in to comment.