diff --git a/CHANGES.rst b/CHANGES.rst
index aa3d78d28c..060ce935b7 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -242,6 +242,12 @@ xmatch
- Minor internal change to use VOTable as the response format that include
units, too. [#1375]
+wfau
+^^^^
+
+- Minor enhancement to enable getting tables of images to download instead
+ of just raw URLs. Tables include metadata about deprecation [#2809]
+
Infrastructure, Utility and Other Changes and Additions
-------------------------------------------------------
diff --git a/astroquery/ukidss/tests/data/image_results.html b/astroquery/ukidss/tests/data/image_results.html
deleted file mode 100644
index 138a48c8eb..0000000000
--- a/astroquery/ukidss/tests/data/image_results.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-GetImage cut-out results
-
J2000 coords: RA: 83.6330757 Dec:22.014436
-
Programme: All UKIDSS surveys
-
Filter: all
-
Processing ...
-
Connecting to database: UKIDSSDR7PLUS
-
Link | multiframeID | frametype | obstype | filterid | shortname | dateObs | extNum |
-
-show |
-1737581 |
-leavstack |
-OBJECT |
-5 |
-K |
-2007-10-11 13:12:05.5 |
-5 |
-
-1 rows returned.
diff --git a/astroquery/ukidss/tests/data/image_results_noradius.html b/astroquery/ukidss/tests/data/image_results_noradius.html
new file mode 100644
index 0000000000..c66ee27fb4
--- /dev/null
+++ b/astroquery/ukidss/tests/data/image_results_noradius.html
@@ -0,0 +1,43 @@
+
+GetImage cut-out results
+
J2000 coords: RA: 83.633083 Dec:22.0145
+
Programme: UKIDSS Galactic Clusters Survey, GCS
+
Filter: K
+
Processing ...
+
Connecting to database: UKIDSSDR11PLUS
+
Link | multiframeID | frametype | obstype | filterid | shortname | dateObs | extNum | deprecated |
+
+show |
+1737581 |
+leavstack |
+OBJECT |
+5 |
+K |
+2007-10-11 13:12:05.5 |
+5 |
+0 |
+
+show |
+1737579 |
+leav |
+OBJECT |
+5 |
+K |
+2007-10-11 13:12:05.5 |
+5 |
+0 |
+
+show |
+1737587 |
+leav |
+OBJECT |
+5 |
+K |
+2007-10-11 13:12:53.8 |
+5 |
+0 |
+
+3 rows returned.
diff --git a/astroquery/ukidss/tests/data/image_results_radius.html b/astroquery/ukidss/tests/data/image_results_radius.html
new file mode 100644
index 0000000000..0a8503ad79
--- /dev/null
+++ b/astroquery/ukidss/tests/data/image_results_radius.html
@@ -0,0 +1,147 @@
+
+
+
+
+
+
+
+WSA ImageList
+
+
+
+
+
+WSA ImageList | |
+ |
+Not logged in: links will only be returned for frames that are publicly accessible
+Archive Listing
Searching...
+Survey: UKIDSS Galactic Clusters Survey, GCS
+Waveband: K
+Minimum RA: 5.551333333333333 hours Maximum RA: 5.599333333333333 hours
+Minimum Dec: 21.68116666666667 degrees Maximum Dec: 22.347833333333334 degrees
+
+
Using database: UKIDSSDR11PLUS
+
View column link | shows jpeg images of multiframe in a new window plus links to download file(s) |
+
Img column link | download the RICE compressed FITS image file. Use View column link to retrieve uncompressed images. |
+
Cat column link | download the FITS catalogue file. |
+
+
begin row 1
View | Img | Cat |
+multiframeID |
+frameType |
+obstype |
+raBase |
+decBase |
+shortname |
+exptime |
+dateObs |
+project |
+numDetectors |
+ukirtRunNo |
+
+
+view |
+FITS |
+FITS |
+1737553 |
+leavstack |
+OBJECT |
++5.5777306 |
++21.7913333 |
+K |
++10.000000 |
+2007-10-11 13:08:30.0 |
+U/UKIDSS/GCS21 |
+4 |
+1802 |
+
+
+view |
+FITS |
+ |
+1737551 |
+leav |
+OBJECT |
++5.5777306 |
++21.7913333 |
+K |
++10.000000 |
+2007-10-11 13:08:30.0 |
+U/UKIDSS/GCS21 |
+4 |
+1802 |
+
+
+view |
+FITS |
+ |
+1737559 |
+leav |
+OBJECT |
++5.5777306 |
++21.7913333 |
+K |
++10.000000 |
+2007-10-11 13:09:18.6 |
+U/UKIDSS/GCS21 |
+4 |
+1806 |
+
+
+view |
+FITS |
+FITS |
+1737581 |
+leavstack |
+OBJECT |
++5.5935528 |
++21.7913333 |
+K |
++10.000000 |
+2007-10-11 13:12:05.5 |
+U/UKIDSS/GCS21 |
+4 |
+1818 |
+
+
+view |
+FITS |
+ |
+1737579 |
+leav |
+OBJECT |
++5.5935528 |
++21.7913333 |
+K |
++10.000000 |
+2007-10-11 13:12:05.5 |
+U/UKIDSS/GCS21 |
+4 |
+1818 |
+
+
+view |
+FITS |
+ |
+1737587 |
+leav |
+OBJECT |
++5.5935528 |
++21.7913333 |
+K |
++10.000000 |
+2007-10-11 13:12:53.8 |
+U/UKIDSS/GCS21 |
+4 |
+1822 |
+
+
row(s) 1 to 6 displayed.
+Back to form (uses Javascript)
+
diff --git a/astroquery/ukidss/tests/test_ukidss.py b/astroquery/ukidss/tests/test_ukidss.py
index d95e6268fb..d16afb154a 100644
--- a/astroquery/ukidss/tests/test_ukidss.py
+++ b/astroquery/ukidss/tests/test_ukidss.py
@@ -14,7 +14,8 @@
from ...exceptions import InvalidQueryError
DATA_FILES = {"vo_results": "vo_results.html",
- "image_results": "image_results.html",
+ "image_results_noradius": "image_results_noradius.html",
+ "image_results_radius": "image_results_radius.html",
"image": "image.fits",
"votable": "votable.xml",
"error": "error.html"
@@ -74,9 +75,12 @@ def parse_coordinates_mock_return(c):
def get_mockreturn(method='GET', url='default_url',
params=None, timeout=10, **kwargs):
- if "Image" in url:
- filename = DATA_FILES["image_results"]
- url = "Image_URL"
+ if "GetImage" in url:
+ filename = DATA_FILES["image_results_noradius"]
+ url = "GetImage"
+ elif "ImageList" in url:
+ filename = DATA_FILES["image_results_radius"]
+ url = "ImageList"
elif "SQL" in url:
filename = DATA_FILES["vo_results"]
url = "SQL_URL"
@@ -114,23 +118,27 @@ def test_get_images_async_1():
def test_get_images_async_2(patch_get, patch_get_readable_fileobj):
+ # debug check: get the table first & make sure it has 'deprecated' column as expected
+ tbl = ukidss.core.Ukidss.get_image_table(icrs_skycoord, programme_id="GPS")
+ assert "deprecated" in tbl.colnames
+
image_urls = ukidss.core.Ukidss.get_images_async(icrs_skycoord, programme_id="GPS")
- assert len(image_urls) == 1
+ assert len(image_urls) == 3
def test_get_image_list(patch_get, patch_get_readable_fileobj):
urls = ukidss.core.Ukidss.get_image_list(
icrs_skycoord, frame_type="all", waveband="all", programme_id="GPS")
print(urls)
- assert len(urls) == 1
+ assert len(urls) == 3
def test_extract_urls():
- with open(data_path(DATA_FILES["image_results"]), 'r') as infile:
+ with open(data_path(DATA_FILES["image_results_radius"]), 'r') as infile:
html_in = infile.read()
- urls = ukidss.core.Ukidss.extract_urls(html_in)
- assert len(urls) == 1
+ urls = ukidss.core.Ukidss._extract_urls(html_in)
+ assert len(urls) == 14
def test_query_region(patch_get, patch_get_readable_fileobj):
diff --git a/astroquery/ukidss/tests/test_ukidss_remote.py b/astroquery/ukidss/tests/test_ukidss_remote.py
index 42966fc650..a819d9bd45 100644
--- a/astroquery/ukidss/tests/test_ukidss_remote.py
+++ b/astroquery/ukidss/tests/test_ukidss_remote.py
@@ -58,3 +58,20 @@ def test_query_region_constraints(self):
assert isinstance(table_constraint, Table)
assert len(table_noconstraint) >= len(table_constraint)
+
+ def test_deprecated_image_list(self):
+ """
+ Regression test for Issue 2808
+ """
+ crd = SkyCoord(ra=211.3194905, dec=54.413845, unit=(u.deg, u.deg))
+ uk = ukidss.core.UkidssClass()
+ uk.database = 'UHSDR2'
+ result = uk.get_image_list(crd, waveband='all', ignore_deprecated=True)
+
+ # this image is not deprecated (deprecated==0)
+ # can't check for exact URL match because URLs include generated 'uniq' strings
+ assert any("file=/disk73/wsa/ingest/fits/20190614_v5/w20190614_00626_st.fit"
+ in x for x in result)
+ # this image is deprecated (deprecated==80)
+ assert not any("file=/disk53/wsa/ingest/fits/20150129_v5/w20150129_02901_st.fit"
+ in x for x in result)
diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py
index 7df464b142..9dc946810c 100644
--- a/astroquery/wfau/core.py
+++ b/astroquery/wfau/core.py
@@ -6,12 +6,17 @@
import time
from math import cos, radians
import requests
-from bs4 import BeautifulSoup
+try:
+ from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
+except ImportError:
+ # workaround: older versions of bs4, which we still support, didn't have this warning
+ XMLParsedAsHTMLWarning = object
from io import BytesIO, StringIO
import astropy.units as u
import astropy.coordinates as coord
import astropy.io.votable as votable
+from astropy.io import ascii
from ..query import QueryWithLogin
from ..exceptions import InvalidQueryError, TimeoutError, NoResultsWarning
@@ -290,10 +295,55 @@ def get_images_async(self, coordinates, *, waveband='all', frame_type='stack',
show_progress=show_progress)
for url in image_urls]
- def get_image_list(self, coordinates, *, waveband='all', frame_type='stack',
- image_width=1 * u.arcmin, image_height=None,
- radius=None, database=None,
- programme_id=None, get_query_payload=False):
+ def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True,
+ get_query_payload=False, **kwargs):
+ """
+ See `get_image_table` for a full list of options.
+
+ This method will return _only_ the URLs requested as a list of URLs.
+
+ Parameters
+ ----------
+ ignore_deprecated : bool
+ If set (default: True), only images with the ``deprecated`` flag
+ set to zero will be included
+
+ Returns
+ -------
+ url_list : list of image urls
+
+ """
+ image_table = self.get_image_table(coordinates, radius=radius,
+ get_query_payload=get_query_payload,
+ **kwargs)
+ if get_query_payload:
+ # actully a payload, not a table
+ return image_table
+
+ if ignore_deprecated and radius is None:
+ image_urls = image_table[image_table['deprecated'] == 0]['Link']
+ elif radius is not None:
+ image_urls = image_table['Img']
+ else:
+ image_urls = image_table['Link']
+
+ # different links for radius queries and simple ones
+ if radius is not None:
+ image_urls = [link for link in image_urls if
+ ('fits_download' in link and '_cat.fits'
+ not in link and '_two.fit' not in link)]
+ else:
+ # Not sure this is necessary any more (as of #2809), but it seems
+ # harmless and I'm not removing it until I'm sure
+ image_urls = [link.replace("getImage", "getFImage")
+ for link in image_urls]
+
+ return image_urls
+
+ def get_image_table(self, coordinates, *, waveband='all', frame_type='stack',
+ image_width=1 * u.arcmin, image_height=None,
+ radius=None, database=None,
+ programme_id=None, get_query_payload=False):
"""
Function that returns a list of urls from which to download the FITS
images.
@@ -337,7 +387,9 @@ def get_image_list(self, coordinates, *, waveband='all', frame_type='stack',
Returns
-------
- url_list : list of image urls
+ table : Table
+ An astropy table containing the metadata table, including URLs, of
+ the requested files.
"""
@@ -398,22 +450,49 @@ def get_image_list(self, coordinates, *, waveband='all', frame_type='stack',
if get_query_payload:
return request_payload
- response = self._wfau_send_request(query_url, request_payload)
- response = self._check_page(response.url, "row")
+ initial_response = self._wfau_send_request(query_url, request_payload)
+ self._penultimate_response = initial_response
+ response = self._check_page(initial_response.url, "row")
+ self._last_response = response
- image_urls = self.extract_urls(response.text)
- # different links for radius queries and simple ones
- if radius is not None:
- image_urls = [link for link in image_urls if
- ('fits_download' in link and '_cat.fits'
- not in link and '_two.fit' not in link)]
- else:
- image_urls = [link.replace("getImage", "getFImage")
- for link in image_urls]
+ return self.parse_imagequery_page(response.text, radius=radius)
- return image_urls
+ def parse_imagequery_page(self, html_in, radius=None):
+ """
+ Parse the image metadata page
+ """
+ ahref = re.compile(r'href="([a-zA-Z0-9_\.&\?=%/:-]+)"')
- def extract_urls(self, html_in):
+ if radius is not None:
+ html = "\n".join([
+ # for radius searches, "FITS" needs to be s/FITS/url/
+ row.replace(">FITS<", ">{}<".format(ahref.search(row).groups()[0])) if ">FITS<" in row else
+ row
+ for row in html_in.split("\n")])
+ with warnings.catch_warnings():
+ # this is really html; the xml parser doesn't work
+ warnings.simplefilter(action="ignore", category=XMLParsedAsHTMLWarning)
+ soup = BeautifulSoup(html, features='html5lib')
+ httb = soup.findAll('table')[2]
+ firstrow = httb.findAll('tr')[0]
+ for td in firstrow.findAll('td'):
+ td.name = 'th'
+ return ascii.read(str(httb), format='html')
+
+ else:
+ html = "\n".join([
+ # for ascii.read: th -> header
+ row.replace("td", "th") if row.startswith("show<", ">{}<".format(ahref.search(row).groups()[0])) if ">show<" in row else
+ row
+ for row in html_in.split("\n")])
+ with warnings.catch_warnings():
+ # ascii.read uses bs4, result is html, not xml, despite xml tag
+ warnings.simplefilter(action="ignore", category=XMLParsedAsHTMLWarning)
+ return ascii.read(html, format='html')
+
+ def _extract_urls(self, html_in):
"""
Helper function that uses regexps to extract the image urls from the
given HTML.
@@ -601,7 +680,7 @@ def _parse_result(self, response, *, verbose=False):
-------
table : `~astropy.table.Table`
"""
- table_links = self.extract_urls(response.text)
+ table_links = self._extract_urls(response.text)
# keep only one link that is not a webstart
if len(table_links) == 0:
raise Exception("No VOTable found on returned webpage!")