Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow for passing directly the python dict string as response body #59

Merged
merged 18 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 37 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,51 @@ Suppose that the `Show API request` button generated the following Python code:
```python
#!/usr/bin/env python
import cdsapi
c = cdsapi.Client()
c.retrieve("insitu-glaciers-elevation-mass",
{
"variable": "all",
"product_type": "elevation_change",
"file_version": "20170405",
"format": "tgz"
},
"download.tar.gz")

dataset = "reanalysis-era5-single-levels"
request = {
"product_type": ["reanalysis"],
"variable": [
"10m_u_component_of_wind",
"10m_v_component_of_wind"
],
"year": ["2024"],
"month": ["12"],
"day": ["06"],
"time": ["16:00"],
"data_format": "netcdf",
"download_format": "unarchived",
"area": [58, 6, 55, 9]
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()
```

You can obtain the same results in Julia with the following code:
You can obtain the same results in Julia:

```julia
using CDSAPI

CDSAPI.retrieve("insitu-glaciers-elevation-mass",
CDSAPI.py2ju("""
{
"variable": "all",
"product_type": "elevation_change",
"file_version": "20170405",
"format": "tgz"
}
"""),
"download.tar.gz")
dataset = "reanalysis-era5-single-levels"
request = """{
"product_type": ["reanalysis"],
"variable": [
"10m_u_component_of_wind",
"10m_v_component_of_wind"
],
"year": ["2024"],
"month": ["12"],
"day": ["06"],
"time": ["16:00"],
"data_format": "netcdf",
"download_format": "unarchived",
"area": [58, 6, 55, 9]
}""" # <- notice the multiline string.

CDSAPI.retrieve(dataset, request, "download.nc")
```

We've copied/pasted the code and called the `py2ju` function on the second argument of the `retrieve` function.
The `py2ju` function simply converts the string containing a Python dictionary to an actual Julia dictionary.

Besides the downloaded file, the `retrieve` function also returns a dictionary with metadata:

```
Expand Down
69 changes: 20 additions & 49 deletions src/CDSAPI.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,19 @@ using JSON
"""
retrieve(name, params, filename; wait=1.0)

Retrieves data for `name` from the Climate Data Store
with the specified `params` and stores it in the current
directory as `filename`.
Retrieves dataset with given `name` from the Climate Data Store
with the specified `params` (JSON string) and stores it in the
given `filename`.

The client periodically requests the status of the retrieve request.
`wait` is the maximum time (in seconds) between status updates.
The client periodically checks the status of the request and one
can specify the maximum time in seconds to `wait` between updates.
"""
function retrieve(name, params, filename; wait=1.0)
retrieve(name, params::AbstractString, filename; wait=1.0) =
retrieve(name, JSON.parse(params), filename; wait)

# CDSAPI.parse can be used to convert the request params into a
# Julia dictionary for additional manipulation before retrieval
function retrieve(name, params::AbstractDict, filename; wait=1.0)
creds = Dict()
open(joinpath(homedir(), ".cdsapirc")) do f
for line in readlines(f)
Expand Down Expand Up @@ -44,12 +49,11 @@ function retrieve(name, params, filename; wait=1.0)
throw(e)
end

body = JSON.parse(String(response.body))
data = Dict("status" => "queued")
data = JSON.parse(String(response.body))
endpoint = Dict(response.headers)["location"]

while data["status"] != "successful"
data = HTTP.request("GET",
creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]),
data = HTTP.request("GET", endpoint,
["PRIVATE-TOKEN" => creds["key"]]
)
data = JSON.parse(String(data.body))
Expand All @@ -70,7 +74,7 @@ function retrieve(name, params, filename; wait=1.0)
end

response = HTTP.request("GET",
creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]) * "/results",
endpoint * "/results",
["PRIVATE-TOKEN" => creds["key"]]
)
body = JSON.parse(String(response.body))
Expand All @@ -80,45 +84,12 @@ function retrieve(name, params, filename; wait=1.0)
end

"""
py2ju(dictstr)

Takes a Python dictionary as string and converts it into Julia's `Dict`

# Examples
```julia-repl
julia> str = \"""{
'format': 'zip',
'variable': 'surface_air_temperature',
'product_type': 'climatology',
'month': '08',
'origin': 'era_interim',
}\""";

julia> CDSAPI.py2ju(str)
Dict{String,Any} with 5 entries:
"format" => "zip"
"month" => "08"
"product_type" => "climatology"
"variable" => "surface_air_temperature"
"origin" => "era_interim"

```
"""
function py2ju(dictstr)
dictstr_cpy = replace(dictstr, "'" => "\"")
lastcomma_pos = findlast(",", dictstr_cpy).start

# if there's no pair after the last comma
if findnext(":", dictstr_cpy, lastcomma_pos) == nothing
# remove the comma
dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos-1)] * dictstr_cpy[(lastcomma_pos+1):lastindex(dictstr_cpy)]
end
parse(string)

# removes trailing comma from a list
rx = r",[ \n\r\t]*\]"
dictstr_cpy = replace(dictstr_cpy, rx => "]")
Equivalent to `JSON.parse(string)`.
"""
parse(string) = JSON.parse(string)

return JSON.parse(dictstr_cpy)
end
@deprecate py2ju(string) parse(string)

end # module
27 changes: 0 additions & 27 deletions test/py2ju.jl

This file was deleted.

81 changes: 0 additions & 81 deletions test/retrieve.jl

This file was deleted.

85 changes: 78 additions & 7 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,85 @@ using GRIB, NetCDF

using Test

# list of tests
testfiles = [
"py2ju.jl",
"retrieve.jl",
]
datadir = joinpath(@__DIR__, "data")

@testset "CDSAPI.jl" begin
for testfile in testfiles
include(testfile)
@testset "ERA5 monthly preasure data" begin
filename = joinpath(datadir, "era5.grib")
response = CDSAPI.retrieve(
"reanalysis-era5-pressure-levels-monthly-means",
"""{
"data_format": "grib",
"product_type": "monthly_averaged_reanalysis",
"variable": "divergence",
"pressure_level": "1",
"year": "2020",
"month": "06",
"area": [90,-180,-90,180],
"time": "00:00"
}""",
filename
)

@test typeof(response) <: Dict
@test isfile(filename)

GribFile(filename) do datafile
data = Message(datafile)
@test data["name"] == "Divergence"
@test data["level"] == 1
@test data["year"] == 2020
@test data["month"] == 6
end
rm(filename)
end

@testset "Sea ice type data" begin
filename = joinpath(datadir, "sea_ice_type.zip")
response = CDSAPI.retrieve(
"satellite-sea-ice-edge-type",
"""{
"variable": "sea_ice_type",
"region": "northern_hemisphere",
"cdr_type": "cdr",
"year": "1979",
"month": "01",
"day": "02",
"version": "3_0",
"data_format": "zip"
}""",
filename
)

@test typeof(response) <: Dict
@test isfile(filename)

# extract contents
zip_reader = ZipFile.Reader(filename)
ewq_fileio = zip_reader.files[1]
ewq_file = joinpath(datadir, ewq_fileio.name)
write(ewq_file, read(ewq_fileio))
close(zip_reader)

# test file contents
@test ncgetatt(ewq_file, "Global", "time_coverage_start") == "19790102T000000Z"
@test ncgetatt(ewq_file, "Global", "time_coverage_end") == "19790103T000000Z"

# cleanup
rm(filename)
rm(ewq_file)
end

@testset "Bad requests errors are catched" begin
goodname = "reanalysis-era5-single-levels"
badname = "bad-dataset"
badrequest = """{
"this": "is",
"a": "bad",
"re": ["quest"]
}"""

@test_throws ArgumentError CDSAPI.retrieve(goodname, badrequest, "unreachable")
@test_throws ArgumentError CDSAPI.retrieve(badname, badrequest, "unreachable")
end
end
Loading