From a385242bcbbd5f2850ad96aab3b70e53a88bbfc5 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Tue, 1 Nov 2022 09:33:29 +1300 Subject: [PATCH 1/7] Fix nightly tests (#50) * Use Pkg.develop instead of LOAD_PATH * Expand CI test matrix to include 1.5, 1.7 1.5 is the lowest version we support here. (cherry picked from commit ad3c9a9f42b2de275b04bb7ddb3a3ea69791424a) --- .github/workflows/ci.yml | 2 ++ test/driver_autoload.jl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e362dd7..aed7db1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,6 +13,8 @@ jobs: strategy: matrix: version: + - '1.5' + - '1.7' - '1' - 'nightly' os: diff --git a/test/driver_autoload.jl b/test/driver_autoload.jl index 18442e2..1f30236 100644 --- a/test/driver_autoload.jl +++ b/test/driver_autoload.jl @@ -1,6 +1,6 @@ @testset "Automatic code loading for drivers" begin empty!(DataSets.PROJECT) - pushfirst!(LOAD_PATH, abspath("drivers")) + Pkg.develop(path=joinpath(@__DIR__, "drivers", "DummyStorageBackends")) ENV["JULIA_DATASETS_PATH"] = joinpath(@__DIR__, "DriverAutoloadData.toml") DataSets.__init__() @test haskey(DataSets._storage_drivers, "DummyTomlStorage") From b711cb09fdadae325aa829c71456d4af65ed41ba Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Fri, 28 Oct 2022 19:32:06 +1300 Subject: [PATCH 2/7] Backport allowing `-` in dataset names to 0.2 This backports a part of #40 that enables hyphens in dataset names to the 0.2 branch, for tagging as 0.2.7. --- .github/workflows/ci.yml | 1 + Project.toml | 2 +- src/DataSets.jl | 6 +++--- test/runtests.jl | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aed7db1..c66200a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,6 +3,7 @@ on: push: branches: - master + - release-* tags: '*' pull_request: jobs: diff --git a/Project.toml b/Project.toml index fcce2f4..47834e2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DataSets" uuid = "c9661210-8a83-48f0-b833-72e62abce419" authors = ["Chris Foster and contributors"] -version = "0.2.6" +version = "0.2.7" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/DataSets.jl b/src/DataSets.jl index 37c9cbd..5e02988 100644 --- a/src/DataSets.jl +++ b/src/DataSets.jl @@ -91,7 +91,7 @@ separated with forward slashes. Examples: my_data my_data_1 username/data - organization/project/data + organization-dataset_name/project/data """ function check_dataset_name(name::AbstractString) # DataSet names disallow most punctuation for now, as it may be needed as @@ -100,13 +100,13 @@ function check_dataset_name(name::AbstractString) ^ [[:alpha:]] (?: - [[:alnum:]_] | + [-[:alnum:]_] | / (?=[[:alpha:]]) )* $ "x if !occursin(dataset_name_pattern, name) - error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `_` or `/`.") + error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") end end diff --git a/test/runtests.jl b/test/runtests.jl index ba534a2..190c8d8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -98,8 +98,9 @@ end @test DataSets.check_dataset_name("δεδομένα") === nothing @test DataSets.check_dataset_name("a/b") === nothing @test DataSets.check_dataset_name("a/b/c") === nothing + @test DataSets.check_dataset_name("a-b-c-") === nothing # Invalid names - @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `_` or `/`.") DataSets.check_dataset_name("a?b") + @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b") @test_throws ErrorException DataSets.check_dataset_name("1") @test_throws ErrorException DataSets.check_dataset_name("a b") @test_throws ErrorException DataSets.check_dataset_name("a.b") From e84c8ce439ba74d6fda8eee1930850b0428e5241 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 24 Nov 2022 10:53:20 +1300 Subject: [PATCH 3/7] Backport #57 to 0.2.x Handle hyphens in dataset() (#57) Manually backported from commit 2b31808e649b77ee0aafad5ade038e88bc485d6c --- src/DataSets.jl | 43 ++++++++++++++++++++++++------------------- test/runtests.jl | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 52 insertions(+), 33 deletions(-) diff --git a/src/DataSets.jl b/src/DataSets.jl index 5e02988..65661f2 100644 --- a/src/DataSets.jl +++ b/src/DataSets.jl @@ -94,21 +94,20 @@ separated with forward slashes. Examples: organization-dataset_name/project/data """ function check_dataset_name(name::AbstractString) - # DataSet names disallow most punctuation for now, as it may be needed as - # delimiters in data-related syntax (eg, for the data REPL). - dataset_name_pattern = r" - ^ - [[:alpha:]] - (?: - [-[:alnum:]_] | - / (?=[[:alpha:]]) - )* - $ - "x - if !occursin(dataset_name_pattern, name) + if !occursin(DATASET_NAME_REGEX, name) error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") end end +# DataSet names disallow most punctuation for now, as it may be needed as +# delimiters in data-related syntax (eg, for the data REPL). +const DATASET_NAME_REGEX_STRING = raw""" +[[:alpha:]] +(?: + [-[:alnum:]_] | + / (?=[[:alpha:]]) +)* +""" +const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x") # Hacky thing until we figure out which fields DataSet should actually have. function Base.getproperty(d::DataSet, name::Symbol) @@ -254,16 +253,22 @@ function _unescapeuri(str) return String(take!(out)) end +# Parse as a suffix of URI syntax +# name/of/dataset?param1=value1¶m2=value2#fragment +const DATASET_SPEC_REGEX = Regex( + """ + ^ + ($(DATASET_NAME_REGEX_STRING)) + (?:\\?([^#]*))? # query - a=b&c=d + (?:\\#(.*))? # fragment - ... + \$ + """, + "x", +) function _split_dataspec(spec::AbstractString) # Parse as a suffix of URI syntax # name/of/dataset?param1=value1¶m2=value2#fragment - m = match(r" - ^ - ((?:[[:alpha:]][[:alnum:]_]*/?)+) # name - a/b/c - (?:\?([^#]*))? # query - a=b&c=d - (?:\#(.*))? # fragment - ... - $"x, - spec) + m = match(DATASET_SPEC_REGEX, spec) if isnothing(m) return nothing, nothing, nothing end diff --git a/test/runtests.jl b/test/runtests.jl index 190c8d8..f42db3e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -92,23 +92,37 @@ end #------------------------------------------------------------------------------- @testset "Data set name parsing" begin - # Valid names - @test DataSets.check_dataset_name("a_b") === nothing - @test DataSets.check_dataset_name("a1") === nothing - @test DataSets.check_dataset_name("δεδομένα") === nothing - @test DataSets.check_dataset_name("a/b") === nothing - @test DataSets.check_dataset_name("a/b/c") === nothing - @test DataSets.check_dataset_name("a-b-c-") === nothing - # Invalid names - @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b") - @test_throws ErrorException DataSets.check_dataset_name("1") - @test_throws ErrorException DataSets.check_dataset_name("a b") - @test_throws ErrorException DataSets.check_dataset_name("a.b") - @test_throws ErrorException DataSets.check_dataset_name("a/b/") - @test_throws ErrorException DataSets.check_dataset_name("/a/b") + @testset "Valid name: $name" for name in ( + "a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_", + ) + @test DataSets.check_dataset_name(name) === nothing + @test DataSets._split_dataspec(name) == (name, nothing, nothing) + end + + @testset "Invalid name: $name" for name in ( + "1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b" + ) + @test_throws ErrorException DataSets.check_dataset_name(name) + @test DataSets._split_dataspec(name) == (nothing, nothing, nothing) + end end @testset "URL-like dataspec parsing" begin + # Valid dataspecs + DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f") + DataSets._split_dataspec("foo#f") == ("foo", nothing, "f") + DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing) + DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing) + # Invalid dataspecs + DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing) + DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing) + DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing) + DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing) + DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing) + DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing) + DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing) + DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing) + proj = DataSets.load_project("Data.toml") @test !haskey(dataset(proj, "a_text_file"), "dataspec") From e8f153d017cf11074a25ac04f4c42022e3ff1161 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Thu, 24 Nov 2022 10:54:50 +1300 Subject: [PATCH 4/7] Set version to 0.2.8 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 47834e2..6cb6dca 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DataSets" uuid = "c9661210-8a83-48f0-b833-72e62abce419" authors = ["Chris Foster and contributors"] -version = "0.2.7" +version = "0.2.8" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" From 269734c49c943900b3e6959867bf6f57bed7210a Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Wed, 5 Apr 2023 10:08:09 +1200 Subject: [PATCH 5/7] fix: allow AbstractTrees 0.4 on release-0.2 branch Also bump the DataSets.jl version to 0.2.9 for release. --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 6cb6dca..8b60f98 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DataSets" uuid = "c9661210-8a83-48f0-b833-72e62abce419" authors = ["Chris Foster and contributors"] -version = "0.2.8" +version = "0.2.9" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" @@ -15,7 +15,7 @@ TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] -AbstractTrees = "0.3" +AbstractTrees = "0.3,0.4" ReplMaker = "0.2" ResourceContexts = "0.1,0.2" TOML = "1" From 7ef6c567f9a7aec33be1d7a704694735ef48ee4c Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 9 Oct 2023 15:07:20 +1300 Subject: [PATCH 6/7] feat: add way to register post-__init__ callbacks --- src/DataSets.jl | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 9 ++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/DataSets.jl b/src/DataSets.jl index 65661f2..ed7c5d0 100644 --- a/src/DataSets.jl +++ b/src/DataSets.jl @@ -669,6 +669,56 @@ function __init__() =# project=proj exception=(exc,catch_backtrace()) end end + # Call any post-__init__() callbacks that were registered before __init__() was called, + # or had chance to finish. + lock(_PROJECT_INIT_LOCK) do + _PROJECT_INITIALIZED[] = true + for f in _PROJECT_INIT_CALLBACKS + _invoke_init_cb(f) + end + # No need to keep the callbacks around, and maybe the GC can free some memory. + empty!(_PROJECT_INIT_CALLBACKS) + end + end +end + +# The register_post_init_callback() can be used to add a callback that will get called +# when DataSets.__init__() has run. Note: if f() throws an error, it does not cause a crash. +# +# This is useful for sysimages where the module is already be loaded (in Base.loaded_modules), +# but __init__() has not been called yet. In particular, this means that other packages' __init__ +# functions can be sure that when they call initialization code that affects DataSets (in particular, +# DataSets.PROJECT), then that code runs after __init__() has run. +# +# In the non-sysimage case, DataSets.__init__() would normally have already been called when +# once register_post_init_callback() becomes available, and so in those situations, the callback +# gets called immediately. However, in a system image, DataSets may have to queue up (FIFO) the +# callback functions and wait until DataSets.__init__() has finished. +# +# Since the __init__() functions in sysimages can run in parallel, we use a lock just in case, +# to make sure that two parallel calls would succeed. +const _PROJECT_INIT_LOCK = ReentrantLock() +const _PROJECT_INITIALIZED = Ref{Bool}(false) +const _PROJECT_INIT_CALLBACKS = Base.Callable[] +function register_post_init_callback(f::Base.Callable) + invoke = lock(_PROJECT_INIT_LOCK) do + if _PROJECT_INITIALIZED[] + return true + end + push!(_PROJECT_INIT_CALLBACKS, f) + return false + end + # We'll invoke outside of the lock, so that a long-running f() call + # wouldn't block other calls to register_post_init_callback() + invoke && _invoke_init_cb(f) + return nothing +end + +function _invoke_init_cb(f::Base.Callable) + try + Base.invokelatest(f) + catch e + @error "Failed to run init callback: $f" exception = (e, catch_backtrace()) end end diff --git a/test/runtests.jl b/test/runtests.jl index f42db3e..fabe54a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,7 +7,14 @@ using ResourceContexts using DataSets: FileSystemRoot -#------------------------------------------------------------------------------- +@testset "register_post_init_callback" begin + init_was_called = Ref(false) + DataSets.register_post_init_callback() do + init_was_called[] = true + end + @test init_was_called[] +end + @testset "DataSet config" begin proj = DataSets.load_project("Data.toml") From bc02342c52bfb1d03ca3fe82fb223b413ccf9cce Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 9 Oct 2023 15:09:46 +1300 Subject: [PATCH 7/7] Set version to 0.2.10 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8b60f98..4b87646 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DataSets" uuid = "c9661210-8a83-48f0-b833-72e62abce419" authors = ["Chris Foster and contributors"] -version = "0.2.9" +version = "0.2.10" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"