From a385242bcbbd5f2850ad96aab3b70e53a88bbfc5 Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@gmail.com>
Date: Tue, 1 Nov 2022 09:33:29 +1300
Subject: [PATCH 1/7] Fix nightly tests (#50)

* Use Pkg.develop instead of LOAD_PATH

* Expand CI test matrix to include 1.5, 1.7

1.5 is the lowest version we support here.

(cherry picked from commit ad3c9a9f42b2de275b04bb7ddb3a3ea69791424a)
---
 .github/workflows/ci.yml | 2 ++
 test/driver_autoload.jl  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e362dd7..aed7db1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,6 +13,8 @@ jobs:
     strategy:
       matrix:
         version:
+          - '1.5'
+          - '1.7'
           - '1'
           - 'nightly'
         os:
diff --git a/test/driver_autoload.jl b/test/driver_autoload.jl
index 18442e2..1f30236 100644
--- a/test/driver_autoload.jl
+++ b/test/driver_autoload.jl
@@ -1,6 +1,6 @@
 @testset "Automatic code loading for drivers" begin
     empty!(DataSets.PROJECT)
-    pushfirst!(LOAD_PATH, abspath("drivers"))
+    Pkg.develop(path=joinpath(@__DIR__, "drivers", "DummyStorageBackends"))
     ENV["JULIA_DATASETS_PATH"] = joinpath(@__DIR__, "DriverAutoloadData.toml")
     DataSets.__init__()
     @test haskey(DataSets._storage_drivers, "DummyTomlStorage")

From b711cb09fdadae325aa829c71456d4af65ed41ba Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@juliacomputing.com>
Date: Fri, 28 Oct 2022 19:32:06 +1300
Subject: [PATCH 2/7] Backport allowing `-` in dataset names to 0.2

This backports a part of #40 that enables hyphens in dataset names to
the 0.2 branch, for tagging as 0.2.7.
---
 .github/workflows/ci.yml | 1 +
 Project.toml             | 2 +-
 src/DataSets.jl          | 6 +++---
 test/runtests.jl         | 3 ++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index aed7db1..c66200a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,6 +3,7 @@ on:
   push:
     branches:
       - master
+      - release-*
     tags: '*'
   pull_request:
 jobs:
diff --git a/Project.toml b/Project.toml
index fcce2f4..47834e2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "DataSets"
 uuid = "c9661210-8a83-48f0-b833-72e62abce419"
 authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
-version = "0.2.6"
+version = "0.2.7"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
diff --git a/src/DataSets.jl b/src/DataSets.jl
index 37c9cbd..5e02988 100644
--- a/src/DataSets.jl
+++ b/src/DataSets.jl
@@ -91,7 +91,7 @@ separated with forward slashes. Examples:
     my_data
     my_data_1
     username/data
-    organization/project/data
+    organization-dataset_name/project/data
 """
 function check_dataset_name(name::AbstractString)
     # DataSet names disallow most punctuation for now, as it may be needed as
@@ -100,13 +100,13 @@ function check_dataset_name(name::AbstractString)
         ^
         [[:alpha:]]
         (?:
-            [[:alnum:]_]      |
+            [-[:alnum:]_]     |
             / (?=[[:alpha:]])
         )*
         $
         "x
     if !occursin(dataset_name_pattern, name)
-        error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `_` or `/`.")
+        error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
     end
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index ba534a2..190c8d8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -98,8 +98,9 @@ end
     @test DataSets.check_dataset_name("δεδομένα") === nothing
     @test DataSets.check_dataset_name("a/b") === nothing
     @test DataSets.check_dataset_name("a/b/c") === nothing
+    @test DataSets.check_dataset_name("a-b-c-") === nothing
     # Invalid names
-    @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `_` or `/`.") DataSets.check_dataset_name("a?b")
+    @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b")
     @test_throws ErrorException DataSets.check_dataset_name("1")
     @test_throws ErrorException DataSets.check_dataset_name("a b")
     @test_throws ErrorException DataSets.check_dataset_name("a.b")

From e84c8ce439ba74d6fda8eee1930850b0428e5241 Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@juliacomputing.com>
Date: Thu, 24 Nov 2022 10:53:20 +1300
Subject: [PATCH 3/7] Backport #57 to 0.2.x

Handle hyphens in dataset() (#57)

Manually backported from commit 2b31808e649b77ee0aafad5ade038e88bc485d6c
---
 src/DataSets.jl  | 43 ++++++++++++++++++++++++-------------------
 test/runtests.jl | 42 ++++++++++++++++++++++++++++--------------
 2 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/src/DataSets.jl b/src/DataSets.jl
index 5e02988..65661f2 100644
--- a/src/DataSets.jl
+++ b/src/DataSets.jl
@@ -94,21 +94,20 @@ separated with forward slashes. Examples:
     organization-dataset_name/project/data
 """
 function check_dataset_name(name::AbstractString)
-    # DataSet names disallow most punctuation for now, as it may be needed as
-    # delimiters in data-related syntax (eg, for the data REPL).
-    dataset_name_pattern = r"
-        ^
-        [[:alpha:]]
-        (?:
-            [-[:alnum:]_]     |
-            / (?=[[:alpha:]])
-        )*
-        $
-        "x
-    if !occursin(dataset_name_pattern, name)
+    if !occursin(DATASET_NAME_REGEX, name)
         error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
     end
 end
+# DataSet names disallow most punctuation for now, as it may be needed as
+# delimiters in data-related syntax (eg, for the data REPL).
+const DATASET_NAME_REGEX_STRING = raw"""
+[[:alpha:]]
+(?:
+    [-[:alnum:]_]     |
+    / (?=[[:alpha:]])
+)*
+"""
+const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")
 
 # Hacky thing until we figure out which fields DataSet should actually have.
 function Base.getproperty(d::DataSet, name::Symbol)
@@ -254,16 +253,22 @@ function _unescapeuri(str)
     return String(take!(out))
 end
 
+# Parse as a suffix of URI syntax
+# name/of/dataset?param1=value1&param2=value2#fragment
+const DATASET_SPEC_REGEX = Regex(
+    """
+    ^
+    ($(DATASET_NAME_REGEX_STRING))
+    (?:\\?([^#]*))? # query    - a=b&c=d
+    (?:\\#(.*))?    # fragment - ...
+    \$
+    """,
+    "x",
+)
 function _split_dataspec(spec::AbstractString)
     # Parse as a suffix of URI syntax
     # name/of/dataset?param1=value1&param2=value2#fragment
-    m = match(r"
-        ^
-        ((?:[[:alpha:]][[:alnum:]_]*/?)+)  # name     - a/b/c
-        (?:\?([^#]*))?                     # query    - a=b&c=d
-        (?:\#(.*))?                        # fragment - ...
-        $"x,
-        spec)
+    m = match(DATASET_SPEC_REGEX, spec)
     if isnothing(m)
         return nothing, nothing, nothing
     end
diff --git a/test/runtests.jl b/test/runtests.jl
index 190c8d8..f42db3e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -92,23 +92,37 @@ end
 
 #-------------------------------------------------------------------------------
 @testset "Data set name parsing" begin
-    # Valid names
-    @test DataSets.check_dataset_name("a_b") === nothing
-    @test DataSets.check_dataset_name("a1") === nothing
-    @test DataSets.check_dataset_name("δεδομένα") === nothing
-    @test DataSets.check_dataset_name("a/b") === nothing
-    @test DataSets.check_dataset_name("a/b/c") === nothing
-    @test DataSets.check_dataset_name("a-b-c-") === nothing
-    # Invalid names
-    @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b")
-    @test_throws ErrorException DataSets.check_dataset_name("1")
-    @test_throws ErrorException DataSets.check_dataset_name("a b")
-    @test_throws ErrorException DataSets.check_dataset_name("a.b")
-    @test_throws ErrorException DataSets.check_dataset_name("a/b/")
-    @test_throws ErrorException DataSets.check_dataset_name("/a/b")
+    @testset "Valid name: $name" for name in (
+        "a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
+    )
+        @test DataSets.check_dataset_name(name) === nothing
+        @test DataSets._split_dataspec(name) == (name, nothing, nothing)
+    end
+
+    @testset "Invalid name: $name" for name in (
+        "1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b"
+    )
+        @test_throws ErrorException DataSets.check_dataset_name(name)
+        @test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
+    end
 end
 
 @testset "URL-like dataspec parsing" begin
+    # Valid dataspecs
+    DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f")
+    DataSets._split_dataspec("foo#f") == ("foo", nothing, "f")
+    DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
+    DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
+    # Invalid dataspecs
+    DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing)
+
     proj = DataSets.load_project("Data.toml")
 
     @test !haskey(dataset(proj, "a_text_file"), "dataspec")

From e8f153d017cf11074a25ac04f4c42022e3ff1161 Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@juliacomputing.com>
Date: Thu, 24 Nov 2022 10:54:50 +1300
Subject: [PATCH 4/7] Set version to 0.2.8

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 47834e2..6cb6dca 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "DataSets"
 uuid = "c9661210-8a83-48f0-b833-72e62abce419"
 authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
-version = "0.2.7"
+version = "0.2.8"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

From 269734c49c943900b3e6959867bf6f57bed7210a Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@juliahub.com>
Date: Wed, 5 Apr 2023 10:08:09 +1200
Subject: [PATCH 5/7] fix: allow AbstractTrees 0.4 on release-0.2 branch

Also bump the DataSets.jl version to 0.2.9 for release.
---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 6cb6dca..8b60f98 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "DataSets"
 uuid = "c9661210-8a83-48f0-b833-72e62abce419"
 authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
-version = "0.2.8"
+version = "0.2.9"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
@@ -15,7 +15,7 @@ TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [compat]
-AbstractTrees = "0.3"
+AbstractTrees = "0.3,0.4"
 ReplMaker = "0.2"
 ResourceContexts = "0.1,0.2"
 TOML = "1"

From 7ef6c567f9a7aec33be1d7a704694735ef48ee4c Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@juliahub.com>
Date: Mon, 9 Oct 2023 15:07:20 +1300
Subject: [PATCH 6/7] feat: add way to register post-__init__ callbacks

---
 src/DataSets.jl  | 50 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/runtests.jl |  9 ++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/src/DataSets.jl b/src/DataSets.jl
index 65661f2..ed7c5d0 100644
--- a/src/DataSets.jl
+++ b/src/DataSets.jl
@@ -669,6 +669,56 @@ function __init__()
                     =# project=proj exception=(exc,catch_backtrace())
             end
         end
+        # Call any post-__init__() callbacks that were registered before __init__() was called,
+        # or had chance to finish.
+        lock(_PROJECT_INIT_LOCK) do
+            _PROJECT_INITIALIZED[] = true
+            for f in _PROJECT_INIT_CALLBACKS
+                _invoke_init_cb(f)
+            end
+            # No need to keep the callbacks around, and maybe the GC can free some memory.
+            empty!(_PROJECT_INIT_CALLBACKS)
+        end
+    end
+end
+
+# The register_post_init_callback() can be used to add a callback that will get called
+# when DataSets.__init__() has run. Note: if f() throws an error, it does not cause a crash.
+#
+# This is useful for sysimages where the module is already be loaded (in Base.loaded_modules),
+# but __init__() has not been called yet. In particular, this means that other packages' __init__
+# functions can be sure that when they call initialization code that affects DataSets (in particular,
+# DataSets.PROJECT), then that code runs after __init__() has run.
+#
+# In the non-sysimage case, DataSets.__init__() would normally have already been called when
+# once register_post_init_callback() becomes available, and so in those situations, the callback
+# gets called immediately. However, in a system image, DataSets may have to queue up (FIFO) the
+# callback functions and wait until DataSets.__init__() has finished.
+#
+# Since the __init__() functions in sysimages can run in parallel, we use a lock just in case,
+# to make sure that two parallel calls would succeed.
+const _PROJECT_INIT_LOCK = ReentrantLock()
+const _PROJECT_INITIALIZED = Ref{Bool}(false)
+const _PROJECT_INIT_CALLBACKS = Base.Callable[]
+function register_post_init_callback(f::Base.Callable)
+    invoke = lock(_PROJECT_INIT_LOCK) do
+        if _PROJECT_INITIALIZED[]
+            return true
+        end
+        push!(_PROJECT_INIT_CALLBACKS, f)
+        return false
+    end
+    # We'll invoke outside of the lock, so that a long-running f() call
+    # wouldn't block other calls to register_post_init_callback()
+    invoke && _invoke_init_cb(f)
+    return nothing
+end
+
+function _invoke_init_cb(f::Base.Callable)
+    try
+        Base.invokelatest(f)
+    catch e
+        @error "Failed to run init callback: $f" exception = (e, catch_backtrace())
     end
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index f42db3e..fabe54a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -7,7 +7,14 @@ using ResourceContexts
 
 using DataSets: FileSystemRoot
 
-#-------------------------------------------------------------------------------
+@testset "register_post_init_callback" begin
+    init_was_called = Ref(false)
+    DataSets.register_post_init_callback() do
+        init_was_called[] = true
+    end
+    @test init_was_called[]
+end
+
 @testset "DataSet config" begin
     proj = DataSets.load_project("Data.toml")
 

From bc02342c52bfb1d03ca3fe82fb223b413ccf9cce Mon Sep 17 00:00:00 2001
From: Morten Piibeleht <morten.piibeleht@juliahub.com>
Date: Mon, 9 Oct 2023 15:09:46 +1300
Subject: [PATCH 7/7] Set version to 0.2.10

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 8b60f98..4b87646 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "DataSets"
 uuid = "c9661210-8a83-48f0-b833-72e62abce419"
 authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
-version = "0.2.9"
+version = "0.2.10"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"