From 2a9517b64d3b827c1c5c26afd9f2a6b314be4e85 Mon Sep 17 00:00:00 2001 From: James Garbutt <43081j@users.noreply.github.com> Date: Sun, 29 Dec 2024 11:58:07 +0000 Subject: [PATCH] feat: implement maxDepth for azure-storage-blob Implements native `maxDepth` for the `azure-storage-blob` driver by using hierarchical fetches. --- src/drivers/azure-storage-blob.ts | 50 ++++++++++++++++++- test/drivers/azure-storage-blob.test.ts | 66 ++++++++++++++++++++++++- test/drivers/utils.ts | 1 + 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/src/drivers/azure-storage-blob.ts b/src/drivers/azure-storage-blob.ts index 34623527..e0f0a17d 100644 --- a/src/drivers/azure-storage-blob.ts +++ b/src/drivers/azure-storage-blob.ts @@ -36,6 +36,47 @@ export interface AzureStorageBlobOptions { const DRIVER_NAME = "azure-storage-blob"; +async function getKeysByDepth( + client: ContainerClient, + maxDepth: number +): Promise { + const queue: Array<{ depth: number; name: string }> = []; + let current: { depth: number; name: string } | undefined = { + name: "", + depth: 0, + }; + const keys: string[] = []; + + do { + const iterator = client + .listBlobsByHierarchy(":", { + prefix: current.name, + }) + .byPage({ maxPageSize: 1000 }); + + for await (const result of iterator) { + const { blobPrefixes, blobItems } = result.segment; + + if (blobPrefixes && current.depth < maxDepth) { + for (const childPrefix of blobPrefixes) { + queue.push({ + name: childPrefix.name, + depth: current.depth + 1, + }); + } + } + + for (const item of blobItems) { + keys.push(item.name); + } + } + + current = queue.pop(); + } while (current !== undefined); + + return keys; +} + export default defineDriver((opts: AzureStorageBlobOptions) => { let containerClient: ContainerClient; const getContainerClient = () => { @@ -81,6 +122,9 @@ export default defineDriver((opts: AzureStorageBlobOptions) => { return { name: DRIVER_NAME, options: opts, + flags: { + maxDepth: true, + }, getInstance: getContainerClient, async hasItem(key) { return await getContainerClient().getBlockBlobClient(key).exists(); @@ -108,7 +152,11 @@ export default defineDriver((opts: AzureStorageBlobOptions) => { async removeItem(key) { await getContainerClient().getBlockBlobClient(key).delete(); }, - async getKeys() { + async getKeys(_base, opts) { + if (opts?.maxDepth !== undefined) { + return getKeysByDepth(getContainerClient(), opts.maxDepth); + } + const iterator = getContainerClient() .listBlobsFlat() .byPage({ maxPageSize: 1000 }); diff --git a/test/drivers/azure-storage-blob.test.ts b/test/drivers/azure-storage-blob.test.ts index 085fb432..dcd6b88c 100644 --- a/test/drivers/azure-storage-blob.test.ts +++ b/test/drivers/azure-storage-blob.test.ts @@ -1,8 +1,17 @@ -import { describe, beforeAll, afterAll } from "vitest"; +import { + describe, + beforeAll, + afterAll, + it, + expect, + vi, + afterEach, +} from "vitest"; import driver from "../../src/drivers/azure-storage-blob"; import { testDriver } from "./utils"; import { BlobServiceClient } from "@azure/storage-blob"; import { ChildProcess, exec } from "node:child_process"; +import { ContainerClient } from "@azure/storage-blob"; describe.skip("drivers: azure-storage-blob", () => { let azuriteProcess: ChildProcess; @@ -17,10 +26,65 @@ describe.skip("drivers: azure-storage-blob", () => { afterAll(() => { azuriteProcess.kill(9); }); + afterEach(() => { + vi.restoreAllMocks(); + }); testDriver({ driver: driver({ connectionString: "UseDevelopmentStorage=true", accountName: "local", }), + additionalTests(ctx) { + it("natively supports depth in getKeys", async () => { + const spy = vi.spyOn(ContainerClient.prototype, "listBlobsByHierarchy"); + + await ctx.storage.setItem("depth-test/key0", "boop"); + await ctx.storage.setItem("depth-test/depth0/key1", "boop"); + await ctx.storage.setItem("depth-test/depth0/depth1/key2", "boop"); + await ctx.storage.setItem("depth-test/depth0/depth1/key3", "boop"); + + expect( + ( + await ctx.driver.getKeys('', { + maxDepth: 1, + }) + ).sort() + ).toMatchObject(["depth-test:key0"]); + + // assert that the underlying blob storage was only called upto 1 depth + // to confirm the native filtering was used + expect(spy).toHaveBeenCalledTimes(2); + expect(spy).toHaveBeenCalledWith(":", { + // azure actually mutates `options` and sets `prefix` to + // `undefined` even though we pass it in as `""`. it seems this + // assertion works by reference, so we see the mutated value + prefix: undefined, + }); + expect(spy).toHaveBeenCalledWith(":", { + prefix: "depth-test:", + }); + + spy.mockClear(); + + expect( + ( + await ctx.driver.getKeys('', { + maxDepth: 2, + }) + ).sort() + ).toMatchObject(["depth-test:depth0:key1", "depth-test:key0"]); + + expect(spy).toHaveBeenCalledTimes(3); + expect(spy).toHaveBeenCalledWith(":", { + prefix: undefined, + }); + expect(spy).toHaveBeenCalledWith(":", { + prefix: "depth-test:", + }); + expect(spy).toHaveBeenCalledWith(":", { + prefix: "depth-test:depth0:", + }); + }); + }, }); }); diff --git a/test/drivers/utils.ts b/test/drivers/utils.ts index d1b1e420..6190e460 100644 --- a/test/drivers/utils.ts +++ b/test/drivers/utils.ts @@ -196,6 +196,7 @@ export function testDriver(opts: TestOptions) { } it("removeItem", async () => { + await ctx.storage.setItem("s1:a", "test_data"); await ctx.storage.removeItem("s1:a", false); expect(await ctx.storage.hasItem("s1:a")).toBe(false); expect(await ctx.storage.getItem("s1:a")).toBe(null);