From af41f5437f4bf130d3db02afda6938b604541cd1 Mon Sep 17 00:00:00 2001 From: Forrest Date: Wed, 7 Aug 2024 16:10:58 -0400 Subject: [PATCH] feat(DataSource): simplified data structure DataSource objects now represent a single type, rather than the bag-of-types done originally. --- src/io/import/__tests__/dataSource.spec.ts | 157 ++++++++---------- .../__tests__/importDataSources.spec.ts | 22 --- src/io/import/common.ts | 6 +- src/io/import/dataSource.ts | 142 ++++------------ src/io/import/importDataSources.ts | 33 ++-- .../import/processors/doneWithDataSource.ts | 12 -- src/io/import/processors/downloadStream.ts | 24 ++- src/io/import/processors/downloadUrl.ts | 41 ----- src/io/import/processors/extractArchive.ts | 17 +- .../import/processors/extractArchiveTarget.ts | 43 ++--- src/io/import/processors/handleAmazonS3.ts | 14 +- src/io/import/processors/handleConfig.ts | 8 +- src/io/import/processors/handleDicomFile.ts | 19 ++- src/io/import/processors/handleDicomStream.ts | 18 +- .../processors/handleGoogleCloudStorage.ts | 14 +- src/io/import/processors/importSingleFile.ts | 13 +- src/io/import/processors/openUriStream.ts | 12 +- src/io/import/processors/remoteManifest.ts | 15 +- src/io/import/processors/resolveParent.ts | 28 ---- src/io/import/processors/restoreStateFile.ts | 99 +++++++---- .../import/processors/updateFileMimeType.ts | 10 +- src/io/import/processors/updateUriType.ts | 12 +- src/io/state-file/schema.ts | 26 ++- src/store/datasets-dicom.ts | 8 +- src/store/datasets-files.ts | 8 +- src/store/datasets.ts | 81 +++++---- 26 files changed, 356 insertions(+), 526 deletions(-) delete mode 100644 src/io/import/__tests__/importDataSources.spec.ts delete mode 100644 src/io/import/processors/doneWithDataSource.ts delete mode 100644 src/io/import/processors/downloadUrl.ts delete mode 100644 src/io/import/processors/resolveParent.ts diff --git a/src/io/import/__tests__/dataSource.spec.ts b/src/io/import/__tests__/dataSource.spec.ts index 5f64b632e..fb2199703 100644 --- a/src/io/import/__tests__/dataSource.spec.ts +++ b/src/io/import/__tests__/dataSource.spec.ts @@ -1,101 +1,88 @@ import { describe, it } from 'vitest'; import { expect } from 'chai'; -import { DataSource, serializeDataSource } from '@/src/io/import/dataSource'; +import { + getDataSourceName, + isRemoteDataSource, +} from '@/src/io/import/dataSource'; +import { Chunk } from '@/src/core/streaming/chunk'; -describe('serializeDataSource', () => { - it('should remove FileSources', () => { - const input: DataSource = { - fileSrc: { - file: new File([], '1.dcm'), - fileType: 'application/dicom', - }, - }; - const output = serializeDataSource(input); +describe('isRemoteDatasource', () => { + it('should work', () => { + expect(isRemoteDataSource(undefined)).to.be.false; - expect(output).to.deep.equal({}); - }); + expect( + isRemoteDataSource({ + type: 'file', + file: new File([], 'name'), + fileType: 'type', + }) + ).to.be.false; - it('should preserve archive status', () => { - const input: DataSource = { - fileSrc: { - file: new File([], '1.dcm'), - fileType: 'application/dicom', - }, - archiveSrc: { - path: 'a/b/c', - }, - parent: { - fileSrc: { - file: new File([], 'archive.zip'), - fileType: 'application/zip', + expect( + isRemoteDataSource({ + type: 'file', + file: new File([], 'name'), + fileType: 'type', + parent: { + type: 'uri', + uri: 'http://', + name: 'name', }, - }, - }; - const output = serializeDataSource(input); - - expect(output).to.deep.equal({ - archiveSrc: { - path: 'a/b/c', - }, - parent: {}, - }); + }) + ).to.be.true; }); +}); - it('should preserve UriSource', () => { - const input: DataSource = { - uriSrc: { - uri: 'https://example.com/image.jpg', - name: 'image.jpg', - }, - parent: { - uriSrc: { - uri: 's3://example/bucket', - name: '', - }, - }, - }; - const output = serializeDataSource(input); +describe('getDataSourceName', () => { + it('should work', () => { + expect( + getDataSourceName({ + type: 'file', + file: new File([], 'name'), + fileType: 'ft', + }) + ).to.equal('name'); - expect(output).to.deep.equal(input); - }); + expect( + getDataSourceName({ + type: 'uri', + uri: 'http://', + name: 'name', + }) + ).to.equal('name'); - it('should serialize remote archive members', () => { - const input: DataSource = { - fileSrc: { - file: new File([], '1.dcm'), - fileType: 'application/dicom', - }, - archiveSrc: { - path: 'a/b/c', - }, - parent: { - fileSrc: { - file: new File([], 'archive.zip'), - fileType: 'application/zip', - }, - parent: { - uriSrc: { - uri: 'https://example.com/archive.zip', - name: 'archive.zip', + expect( + getDataSourceName({ + type: 'collection', + sources: [ + { + type: 'file', + file: new File([], 'name'), + fileType: 'ft', }, - }, - }, - }; - const output = serializeDataSource(input); + ], + }) + ).to.equal('name'); - expect(output).to.deep.equal({ - archiveSrc: { - path: 'a/b/c', - }, - parent: { - // empty parent b/c archive FileSource cannot be serialized + expect( + getDataSourceName({ + type: 'chunk', + chunk: {} as Chunk, + mime: 'mime', + }) + ).to.equal(null); + + expect( + getDataSourceName({ + type: 'chunk', + chunk: {} as Chunk, + mime: 'mime', parent: { - uriSrc: { - uri: 'https://example.com/archive.zip', - name: 'archive.zip', - }, + type: 'file', + file: new File([], 'name'), + fileType: 'ft', }, - }, - }); + }) + ).to.equal('name'); }); }); diff --git a/src/io/import/__tests__/importDataSources.spec.ts b/src/io/import/__tests__/importDataSources.spec.ts deleted file mode 100644 index 172b16354..000000000 --- a/src/io/import/__tests__/importDataSources.spec.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { describe, it } from 'vitest'; -import { expect } from 'chai'; -import { DataSource } from '../dataSource'; -import { importDataSources } from '../importDataSources'; - -describe('importDataSources', () => { - it('should return error if illegal URI', async () => { - const input: DataSource = { - uriSrc: { - uri: '// asdf asdf', - name: 'image.jpg', - }, - }; - const output = await importDataSources([input]); - - const firstResult = output[0]; - expect(firstResult.ok).to.equals(false); - if (!firstResult.ok) { - expect(firstResult.errors.length).to.greaterThan(0); - } - }); -}); diff --git a/src/io/import/common.ts b/src/io/import/common.ts index de8b75884..ce6599adf 100644 --- a/src/io/import/common.ts +++ b/src/io/import/common.ts @@ -121,10 +121,8 @@ export type ImportHandler = ChainHandler< ImportContext >; -export function isArchive( - ds: DataSource -): ds is DataSource & { fileSrc: FileSource } { - return !!ds.fileSrc && ARCHIVE_FILE_TYPES.has(ds.fileSrc.fileType); +export function isArchive(ds: DataSource): ds is FileSource { + return ds.type === 'file' && ARCHIVE_FILE_TYPES.has(ds.fileType); } export function isLoadableResult( diff --git a/src/io/import/dataSource.ts b/src/io/import/dataSource.ts index c4de269fa..322432cf4 100644 --- a/src/io/import/dataSource.ts +++ b/src/io/import/dataSource.ts @@ -1,14 +1,12 @@ import { Chunk } from '@/src/core/streaming/chunk'; import { Fetcher } from '@/src/core/streaming/types'; -import { Maybe, PartialWithRequired } from '@/src/types'; +import { Maybe } from '@/src/types'; /** * Represents a URI source with a file name for the downloaded resource. - * - * This can optionally be paired with a FileSource, indicating that the - * FileSource is a remote FileSource. */ export interface UriSource { + type: 'uri'; uri: string; name: string; mime?: string; @@ -17,22 +15,21 @@ export interface UriSource { /** * Represents a user-specified file. - * - * This can optionally be paired with an ArchiveSource. */ export interface FileSource { + type: 'file'; file: File; fileType: string; } /** - * If an archive source is specified, then it is assumed that the data source - * has a FileSource (representing the file inside the archive), and a parent - * data source with a FileSource that refers to the archive. + * Represents an archive member. The parent should exist and be a FileSource. */ export interface ArchiveSource { + type: 'archive'; // Full path + filename inside the archive path: string; + parent: FileSource; } /** @@ -42,6 +39,7 @@ export interface ArchiveSource { * e.g. reconstructed DICOM. */ export interface CollectionSource { + type: 'collection'; // eslint-disable-next-line no-use-before-define sources: DataSource[]; } @@ -50,6 +48,7 @@ export interface CollectionSource { * Represents a data chunk for further processing and import. */ export interface ChunkSource { + type: 'chunk'; chunk: Chunk; mime: string; } @@ -57,50 +56,25 @@ export interface ChunkSource { /** * Represents a source of data. * - * If the parent property is set, it represents the DataSource from which this - * DataSource was derived. - * - * Examples: - * - { uriSrc }: a file that has yet to be downloaded. - * - { fileSrc, parent: { uriSrc } }: a file with URI provenance info. - * - { fileSrc, archiveSrc, parent }: a file originating from an archive. - */ -export interface DataSource { - fileSrc?: FileSource; - uriSrc?: UriSource; - archiveSrc?: ArchiveSource; - chunkSrc?: ChunkSource; - collectionSrc?: CollectionSource; - parent?: DataSource; -} - -/** - * A data source that has a File. - */ -export type FileDataSource = PartialWithRequired; - -/** - * An archive member data source. + * The parent chain denotes the provenance for each step of the data source resolution. */ -export type ArchiveDataSource = PartialWithRequired< - DataSource, - 'archiveSrc' | 'fileSrc' -> & { - parent: FileDataSource; -}; - -export type ChunkDataSource = PartialWithRequired; +export type DataSource = { parent?: DataSource } & ( + | FileSource + | UriSource + | ArchiveSource + | ChunkSource + | CollectionSource +); /** * Creates a DataSource from a single file. * @param file * @returns */ -export const fileToDataSource = (file: File): DataSource => ({ - fileSrc: { - file, - fileType: file.type, - }, +export const fileToDataSource = (file: File): FileSource => ({ + type: 'file', + file, + fileType: file.type, }); /** @@ -112,12 +86,11 @@ export const uriToDataSource = ( uri: string, name: string, mime?: string -): DataSource => ({ - uriSrc: { - uri, - name, - mime, - }, +): UriSource => ({ + type: 'uri', + uri, + name, + mime, }); /** @@ -130,7 +103,7 @@ export const remoteFileToDataSource = ( uri: string ): DataSource => ({ ...fileToDataSource(file), - ...uriToDataSource(uri, file.name), + parent: uriToDataSource(uri, file.name), }); /** @@ -138,27 +111,9 @@ export const remoteFileToDataSource = ( * @param ds * @returns */ -export function isRemoteDataSource(ds: DataSource): boolean { - return !!ds.uriSrc || (!!ds.parent && isRemoteDataSource(ds.parent)); -} - -/** - * Flattens a data source hierarchy, ordered by descendant first. - * - * For a given data source `ds`, `ds` is the descendant and `ds.parent` is the - * ancestor. - * - * @param ds - * @returns - */ -export function flattenDataSourceHierarchy(ds: DataSource): DataSource[] { - const sources: DataSource[] = []; - let cur: Maybe = ds; - while (cur) { - sources.push(cur); - cur = cur.parent; - } - return sources; +export function isRemoteDataSource(ds: DataSource | undefined): boolean { + if (!ds) return false; + return ds.type === 'uri' || isRemoteDataSource(ds.parent); } /** @@ -166,43 +121,16 @@ export function flattenDataSourceHierarchy(ds: DataSource): DataSource[] { * @param ds */ export function getDataSourceName(ds: Maybe): Maybe { - if (ds?.fileSrc) { - return ds.fileSrc.file.name; - } + if (!ds) return null; - if (ds?.uriSrc) { - return ds.uriSrc.name; - } - - if (ds?.collectionSrc?.sources.length) { - const { sources } = ds.collectionSrc; + if (ds.type === 'file') return ds.file.name; + if (ds.type === 'uri') return ds.name; + if (ds.type === 'collection' && ds.sources.length) { + const { sources } = ds; const [first] = sources; const more = sources.length > 1 ? ` (+${sources.length - 1} more)` : ''; return `${getDataSourceName(first)}${more}`; } - return null; -} - -/** - * Serializes a data source into a JSON formattable object. - * - * FileSources are stripped, as they cannot be properly serialized. This - * includes the fileType property, which should be inferred when retyping the - * file. - * @param ds - */ -export function serializeDataSource(ds: DataSource) { - const output = { ...ds }; - - if (output.uriSrc) { - delete output.uriSrc.fetcher; - } - - delete output.fileSrc; - - if (output.parent) { - output.parent = serializeDataSource(output.parent); - } - return output; + return getDataSourceName(ds.parent); } diff --git a/src/io/import/importDataSources.ts b/src/io/import/importDataSources.ts index 608d0c75c..576fdaf53 100644 --- a/src/io/import/importDataSources.ts +++ b/src/io/import/importDataSources.ts @@ -10,7 +10,7 @@ import { ImportDataSourcesResult, asIntermediateResult, } from '@/src/io/import/common'; -import { DataSource, ChunkDataSource } from '@/src/io/import/dataSource'; +import { DataSource, ChunkSource } from '@/src/io/import/dataSource'; import handleDicomFile from '@/src/io/import/processors/handleDicomFile'; import extractArchive from '@/src/io/import/processors/extractArchive'; import extractArchiveTarget from '@/src/io/import/processors/extractArchiveTarget'; @@ -34,13 +34,13 @@ import { ensureError, partition } from '@/src/utils'; import { Chunk } from '@/src/core/streaming/chunk'; import { useDatasetStore } from '@/src/store/datasets'; -const unhandledResource: ImportHandler = () => { - throw new Error('Failed to handle resource'); +const unhandledResource: ImportHandler = (dataSource) => { + return asErrorResult(new Error('Failed to handle resource'), dataSource); }; const handleCollections: ImportHandler = (dataSource) => { - if (!dataSource.collectionSrc) return Skip; - return asIntermediateResult(dataSource.collectionSrc.sources); + if (dataSource.type !== 'collection') return Skip; + return asIntermediateResult(dataSource.sources); }; function isSelectable(result: ImportResult): result is LoadableVolumeResult { @@ -60,26 +60,23 @@ const importConfigs = ( }); }; -async function importDicomChunkSources(sources: ChunkDataSource[]) { +async function importDicomChunkSources(sources: ChunkSource[]) { if (sources.length === 0) return []; - const volumeChunks = await importDicomChunks( - sources.map((src) => src.chunkSrc.chunk) - ); + const volumeChunks = await importDicomChunks(sources.map((src) => src.chunk)); - // this is used to reconstruct the ChunkDataSource list - const chunkToDataSource = new Map(); + // this is used to reconstruct the ChunkSource list + const chunkToDataSource = new Map(); sources.forEach((src) => { - chunkToDataSource.set(src.chunkSrc.chunk, src); + chunkToDataSource.set(src.chunk, src); }); return Object.entries(volumeChunks).map(([id, chunks]) => asLoadableResult( id, { - collectionSrc: { - sources: chunks.map((chunk) => chunkToDataSource.get(chunk)!), - }, + type: 'collection', + sources: chunks.map((chunk) => chunkToDataSource.get(chunk)!), }, 'image' ) @@ -151,7 +148,7 @@ export async function importDataSources( switch (result.type) { case 'intermediate': { const [chunks, otherSources] = partition( - (ds) => !!ds.chunkSrc, + (ds) => ds.type === 'chunk', result.dataSources ); chunkSources.push(...chunks); @@ -185,8 +182,8 @@ export async function importDataSources( results.push( ...(await importDicomChunkSources( chunkSources.filter( - (src): src is ChunkDataSource => - src.chunkSrc?.mime === FILE_EXT_TO_MIME.dcm + (src): src is ChunkSource => + src.type === 'chunk' && src.mime === FILE_EXT_TO_MIME.dcm ) )) ); diff --git a/src/io/import/processors/doneWithDataSource.ts b/src/io/import/processors/doneWithDataSource.ts deleted file mode 100644 index eff2663d8..000000000 --- a/src/io/import/processors/doneWithDataSource.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { ImportHandler } from '@/src/io/import/common'; - -/** - * Ends a pipeline execution, returning the final data source. - * @param dataSource - * @returns - */ -const doneWithDataSource: ImportHandler = (dataSource, { done }) => { - return done({ dataSource }); -}; - -export default doneWithDataSource; diff --git a/src/io/import/processors/downloadStream.ts b/src/io/import/processors/downloadStream.ts index 6bfeeca36..93334d33b 100644 --- a/src/io/import/processors/downloadStream.ts +++ b/src/io/import/processors/downloadStream.ts @@ -13,33 +13,29 @@ import { ensureError } from '@/src/utils'; * @returns */ const downloadStream: ImportHandler = async (dataSource) => { - const { fileSrc, uriSrc, chunkSrc } = dataSource; - // existence of a chunkSrc means that the stream doesn't need to be downloaded. - if (fileSrc || chunkSrc || !uriSrc?.fetcher) { - return Skip; - } + if (dataSource.type !== 'uri') return Skip; + if (!dataSource.fetcher) return Skip; - const { fetcher } = uriSrc; + const { fetcher } = dataSource; await fetcher.connect(); try { const blob = await fetcher.blob(); - const file = new File([blob], uriSrc.name, { - type: uriSrc.mime, + const file = new File([blob], dataSource.name, { + type: dataSource.mime, }); return asIntermediateResult([ { - ...dataSource, - fileSrc: { - file, - fileType: file.type, - }, + type: 'file', + file, + fileType: file.type, + parent: dataSource, }, ]); } catch (err) { throw new Error( - `Could not download stream associated with URL ${uriSrc.uri}`, + `Could not download stream associated with URL ${dataSource.uri}`, { cause: ensureError(err), } diff --git a/src/io/import/processors/downloadUrl.ts b/src/io/import/processors/downloadUrl.ts deleted file mode 100644 index 0f17f6134..000000000 --- a/src/io/import/processors/downloadUrl.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { canFetchUrl, fetchFile } from '@/src/utils/fetch'; -import { ImportHandler } from '@/src/io/import/common'; - -/** - * Downloads a URL to a file DataSource. - * - * Input: { uriSrc } - * Output: { fileSrc, uriSrc } - * - * Provides optional caching if the execution context provides a cache. - * @param dataSource - * @returns - */ -const downloadUrl: ImportHandler = async ( - dataSource, - { execute, done, extra } -) => { - const { fileSrc, uriSrc } = dataSource; - if (!fileSrc && uriSrc && canFetchUrl(uriSrc.uri)) { - try { - const file = await fetchFile(uriSrc.uri, uriSrc.name, { - cache: extra?.fetchFileCache, - }); - execute({ - ...dataSource, - fileSrc: { - file, - fileType: '', - }, - }); - return done(); - } catch (err) { - throw new Error(`Could not download URL ${uriSrc.uri}`, { - cause: err instanceof Error ? err : undefined, - }); - } - } - return dataSource; -}; - -export default downloadUrl; diff --git a/src/io/import/processors/extractArchive.ts b/src/io/import/processors/extractArchive.ts index 88329b7fe..44f306f95 100644 --- a/src/io/import/processors/extractArchive.ts +++ b/src/io/import/processors/extractArchive.ts @@ -5,6 +5,7 @@ import { isArchive, } from '@/src/io/import/common'; import { Skip } from '@/src/utils/evaluateChain'; +import { DataSource } from '@/src/io/import/dataSource'; /** * Extracts all files from an archive. @@ -12,17 +13,17 @@ import { Skip } from '@/src/utils/evaluateChain'; */ const extractArchive: ImportHandler = async (dataSource) => { if (isArchive(dataSource)) { - const files = await extractFilesFromZip(dataSource.fileSrc.file); - const newSources = files.map((entry) => { + const files = await extractFilesFromZip(dataSource.file); + const newSources = files.map((entry): DataSource => { return { - fileSrc: { - file: entry.file, - fileType: '', - }, - archiveSrc: { + type: 'file', + file: entry.file, + fileType: '', + parent: { + type: 'archive', path: entry.archivePath, + parent: dataSource, }, - parent: dataSource, }; }); return asIntermediateResult(newSources); diff --git a/src/io/import/processors/extractArchiveTarget.ts b/src/io/import/processors/extractArchiveTarget.ts index fb30f6326..03eefe7ca 100644 --- a/src/io/import/processors/extractArchiveTarget.ts +++ b/src/io/import/processors/extractArchiveTarget.ts @@ -18,36 +18,25 @@ import { Skip } from '@/src/utils/evaluateChain'; * @returns */ const extractArchiveTarget: ImportHandler = async (dataSource) => { - const { fileSrc, archiveSrc, parent } = dataSource; + if (dataSource.type !== 'archive') return Skip; - if (!fileSrc && archiveSrc && parent) { - if (!parent?.fileSrc) { - throw new Error( - 'Cannot extract an archive target with an unresolved parent' - ); - } - - if (!isArchive(parent)) { - throw new Error('Parent is not a supported archive file'); - } - - const targetFile = await extractFileFromZip( - parent.fileSrc.file, - archiveSrc.path - ); - - return asIntermediateResult([ - { - ...dataSource, - fileSrc: { - file: targetFile, - fileType: '', - }, - }, - ]); + if (!isArchive(dataSource.parent)) { + throw new Error('Parent is not a supported archive file'); } - return Skip; + const targetFile = await extractFileFromZip( + dataSource.parent.file, + dataSource.path + ); + + return asIntermediateResult([ + { + type: 'file', + file: targetFile, + fileType: '', + parent: dataSource, + }, + ]); }; export default extractArchiveTarget; diff --git a/src/io/import/processors/handleAmazonS3.ts b/src/io/import/processors/handleAmazonS3.ts index 569d391d9..aff3cd9b6 100644 --- a/src/io/import/processors/handleAmazonS3.ts +++ b/src/io/import/processors/handleAmazonS3.ts @@ -4,22 +4,20 @@ import { ImportHandler, asIntermediateResult } from '@/src/io/import/common'; import { DataSource } from '@/src/io/import/dataSource'; const handleAmazonS3: ImportHandler = async (dataSource) => { - const { uriSrc } = dataSource; - if (uriSrc && isAmazonS3Uri(uriSrc.uri)) { + if (dataSource.type === 'uri' && isAmazonS3Uri(dataSource.uri)) { try { const newSources: DataSource[] = []; - await getObjectsFromS3(uriSrc.uri, (name, url) => { + await getObjectsFromS3(dataSource.uri, (name, url) => { newSources.push({ - uriSrc: { - uri: url, - name, - }, + type: 'uri', + uri: url, + name, parent: dataSource, }); }); return asIntermediateResult(newSources); } catch (err) { - throw new Error(`Could not download S3 URI ${uriSrc.uri}`, { + throw new Error(`Could not download S3 URI ${dataSource.uri}`, { cause: err instanceof Error ? err : undefined, }); } diff --git a/src/io/import/processors/handleConfig.ts b/src/io/import/processors/handleConfig.ts index cf909226b..a365a4873 100644 --- a/src/io/import/processors/handleConfig.ts +++ b/src/io/import/processors/handleConfig.ts @@ -9,10 +9,12 @@ import { Skip } from '@/src/utils/evaluateChain'; * @returns */ const handleConfig: ImportHandler = async (dataSource) => { - const { fileSrc } = dataSource; - if (fileSrc?.fileType === 'application/json') { + if ( + dataSource.type === 'file' && + dataSource.fileType === 'application/json' + ) { try { - const manifest = await readConfigFile(fileSrc.file); + const manifest = await readConfigFile(dataSource.file); // Don't consume JSON if it has no known key if (Object.keys(manifest).length === 0) { return Skip; diff --git a/src/io/import/processors/handleDicomFile.ts b/src/io/import/processors/handleDicomFile.ts index 1a6de38e5..818da9e34 100644 --- a/src/io/import/processors/handleDicomFile.ts +++ b/src/io/import/processors/handleDicomFile.ts @@ -14,8 +14,10 @@ import { readDicomTags } from '@itk-wasm/dicom'; * @returns */ const handleDicomFile: ImportHandler = async (dataSource) => { - const { fileSrc } = dataSource; - if (fileSrc?.fileType !== FILE_EXT_TO_MIME.dcm) { + if ( + dataSource.type !== 'file' || + dataSource.fileType !== FILE_EXT_TO_MIME.dcm + ) { return Skip; } @@ -24,8 +26,8 @@ const handleDicomFile: ImportHandler = async (dataSource) => { return result.tags; }; - const metaLoader = new DicomFileMetaLoader(fileSrc.file, readTags); - const dataLoader = new DicomFileDataLoader(fileSrc.file); + const metaLoader = new DicomFileMetaLoader(dataSource.file, readTags); + const dataLoader = new DicomFileDataLoader(dataSource.file); const chunk = new Chunk({ metaLoader, dataLoader, @@ -35,11 +37,10 @@ const handleDicomFile: ImportHandler = async (dataSource) => { return asIntermediateResult([ { - ...dataSource, - chunkSrc: { - chunk, - mime: FILE_EXT_TO_MIME.dcm, - }, + type: 'chunk', + chunk, + mime: FILE_EXT_TO_MIME.dcm, + parent: dataSource, }, ]); }; diff --git a/src/io/import/processors/handleDicomStream.ts b/src/io/import/processors/handleDicomStream.ts index 0aad299e0..bf911b650 100644 --- a/src/io/import/processors/handleDicomStream.ts +++ b/src/io/import/processors/handleDicomStream.ts @@ -13,16 +13,13 @@ import { FILE_EXT_TO_MIME } from '@/src/io/mimeTypes'; import { readDicomTags } from '@itk-wasm/dicom'; const handleDicomStream: ImportHandler = async (dataSource) => { - const { fileSrc, uriSrc, chunkSrc } = dataSource; - if (fileSrc || uriSrc?.mime !== FILE_EXT_TO_MIME.dcm) { + if (dataSource.type !== 'uri' || dataSource?.mime !== FILE_EXT_TO_MIME.dcm) { return Skip; } - if (chunkSrc?.chunk && chunkSrc?.mime === FILE_EXT_TO_MIME.dcm) return Skip; - const fetcher = - uriSrc.fetcher ?? - new CachedStreamFetcher(uriSrc.uri, { + dataSource.fetcher ?? + new CachedStreamFetcher(dataSource.uri, { fetch: (...args) => getRequestPool().fetch(...args), }); @@ -42,11 +39,10 @@ const handleDicomStream: ImportHandler = async (dataSource) => { return asIntermediateResult([ { - ...dataSource, - chunkSrc: { - chunk, - mime: FILE_EXT_TO_MIME.dcm, - }, + type: 'chunk', + chunk, + mime: FILE_EXT_TO_MIME.dcm, + parent: dataSource, }, ]); }; diff --git a/src/io/import/processors/handleGoogleCloudStorage.ts b/src/io/import/processors/handleGoogleCloudStorage.ts index 0e7051a8e..cb58b58ab 100644 --- a/src/io/import/processors/handleGoogleCloudStorage.ts +++ b/src/io/import/processors/handleGoogleCloudStorage.ts @@ -7,22 +7,20 @@ import { ImportHandler, asIntermediateResult } from '@/src/io/import/common'; import { DataSource } from '@/src/io/import/dataSource'; const handleGoogleCloudStorage: ImportHandler = async (dataSource) => { - const { uriSrc } = dataSource; - if (uriSrc && isGoogleCloudStorageUri(uriSrc.uri)) { + if (dataSource.type === 'uri' && isGoogleCloudStorageUri(dataSource.uri)) { try { const newSources: DataSource[] = []; - await getObjectsFromGsUri(uriSrc.uri, (object) => { + await getObjectsFromGsUri(dataSource.uri, (object) => { newSources.push({ - uriSrc: { - uri: object.mediaLink, - name: object.name, - }, + type: 'uri', + uri: object.mediaLink, + name: object.name, parent: dataSource, }); }); return asIntermediateResult(newSources); } catch (err) { - throw new Error(`Could not download GCS URI ${uriSrc.uri}`, { + throw new Error(`Could not download GCS URI ${dataSource.uri}`, { cause: err instanceof Error ? err : undefined, }); } diff --git a/src/io/import/processors/importSingleFile.ts b/src/io/import/processors/importSingleFile.ts index ac5c0c1d0..e63ce309f 100644 --- a/src/io/import/processors/importSingleFile.ts +++ b/src/io/import/processors/importSingleFile.ts @@ -14,21 +14,20 @@ import { Skip } from '@/src/utils/evaluateChain'; * @returns */ const importSingleFile: ImportHandler = async (dataSource) => { - if (!dataSource.fileSrc) { + if (dataSource.type !== 'file') { return Skip; } - const { fileSrc } = dataSource; - if (!FILE_READERS.has(fileSrc.fileType)) { + if (!FILE_READERS.has(dataSource.fileType)) { return Skip; } - const reader = FILE_READERS.get(fileSrc.fileType)!; - const dataObject = await reader(fileSrc.file); + const reader = FILE_READERS.get(dataSource.fileType)!; + const dataObject = await reader(dataSource.file); if (dataObject.isA('vtkImageData')) { const dataID = useImageStore().addVTKImageData( - fileSrc.file.name, + dataSource.file.name, dataObject as vtkImageData ); @@ -42,7 +41,7 @@ const importSingleFile: ImportHandler = async (dataSource) => { ); } const dataID = useModelStore().addVTKPolyData( - fileSrc.file.name, + dataSource.file.name, dataObject as vtkPolyData ); diff --git a/src/io/import/processors/openUriStream.ts b/src/io/import/processors/openUriStream.ts index 4426cd723..f7be050e6 100644 --- a/src/io/import/processors/openUriStream.ts +++ b/src/io/import/processors/openUriStream.ts @@ -5,16 +5,15 @@ import { ImportHandler, asIntermediateResult } from '@/src/io/import/common'; import { canFetchUrl } from '@/src/utils/fetch'; const openUriStream: ImportHandler = async (dataSource, context) => { - const { uriSrc } = dataSource; - if (!uriSrc || !canFetchUrl(uriSrc.uri)) { + if (dataSource.type !== 'uri' || !canFetchUrl(dataSource.uri)) { return Skip; } - if (uriSrc.fetcher?.connected) { + if (dataSource.fetcher?.connected) { return Skip; } - const fetcher = new CachedStreamFetcher(uriSrc.uri, { + const fetcher = new CachedStreamFetcher(dataSource.uri, { fetch: (...args) => getRequestPool().fetch(...args), }); @@ -28,10 +27,7 @@ const openUriStream: ImportHandler = async (dataSource, context) => { return asIntermediateResult([ { ...dataSource, - uriSrc: { - ...uriSrc, - fetcher, - }, + fetcher, }, ]); }; diff --git a/src/io/import/processors/remoteManifest.ts b/src/io/import/processors/remoteManifest.ts index b3c09abea..3914e83ad 100644 --- a/src/io/import/processors/remoteManifest.ts +++ b/src/io/import/processors/remoteManifest.ts @@ -10,20 +10,21 @@ import { ZodError } from 'zod'; * @returns */ const handleRemoteManifest: ImportHandler = async (dataSource) => { - const { fileSrc } = dataSource; - if (fileSrc?.fileType !== 'application/json') { + if ( + dataSource.type !== 'file' || + dataSource.fileType !== 'application/json' + ) { return Skip; } try { const remotes: DataSource[] = []; - const manifest = await readRemoteManifestFile(fileSrc.file); + const manifest = await readRemoteManifestFile(dataSource.file); manifest.resources.forEach((res) => { remotes.push({ - uriSrc: { - uri: res.url, - name: res.name ?? new URL(res.url, window.location.origin).pathname, - }, + type: 'uri', + uri: res.url, + name: res.name ?? new URL(res.url, window.location.origin).pathname, parent: dataSource, }); }); diff --git a/src/io/import/processors/resolveParent.ts b/src/io/import/processors/resolveParent.ts deleted file mode 100644 index 20a218a7b..000000000 --- a/src/io/import/processors/resolveParent.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { ImportHandler } from '@/src/io/import/common'; -import { ensureError } from '@/src/utils'; - -/** - * Resolves a data source's parent. - * - * A data source is considered if it has a fileSrc or uriSrc. - * @param dataSource - * @returns - */ -const resolveParent: ImportHandler = async (dataSource, { execute }) => { - const { parent } = dataSource; - if (parent) { - const result = await execute(parent); - if (!result.ok) { - throw new Error('Failed to resolve parent data source', { - cause: ensureError(result.errors[0].cause), - }); - } - return { - ...dataSource, - parent: result.data[0].dataSource, - }; - } - return dataSource; -}; - -export default resolveParent; diff --git a/src/io/import/processors/restoreStateFile.ts b/src/io/import/processors/restoreStateFile.ts index 088b4dcc1..cabdd56c1 100644 --- a/src/io/import/processors/restoreStateFile.ts +++ b/src/io/import/processors/restoreStateFile.ts @@ -4,6 +4,7 @@ import { ManifestSchema, } from '@/src/io/state-file/schema'; import { + asErrorResult, asOkayResult, ImportContext, ImportHandler, @@ -196,8 +197,59 @@ async function rebuildDataSources( leaves.add(serializedSrc.id); }); - // serializedDataSources should be topologically ordered by - // ancestors first and descendants last + const deserialize = ( + serialized: (typeof serializedDataSources)[number] + ): DataSource => { + const { type } = serialized; + switch (type) { + case 'file': + return { + type: 'file', + file: fileIDToFile[serialized.fileId], + fileType: serialized.fileType, + }; + case 'archive': { + const parent = dataSourceCache[serialized.parent]; + if (!parent) + throw new Error('Could not find the parent of an archive source'); + if (parent.type !== 'file') + throw new Error('Archive source parent is not a file'); + return { + type: 'archive', + path: serialized.path, + parent, + }; + } + case 'uri': + return { + type: 'uri', + uri: serialized.uri, + name: serialized.name, + mime: serialized.mime, + }; + case 'collection': { + // these sources are no longer leaves + serialized.sources.forEach((id) => { + leaves.delete(id); + }); + const sources = serialized.sources.map((id) => dataSourceCache[id]); + if (sources.some((src) => !src)) + throw new Error('Could not deserialize a collection source'); + return { + type: 'collection', + sources, + }; + } + default: + throw new Error( + `Encountered an invalid serialized data source: ${type}` + ); + } + }; + + // serializedDataSources should be topologically ordered by ancestors first + // and descendants last. This is established in + // datasets.ts/serializeDataSource() for (let i = 0; i < serializedDataSources.length; i++) { const serializedSrc = serializedDataSources[i]; @@ -206,33 +258,7 @@ async function rebuildDataSources( continue; } - let dataSource: DataSource = {}; - - if (serializedSrc.fileSrc) { - dataSource.fileSrc = { - file: fileIDToFile[serializedSrc.fileSrc.fileId], - fileType: serializedSrc.fileSrc.fileType, - }; - } - - if (serializedSrc.archiveSrc) { - dataSource.archiveSrc = serializedSrc.archiveSrc; - } - - if (serializedSrc.uriSrc) { - dataSource.uriSrc = serializedSrc.uriSrc; - } - - if (serializedSrc.collectionSrc) { - serializedSrc.collectionSrc.sources.forEach((id) => { - leaves.delete(id); - }); - dataSource.collectionSrc = { - sources: serializedSrc.collectionSrc.sources.map( - (id) => dataSourceCache[id] - ), - }; - } + let dataSource = deserialize(serializedSrc); if (serializedSrc.parent) { dataSource.parent = dataSourceCache[serializedSrc.parent]; @@ -316,9 +342,8 @@ async function restoreDatasets( } const restoreStateFile: ImportHandler = async (dataSource, context) => { - const { fileSrc } = dataSource; - if (fileSrc && (await isStateFile(fileSrc.file))) { - const stateFileContents = await extractFilesFromZip(fileSrc.file); + if (dataSource.type === 'file' && (await isStateFile(dataSource.file))) { + const stateFileContents = await extractFilesFromZip(dataSource.file); const [manifests, restOfStateFile] = partition( (dataFile) => dataFile.file.name === MANIFEST, @@ -331,7 +356,15 @@ const restoreStateFile: ImportHandler = async (dataSource, context) => { const manifestString = await manifests[0].file.text(); const migrated = migrateManifest(manifestString); - const manifest = ManifestSchema.parse(migrated); + let manifest: Manifest; + try { + manifest = ManifestSchema.parse(migrated); + } catch (_) { + return asErrorResult( + new Error('Unsupported state file schema or version'), + dataSource + ); + } // We restore the view first, so that the appropriate watchers are triggered // in the views as the data is loaded diff --git a/src/io/import/processors/updateFileMimeType.ts b/src/io/import/processors/updateFileMimeType.ts index 06238781c..9817632a6 100644 --- a/src/io/import/processors/updateFileMimeType.ts +++ b/src/io/import/processors/updateFileMimeType.ts @@ -7,10 +7,9 @@ import { ImportHandler, asIntermediateResult } from '@/src/io/import/common'; * @param dataSource */ const updateFileMimeType: ImportHandler = async (dataSource) => { - const { fileSrc } = dataSource; - if (!fileSrc || fileSrc.fileType !== '') return Skip; + if (dataSource.type !== 'file' || dataSource.fileType !== '') return Skip; - const mime = await getFileMimeType(fileSrc.file); + const mime = await getFileMimeType(dataSource.file); if (!mime) { throw new Error('File is unsupported'); } @@ -18,10 +17,7 @@ const updateFileMimeType: ImportHandler = async (dataSource) => { return asIntermediateResult([ { ...dataSource, - fileSrc: { - ...fileSrc, - fileType: mime, - }, + fileType: mime, }, ]); }; diff --git a/src/io/import/processors/updateUriType.ts b/src/io/import/processors/updateUriType.ts index e79b94acb..3407f1f93 100644 --- a/src/io/import/processors/updateUriType.ts +++ b/src/io/import/processors/updateUriType.ts @@ -36,16 +36,15 @@ function detectStreamType(stream: ReadableStream) { } const updateUriType: ImportHandler = async (dataSource) => { - const { fileSrc, uriSrc } = dataSource; - if (fileSrc || !uriSrc?.fetcher) { + if (dataSource.type !== 'uri' || !dataSource?.fetcher) { return Skip; } - if (uriSrc.mime !== undefined) { + if (dataSource.mime !== undefined) { return Skip; } - const { fetcher } = uriSrc; + const { fetcher } = dataSource; await fetcher.connect(); const stream = fetcher.getStream(); @@ -53,10 +52,7 @@ const updateUriType: ImportHandler = async (dataSource) => { const streamDataSource = { ...dataSource, - uriSrc: { - ...uriSrc, - mime, - }, + mime, }; return asIntermediateResult([streamDataSource]); diff --git a/src/io/state-file/schema.ts b/src/io/state-file/schema.ts index 4d8e15af3..871634a08 100644 --- a/src/io/state-file/schema.ts +++ b/src/io/state-file/schema.ts @@ -48,33 +48,43 @@ const LPSAxisDir = z.union([ ]); const FileSource = z.object({ + id: z.number(), + type: z.literal('file'), fileId: z.number(), fileType: z.string(), + parent: z.number().optional(), }); const UriSource = z.object({ + id: z.number(), + type: z.literal('uri'), uri: z.string(), name: z.string(), mime: z.string().optional(), + parent: z.number().optional(), }); const ArchiveSource = z.object({ + id: z.number(), + type: z.literal('archive'), path: z.string(), + parent: z.number(), }); const CollectionSource = z.object({ - sources: z.number().array(), -}); - -const DataSource = z.object({ id: z.number(), + type: z.literal('collection'), + sources: z.number().array(), parent: z.number().optional(), - fileSrc: FileSource.optional(), - uriSrc: UriSource.optional(), - archiveSrc: ArchiveSource.optional(), - collectionSrc: CollectionSource.optional(), }); +const DataSource = z.union([ + FileSource, + UriSource, + ArchiveSource, + CollectionSource, +]); + export type DataSourceType = z.infer; const Dataset = z.object({ diff --git a/src/store/datasets-dicom.ts b/src/store/datasets-dicom.ts index 3dc1de626..2b9bb6eab 100644 --- a/src/store/datasets-dicom.ts +++ b/src/store/datasets-dicom.ts @@ -2,7 +2,7 @@ import vtkITKHelper from '@kitware/vtk.js/Common/DataModel/ITKHelper'; import vtkImageData from '@kitware/vtk.js/Common/DataModel/ImageData'; import { defineStore } from 'pinia'; import { Image } from 'itk-wasm'; -import { FileDataSource } from '@/src/io/import/dataSource'; +import { FileSource } from '@/src/io/import/dataSource'; import * as DICOM from '@/src/io/dicom'; import { identity, pick, removeFromArray } from '../utils'; import { useImageStore } from './datasets-images'; @@ -165,12 +165,10 @@ export const useDICOMStore = defineStore('dicom', { needsRebuild: {}, }), actions: { - async importFiles(datasets: FileDataSource[]) { + async importFiles(datasets: FileSource[]) { if (!datasets.length) return []; - const fileToDataSource = new Map( - datasets.map((ds) => [ds.fileSrc.file, ds]) - ); + const fileToDataSource = new Map(datasets.map((ds) => [ds.file, ds])); const allFiles = [...fileToDataSource.keys()]; const volumeToFiles = await DICOM.splitAndSort(allFiles, identity); diff --git a/src/store/datasets-files.ts b/src/store/datasets-files.ts index ea400351a..2b434af79 100644 --- a/src/store/datasets-files.ts +++ b/src/store/datasets-files.ts @@ -1,8 +1,8 @@ import { defineStore } from 'pinia'; -import { FileDataSource } from '@/src/io/import/dataSource'; +import { FileSource } from '@/src/io/import/dataSource'; interface State { - byDataID: Record; + byDataID: Record; } /** @@ -19,7 +19,7 @@ export const useFileStore = defineStore('files', { // Returns [File] used to build a dataID getFiles: (state) => (dataID: string) => - (state.byDataID[dataID] ?? []).map((ds) => ds.fileSrc.file), + (state.byDataID[dataID] ?? []).map((ds) => ds.file), }, actions: { @@ -29,7 +29,7 @@ export const useFileStore = defineStore('files', { } }, - add(dataID: string, files: FileDataSource[]) { + add(dataID: string, files: FileSource[]) { this.byDataID[dataID] = files; }, }, diff --git a/src/store/datasets.ts b/src/store/datasets.ts index d9c6e8591..c74430158 100644 --- a/src/store/datasets.ts +++ b/src/store/datasets.ts @@ -40,44 +40,57 @@ function serializeLoadedData(loadedDataSources: Array) { return dataSourceToId.get(ds)!; } - const serialized: Schema.DataSourceType = { id: nextId() }; - dataSourceToId.set(ds, serialized.id); - - if (ds.fileSrc) { - if (ds.archiveSrc || ds.uriSrc) { - // fileSrc is constructed from either an archive or uri - delete serialized.fileSrc; - } else { - const fileId = nextId(); - serialized.fileSrc = { fileId, fileType: ds.fileSrc.fileType }; - files[fileId] = ds.fileSrc.file; + const id = nextId(); + dataSourceToId.set(ds, id); + + // don't need to serialize all parents, just the ones that are necessary. + const { type } = ds; + if (type === 'file') { + // file derives from the parent. Just return the serialized parent. + if (ds.parent) { + return serializeDataSource(ds.parent); } - } - - if (ds.archiveSrc) { - serialized.archiveSrc = ds.archiveSrc; - } - - if (ds.uriSrc) { - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const { fetcher, ...rest } = ds.uriSrc; - serialized.uriSrc = rest; - } - if (ds.collectionSrc) { - serialized.collectionSrc = { - sources: ds.collectionSrc.sources.map((s) => serializeDataSource(s)), - }; - } - - const shouldSerializeParent = !!ds.archiveSrc; - - if (shouldSerializeParent && ds.parent) { - serialized.parent = serializeDataSource(ds.parent); + const fileId = nextId(); + files[fileId] = ds.file; + serializedDependencies.push({ + id, + type: 'file', + fileId, + fileType: ds.fileType, + }); + } else if (type === 'archive') { + serializedDependencies.push({ + id, + type: 'archive', + path: ds.path, + parent: serializeDataSource(ds.parent), + }); + } else if (type === 'uri') { + serializedDependencies.push({ + id, + type: 'uri', + name: ds.name, + uri: ds.uri, + mime: ds.mime, + }); + } else if (type === 'collection') { + serializedDependencies.push({ + id, + type: 'collection', + sources: ds.sources.map((src) => serializeDataSource(src)), + }); + } else if (type === 'chunk') { + // chunk derives from the parent. Just return the serialized parent. + if (ds.parent) { + return serializeDataSource(ds.parent); + } + throw new Error('Chunk does not have a parent'); + } else { + throw new Error(`Invalid data source type: ${type as string}`); } - serializedDependencies.push(serialized); - return serialized.id; + return id; } loadedDataSources.forEach(({ dataID, dataSource }) => {