From 0962c75aac889b1781c2fe79eec0f3afdb163fbe Mon Sep 17 00:00:00 2001 From: sdevalk Date: Tue, 19 Sep 2023 22:46:00 +0200 Subject: [PATCH 1/2] dataset browser maintenance --- apps/dataset-browser/.env.development | 4 +- apps/dataset-browser/.env.test | 4 +- .../src/lib/datasets/enricher.ts | 34 +- .../src/lib/datasets/fetcher-result.ts | 21 +- .../lib/datasets/fetcher.integration.test.ts | 646 +++++++----------- .../src/lib/datasets/fetcher.ts | 60 +- 6 files changed, 297 insertions(+), 472 deletions(-) diff --git a/apps/dataset-browser/.env.development b/apps/dataset-browser/.env.development index 0e0357378..41c9d205f 100644 --- a/apps/dataset-browser/.env.development +++ b/apps/dataset-browser/.env.development @@ -2,5 +2,5 @@ # DO NOT ADD SECRETS TO THIS FILE # If you want to add secrets use `.env.development.local` instead -SEARCH_PLATFORM_ELASTIC_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/testing/dataset-search-v2/services/dataset-search-v2-es/elasticsearch -SEARCH_PLATFORM_SPARQL_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/testing/dataset-search-v2/services/dataset-search-v2-sparql/sparql +SEARCH_PLATFORM_ELASTIC_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/data-hub-testing/search-graph/services/search/elasticsearch +SEARCH_PLATFORM_SPARQL_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/data-hub-testing/knowledge-graph/services/kg/sparql diff --git a/apps/dataset-browser/.env.test b/apps/dataset-browser/.env.test index 7fb8d69a1..3959aed1b 100644 --- a/apps/dataset-browser/.env.test +++ b/apps/dataset-browser/.env.test @@ -2,5 +2,5 @@ # DO NOT ADD SECRETS TO THIS FILE # If you want to add secrets use `.env.test.local` instead -SEARCH_PLATFORM_ELASTIC_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/testing/dataset-search-v2/services/dataset-search-v2-es/elasticsearch -SEARCH_PLATFORM_SPARQL_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/testing/dataset-search-v2/services/dataset-search-v2-sparql/sparql +SEARCH_PLATFORM_ELASTIC_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/data-hub-testing/search-graph/services/search/elasticsearch +SEARCH_PLATFORM_SPARQL_ENDPOINT_URL=https://api.colonial-heritage.triply.cc/datasets/data-hub-testing/knowledge-graph/services/kg/sparql diff --git a/apps/dataset-browser/src/lib/datasets/enricher.ts b/apps/dataset-browser/src/lib/datasets/enricher.ts index 8c1b76e37..afe51530d 100644 --- a/apps/dataset-browser/src/lib/datasets/enricher.ts +++ b/apps/dataset-browser/src/lib/datasets/enricher.ts @@ -1,6 +1,7 @@ import {Dataset, Measurement} from '.'; import {isIri} from '@colonial-collections/iris'; import {SparqlEndpointFetcher} from 'fetch-sparql-endpoint'; +import {EOL} from 'node:os'; import type {Readable} from 'node:stream'; import {lru, LRU} from 'tiny-lru'; import type {Stream} from '@rdfjs/types'; @@ -60,11 +61,15 @@ export class DatasetEnricher { return; // No IRIs to fetch } - const irisForValues = iris.map(iri => `<${iri}>`).join(' '); + const irisForValues = iris.map(iri => `<${iri}>`).join(EOL); // Query can be expanded to also include other properties const query = ` - PREFIX cc: + PREFIX cc: + PREFIX dcat: + PREFIX dqv: + PREFIX qb: + PREFIX skos: CONSTRUCT { ?iri cc:measurement ?measurement . @@ -74,13 +79,22 @@ export class DatasetEnricher { cc:order ?order . } WHERE { - VALUES ?iri { ${irisForValues} } - ?iri a cc:Dataset ; - cc:measurement ?measurement . - ?measurement cc:value ?value ; - cc:measurementOf ?metric . - ?metric cc:name ?name ; - cc:order ?order . + VALUES ?iri { + ${irisForValues} + } + ?iri a dcat:Dataset . + + { + ?iri dqv:hasQualityMeasurement ?measurement . + } + UNION { + ?iri dcat:distribution/dqv:hasQualityMeasurement ?measurement . + } + + ?measurement dqv:value ?value ; + dqv:isMeasurementOf ?metric . + ?metric skos:prefLabel ?name ; + qb:order ?order . } `; @@ -125,7 +139,7 @@ export class DatasetEnricher { private async processResponse(iris: string[], stream: Readable & Stream) { const loader = new RdfObjectLoader({ context: { - cc: 'https://colonialcollections.nl/search#', + cc: 'https://colonialcollections.nl/schema#', }, }); diff --git a/apps/dataset-browser/src/lib/datasets/fetcher-result.ts b/apps/dataset-browser/src/lib/datasets/fetcher-result.ts index 47baca4a0..26b36d8a8 100644 --- a/apps/dataset-browser/src/lib/datasets/fetcher-result.ts +++ b/apps/dataset-browser/src/lib/datasets/fetcher-result.ts @@ -1,24 +1,18 @@ import type {RawBucket, SearchResultFilter} from '.'; import type {LabelFetcher} from '@colonial-collections/label-fetcher'; -function toUnmatchedFilter( - bucket: RawBucket, - labelFetcher: LabelFetcher -): SearchResultFilter { +function toUnmatchedFilter(bucket: RawBucket): SearchResultFilter { const totalCount = 0; // Initial count; will be overridden by the matching filter, if any const id = bucket.key; - const name = labelFetcher.getByIri({iri: id}); + const name = bucket.key; return {totalCount, id, name}; } -function toMatchedFilter( - bucket: RawBucket, - labelFetcher: LabelFetcher -): SearchResultFilter { +function toMatchedFilter(bucket: RawBucket): SearchResultFilter { const totalCount = bucket.doc_count; // Actual count if a filter matched the query const id = bucket.key; - const name = labelFetcher.getByIri({iri: id}); + const name = bucket.key; return {totalCount, id, name}; } @@ -39,14 +33,13 @@ function combineUnmatchedWithMatchedFilters( export function buildFilters( rawUnmatchedFilters: RawBucket[], - rawMatchedFilters: RawBucket[], - labelFetcher: LabelFetcher + rawMatchedFilters: RawBucket[] ) { const unmatchedFilters = rawUnmatchedFilters.map(rawUnmatchedFilter => { - return toUnmatchedFilter(rawUnmatchedFilter, labelFetcher); + return toUnmatchedFilter(rawUnmatchedFilter); }); const matchedFilters = rawMatchedFilters.map(rawMatchedFilter => { - return toMatchedFilter(rawMatchedFilter, labelFetcher); + return toMatchedFilter(rawMatchedFilter); }); const combinedFilters = combineUnmatchedWithMatchedFilters( unmatchedFilters, diff --git a/apps/dataset-browser/src/lib/datasets/fetcher.integration.test.ts b/apps/dataset-browser/src/lib/datasets/fetcher.integration.test.ts index 623b28bc2..81d487534 100644 --- a/apps/dataset-browser/src/lib/datasets/fetcher.integration.test.ts +++ b/apps/dataset-browser/src/lib/datasets/fetcher.integration.test.ts @@ -35,9 +35,9 @@ describe('search', () => { { id: 'https://example.org/datasets/1', name: 'Dataset 1', - publisher: {id: 'https://museum.example.org/', name: 'Museum'}, + publisher: {id: 'Museum', name: 'Museum'}, license: { - id: 'https://creativecommons.org/licenses/by/4.0/', + id: 'Attribution 4.0 International (CC BY 4.0)', name: 'Attribution 4.0 International (CC BY 4.0)', }, description: @@ -47,15 +47,9 @@ describe('search', () => { dateCreated: new Date('2019-03-12T00:00:00.000Z'), dateModified: new Date('2023-02-17T00:00:00.000Z'), datePublished: new Date('2023-02-17T00:00:00.000Z'), - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', - }, - ], genres: [ { - id: 'http://vocab.getty.edu/aat/300386957', + id: 'man-made objects', name: 'man-made objects', }, ], @@ -119,9 +113,9 @@ describe('search', () => { { id: 'https://example.org/datasets/10', name: '(No name)', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, license: { - id: 'https://example.org/custom-license', + id: 'Custom License', name: 'Custom License', }, measurements: [ @@ -184,29 +178,15 @@ describe('search', () => { { id: 'https://example.org/datasets/11', name: 'Dataset 11', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, license: { - id: 'https://creativecommons.org/publicdomain/zero/1.0/', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, dateCreated: new Date('2019-03-12T00:00:00.000Z'), - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10061190', - name: 'Indonesië', - }, - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10063351', - name: 'Bali', - }, - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10063401', - name: 'Ubud', - }, - ], genres: [ { - id: 'http://vocab.getty.edu/aat/300027200', + id: 'notes (documents)', name: 'notes (documents)', }, ], @@ -270,24 +250,14 @@ describe('search', () => { { id: 'https://example.org/datasets/12', name: 'Dataset 12', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, license: { - id: 'https://creativecommons.org/publicdomain/zero/1.0/', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, description: 'Donec placerat orci vel erat commodo suscipit. Morbi elementum nunc ut dolor venenatis, vel ultricies nisi euismod. Sed aliquet ultricies sapien, vehicula malesuada nunc tristique ac.', keywords: ['Hendrerit', 'Vestibulum'], - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10054875', - name: 'Ghana', - }, - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10055279', - name: 'Zuid-Afrika', - }, - ], measurements: [ { id: 'https://example.org/datasets/12/measurements/2', @@ -349,27 +319,21 @@ describe('search', () => { id: 'https://example.org/datasets/13', name: 'Dataset 13', publisher: { - id: 'https://research.example.org/', + id: 'Research Organisation', name: 'Research Organisation', }, license: { - id: 'http://rightsstatements.org/vocab/UND/1.0/', + id: 'Copyright Undetermined', name: 'Copyright Undetermined', }, description: 'Cras erat elit, finibus eget ipsum vel, gravida dapibus leo. Etiam sem erat, suscipit id eros sit amet, scelerisque ornare sem. Aenean commodo elementum neque ac accumsan.', keywords: ['Fringilla'], dateCreated: new Date('2022-10-01T09:01:02.000Z'), - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10058073', - name: 'Zuid-Amerika', - }, - ], genres: [ - {id: 'http://vocab.getty.edu/aat/300048715', name: 'articles'}, + {id: 'articles', name: 'articles'}, { - id: 'http://vocab.getty.edu/aat/300111999', + id: 'publications (documents)', name: 'publications (documents)', }, ], @@ -433,9 +397,9 @@ describe('search', () => { { id: 'https://example.org/datasets/14', name: 'Dataset 14', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, license: { - id: 'http://creativecommons.org/publicdomain/zero/1.0/deed.nl', + id: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', }, description: @@ -501,26 +465,20 @@ describe('search', () => { { id: 'https://example.org/datasets/2', name: '(No name)', - publisher: {id: 'https://museum.example.org/', name: 'Museum'}, + publisher: {id: 'Museum', name: 'Museum'}, license: { - id: 'https://creativecommons.org/publicdomain/zero/1.0/', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, dateCreated: new Date('2019-03-12T00:00:00.000Z'), dateModified: new Date('2023-02-17T00:00:00.000Z'), datePublished: new Date('2023-02-17T00:00:00.000Z'), - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10055279', - name: 'Zuid-Afrika', - }, - ], genres: [ - {id: 'http://vocab.getty.edu/aat/300043196', name: 'tableware'}, { - id: 'http://vocab.getty.edu/aat/300417586', + id: 'art (broad object genre)', name: 'art (broad object genre)', }, + {id: 'tableware', name: 'tableware'}, ], measurements: [ { @@ -573,9 +531,9 @@ describe('search', () => { { id: 'https://example.org/datasets/3', name: 'Dataset 3', - publisher: {id: 'https://archive.example.org/', name: 'Archive'}, + publisher: {id: 'Archive', name: 'Archive'}, license: { - id: 'http://opendatacommons.org/licenses/odbl/1.0/', + id: 'Open Data Commons Open Database License (ODbL) v1.0', name: 'Open Data Commons Open Database License (ODbL) v1.0', }, measurements: [ @@ -602,24 +560,16 @@ describe('search', () => { { id: 'https://example.org/datasets/4', name: 'Dataset 4', - publisher: {id: 'https://museum.example.org/', name: 'Museum'}, + publisher: {id: 'Museum', name: 'Museum'}, license: { - id: 'http://opendatacommons.org/licenses/by/1.0/', + id: 'Open Data Commons Attribution License (ODC-By) v1.0', name: 'Open Data Commons Attribution License (ODC-By) v1.0', }, description: 'Donec placerat orci vel erat commodo suscipit. Morbi elementum nunc ut dolor venenatis, vel ultricies nisi euismod. Sed aliquet ultricies sapien, vehicula malesuada nunc tristique ac.', keywords: ['Hendrerit', 'Suspendisse'], dateModified: new Date('2023-02-01T00:00:00.000Z'), - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10058074', - name: 'Suriname', - }, - ], - genres: [ - {id: 'http://vocab.getty.edu/aat/300043196', name: 'tableware'}, - ], + genres: [{id: 'tableware', name: 'tableware'}], measurements: [ { id: 'https://example.org/datasets/4/measurements/2', @@ -680,17 +630,15 @@ describe('search', () => { { id: 'https://example.org/datasets/5', name: 'Dataset 5', - publisher: {id: 'https://archive.example.org/', name: 'Archive'}, + publisher: {id: 'Archive', name: 'Archive'}, license: { - id: 'http://rightsstatements.org/vocab/InC/1.0/', + id: 'In Copyright', name: 'In Copyright', }, description: 'Maecenas quis sem ante. Vestibulum mattis lorem in mauris pulvinar tincidunt. Sed nisi ligula, mattis id vehicula at, faucibus vel quam.', keywords: ['Keyword'], - genres: [ - {id: 'http://vocab.getty.edu/aat/300404198', name: 'digital media'}, - ], + genres: [{id: 'digital media', name: 'digital media'}], measurements: [ { id: 'https://example.org/datasets/5/measurements/2', @@ -751,138 +699,114 @@ describe('search', () => { ], filters: { publishers: [ - {totalCount: 5, id: 'https://archive.example.org/', name: 'Archive'}, - {totalCount: 5, id: 'https://library.example.org/', name: 'Library'}, - {totalCount: 3, id: 'https://museum.example.org/', name: 'Museum'}, + { + totalCount: 5, + id: 'Library', + name: 'Library', + }, + { + totalCount: 3, + id: 'Museum', + name: 'Museum', + }, + { + totalCount: 5, + id: 'Archive', + name: 'Archive', + }, { totalCount: 1, - id: 'https://research.example.org/', + id: 'Research Organisation', name: 'Research Organisation', }, + { + totalCount: 0, + id: 'Onderzoeksinstelling', + name: 'Onderzoeksinstelling', + }, ], licenses: [ { totalCount: 6, - id: 'https://creativecommons.org/publicdomain/zero/1.0/', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, { totalCount: 2, - id: 'https://creativecommons.org/licenses/by/4.0/', + id: 'Attribution 4.0 International (CC BY 4.0)', name: 'Attribution 4.0 International (CC BY 4.0)', }, { totalCount: 1, - id: 'http://creativecommons.org/publicdomain/zero/1.0/deed.nl', + id: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', }, { totalCount: 1, - id: 'http://opendatacommons.org/licenses/by/1.0/', - name: 'Open Data Commons Attribution License (ODC-By) v1.0', - }, - { - totalCount: 1, - id: 'http://opendatacommons.org/licenses/odbl/1.0/', - name: 'Open Data Commons Open Database License (ODbL) v1.0', - }, - { - totalCount: 1, - id: 'http://rightsstatements.org/vocab/InC/1.0/', - name: 'In Copyright', - }, - { - totalCount: 1, - id: 'http://rightsstatements.org/vocab/UND/1.0/', + id: 'Copyright Undetermined', name: 'Copyright Undetermined', }, { totalCount: 1, - id: 'https://example.org/custom-license', + id: 'Custom License', name: 'Custom License', }, - ], - spatialCoverages: [ - { - totalCount: 2, - id: 'https://hdl.handle.net/20.500.11840/termmaster10055279', - name: 'Zuid-Afrika', - }, - { - totalCount: 2, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058074', - name: 'Suriname', - }, - { - totalCount: 2, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063351', - name: 'Bali', - }, { totalCount: 1, - id: 'https://hdl.handle.net/20.500.11840/termmaster10054875', - name: 'Ghana', - }, - { - totalCount: 1, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058073', - name: 'Zuid-Amerika', - }, - { - totalCount: 1, - id: 'https://hdl.handle.net/20.500.11840/termmaster10061190', - name: 'Indonesië', + id: 'In Copyright', + name: 'In Copyright', }, { totalCount: 1, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', + id: 'Open Data Commons Attribution License (ODC-By) v1.0', + name: 'Open Data Commons Attribution License (ODC-By) v1.0', }, { totalCount: 1, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063401', - name: 'Ubud', + id: 'Open Data Commons Open Database License (ODbL) v1.0', + name: 'Open Data Commons Open Database License (ODbL) v1.0', }, ], + spatialCoverages: [], genres: [ { totalCount: 2, - id: 'http://vocab.getty.edu/aat/300043196', - name: 'tableware', + id: 'articles', + name: 'articles', }, { totalCount: 2, - id: 'http://vocab.getty.edu/aat/300048715', - name: 'articles', + id: 'tableware', + name: 'tableware', }, { totalCount: 1, - id: 'http://vocab.getty.edu/aat/300027200', - name: 'notes (documents)', + id: 'art (broad object genre)', + name: 'art (broad object genre)', }, { totalCount: 1, - id: 'http://vocab.getty.edu/aat/300111999', - name: 'publications (documents)', + id: 'digital media', + name: 'digital media', }, { totalCount: 1, - id: 'http://vocab.getty.edu/aat/300386957', + id: 'man-made objects', name: 'man-made objects', }, { totalCount: 1, - id: 'http://vocab.getty.edu/aat/300404198', - name: 'digital media', + id: 'notes (documents)', + name: 'notes (documents)', }, { totalCount: 1, - id: 'http://vocab.getty.edu/aat/300417586', - name: 'art (broad object genre)', + id: 'publications (documents)', + name: 'publications (documents)', }, { totalCount: 1, - id: 'http://vocab.getty.edu/aat/300431978', + id: 'unidentified works', name: 'unidentified works', }, ], @@ -904,138 +828,114 @@ describe('search', () => { datasets: [], filters: { publishers: [ - {totalCount: 0, id: 'https://archive.example.org/', name: 'Archive'}, - {totalCount: 0, id: 'https://library.example.org/', name: 'Library'}, - {totalCount: 0, id: 'https://museum.example.org/', name: 'Museum'}, - { - totalCount: 0, - id: 'https://research.example.org/', - name: 'Research Organisation', - }, - ], - licenses: [ { totalCount: 0, - id: 'https://creativecommons.org/publicdomain/zero/1.0/', - name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', + id: 'Library', + name: 'Library', }, { totalCount: 0, - id: 'https://creativecommons.org/licenses/by/4.0/', - name: 'Attribution 4.0 International (CC BY 4.0)', + id: 'Museum', + name: 'Museum', }, { totalCount: 0, - id: 'http://creativecommons.org/publicdomain/zero/1.0/deed.nl', - name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', - }, - { - totalCount: 0, - id: 'http://opendatacommons.org/licenses/by/1.0/', - name: 'Open Data Commons Attribution License (ODC-By) v1.0', - }, - { - totalCount: 0, - id: 'http://opendatacommons.org/licenses/odbl/1.0/', - name: 'Open Data Commons Open Database License (ODbL) v1.0', - }, - { - totalCount: 0, - id: 'http://rightsstatements.org/vocab/InC/1.0/', - name: 'In Copyright', + id: 'Archive', + name: 'Archive', }, { totalCount: 0, - id: 'http://rightsstatements.org/vocab/UND/1.0/', - name: 'Copyright Undetermined', + id: 'Research Organisation', + name: 'Research Organisation', }, { totalCount: 0, - id: 'https://example.org/custom-license', - name: 'Custom License', + id: 'Onderzoeksinstelling', + name: 'Onderzoeksinstelling', }, ], - spatialCoverages: [ + licenses: [ { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10055279', - name: 'Zuid-Afrika', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', + name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058074', - name: 'Suriname', + id: 'Attribution 4.0 International (CC BY 4.0)', + name: 'Attribution 4.0 International (CC BY 4.0)', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063351', - name: 'Bali', + id: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', + name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10054875', - name: 'Ghana', + id: 'Copyright Undetermined', + name: 'Copyright Undetermined', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058073', - name: 'Zuid-Amerika', + id: 'Custom License', + name: 'Custom License', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10061190', - name: 'Indonesië', + id: 'In Copyright', + name: 'In Copyright', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', + id: 'Open Data Commons Attribution License (ODC-By) v1.0', + name: 'Open Data Commons Attribution License (ODC-By) v1.0', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063401', - name: 'Ubud', + id: 'Open Data Commons Open Database License (ODbL) v1.0', + name: 'Open Data Commons Open Database License (ODbL) v1.0', }, ], + spatialCoverages: [], genres: [ { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300043196', - name: 'tableware', + id: 'articles', + name: 'articles', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300048715', - name: 'articles', + id: 'tableware', + name: 'tableware', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300027200', - name: 'notes (documents)', + id: 'art (broad object genre)', + name: 'art (broad object genre)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300111999', - name: 'publications (documents)', + id: 'digital media', + name: 'digital media', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300386957', + id: 'man-made objects', name: 'man-made objects', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300404198', - name: 'digital media', + id: 'notes (documents)', + name: 'notes (documents)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300417586', - name: 'art (broad object genre)', + id: 'publications (documents)', + name: 'publications (documents)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300431978', + id: 'unidentified works', name: 'unidentified works', }, ], @@ -1056,16 +956,22 @@ describe('search', () => { { id: 'https://example.org/datasets/5', name: 'Dataset 5', - publisher: {id: 'https://archive.example.org/', name: 'Archive'}, + publisher: { + id: 'Archive', + name: 'Archive', + }, license: { - id: 'http://rightsstatements.org/vocab/InC/1.0/', + id: 'In Copyright', name: 'In Copyright', }, description: 'Maecenas quis sem ante. Vestibulum mattis lorem in mauris pulvinar tincidunt. Sed nisi ligula, mattis id vehicula at, faucibus vel quam.', keywords: ['Keyword'], genres: [ - {id: 'http://vocab.getty.edu/aat/300404198', name: 'digital media'}, + { + id: 'digital media', + name: 'digital media', + }, ], measurements: [ { @@ -1127,138 +1033,114 @@ describe('search', () => { ], filters: { publishers: [ - {totalCount: 1, id: 'https://archive.example.org/', name: 'Archive'}, - {totalCount: 0, id: 'https://library.example.org/', name: 'Library'}, - {totalCount: 0, id: 'https://museum.example.org/', name: 'Museum'}, { totalCount: 0, - id: 'https://research.example.org/', - name: 'Research Organisation', + id: 'Library', + name: 'Library', }, - ], - licenses: [ { totalCount: 0, - id: 'https://creativecommons.org/publicdomain/zero/1.0/', - name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', - }, - { - totalCount: 0, - id: 'https://creativecommons.org/licenses/by/4.0/', - name: 'Attribution 4.0 International (CC BY 4.0)', - }, - { - totalCount: 0, - id: 'http://creativecommons.org/publicdomain/zero/1.0/deed.nl', - name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', - }, - { - totalCount: 0, - id: 'http://opendatacommons.org/licenses/by/1.0/', - name: 'Open Data Commons Attribution License (ODC-By) v1.0', - }, - { - totalCount: 0, - id: 'http://opendatacommons.org/licenses/odbl/1.0/', - name: 'Open Data Commons Open Database License (ODbL) v1.0', + id: 'Museum', + name: 'Museum', }, { totalCount: 1, - id: 'http://rightsstatements.org/vocab/InC/1.0/', - name: 'In Copyright', + id: 'Archive', + name: 'Archive', }, { totalCount: 0, - id: 'http://rightsstatements.org/vocab/UND/1.0/', - name: 'Copyright Undetermined', + id: 'Research Organisation', + name: 'Research Organisation', }, { totalCount: 0, - id: 'https://example.org/custom-license', - name: 'Custom License', + id: 'Onderzoeksinstelling', + name: 'Onderzoeksinstelling', }, ], - spatialCoverages: [ + licenses: [ { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10055279', - name: 'Zuid-Afrika', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', + name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058074', - name: 'Suriname', + id: 'Attribution 4.0 International (CC BY 4.0)', + name: 'Attribution 4.0 International (CC BY 4.0)', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063351', - name: 'Bali', + id: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', + name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10054875', - name: 'Ghana', + id: 'Copyright Undetermined', + name: 'Copyright Undetermined', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058073', - name: 'Zuid-Amerika', + id: 'Custom License', + name: 'Custom License', }, { - totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10061190', - name: 'Indonesië', + totalCount: 1, + id: 'In Copyright', + name: 'In Copyright', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', + id: 'Open Data Commons Attribution License (ODC-By) v1.0', + name: 'Open Data Commons Attribution License (ODC-By) v1.0', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063401', - name: 'Ubud', + id: 'Open Data Commons Open Database License (ODbL) v1.0', + name: 'Open Data Commons Open Database License (ODbL) v1.0', }, ], + spatialCoverages: [], genres: [ { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300043196', - name: 'tableware', + id: 'articles', + name: 'articles', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300048715', - name: 'articles', + id: 'tableware', + name: 'tableware', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300027200', - name: 'notes (documents)', + id: 'art (broad object genre)', + name: 'art (broad object genre)', }, { - totalCount: 0, - id: 'http://vocab.getty.edu/aat/300111999', - name: 'publications (documents)', + totalCount: 1, + id: 'digital media', + name: 'digital media', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300386957', + id: 'man-made objects', name: 'man-made objects', }, { - totalCount: 1, - id: 'http://vocab.getty.edu/aat/300404198', - name: 'digital media', + totalCount: 0, + id: 'notes (documents)', + name: 'notes (documents)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300417586', - name: 'art (broad object genre)', + id: 'publications (documents)', + name: 'publications (documents)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300431978', + id: 'unidentified works', name: 'unidentified works', }, ], @@ -1269,7 +1151,7 @@ describe('search', () => { it('finds datasets if "publishers" filter matches', async () => { const result = await datasetFetcher.search({ filters: { - publishers: ['https://library.example.org/'], + publishers: ['Library'], }, }); @@ -1278,35 +1160,52 @@ describe('search', () => { datasets: [ { id: 'https://example.org/datasets/10', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, }, { id: 'https://example.org/datasets/11', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, }, { id: 'https://example.org/datasets/12', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, }, { id: 'https://example.org/datasets/14', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, }, { id: 'https://example.org/datasets/9', - publisher: {id: 'https://library.example.org/', name: 'Library'}, + publisher: {id: 'Library', name: 'Library'}, }, ], filters: { publishers: [ - {totalCount: 0, id: 'https://archive.example.org/', name: 'Archive'}, - {totalCount: 5, id: 'https://library.example.org/', name: 'Library'}, - {totalCount: 0, id: 'https://museum.example.org/', name: 'Museum'}, + { + totalCount: 5, + id: 'Library', + name: 'Library', + }, + { + totalCount: 0, + id: 'Museum', + name: 'Museum', + }, { totalCount: 0, - id: 'https://research.example.org/', + id: 'Archive', + name: 'Archive', + }, + { + totalCount: 0, + id: 'Research Organisation', name: 'Research Organisation', }, + { + totalCount: 0, + id: 'Onderzoeksinstelling', + name: 'Onderzoeksinstelling', + }, ], }, }); @@ -1315,7 +1214,7 @@ describe('search', () => { it('finds datasets if "licenses" filter matches', async () => { const result = await datasetFetcher.search({ filters: { - licenses: ['https://creativecommons.org/licenses/by/4.0/'], + licenses: ['Attribution 4.0 International (CC BY 4.0)'], }, }); @@ -1325,14 +1224,14 @@ describe('search', () => { { id: 'https://example.org/datasets/1', license: { - id: 'https://creativecommons.org/licenses/by/4.0/', + id: 'Attribution 4.0 International (CC BY 4.0)', name: 'Attribution 4.0 International (CC BY 4.0)', }, }, { id: 'https://example.org/datasets/9', license: { - id: 'https://creativecommons.org/licenses/by/4.0/', + id: 'Attribution 4.0 International (CC BY 4.0)', name: 'Attribution 4.0 International (CC BY 4.0)', }, }, @@ -1341,112 +1240,43 @@ describe('search', () => { licenses: [ { totalCount: 0, - id: 'https://creativecommons.org/publicdomain/zero/1.0/', + id: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', name: 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', }, { totalCount: 2, - id: 'https://creativecommons.org/licenses/by/4.0/', + id: 'Attribution 4.0 International (CC BY 4.0)', name: 'Attribution 4.0 International (CC BY 4.0)', }, { totalCount: 0, - id: 'http://creativecommons.org/publicdomain/zero/1.0/deed.nl', + id: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', name: 'CC0 1.0 Universeel (CC0 1.0) Publiek Domein Verklaring', }, { totalCount: 0, - id: 'http://opendatacommons.org/licenses/by/1.0/', - name: 'Open Data Commons Attribution License (ODC-By) v1.0', - }, - { - totalCount: 0, - id: 'http://opendatacommons.org/licenses/odbl/1.0/', - name: 'Open Data Commons Open Database License (ODbL) v1.0', - }, - { - totalCount: 0, - id: 'http://rightsstatements.org/vocab/InC/1.0/', - name: 'In Copyright', - }, - { - totalCount: 0, - id: 'http://rightsstatements.org/vocab/UND/1.0/', + id: 'Copyright Undetermined', name: 'Copyright Undetermined', }, { totalCount: 0, - id: 'https://example.org/custom-license', + id: 'Custom License', name: 'Custom License', }, - ], - }, - }); - }); - - it('finds datasets if "spatialCoverages" filter matches', async () => { - const result = await datasetFetcher.search({ - filters: { - spatialCoverages: [ - 'https://hdl.handle.net/20.500.11840/termmaster10063182', - ], - }, - }); - - expect(result).toMatchObject({ - totalCount: 1, - datasets: [ - { - id: 'https://example.org/datasets/1', - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', - }, - ], - }, - ], - filters: { - spatialCoverages: [ - { - totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10055279', - name: 'Zuid-Afrika', - }, - { - totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058074', - name: 'Suriname', - }, - { - totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063351', - name: 'Bali', - }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10054875', - name: 'Ghana', - }, - { - totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10058073', - name: 'Zuid-Amerika', + id: 'In Copyright', + name: 'In Copyright', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10061190', - name: 'Indonesië', - }, - { - totalCount: 1, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', + id: 'Open Data Commons Attribution License (ODC-By) v1.0', + name: 'Open Data Commons Attribution License (ODC-By) v1.0', }, { totalCount: 0, - id: 'https://hdl.handle.net/20.500.11840/termmaster10063401', - name: 'Ubud', + id: 'Open Data Commons Open Database License (ODbL) v1.0', + name: 'Open Data Commons Open Database License (ODbL) v1.0', }, ], }, @@ -1456,7 +1286,7 @@ describe('search', () => { it('finds datasets if "genres" filter matches', async () => { const result = await datasetFetcher.search({ filters: { - genres: ['http://vocab.getty.edu/aat/300417586'], + genres: ['art (broad object genre)'], }, }); @@ -1469,44 +1299,44 @@ describe('search', () => { ], filters: { genres: [ - { - totalCount: 1, - id: 'http://vocab.getty.edu/aat/300043196', - name: 'tableware', - }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300048715', + id: 'articles', name: 'articles', }, { - totalCount: 0, - id: 'http://vocab.getty.edu/aat/300027200', - name: 'notes (documents)', + totalCount: 1, + id: 'tableware', + name: 'tableware', + }, + { + totalCount: 1, + id: 'art (broad object genre)', + name: 'art (broad object genre)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300111999', - name: 'publications (documents)', + id: 'digital media', + name: 'digital media', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300386957', + id: 'man-made objects', name: 'man-made objects', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300404198', - name: 'digital media', + id: 'notes (documents)', + name: 'notes (documents)', }, { - totalCount: 1, - id: 'http://vocab.getty.edu/aat/300417586', - name: 'art (broad object genre)', + totalCount: 0, + id: 'publications (documents)', + name: 'publications (documents)', }, { totalCount: 0, - id: 'http://vocab.getty.edu/aat/300431978', + id: 'unidentified works', name: 'unidentified works', }, ], @@ -1530,9 +1360,9 @@ describe('getById', () => { expect(dataset).toStrictEqual({ id: 'https://example.org/datasets/1', name: 'Dataset 1', - publisher: {id: 'https://museum.example.org/', name: 'Museum'}, + publisher: {id: 'Museum', name: 'Museum'}, license: { - id: 'https://creativecommons.org/licenses/by/4.0/', + id: 'Attribution 4.0 International (CC BY 4.0)', name: 'Attribution 4.0 International (CC BY 4.0)', }, description: @@ -1542,15 +1372,7 @@ describe('getById', () => { dateCreated: new Date('2019-03-12T00:00:00.000Z'), dateModified: new Date('2023-02-17T00:00:00.000Z'), datePublished: new Date('2023-02-17T00:00:00.000Z'), - spatialCoverages: [ - { - id: 'https://hdl.handle.net/20.500.11840/termmaster10063182', - name: 'Jakarta', - }, - ], - genres: [ - {id: 'http://vocab.getty.edu/aat/300386957', name: 'man-made objects'}, - ], + genres: [{id: 'man-made objects', name: 'man-made objects'}], measurements: [ { id: 'https://example.org/datasets/1/measurements/2', diff --git a/apps/dataset-browser/src/lib/datasets/fetcher.ts b/apps/dataset-browser/src/lib/datasets/fetcher.ts index b926cc0f4..888712db3 100644 --- a/apps/dataset-browser/src/lib/datasets/fetcher.ts +++ b/apps/dataset-browser/src/lib/datasets/fetcher.ts @@ -19,17 +19,17 @@ export type FetcherConstructorOptions = z.infer< enum RawDatasetKeys { Id = '@id', Type = 'http://www w3 org/1999/02/22-rdf-syntax-ns#type', - Name = 'https://colonialcollections nl/search#name', - Description = 'https://colonialcollections nl/search#description', - Publisher = 'https://colonialcollections nl/search#publisher', - License = 'https://colonialcollections nl/search#license', - Keyword = 'https://colonialcollections nl/search#keyword', - MainEntityOfPage = 'https://colonialcollections nl/search#mainEntityOfPage', - DateCreated = 'https://colonialcollections nl/search#dateCreated', - DateModified = 'https://colonialcollections nl/search#dateModified', - DatePublished = 'https://colonialcollections nl/search#datePublished', - SpatialCoverage = 'https://colonialcollections nl/search#spatialCoverage', - Genre = 'https://colonialcollections nl/search#genre', + Name = 'https://colonialcollections nl/schema#name', + Description = 'https://colonialcollections nl/schema#description', + Publisher = 'https://colonialcollections nl/schema#publisher', + License = 'https://colonialcollections nl/schema#license', + Keyword = 'https://colonialcollections nl/schema#keyword', + MainEntityOfPage = 'https://colonialcollections nl/schema#mainEntityOfPage', + DateCreated = 'https://colonialcollections nl/schema#dateCreated', + DateModified = 'https://colonialcollections nl/schema#dateModified', + DatePublished = 'https://colonialcollections nl/schema#datePublished', + SpatialCoverage = 'https://colonialcollections nl/schema#spatialCoverage', + Genre = 'https://colonialcollections nl/schema#genre', } type Thing = { @@ -228,7 +228,7 @@ export class DatasetFetcher { // Extract the IRIs, if any, from the response. // The IRIs are necessary for fetching their labels later on const iris = getIrisFromObject(responseData); - const predicates = ['https://colonialcollections.nl/search#name']; + const predicates = ['https://colonialcollections.nl/schema#name']; await this.labelFetcher.loadByIris({iris, predicates}); return responseData; @@ -251,28 +251,28 @@ export class DatasetFetcher { `${RawDatasetKeys.DatePublished}.0` ); - const publisherIri = reach(rawDataset, `${RawDatasetKeys.Publisher}.0`); + const publisherName = reach(rawDataset, `${RawDatasetKeys.Publisher}.0`); const publisher: Publisher = { - id: publisherIri, - name: this.labelFetcher.getByIri({iri: publisherIri}), + id: publisherName, // TBD: fetch IRI via SPARQL + name: publisherName, }; - const licenseIri = reach(rawDataset, `${RawDatasetKeys.License}.0`); + const licenseName = reach(rawDataset, `${RawDatasetKeys.License}.0`); const license: License = { - id: licenseIri, - name: this.labelFetcher.getByIri({iri: licenseIri}), + id: licenseName, // TBD: fetch IRI via SPARQL + name: licenseName, }; const toThings = (rawDatasetKey: string) => { - const iris: string[] | undefined = reach(rawDataset, `${rawDatasetKey}`); - if (iris === undefined) { + const names: string[] | undefined = reach(rawDataset, `${rawDatasetKey}`); + if (names === undefined) { return undefined; } - const things = iris.map((iri: string) => { + const things = names.map((name: string) => { return { - id: iri, - name: this.labelFetcher.getByIri({iri}), + id: name, // TBD: fetch IRI via SPARQL + name, }; }); @@ -346,7 +346,7 @@ export class DatasetFetcher { // Only return documents of a specific type terms: { [`${RawDatasetKeys.Type}.keyword`]: [ - 'https://colonialcollections.nl/search#Dataset', + 'https://colonialcollections.nl/schema#Dataset', ], }, }, @@ -403,26 +403,22 @@ export class DatasetFetcher { const publisherFilters = buildFilters( aggregations.all.publishers.buckets, - aggregations.publishers.buckets, - this.labelFetcher + aggregations.publishers.buckets ); const licenseFilters = buildFilters( aggregations.all.licenses.buckets, - aggregations.licenses.buckets, - this.labelFetcher + aggregations.licenses.buckets ); const spatialCoverageFilters = buildFilters( aggregations.all.spatialCoverages.buckets, - aggregations.spatialCoverages.buckets, - this.labelFetcher + aggregations.spatialCoverages.buckets ); const genresFilters = buildFilters( aggregations.all.genres.buckets, - aggregations.genres.buckets, - this.labelFetcher + aggregations.genres.buckets ); const searchResult: SearchResult = { From c85c3ab9d8670f1a6b1b77935862cddc80622e4c Mon Sep 17 00:00:00 2001 From: sdevalk Date: Tue, 19 Sep 2023 22:53:00 +0200 Subject: [PATCH 2/2] remove unused import --- apps/dataset-browser/src/lib/datasets/fetcher-result.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/dataset-browser/src/lib/datasets/fetcher-result.ts b/apps/dataset-browser/src/lib/datasets/fetcher-result.ts index 26b36d8a8..3dfe280d6 100644 --- a/apps/dataset-browser/src/lib/datasets/fetcher-result.ts +++ b/apps/dataset-browser/src/lib/datasets/fetcher-result.ts @@ -1,5 +1,4 @@ import type {RawBucket, SearchResultFilter} from '.'; -import type {LabelFetcher} from '@colonial-collections/label-fetcher'; function toUnmatchedFilter(bucket: RawBucket): SearchResultFilter { const totalCount = 0; // Initial count; will be overridden by the matching filter, if any