diff --git a/proposals/README.md b/proposals/README.md index 7f9d0be7..381fe25a 100644 --- a/proposals/README.md +++ b/proposals/README.md @@ -1,6 +1,6 @@ # Proposed Extensions -OGC APIs are designed to be modular. We expect new requirements will emerge with use and new features will be proposed to address those requirements. Development and validation of these new features is a community effort. Supporting that effort are two tools; a process for tracking the maturity of a proposed addition, and a means to publish the current baseline of a proposed new feature. +OGC APIs are designed to be modular. We expect new requirements will emerge with use then new features will be proposed to address those requirements. Development and validation of these new features is a community effort. Supporting that effort are two tools; a process for tracking the maturity of a proposed addition, and a means to publish the current baseline of a proposed new feature. ## Draft Features diff --git a/proposals/aggregations/README.md b/proposals/aggregations/README.md deleted file mode 100644 index a757d810..00000000 --- a/proposals/aggregations/README.md +++ /dev/null @@ -1,135 +0,0 @@ -# OGC API - Records - Term Aggregations - -This folder contains the content for the standard extension OGC API - Records - Term Aggregations. - -# Overview - -This extensions enables the capability to include Term Aggregations in the items (records) response. These aggregations can be used by clients to enable [faceted search](https://en.wikipedia.org/wiki/Faceted_search). - -Various backends support faceted search. Examples are [Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html), [SOLR](https://solr.apache.org/guide/8_8/json-facet-api.html) and limited support in [PostGres](https://akorotkov.github.io/blog/2016/06/17/faceted-search/), [Oracle](https://blogs.oracle.com/apex/apex-192-faceted-search). - -Facet statistics are also interesting to give an overview of the spatio temporal distribution of items in a (part of a) collection. - -## Extending a json search result with Term Aggregations - -The server includes the Term Aggregations for any search result within a collection. For example, for a collection `cat` the response to - -``` -GET /collections/cat/items -``` - -may include the following property `aggregations`. The content represents an array of aggregated terms. -A aggregation is identified by a collection property name and contains the top number of buckets with -their count. A bucket is a number of results in the resultset matching the key. A parameter `next` -indicates if there are potentially more buckets to be retrieved. - -``` -{ - "type": "FeatureCollection", - "aggregations": [ - { - "property": "keywords", - "buckets": [ - { - "key": "forestry", - "count": "202" - }, - { - "key": "marine", - "count": "150" - } - ], - "next": "0" - } - ], - features": [], - "numberMatched": 375, - "numberReturned": 0, - "links": [] -} -``` - -## Numerical, spatial and temporal buckets - -For dynamic values a range of buckets can be returned defined by a min, max or bbox value. - -``` -{ - "aggregations": [ - "key": "scale", - "buckets": [ - { - "key": "0.1 - 0.001", - "count": "175" - }, - { - "key": "0.001 - 0.00001", - "count": "120" - } - ], - "next": "2" - }, - { - "key": "date", - "buckets": [ - { - "key": "1990/01/01 - 1995/01/01", - "count": "12" - }, - { - "key": "1995/01/01 - 2000/01/01", - "count": "340" - }, - { - "key": "2000/01/01 - 2005/01/01", - "count": "1200" - } - ], - "next": "5" - } - ] -} -``` - -In the case of geometries this is a boundingbox in WGS84 defined by two points. - -``` -{ - "aggregations": [ - { - "key": "bbox", - "buckets": [ - { - "key": "0,0 5,5", - "count": 175 - }, - { - "key": "0,5 5,10", - "count": 120 - }, - { - "key": "10,5 15,10", - "count": 77 - } - ], - "next": 7 - } - ] -} -``` - -## Interacting with aggregations - -In some situations clients are interested only in the aggregations. In other situations the aggregations are not required. These aspects can be controlled via additional query parameters. - -| Parameter | Explanation | -| -- | -- | -| aggregationsOnly | default `false`, Returns the aggregations without search results. Similar to `&limit=0`. | -| includeAggregations | default `true` (if available), can be set to `false` | - -# Folder structure - -This folder is organized as follows: - -* openapi - normative OpenAPI components specified by the standard - diff --git a/proposals/aggregations/openapi/parameters/aggregationsOnly.yaml b/proposals/aggregations/openapi/parameters/aggregationsOnly.yaml deleted file mode 100644 index cccf8cb5..00000000 --- a/proposals/aggregations/openapi/parameters/aggregationsOnly.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: aggregationsOnly -description: parameter can be used to request aggregations only, similar to &limit=0 -in: query -required: false -schema: - type: string - format: uri -style: form -explode: false -default: false \ No newline at end of file diff --git a/proposals/aggregations/openapi/parameters/includeAggregations.yaml b/proposals/aggregations/openapi/parameters/includeAggregations.yaml deleted file mode 100644 index d6b44398..00000000 --- a/proposals/aggregations/openapi/parameters/includeAggregations.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: includeAggregations -description: parameter can be set to omit aggregations in search result -in: query -required: false -schema: - type: string - format: uri -style: form -explode: false -default: true diff --git a/proposals/aggregations/openapi/schemas/aggregations.yaml b/proposals/aggregations/openapi/schemas/aggregations.yaml deleted file mode 100644 index 49e98032..00000000 --- a/proposals/aggregations/openapi/schemas/aggregations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -type: array -description: List of aggregations -items: - $ref: "./aggregation.yaml" \ No newline at end of file diff --git a/proposals/facets/README.md b/proposals/facets/README.md new file mode 100644 index 00000000..44c1f380 --- /dev/null +++ b/proposals/facets/README.md @@ -0,0 +1,218 @@ +# OGC API - Records - Facets + +This folder contains the content for the standard extension OGC API - Records - Facets. + +# Overview + +This extension aggregates the items (records) in buckets and provides a number associated to each bucket. Each of the related buckets are grouped into a facet enabling [faceted search](https://en.wikipedia.org/wiki/Faceted_search). + +Various backends support faceted search. Examples are [Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html), [SOLR](https://solr.apache.org/guide/8_8/json-facet-api.html) and limited support in [PostGres](https://akorotkov.github.io/blog/2016/06/17/faceted-search/), [Oracle](https://blogs.oracle.com/apex/apex-192-faceted-search). + +## Definition of a Facet + +The word _Facet_ refers here to a high-level piece of information that is computed over a set of records in a +collection. + +A facet can be of several types: + +### Terms facet + +A `terms` facet can be applied to any text property and produces a list of values appearing for a specific +property across all matching records, as well as the count of records containing each value. + +#### Example + +A terms facet based on `keyword` countries might return a list of buckets like so: + +* `Greece` (24 records) +* `Germany` (12 records) +* `England` (5 records) +* etc. + +### Histogram facet + +A `histogram` facet can be applied to any temporal or numeric property to distribute item values over ranges or intervals. + +#### Example + +The facet `createDate` will return a list of buckets like so: + +* `2020-01-01` to `2020-02-01`: 18 records +* `2020-02-01` to `2020-03-01`: 22 records +* `2020-03-01` to `2020-04-01`: 43 records + +### Filters facets + +A `filters` facets produces a count of matching records for one or several predefined queries. This essentially +lets the user run "sub-queries" cheaply to have a better understanding of the composition of the search results. + +#### Examples + +The facet `is available by` will provide 2 sub-queries : +- `Download service` returns the amount of records which have at least 1 distribution of a download type (CSV, Excel...): +- `Visualization service` returns the amount of records which have at least 1 distribution of a type (WMS, WMTS...): + +Is available by +* `Download service`: 300 records +* `Visualization service`: 243 records + +## Requirements + +### 1. Advertising available facets + +An additional `/facets` entrypoint (similar to `/queryables`)has to be supported at the collection level. This endpoint only supports `GET` requests. + +For example, for a collection called `myOrg`, a request on + +Note +```http request +GET /collections/myOrg/facets +``` + +will return a JSON object describing the various available facets for that collection. For each facet the +following information are included: + +* The identifier of the facet +* The type of facet: a facet can be of type `term`, `histogram` or `filter` +* The maximum count of buckets returned in the _facets overview_ +* For term facets: + * Name of the property targeted by this facet + * Sort criteria: count or value (alphabetical) + * Minimum occurrence count + * Support for including/excluding terms +* For histogram facet: + * Name of the property targeted by this facet + * Type of buckets used: fixed intervals, fixed buckets count, equalized amount of records in each bucket +* For filter facets: + * CQL expression used for each filter; this then lets the user apply the same filter in subsequent queries + +> **Note** that all the `facettables` attributes must be `queryables` + +Example: + +```json +{ + "type": "object", + "title": "Observations", + "defaultBucketCount": 10, + "facets": { + "date": { + "name" : "updateDate", + "type": "histogram", + "bucketType": "fixedInterval" + }, + "organization": { + "type": "term", + "sortedBy": "count", + "minimumOccurrenceCount": 500 + }, + "created": { + "type": "histogram", + "bucketType": "fixedBucketCount" + }, + "format": { + "type": "terms", + "sortedBy": "value", + "minimumOccurrenceCount": 10 + }, + "usage": { + "type": "filter", + "view": "link.type = 'wms'", + "download": "link.type = 'wfs'" + } + }, + "$schema": "http://json-schema.org/draft/2019-09/schema" +} +``` + +### 2. Extending queryables response + +The queryable response will advertize that an attribute is facettable or not, to eventually skip the `/facets` request. + +eg. +```json +{ + "properties": { + "organization": { + "title": "organization", + "type": "string", + "facets": true + } + } +} +``` +### 3. Extending a JSON search result with a Facet Overview + +The server might include a facet Overview for any search result within a collection. For example, for a collection `myOrg` the response to + +```http request +GET /collections/myOrg/items +``` + +should include the following property `facets`. The content represents a dictionary of all facets available +for that collection, each containing +various buckets describing different facets of the search results. +A facet is identified by an identifier and contains the top number of buckets with +their count. A bucket is a number of results in the result set matching the key. A parameter `more` +indicates how many buckets were left out to keep the response size low (0 if all buckets were included in the overview). + +Note: `more` can also simply have the value `true` in case the precise amount of additional buckets could not be +computed. + +```json +{ + "type": "FeatureCollection", + "facets": { + "keywords": { + "type": "terms", + "property": "keyword", + "buckets": [ + { + "value": "Greece", + "count": "202" + }, + { + "value": "Germany", + "count": "150" + } + ], + "more": 0 + }, + "createDate": { + "type": "histogram", + "property": "createDate", + "buckets": [ + { + "min": "2010-01-01", + "max": "2011-01-01", + "count": 100 + }, + { + "min": "2011-01-01", + "max": "2012-01-01", + "count": 220 + } + ], + "more": 12 + } + }, + "features": [], + "numberMatched": 375, + "numberReturned": 0, + "links": [] +} +``` + +In some situations clients are interested only in the facets. In that case, they can use the `facets` parameter with `&limit=0`. + +### Examples +``` +GET /collections/myOrg/items?q=&facets=keywords:20:value_asc,date:quantile:4,update:year.month,usage +``` + +# Folder structure + +This folder is organized as follows: + +* openapi - normative OpenAPI components specified by the standard + diff --git a/proposals/aggregations/openapi/schemas/bucket.yaml b/proposals/facets/openapi/schemas/bucket.yaml similarity index 100% rename from proposals/aggregations/openapi/schemas/bucket.yaml rename to proposals/facets/openapi/schemas/bucket.yaml diff --git a/proposals/aggregations/openapi/schemas/aggregation.yaml b/proposals/facets/openapi/schemas/facetResult.yaml similarity index 70% rename from proposals/aggregations/openapi/schemas/aggregation.yaml rename to proposals/facets/openapi/schemas/facetResult.yaml index a06ca837..ca847d63 100644 --- a/proposals/aggregations/openapi/schemas/aggregation.yaml +++ b/proposals/facets/openapi/schemas/facetResult.yaml @@ -1,6 +1,6 @@ -Aggregation: +Facet: type: object - description: An aggregation is linked to a item property and contains the top set of occurences (buckets) of values of the property + description: A facet is linked to an item property and contains the top set of occurrences (buckets) of values of the property required: - key properties: diff --git a/proposals/facets/openapi/schemas/facets.yaml b/proposals/facets/openapi/schemas/facets.yaml new file mode 100644 index 00000000..e448e160 --- /dev/null +++ b/proposals/facets/openapi/schemas/facets.yaml @@ -0,0 +1,4 @@ +type: object +description: Facets overview +items: + $ref: "./facetResult.yaml"