Skip to content

Commit

Permalink
fix(gateway): undesired conversions to dag-json and friends (#9566)
Browse files Browse the repository at this point in the history
* fix(gateway): do not convert unixfs/raw into dag-* unless explicit
* fix(gateway): keep only dag-json|dag-cbor handling
* fix: allow requesting dag-json as application/json
- adds bunch of additional tests including JSON file on UnixFS
- fix: dag-json codec (0x0129) can be returned as plain json
- fix: json codec (0x0200) cna be retrurned as plain json
* fix: using ?format|Accept with CID w/ codec works
* docs(changelog): cbor and json on gateway

Co-authored-by: Marcin Rataj <[email protected]>
  • Loading branch information
hacdias and lidel authored Jan 21, 2023
1 parent ad9b486 commit c706c63
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 190 deletions.
21 changes: 11 additions & 10 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "":
switch resolvedPath.Cid().Prefix().Codec {
case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor):
case "", "application/json", "application/cbor":
switch mc.Code(resolvedPath.Cid().Prefix().Codec) {
case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor:
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
default:
Expand All @@ -441,14 +441,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
case "application/json", "application/vnd.ipld.dag-json",
"application/cbor", "application/vnd.ipld.dag-cbor":
case "application/vnd.ipld.dag-json", "application/vnd.ipld.dag-cbor":
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
webError(w, "failed to respond with requested content type", err, http.StatusBadRequest)
return
}
}
Expand Down Expand Up @@ -878,14 +877,14 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
case "dag-json":
return "application/vnd.ipld.dag-json", nil, nil
case "json":
return "application/json", nil, nil
case "dag-cbor":
return "application/vnd.ipld.dag-cbor", nil, nil
case "cbor":
return "application/cbor", nil, nil
case "dag-json":
return "application/vnd.ipld.dag-json", nil, nil
case "dag-cbor":
return "application/vnd.ipld.dag-cbor", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
Expand All @@ -908,6 +907,8 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
}
}
}
// If none of special-cased content types is found, return empty string
// to indicate default, implicit UnixFS response should be prepared
return "", nil, nil
}

Expand Down
83 changes: 41 additions & 42 deletions core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,25 @@ import (

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
var codecToContentType = map[mc.Code]string{
mc.Json: "application/json",
mc.Cbor: "application/cbor",
mc.DagJson: "application/vnd.ipld.dag-json",
mc.DagCbor: "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
// of the list.
var contentTypeToCodecs = map[string][]uint64{
"application/json": {uint64(mc.Json), uint64(mc.DagJson)},
"application/vnd.ipld.dag-json": {uint64(mc.DagJson)},
"application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)},
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
// contentTypeToRaw maps the HTTP Content Type to the respective codec that
// allows raw response without any conversion.
var contentTypeToRaw = map[string][]mc.Code{
"application/json": {mc.Json, mc.DagJson},
"application/cbor": {mc.Cbor, mc.DagCbor},
}

// contentTypeToCodec maps the HTTP Content Type to the respective codec. We
// only add here the codecs that we want to convert-to-from.
var contentTypeToCodec = map[string]mc.Code{
"application/vnd.ipld.dag-json": mc.DagJson,
"application/vnd.ipld.dag-cbor": mc.DagCbor,
}

// contentTypeToExtension maps the HTTP Content Type to the respective file
Expand All @@ -56,7 +59,7 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

cidCodec := resolvedPath.Cid().Prefix().Codec
cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec)
responseContentType := requestedContentType

// If the resolved path still has some remainder, return error for now.
Expand Down Expand Up @@ -90,50 +93,44 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
isDAG := cidCodec == mc.DagJson || cidCodec == mc.DagCbor
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")
download := r.URL.Query().Get("download") == "true"

if isDAG && acceptsHTML && !download {
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
// This covers CIDs with codec 'json' and 'cbor' as those do not have
// an explicit requested content type.
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
// If DAG-JSON or DAG-CBOR was requested using corresponding plain content type
// return raw block as-is, without conversion
skipCodecs, ok := contentTypeToRaw[requestedContentType]
if ok {
for _, skipCodec := range skipCodecs {
if skipCodec == cidCodec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
return
}
}
}

// Otherwise, the user has requested a specific content type (a DAG-* variant).
// Let's first get the codecs that can be used with this content type.
toCodec, ok := contentTypeToCodec[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// If we need to convert, use the last codec (strict dag- variant)
toCodec := codecs[len(codecs)-1]

// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
return
}

// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
return
}
}

// Finally, if nothing of the above is true, we have to actually convert the codec.
// This handles DAG-* conversions and validations.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
}

Expand Down Expand Up @@ -165,6 +162,7 @@ func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWrit
}
}

// serveCodecRaw returns the raw block without any conversion
func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, name string, modtime time.Time) {
blockCid := resolvedPath.Cid()
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
Expand All @@ -184,7 +182,8 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) {
// serveCodecConverted returns payload converted to codec specified in toCodec
func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
Expand All @@ -199,7 +198,7 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons
}
finalNode := universal.(ipld.Node)

encoder, err := multicodec.LookupEncoder(toCodec)
encoder, err := multicodec.LookupEncoder(uint64(toCodec))
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
Expand Down
79 changes: 68 additions & 11 deletions docs/changelogs/v0.18.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,74 @@ Learn more in the [`Reprovider` config](https://github.com/ipfs/go-ipfs/blob/mas

##### (DAG-)JSON and (DAG-)CBOR response formats

Implemented [IPIP-328](https://github.com/ipfs/specs/pull/328) which adds support
for DAG-JSON and DAG-CBOR, as well as their non-DAG variants, to the gateway. Now,
CIDs that encode JSON, CBOR, DAG-JSON and DAG-CBOR objects can be retrieved, and
traversed thanks to the [special meaning of CBOR Tag 42](https://github.com/ipld/cid-cbor/).
The IPFS project has reserved the corresponding media types at IANA:
- [`application/vnd.ipld.dag-json`](https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-json)
- [`application/vnd.ipld.dag-cbor`](https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-cbor)

HTTP clients can request JSON, CBOR, DAG-JSON, and DAG-CBOR responses by either
passing the query parameter `?format` or setting the `Accept` HTTP header to the
following values:
This release implements them as part of [IPIP-328](https://github.com/ipfs/specs/pull/328)
and adds Gateway support for CIDs with `json` (0x0200), `cbor` (0x51),
[`dag-json`](https://ipld.io/specs/codecs/dag-json/) (0x0129)
and [`dag-cbor`](https://ipld.io/specs/codecs/dag-cbor/spec/) (0x71) codecs.

- JSON: `?format=json`, or `Accept: application/json`
- CBOR: `?format=cbor`, or `Accept: application/cbor`
- DAG-JSON: `?format=dag-json`, or `Accept: application/vnd.ipld.dag-json`
- DAG-JSON: `?format=dag-cbor`, or `Accept: application/vnd.ipld.dag-cbor`
To specify the response `Content-Type` explicitly, the HTTP client can override
the codec present in the CID by using the `format` parameter
or setting the `Accept` HTTP header:

- Plain JSON: `?format=json` or `Accept: application/json`
- Plain CBOR: `?format=cbor` or `Accept: application/cbor`
- DAG-JSON: `?format=dag-json` or `Accept: application/vnd.ipld.dag-json`
- DAG-CBOR: `?format=dag-cbor` or `Accept: application/vnd.ipld.dag-cbor`

In addition, when DAG-JSON or DAG-CBOR is requested with the `Accept` header
set to `text/html`, the Gateway will return a basic HTML page with download
options, improving the user experience in web browsers.

###### Example 1: DAG-CBOR and DAG-JSON Conversion on Gateway

The Gateway supports conversion between DAG-CBOR and DAG-JSON for efficient
end-to-end data structure management: author in CBOR or JSON, store as binary
CBOR and retrieve as JSON via HTTP:

```console
$ echo '{"test": "json"}' | ipfs dag put # implicit --input-codec dag-json --store-codec dag-cbor
bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4

$ ipfs block get bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 | xxd
00000000: a164 7465 7374 646a 736f 6e .dtestdjson

$ ipfs dag get bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 # implicit --output-codec dag-json
{"test":"json"}

$ curl "http://127.0.0.1:8080/ipfs/bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4?format=dag-json"
{"test":"json"}
```

###### Example 2: Traversing CBOR DAGs

Placing a CID in [CBOR Tag 42](https://github.com/ipld/cid-cbor/) enables the
creation of arbitrary DAGs. The equivalent DAG-JSON notation for linking
to different blocks is represented by `{ "/": "cid" }`.

The Gateway supports traversing these links, enabling access to data
referenced by structures other than regular UnixFS directories:

```console
$ echo '{"test.jpg": {"/": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"}}' | ipfs dag put
bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4 # dag-cbor document linking to unixfs file

$ ipfs resolve /ipfs/bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4/test.jpg
/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi

$ ipfs dag stat bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4
Size: 119827, NumBlocks: 2

$ curl "http://127.0.0.1:8080/ipfs/bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4/test.jpg" > test.jpg
```

###### Example 3: UnixFS directory listing as JSON

Finally, Gateway now supports the same [logical format projection](https://ipld.io/specs/codecs/dag-pb/spec/#logical-format) from
DAG-PB to DAG-JSON as the `ipfs dag get` command, enabling the retrieval of directory listings as JSON instead of HTML:

```console
$ export DIR_CID=bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq
Expand Down Expand Up @@ -112,6 +167,8 @@ $ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=dag-json" | jq
}
]
}
$ ipfs dag get $DIR_CID
{"Data":{"/":{"bytes":"CAE"}},"Links":[{"Hash":{"/":"Qmc3zqKcwzbbvw3MQm3hXdg8BQoFjGdZiGdAfXAyAGGdLi"},"Name":"1 - Barrel - Part 1 - alt.txt","Tsize":21},{"Hash":{"/":"QmdMxMx29KVYhHnaCc1icWYxQqXwUNCae6t1wS2NqruiHd"},"Name":"1 - Barrel - Part 1 - transcript.txt","Tsize":195},{"Hash":{"/":"QmawceGscqN4o8Y8Fv26UUmB454kn2bnkXV5tEQYc4jBd6"},"Name":"1 - Barrel - Part 1.png","Tsize":24862}]}
```

##### 🐎 Fast directory listings with DAG sizes
Expand Down
Loading

0 comments on commit c706c63

Please sign in to comment.