diff --git a/decode/decode.go b/decode/decode.go index 2db3aa9..ce59ecd 100644 --- a/decode/decode.go +++ b/decode/decode.go @@ -44,8 +44,13 @@ func (n *notebook) UnmarshalJSON(data []byte) error { return fmt.Errorf("%s: notebook metadata: %w", ver, err) } - n.cells = make([]schema.Cell, len(n.Notebook.Cells)) - for i, raw := range n.Notebook.Cells { + cells, err := d.ExtractCells(data) + if err != nil { + return fmt.Errorf("%s: extract cells: %w", ver, err) + } + + n.cells = make([]schema.Cell, len(cells)) + for i, raw := range cells { c := cell{meta: meta, decoder: d} if err := json.Unmarshal(raw, &c); err != nil { return fmt.Errorf("%s: %w", ver, err) @@ -78,7 +83,16 @@ func (c *cell) UnmarshalJSON(data []byte) error { // Decoder implementations are version-aware and decode cell contents and metadata // based on the respective JSON schema definition. type Decoder interface { + // ExtractCells accesses the array of notebook cells. + // + // Prior to v4.0 cells were not a part of the top level structure, + // and were contained in "worksheets" instead. + ExtractCells(data []byte) ([]json.RawMessage, error) + + // DecodeMeta decodes version-specific metadata. DecodeMeta(data []byte) (schema.NotebookMetadata, error) + + // DecodeCell decodes raw cell data to a version-specific implementation. DecodeCell(v map[string]interface{}, data []byte, meta schema.NotebookMetadata) (schema.Cell, error) } diff --git a/decode/decode_test.go b/decode/decode_test.go index d597988..e7f1f23 100644 --- a/decode/decode_test.go +++ b/decode/decode_test.go @@ -6,6 +6,7 @@ import ( "github.com/bevzzz/nb/schema" "github.com/bevzzz/nb/schema/common" + _ "github.com/bevzzz/nb/schema/v3" _ "github.com/bevzzz/nb/schema/v4" "github.com/bevzzz/nb/decode" @@ -37,6 +38,16 @@ func TestDecodeBytes(t *testing.T) { json string nCells int }{ + { + name: "v4.5", + json: `{ + "nbformat": 4, "nbformat_minor": 5, "metadata": {}, "cells": [ + {"id": "a", "cell_type": "markdown", "metadata": {}, "source": []}, + {"id": "b", "cell_type": "markdown", "metadata": {}, "source": []} + ] + }`, + nCells: 2, + }, { name: "v4.4", json: `{ @@ -47,6 +58,91 @@ func TestDecodeBytes(t *testing.T) { }`, nCells: 2, }, + { + name: "v4.3", + json: `{ + "nbformat": 4, "nbformat_minor": 3, "metadata": {}, "cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ] + }`, + nCells: 2, + }, + { + name: "v4.2", + json: `{ + "nbformat": 4, "nbformat_minor": 2, "metadata": {}, "cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ] + }`, + nCells: 2, + }, + { + name: "v4.1", + json: `{ + "nbformat": 4, "nbformat_minor": 1, "metadata": {}, "cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ] + }`, + nCells: 2, + }, + { + name: "v4.0", + json: `{ + "nbformat": 4, "nbformat_minor": 0, "metadata": {}, "cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ] + }`, + nCells: 2, + }, + { + name: "v3.0", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ]}, + {"cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []} + ]} + ] + }`, + nCells: 3, + }, + { + name: "v2.0", + json: `{ + "nbformat": 2, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ]}, + {"cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []} + ]} + ] + }`, + nCells: 3, + }, + { + name: "v1.0", + json: `{ + "nbformat": 1, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []}, + {"cell_type": "markdown", "metadata": {}, "source": []} + ]}, + {"cells": [ + {"cell_type": "markdown", "metadata": {}, "source": []} + ]} + ] + }`, + nCells: 3, + }, } { t.Run(tt.name, func(t *testing.T) { nb, err := decode.Bytes([]byte(tt.json)) @@ -174,6 +270,21 @@ func TestDecodeBytes(t *testing.T) { Data: []byte("base64-encoded-image-data"), }, }, + { + name: "v3.0: no explicit mime-type", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "raw", "source": ["sometimes you just want to rawdog sqweel"]} + ]} + ] + }`, + want: WithAttachments{Cell: Cell{ + Type: schema.Raw, + MimeType: common.PlainText, + Text: []byte("sometimes you just want to rawdog sqweel"), + }}, + }, } { t.Run(tt.name, func(t *testing.T) { nb, err := decode.Bytes([]byte(tt.json)) @@ -209,7 +320,8 @@ func TestDecodeBytes(t *testing.T) { { "cell_type": "code", "execution_count": 5, "source": ["print('Hi, mom!')"], "outputs": [ - {"output_type": "stream"}, {"output_type": "stream"} + {"output_type": "stream", "name": "stdout"}, + {"output_type": "stream", "name": "stderr"} ] } ] @@ -225,6 +337,32 @@ func TestDecodeBytes(t *testing.T) { OutputLen: 2, }, }, + { + name: "v3.0", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + { + "cell_type": "code", "language": "javascript", "prompt_number": 5, + "input": ["print('Hi, mom!')"], "outputs": [ + {"output_type": "stream", "stream": "stdout"}, + {"output_type": "stream", "stream": "stderr"} + ] + } + ]} + ] + }`, + want: outcome{ + Cell: Cell{ + Type: schema.Code, + MimeType: "application/x-python", // FIXME: expect language-specific mime-type + Text: []byte("print('Hi, mom!')"), + }, + Language: "javascript", + ExecutionCount: 5, + OutputLen: 2, + }, + }, } { t.Run(tt.name, func(t *testing.T) { nb, err := decode.Bytes([]byte(tt.json)) @@ -271,6 +409,28 @@ func TestDecodeBytes(t *testing.T) { }}, }, }, + { + name: "v3.0: stream output to stdout", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "code", "outputs": [ + { + "output_type": "stream", "stream": "stdout", + "text": ["$> ls\n", ".\n", "..\n", "nb/"] + } + ]} + ]} + ] + }`, + want: []output{ + {Cell: Cell{ + Type: schema.Stream, + MimeType: common.Stdout, + Text: []byte("$> ls\n.\n..\nnb/"), + }}, + }, + }, { name: "v4.4: stream output to stderr", json: `{ @@ -292,6 +452,28 @@ func TestDecodeBytes(t *testing.T) { }}, }, }, + { + name: "v3.0: stream output to stderr", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "code", "outputs": [ + { + "output_type": "stream", "stream": "stderr", + "text": ["KeyError: ", "dict['unknown key']"] + } + ]} + ]} + ] + }`, + want: []output{ + {Cell: Cell{ + Type: schema.Stream, + MimeType: common.Stderr, + Text: []byte("KeyError: dict['unknown key']"), + }}, + }, + }, { name: "v4.4: stream output to unrecognized target", json: `{ @@ -321,13 +503,13 @@ func TestDecodeBytes(t *testing.T) { {"cell_type": "code", "outputs": [ {"output_type": "display_data", "metadata": {}, "data": { - "image/png": "base64-encoded-png-image", + "image/png": "base64-encoded-png-image", "text/plain": "
" } }, {"output_type": "display_data", "metadata": {}, "data": { - "image/jpeg": "base64-encoded-jpeg-image", + "image/jpeg": "base64-encoded-jpeg-image", "text/plain": "
" } }, @@ -357,6 +539,93 @@ func TestDecodeBytes(t *testing.T) { }}, }, }, + { + name: "v3.0: display_data output different recognized formats", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "code", "outputs": [ + {"output_type": "display_data", "metadata": {}, + "png": ["base64-encoded-png-image"], + "text": ["
"] + }, + {"output_type": "display_data", "metadata": {}, + "jpeg": ["base64-encoded-jpeg-image"], + "text": ["
"] + }, + {"output_type": "display_data", "metadata": {}, + "html": [""] + }, + {"output_type": "display_data", "metadata": {}, + "svg": [""] + }, + {"output_type": "display_data", "metadata": {}, + "javascript": ["[,,,].length"] + }, + {"output_type": "display_data", "metadata": {}, + "json": ["{\"foo\": \"bar\"}"] + }, + {"output_type": "display_data", "metadata": {}, + "pdf": ["some-raw-pdf-data"] + }, + {"output_type": "display_data", "metadata": {}, + "latex": ["c = \\sqrt{a^2 + b^2}"] + }, + {"output_type": "display_data", "metadata": {}, + "text": [""] + } + ]} + ]} + ] + }`, + want: []output{ + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "image/png", + Text: []byte("base64-encoded-png-image"), + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "image/jpeg", + Text: []byte("base64-encoded-jpeg-image"), + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "text/html", + Text: []byte(``), + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "image/svg+xml", + Text: []byte(``), + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "text/javascript", + Text: []byte("[,,,].length"), + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "application/json", + Text: []byte("{\"foo\": \"bar\"}"), // ???? + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "application/pdf", + Text: []byte("some-raw-pdf-data"), // ???? + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: "application/x-latex", + Text: []byte("c = \\sqrt{a^2 + b^2}"), // ???? + }}, + {Cell: Cell{ + Type: schema.DisplayData, + MimeType: common.PlainText, + Text: []byte(""), + }}, + }, + }, { name: "v4.4: execute_result output with several images and a plain text", json: `{ @@ -391,6 +660,101 @@ func TestDecodeBytes(t *testing.T) { }}, }, }, + { + name: "v3.0: pyout (execute_result) output different recognized formats", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "code", "outputs": [ + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "png": ["base64-encoded-png-image"], + "text": ["
"] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "jpeg": ["base64-encoded-jpeg-image"], + "text": ["
"] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "html": [""] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "svg": [""] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "javascript": ["[,,,].length"] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "json": ["{\"foo\": \"bar\"}"] + }, + {"output_type": "pyout", "metadata": {}, + "pdf": ["some-raw-pdf-data"] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "latex": ["c = \\sqrt{a^2 + b^2}"] + }, + {"output_type": "pyout", "metadata": {}, + "prompt_number": 42, + "text": [""] + } + ]} + ]} + ] + }`, + want: []output{ + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "image/png", + Text: []byte("base64-encoded-png-image"), + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "image/jpeg", + Text: []byte("base64-encoded-jpeg-image"), + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "text/html", + Text: []byte(``), + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "image/svg+xml", + Text: []byte(``), + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "text/javascript", + Text: []byte("[,,,].length"), + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "application/json", + Text: []byte("{\"foo\": \"bar\"}"), // ???? + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "application/pdf", + Text: []byte("some-raw-pdf-data"), // ???? + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: "application/x-latex", + Text: []byte("c = \\sqrt{a^2 + b^2}"), // ???? + }}, + {ExecutionCount: 42, Cell: Cell{ + Type: schema.ExecuteResult, + MimeType: common.PlainText, + Text: []byte(""), + }}, + }, + }, { name: "v4.4: error output", json: `{ @@ -417,6 +781,33 @@ func TestDecodeBytes(t *testing.T) { }}, }, }, + { + name: "v3.0: error output", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + {"cell_type": "code", "outputs": [ + { + "output_type": "pyerr", "ename": "ZeroDivisionError", "evalue": "division by zero", + "traceback": [ + "Traceback (most recent call last):", + "\tFile \"main.py\", line 3, in ", + "\t\tprint(n/0)", + "\tZeroDivisionError: division by zero" + ] + } + ]} + ]} + ] + }`, + want: []output{ + {Cell: Cell{ + Type: schema.Error, + MimeType: common.Stderr, + Text: []byte("Traceback (most recent call last):\n\tFile \"main.py\", line 3, in \n\t\tprint(n/0)\n\tZeroDivisionError: division by zero"), + }}, + }, + }, } { t.Run(tt.name, func(t *testing.T) { nb, err := decode.Bytes([]byte(tt.json)) @@ -433,6 +824,43 @@ func TestDecodeBytes(t *testing.T) { }) } }) + + t.Run("heading cells", func(t *testing.T) { + for _, tt := range []struct { + name string + json string + want Cell + }{ + { + name: "v3.0 used to have dedicated heading cells", + json: `{ + "nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [ + {"cells": [ + { + "cell_type": "heading", "level": 2, + "source": ["Fun facts about Ronald McDonald"], "metadata": {} + } + ]} + ] + }`, + want: Cell{ + Type: schema.Markdown, + MimeType: common.MarkdownText, + Text: []byte("## Fun facts about Ronald McDonald"), + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + nb, err := decode.Bytes([]byte(tt.json)) + require.NoError(t, err) + + got := nb.Cells() + require.Len(t, got, 1, "expected 1 cell") + + checkCell(t, got[0], tt.want) + }) + } + }) } // checkCell compares the cell's type and content to expected. diff --git a/schema/common/notebook.go b/schema/common/notebook.go index 8246fe7..ae289ab 100644 --- a/schema/common/notebook.go +++ b/schema/common/notebook.go @@ -7,10 +7,9 @@ import ( ) type Notebook struct { - VersionMajor int `json:"nbformat"` - VersionMinor int `json:"nbformat_minor"` - Metadata json.RawMessage `json:"metadata"` // TODO: omitempty - Cells []json.RawMessage `json:"cells"` + VersionMajor int `json:"nbformat"` + VersionMinor int `json:"nbformat_minor"` + Metadata json.RawMessage `json:"metadata"` // TODO: omitempty } func (n *Notebook) Version() schema.Version { @@ -26,3 +25,60 @@ const ( Stdout = "application/vnd.jupyter.stdout" // Custom mime-type for stream output to stdout. Stderr = "application/vnd.jupyter.stderr" // Custom mime-type for stream output to stderr. ) + +// Markdown defines the schema for a "markdown" cell. +type Markdown struct { + Source MultilineString `json:"source"` +} + +var _ schema.Cell = (*Markdown)(nil) + +func (md *Markdown) Type() schema.CellType { + return schema.Markdown +} + +func (md *Markdown) MimeType() string { + return MarkdownText +} + +func (md *Markdown) Text() []byte { + return md.Source.Text() +} + +// Raw defines the schema for a "raw" cell. +type Raw struct { + Source MultilineString `json:"source"` + Metadata RawCellMetadata `json:"metadata"` +} + +var _ schema.Cell = (*Raw)(nil) + +func (raw *Raw) Type() schema.CellType { + return schema.Raw +} + +func (raw *Raw) MimeType() string { + return raw.Metadata.MimeType() +} + +func (raw *Raw) Text() []byte { + return raw.Source.Text() +} + +// RawCellMetadata may specify a target conversion format. +type RawCellMetadata struct { + Format *string `json:"format"` + RawMimeType *string `json:"raw_mimetype"` +} + +// MimeType returns a more specific mime-type if one is provided and "text/plain" otherwise. +func (raw *RawCellMetadata) MimeType() string { + switch { + case raw.Format != nil: + return *raw.Format + case raw.RawMimeType != nil: + return *raw.RawMimeType + default: + return PlainText + } +} diff --git a/schema/schema.go b/schema/schema.go index e6df60d..d510239 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -1,3 +1,11 @@ +// Package schema defines the common data format for elements of a Jupyter notebook. +// +// It is based on the [v4.4] definition, as it is stable and encompasses all the data +// necessary for accurate rendering. Note, that schema validation is not a goal of this +// package, and so, interfaces defined here will often omit the non-essential data, +// e.g. metadata or fields specific to JupyterLab environment. +// +// [v4.4]: https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.4.schema.json package schema import ( @@ -34,6 +42,9 @@ type Cell interface { Text() []byte } +// HasAttachments is implemented by cells which include [cell attachments]. +// +// [cell attachments]: https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments type HasAttachments interface { // Attachments are only defined for v4.0 and above for markdown and raw cells // and may be omitted in the JSON. Cells without attachments should return nil. diff --git a/schema/v3/schema.go b/schema/v3/schema.go new file mode 100644 index 0000000..c50fb45 --- /dev/null +++ b/schema/v3/schema.go @@ -0,0 +1,322 @@ +// Package v3 provides a decoder for Jupyter Notebooks v1.0, v2.0, and v3.0. +// +// It implements the IPython Notebook v3.0 JSON Schema, which is also suitable +// for decoding all earlier versions, as there hasn't been any breaking changes +// to it. +// +// [IPython Notebook v3.0 JSON Schema]: https://github.com/jupyter/nbformat/blob/main/nbformat/v3/nbformat.v3.schema.json +package v3 + +import ( + "bytes" + "encoding/json" + "fmt" + "strings" + + "github.com/bevzzz/nb/decode" + "github.com/bevzzz/nb/schema" + "github.com/bevzzz/nb/schema/common" +) + +func init() { + d := new(decoder) + decode.RegisterDecoder(schema.Version{Major: 3, Minor: 0}, d) + decode.RegisterDecoder(schema.Version{Major: 2, Minor: 0}, d) + decode.RegisterDecoder(schema.Version{Major: 1, Minor: 0}, d) +} + +// decoder decodes cell contents and metadata for nbformat v3.0, v2.0, and v1.0. +type decoder struct{} + +var _ decode.Decoder = (*decoder)(nil) + +func (d *decoder) ExtractCells(data []byte) ([]json.RawMessage, error) { + var raw struct { + Worksheets []struct { + Cells []json.RawMessage `json:"cells"` + } `json:"worksheets"` + } + if err := json.Unmarshal(data, &raw); err != nil { + return nil, err + } + + var cells []json.RawMessage + for i := range raw.Worksheets { + cells = append(cells, raw.Worksheets[i].Cells...) + } + return cells, nil +} + +func (d *decoder) DecodeMeta(data []byte) (schema.NotebookMetadata, error) { + return nil, nil +} + +func (d *decoder) DecodeCell(m map[string]interface{}, data []byte, meta schema.NotebookMetadata) (schema.Cell, error) { + var ct interface{} + var c schema.Cell + switch ct = m["cell_type"]; ct { + case "markdown": + c = &Markdown{} + case "heading": + c = &Heading{} + case "raw": + c = &Raw{} + case "code": + c = &Code{} + default: + return nil, fmt.Errorf("unknown cell type %q", ct) + } + if err := json.Unmarshal(data, &c); err != nil { + return nil, fmt.Errorf("%s: %w", ct, err) + } + return c, nil +} + +type ( + Markdown = common.Markdown + Raw = common.Raw +) + +// Heading is a dedicated cell type which represent a heading in a Jupyter notebook. +// This type is deprecated in the later versions and the content is stored as markdown instead. +// +// Heading cell behaves exactly like a markdown cell, decorating its source with the +// appropriate number of heading signs (#). +type Heading struct { + Markdown + Level int `json:"level"` +} + +var _ schema.Cell = (*Heading)(nil) + +func (h *Heading) Text() []byte { + hashes := append(bytes.Repeat([]byte("#"), h.Level), " "...) + return append(hashes, h.Source.Text()...) +} + +// Code defines the schema for a "code" cell. +type Code struct { + Source common.MultilineString `json:"input"` + TimesExecuted int `json:"prompt_number"` + Out []Output `json:"outputs"` + Lang string `json:"language"` +} + +var _ schema.CodeCell = (*Code)(nil) +var _ schema.Outputter = (*Code)(nil) + +func (code *Code) Type() schema.CellType { + return schema.Code +} + +// FIXME: return correct mime type (add a function to common) +func (code *Code) MimeType() string { + return "application/x-python" +} + +func (code *Code) Text() []byte { + return code.Source.Text() +} + +func (code *Code) Language() string { + return code.Lang +} + +func (code *Code) ExecutionCount() int { + return code.TimesExecuted +} + +func (code *Code) Outputs() (cells []schema.Cell) { + for i := range code.Out { + cells = append(cells, code.Out[i].cell) + } + return +} + +// Outputs unmarshals cell outputs into schema.Cell based on their type. +type Output struct { + cell schema.Cell +} + +func (out *Output) UnmarshalJSON(data []byte) error { + var v map[string]interface{} + if err := json.Unmarshal(data, &v); err != nil { + return fmt.Errorf("code outputs: %w", err) + } + + var t interface{} + var c schema.Cell + switch t = v["output_type"]; t { + case "stream": + c = &StreamOutput{} + case "display_data": + c = &DisplayDataOutput{} + case "pyout": + c = &ExecuteResultOutput{} + case "pyerr": + c = &ErrorOutput{} + default: + return fmt.Errorf("unknown output type %q", t) + } + + if err := json.Unmarshal(data, &c); err != nil { + return fmt.Errorf("%q output: %w", t, err) + } + out.cell = c + return nil +} + +// StreamOutput is a plain, text-based output of the executed code. +// Depending on the stream "target", Type() can report "text/plain" (stdout) or "error" (stderr). +// The output is often decorated with ANSI-color sequences, which should be handled separately. +type StreamOutput struct { + // Target can be stdout or stderr. + Target string `json:"stream"` + Source common.MultilineString `json:"text"` +} + +var _ schema.Cell = (*StreamOutput)(nil) + +func (stream *StreamOutput) Type() schema.CellType { + return schema.Stream +} + +func (stream *StreamOutput) MimeType() string { + switch stream.Target { + case "stdout": + return common.Stdout + case "stderr": + return common.Stderr + } + return common.PlainText +} + +func (stream *StreamOutput) Text() []byte { + return stream.Source.Text() +} + +// DisplayDataOutput are rich-format outputs generated by running the code in the parent cell. +type DisplayDataOutput struct { + MimeBundle + Metadata map[string]interface{} `json:"metadata"` +} + +var _ schema.Cell = (*DisplayDataOutput)(nil) + +func (dd *DisplayDataOutput) Type() schema.CellType { + return schema.DisplayData +} + +// MimeBundle contains rich output data keyed by mime-type. +type MimeBundle struct { + PNG common.MultilineString `json:"png,omitempty"` + JPEG common.MultilineString `json:"jpeg,omitempty"` + HTML common.MultilineString `json:"html,omitempty"` + SVG common.MultilineString `json:"svg,omitempty"` + Javascript common.MultilineString `json:"javascript,omitempty"` + JSON common.MultilineString `json:"json,omitempty"` + PDF common.MultilineString `json:"pdf,omitempty"` + LaTeX common.MultilineString `json:"latex,omitempty"` + Txt common.MultilineString `json:"text,omitempty"` +} + +var _ schema.MimeBundle = (*MimeBundle)(nil) + +// MimeType returns the richer of the mime-types present in the bundle, +// and falls back to "text/plain" otherwise. +func (mb MimeBundle) MimeType() string { + switch { + case mb.PNG != nil: + return "image/png" + case mb.JPEG != nil: + return "image/jpeg" + case mb.HTML != nil: + return "text/html" + case mb.SVG != nil: + return "image/svg+xml" + case mb.Javascript != nil: + return "text/javascript" + case mb.JSON != nil: + return "application/json" + case mb.PDF != nil: + return "application/pdf" + case mb.LaTeX != nil: + return "application/x-latex" + } + return common.PlainText +} + +// Text returns data with the richer mime-type. +func (mb MimeBundle) Text() []byte { + return mb.Data(mb.MimeType()) +} + +// Data returns mime-type-specific content if present and a nil slice otherwise. +func (mb MimeBundle) Data(mime string) []byte { + switch mime { + case "image/png": + return mb.PNG.Text() + case "image/jpeg": + return mb.JPEG.Text() + case "text/html": + return mb.HTML.Text() + case "image/svg+xml": + return mb.SVG.Text() + case "text/javascript": + return mb.Javascript.Text() + case "application/json": + return mb.JSON.Text() + case "application/pdf": + return mb.PDF.Text() + case "application/x-latex": + return mb.LaTeX.Text() + case common.PlainText: + return mb.Txt.Text() + } + return nil +} + +// PlainText returns data for "text/plain" mime-type and a nil slice otherwise. +func (mb MimeBundle) PlainText() []byte { + return mb.Data(common.PlainText) +} + +// ExecuteResultOutput is the result of executing the code in the cell. +// Its contents are identical to those of DisplayDataOutput with the addition of the execution count. +type ExecuteResultOutput struct { + DisplayDataOutput + TimesExecuted int `json:"prompt_number"` +} + +var _ schema.Cell = (*ExecuteResultOutput)(nil) +var _ schema.ExecutionCounter = (*ExecuteResultOutput)(nil) + +func (ex *ExecuteResultOutput) Type() schema.CellType { + return schema.ExecuteResult +} + +func (ex *ExecuteResultOutput) ExecutionCount() int { + return ex.TimesExecuted +} + +// ErrorOutput stores the output of a failed code execution. +type ErrorOutput struct { + ExceptionName string `json:"ename"` + ExceptionValue string `json:"evalue"` + Traceback []string `json:"traceback"` +} + +var _ schema.Cell = (*ErrorOutput)(nil) + +func (err *ErrorOutput) Type() schema.CellType { + return schema.Error +} + +func (err *ErrorOutput) MimeType() string { + return common.Stderr +} + +func (err *ErrorOutput) Text() (txt []byte) { + s := strings.Join(err.Traceback, "\n") + return []byte(s) +} diff --git a/schema/v4/schema.go b/schema/v4/schema.go index 940f122..c13a7c2 100644 --- a/schema/v4/schema.go +++ b/schema/v4/schema.go @@ -1,3 +1,9 @@ +// Package v4 provides a decoder for Jupyter Notebooks v4.0 and later minor versions. +// +// It implements the IPython Notebook v4.0 JSON Schema. Other minor versions can be decoded using the same, +// as the differences do not affect how the notebook is rendered. +// +// [IPython Notebook v4.0 JSON Schema]: https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.0.schema.json package v4 import ( @@ -11,13 +17,30 @@ import ( ) func init() { - decode.RegisterDecoder(version, new(decoder)) + d := new(decoder) + decode.RegisterDecoder(schema.Version{Major: 4, Minor: 5}, d) + decode.RegisterDecoder(schema.Version{Major: 4, Minor: 4}, d) + decode.RegisterDecoder(schema.Version{Major: 4, Minor: 3}, d) + decode.RegisterDecoder(schema.Version{Major: 4, Minor: 2}, d) + decode.RegisterDecoder(schema.Version{Major: 4, Minor: 1}, d) + decode.RegisterDecoder(schema.Version{Major: 4, Minor: 0}, d) } -var version = schema.Version{Major: 4, Minor: 4} - +// decoder decodes cell contents and metadata for nbformat v4.0. type decoder struct{} +var _ decode.Decoder = (*decoder)(nil) + +func (d *decoder) ExtractCells(data []byte) ([]json.RawMessage, error) { + var raw struct { + Cells []json.RawMessage `json:"cells"` + } + if err := json.Unmarshal(data, &raw); err != nil { + return nil, err + } + return raw.Cells, nil +} + func (d *decoder) DecodeMeta(data []byte) (schema.NotebookMetadata, error) { var nm NotebookMetadata if err := json.Unmarshal(data, &nm); err != nil { @@ -60,51 +83,24 @@ func (nm *NotebookMetadata) Language() string { // Markdown defines the schema for a "markdown" cell. type Markdown struct { - Att Attachments `json:"attachments,omitempty"` - Source common.MultilineString `json:"source"` + common.Markdown + Att Attachments `json:"attachments,omitempty"` } -var _ schema.Cell = (*Markdown)(nil) var _ schema.HasAttachments = (*Markdown)(nil) -func (md *Markdown) Type() schema.CellType { - return schema.Markdown -} - -func (md *Markdown) MimeType() string { - return common.MarkdownText -} - -func (md *Markdown) Text() []byte { - return md.Source.Text() -} - func (md *Markdown) Attachments() schema.Attachments { return md.Att } // Raw defines the schema for a "raw" cell. type Raw struct { - Att Attachments `json:"attachments,omitempty"` - Source common.MultilineString `json:"source"` - Metadata RawCellMetadata `json:"metadata"` + common.Raw + Att Attachments `json:"attachments,omitempty"` } -var _ schema.Cell = (*Raw)(nil) var _ schema.HasAttachments = (*Raw)(nil) -func (raw *Raw) Type() schema.CellType { - return schema.Raw -} - -func (raw *Raw) MimeType() string { - return raw.Metadata.MimeType() -} - -func (raw *Raw) Text() []byte { - return raw.Source.Text() -} - func (raw *Raw) Attachments() schema.Attachments { return raw.Att } @@ -122,24 +118,6 @@ func (att Attachments) MimeBundle(filename string) schema.MimeBundle { return mb } -// RawCellMetadata may specify a target conversion format. -type RawCellMetadata struct { - Format *string `json:"format"` - RawMimeType *string `json:"raw_mimetype"` -} - -// MimeType returns a more specific mime-type if one is provided and "text/plain" otherwise. -func (raw *RawCellMetadata) MimeType() string { - switch { - case raw.Format != nil: - return *raw.Format - case raw.RawMimeType != nil: - return *raw.RawMimeType - default: - return common.PlainText - } -} - // Code defines the schema for a "code" cell. type Code struct { Source common.MultilineString `json:"source"` @@ -155,7 +133,7 @@ func (code *Code) Type() schema.CellType { return schema.Code } -// TODO: return correct mime type (add a function to common) +// FIXME: return correct mime type (add a function to common) func (code *Code) MimeType() string { return "application/x-python" } @@ -295,7 +273,7 @@ func (mb MimeBundle) Data(mime string) []byte { return nil } -// RawText returns data for "text/plain" mime-type and a nil slice otherwise. +// PlainText returns data for "text/plain" mime-type and a nil slice otherwise. func (mb MimeBundle) PlainText() []byte { return mb.Data(common.PlainText) } diff --git a/version.go b/version.go index 3ac4da1..2a4b750 100644 --- a/version.go +++ b/version.go @@ -2,10 +2,11 @@ package nb import ( // Currently supported nbformat versions: + _ "github.com/bevzzz/nb/schema/v3" _ "github.com/bevzzz/nb/schema/v4" ) // Version returns current release version. func Version() string { - return "v0.2.0" + return "v0.2.1" }