From 4956c2fa9007eb302ebaf17267a7bcaf0eb07219 Mon Sep 17 00:00:00 2001 From: Carlana Date: Wed, 19 Feb 2025 11:28:48 -0500 Subject: [PATCH] Add spl-photo and partner-photo --- internal/db/gdocs.go | 1 + pkg/almanack/service-gdocs-markdown.go | 4 + pkg/almanack/service-gdocs-process.go | 20 ++++- pkg/almanack/service-gdocs-raw.go | 4 + pkg/almanack/service-gdocs-rich.go | 5 ++ pkg/almanack/service-gdocs.go | 2 +- .../testdata/processDocHTML/OP1/embeds.json | 11 ++- .../testdata/processDocHTML/OP2/article.md | 19 ++++ .../testdata/processDocHTML/OP2/doc.html | 86 +++++++++++++++++++ .../testdata/processDocHTML/OP2/embeds.json | 33 +++++++ .../testdata/processDocHTML/OP2/metadata.json | 19 ++++ .../testdata/processDocHTML/OP2/raw.html | 1 + .../testdata/processDocHTML/OP2/rich.html | 1 + .../testdata/processDocHTML/OP2/warnings.json | 1 + .../processDocHTML/SPLHAROLD/embeds.json | 5 +- .../testdata/processDocHTML/abc/embeds.json | 5 +- 16 files changed, 204 insertions(+), 13 deletions(-) create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/article.md create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/doc.html create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/embeds.json create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/metadata.json create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/raw.html create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/rich.html create mode 100644 pkg/almanack/testdata/processDocHTML/OP2/warnings.json diff --git a/internal/db/gdocs.go b/internal/db/gdocs.go index 9306b037..df6146d1 100644 --- a/internal/db/gdocs.go +++ b/internal/db/gdocs.go @@ -77,4 +77,5 @@ type EmbedImage struct { Description string `json:"description"` Width int `json:"width"` Height int `json:"height"` + Kind string `json:"kind"` } diff --git a/pkg/almanack/service-gdocs-markdown.go b/pkg/almanack/service-gdocs-markdown.go index 35a7da93..738b0c98 100644 --- a/pkg/almanack/service-gdocs-markdown.go +++ b/pkg/almanack/service-gdocs-markdown.go @@ -66,6 +66,10 @@ func intermediateDocToMarkdown(doc *html.Node) string { // Write picture shortcode case db.ImageEmbedTag: image := dbembed.Value.(db.EmbedImage) + if image.Kind == "partner" { + dataEl.Parent.RemoveChild(dataEl) + continue + } var widthHeight string if image.Width != 0 { widthHeight = fmt.Sprintf(`width-ratio="%d" height-ratio="%d" `, diff --git a/pkg/almanack/service-gdocs-process.go b/pkg/almanack/service-gdocs-process.go index 52a20b3e..f4bdaa7b 100644 --- a/pkg/almanack/service-gdocs-process.go +++ b/pkg/almanack/service-gdocs-process.go @@ -134,14 +134,25 @@ func processDocHTML(docHTML *html.Node) ( data := newDataTag(dtPartnerText, xhtml.InnerHTMLBlocks(n)) xhtml.ReplaceWith(tbl, data) - case "photo", "image", "photograph", "illustration", "illo": + case "photo", "image", "photograph", "illustration", "illo", "spl-photo", "partner-photo": embed.Type = db.ImageEmbedTag - if imageEmbed, warning := processImage(rows, n); warning != "" { + kind := "all" + if label == "spl-photo" { + kind = "spl" + } else if label == "partner-photo" { + kind = "partner" + } + if imageEmbed, warning := processImage(rows, n, kind); warning != "" { tbl.Parent.RemoveChild(tbl) warnings = append(warnings, warning) } else { embed.Value = *imageEmbed - goto append + if kind != "spl" { + embeds = append(embeds, embed) + n++ + } + data := newDataTag(dtDBEmbed, dbEmbedToString(embed)) + xhtml.ReplaceWith(tbl, data) } case "metadata", "info": @@ -222,7 +233,7 @@ func processDocHTML(docHTML *html.Node) ( return } -func processImage(rows xhtml.TableNodes, n int) (imageEmbed *db.EmbedImage, warning string) { +func processImage(rows xhtml.TableNodes, n int, kind string) (imageEmbed *db.EmbedImage, warning string) { var width, height int if w := xhtml.TextContent(rows.Value("width")); w != "" { width, _ = strconv.Atoi(w) @@ -239,6 +250,7 @@ func processImage(rows xhtml.TableNodes, n int) (imageEmbed *db.EmbedImage, warn ), Width: width, Height: height, + Kind: kind, } if path := xhtml.TextContent(rows.Value("path")); path != "" { diff --git a/pkg/almanack/service-gdocs-raw.go b/pkg/almanack/service-gdocs-raw.go index 7079dede..3e47f0c1 100644 --- a/pkg/almanack/service-gdocs-raw.go +++ b/pkg/almanack/service-gdocs-raw.go @@ -30,6 +30,10 @@ func intermediateDocToPartnerHTML(doc *html.Node) { switch dbembed.Type { // Replace images with red placeholder text case db.ImageEmbedTag: + if imgTag := dbembed.Value.(db.EmbedImage); imgTag.Kind == "spl" { + dataEl.Parent.RemoveChild(dataEl) + continue + } placeholder := xhtml.New("h2", "style", "color: red;") xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", dbembed.N)) xhtml.ReplaceWith(dataEl, placeholder) diff --git a/pkg/almanack/service-gdocs-rich.go b/pkg/almanack/service-gdocs-rich.go index a8bb092a..e0ca8fa7 100644 --- a/pkg/almanack/service-gdocs-rich.go +++ b/pkg/almanack/service-gdocs-rich.go @@ -3,6 +3,7 @@ package almanack import ( "fmt" + "github.com/spotlightpa/almanack/internal/db" "github.com/spotlightpa/almanack/internal/iterx" "github.com/spotlightpa/almanack/internal/xhtml" "golang.org/x/net/html" @@ -27,6 +28,10 @@ func intermediateDocToPartnerRichText(doc *html.Node) { // Replace other embeds with red placeholder text for dataEl, value := range dataEls(doc, dtDBEmbed) { dbembed := dbEmbedFromString(value) + if imgTag, ok := dbembed.Value.(db.EmbedImage); ok && imgTag.Kind == "spl" { + dataEl.Parent.RemoveChild(dataEl) + continue + } placeholder := xhtml.New("h2", "style", "color: red;") xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", dbembed.N)) xhtml.ReplaceWith(dataEl, placeholder) diff --git a/pkg/almanack/service-gdocs.go b/pkg/almanack/service-gdocs.go index 918b8ced..393941ef 100644 --- a/pkg/almanack/service-gdocs.go +++ b/pkg/almanack/service-gdocs.go @@ -113,7 +113,7 @@ func (svc Services) ProcessGDocsDoc(ctx context.Context, dbDoc db.GDocsDoc) (err // Handle image uploads/database lookups for tbl, rows := range xhtml.Tables(docHTML) { switch label := rows.Label(); label { - case "photo", "image", "photograph", "illustration", "illo": + case "photo", "image", "photograph", "illustration", "illo", "spl-photo", "partner-photo": if warning := svc.replaceImagePath( ctx, tbl, rows, dbDoc.ExternalID, objID2Path, ); warning != "" { diff --git a/pkg/almanack/testdata/processDocHTML/OP1/embeds.json b/pkg/almanack/testdata/processDocHTML/OP1/embeds.json index fb0fe7a5..d16f4fa9 100644 --- a/pkg/almanack/testdata/processDocHTML/OP1/embeds.json +++ b/pkg/almanack/testdata/processDocHTML/OP1/embeds.json @@ -13,7 +13,8 @@ "caption": "Here is a caption", "description": "Blah blah blah.", "width": 0, - "height": 0 + "height": 0, + "kind": "all" } }, { @@ -25,7 +26,8 @@ "caption": "Overdose awareness", "description": "", "width": 0, - "height": 0 + "height": 0, + "kind": "all" } }, { @@ -37,7 +39,8 @@ "caption": "", "description": "The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a \"decade.\"", "width": 0, - "height": 0 + "height": 0, + "kind": "all" } } -] \ No newline at end of file +] diff --git a/pkg/almanack/testdata/processDocHTML/OP2/article.md b/pkg/almanack/testdata/processDocHTML/OP2/article.md new file mode 100644 index 00000000..ebc6d601 --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/article.md @@ -0,0 +1,19 @@ +My name is \*Carlana Johnson\*. + +This is my \_test document\_. + +\[Citation Needed\] + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +# Some blocks + +{{}} + +Here is some text. Lorem ipsum. + +{{}} + +And here’s another image: + +{{}} \ No newline at end of file diff --git a/pkg/almanack/testdata/processDocHTML/OP2/doc.html b/pkg/almanack/testdata/processDocHTML/OP2/doc.html new file mode 100644 index 00000000..9efc27dd --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/doc.html @@ -0,0 +1,86 @@ +

+

metadata +

+

Slug +

OP1 +

Date +

4/20/2023 +

Hed +

The Winds of Change +

Description +

Something is in the air tonight… +

Byline +

The Scorpions +

Blurb (100 chars) +

Blurb +

Budget +

+

A CIA op becomes an unexpected smash hit. +

+

Lede image +

N / A +

Lede image credit +

Teenage Engineering +

Lede image description +

Synthesizer +

+

My name is *Carlana Johnson*. +

+

This is my _test document_. +

+

[Citation Needed] +

+

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +

+

Some blocks +

+

html +

+

<script src="http://example.com/"></script> +

+

+

+

partner-photo +

+

+

+

credit +

+

CarlCo +

+

description +

+

Blah blah blah. +

+

caption +

+

Here is a caption +

+

pathexternal/cwp3y3z5x8svbtsfv7t68hggc0.png

+

Here is some text. Lorem ipsum. +

+

+

spl-photo +

20210831_spotlight_overdose_awareness_selects_0021.JPG +

credit +

Amanda Berg +

caption +

Overdose awareness +

pathexternal/75h9dj4qkszh2ydb6e3h9pn2r0.jpeg

+

And here’s another image: +

+

photo +

+

path +

external/08v6tmkadg07fyknwy3pb8sm3m.jpeg +

credit +

+

Amanda Berg / For Spotlight PA +

+

Alt +

+

The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a "decade." +

+

+

+

diff --git a/pkg/almanack/testdata/processDocHTML/OP2/embeds.json b/pkg/almanack/testdata/processDocHTML/OP2/embeds.json new file mode 100644 index 00000000..f2a7966b --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/embeds.json @@ -0,0 +1,33 @@ +[ + { + "n": 1, + "type": "raw", + "value": "" + }, + { + "n": 2, + "type": "image", + "value": { + "path": "external/cwp3y3z5x8svbtsfv7t68hggc0.png", + "credit": "CarlCo", + "caption": "Here is a caption", + "description": "Blah blah blah.", + "width": 0, + "height": 0, + "kind": "partner" + } + }, + { + "n": 3, + "type": "image", + "value": { + "path": "external/08v6tmkadg07fyknwy3pb8sm3m.jpeg", + "credit": "Amanda Berg / For Spotlight PA", + "caption": "", + "description": "The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a \"decade.\"", + "width": 0, + "height": 0, + "kind": "all" + } + } +] \ No newline at end of file diff --git a/pkg/almanack/testdata/processDocHTML/OP2/metadata.json b/pkg/almanack/testdata/processDocHTML/OP2/metadata.json new file mode 100644 index 00000000..bd69ac71 --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/metadata.json @@ -0,0 +1,19 @@ +{ + "publication_date": null, + "internal_id": "OP1", + "byline": "The Scorpions", + "budget": "A CIA op becomes an unexpected smash hit.", + "hed": "The Winds of Change", + "description": "Something is in the air tonight…", + "lede_image": "", + "lede_image_credit": "Teenage Engineering", + "lede_image_description": "Synthesizer", + "lede_image_caption": "", + "eyebrow": "", + "url_slug": "", + "blurb": "Blurb", + "link_title": "", + "seo_title": "", + "og_title": "", + "twitter_title": "" +} \ No newline at end of file diff --git a/pkg/almanack/testdata/processDocHTML/OP2/raw.html b/pkg/almanack/testdata/processDocHTML/OP2/raw.html new file mode 100644 index 00000000..49ee3699 --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/raw.html @@ -0,0 +1 @@ +

My name is *Carlana Johnson*.

This is my _test document_.

[Citation Needed]

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

Some blocks

Embed #2

Here is some text. Lorem ipsum.

And here’s another image:

Embed #3

\ No newline at end of file diff --git a/pkg/almanack/testdata/processDocHTML/OP2/rich.html b/pkg/almanack/testdata/processDocHTML/OP2/rich.html new file mode 100644 index 00000000..138f7fc1 --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/rich.html @@ -0,0 +1 @@ +

My name is *Carlana Johnson*.

This is my _test document_.

[Citation Needed]

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

Some blocks

Embed #1

Embed #2

Here is some text. Lorem ipsum.

And here’s another image:

Embed #3

\ No newline at end of file diff --git a/pkg/almanack/testdata/processDocHTML/OP2/warnings.json b/pkg/almanack/testdata/processDocHTML/OP2/warnings.json new file mode 100644 index 00000000..ec747fa4 --- /dev/null +++ b/pkg/almanack/testdata/processDocHTML/OP2/warnings.json @@ -0,0 +1 @@ +null \ No newline at end of file diff --git a/pkg/almanack/testdata/processDocHTML/SPLHAROLD/embeds.json b/pkg/almanack/testdata/processDocHTML/SPLHAROLD/embeds.json index 9a167b13..e573f70f 100644 --- a/pkg/almanack/testdata/processDocHTML/SPLHAROLD/embeds.json +++ b/pkg/almanack/testdata/processDocHTML/SPLHAROLD/embeds.json @@ -13,7 +13,8 @@ "caption": "A plaque inside Millvale \"they bar\" Harold's Haunt profiles the businesses' owners and namesake, a neighborhood ghost.", "description": "A plaque inside Harold’s Haunt is pictured.", "width": 0, - "height": 0 + "height": 0, + "kind": "all" } } -] \ No newline at end of file +] diff --git a/pkg/almanack/testdata/processDocHTML/abc/embeds.json b/pkg/almanack/testdata/processDocHTML/abc/embeds.json index 2b8dddd6..8d36d461 100644 --- a/pkg/almanack/testdata/processDocHTML/abc/embeds.json +++ b/pkg/almanack/testdata/processDocHTML/abc/embeds.json @@ -8,7 +8,8 @@ "caption": "Cap 1", "description": "Desc 2", "width": 640, - "height": 480 + "height": 480, + "kind": "all" } } -] \ No newline at end of file +]