Skip to content

Commit

Permalink
Add spl-photo and partner-photo
Browse files Browse the repository at this point in the history
  • Loading branch information
earthboundkid committed Feb 19, 2025
1 parent f23197b commit 4956c2f
Show file tree
Hide file tree
Showing 16 changed files with 204 additions and 13 deletions.
1 change: 1 addition & 0 deletions internal/db/gdocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,5 @@ type EmbedImage struct {
Description string `json:"description"`
Width int `json:"width"`
Height int `json:"height"`
Kind string `json:"kind"`
}
4 changes: 4 additions & 0 deletions pkg/almanack/service-gdocs-markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ func intermediateDocToMarkdown(doc *html.Node) string {
// Write picture shortcode
case db.ImageEmbedTag:
image := dbembed.Value.(db.EmbedImage)
if image.Kind == "partner" {
dataEl.Parent.RemoveChild(dataEl)
continue
}
var widthHeight string
if image.Width != 0 {
widthHeight = fmt.Sprintf(`width-ratio="%d" height-ratio="%d" `,
Expand Down
20 changes: 16 additions & 4 deletions pkg/almanack/service-gdocs-process.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,25 @@ func processDocHTML(docHTML *html.Node) (
data := newDataTag(dtPartnerText, xhtml.InnerHTMLBlocks(n))
xhtml.ReplaceWith(tbl, data)

case "photo", "image", "photograph", "illustration", "illo":
case "photo", "image", "photograph", "illustration", "illo", "spl-photo", "partner-photo":
embed.Type = db.ImageEmbedTag
if imageEmbed, warning := processImage(rows, n); warning != "" {
kind := "all"
if label == "spl-photo" {
kind = "spl"
} else if label == "partner-photo" {
kind = "partner"
}
if imageEmbed, warning := processImage(rows, n, kind); warning != "" {
tbl.Parent.RemoveChild(tbl)
warnings = append(warnings, warning)
} else {
embed.Value = *imageEmbed
goto append
if kind != "spl" {
embeds = append(embeds, embed)
n++
}
data := newDataTag(dtDBEmbed, dbEmbedToString(embed))
xhtml.ReplaceWith(tbl, data)
}

case "metadata", "info":
Expand Down Expand Up @@ -222,7 +233,7 @@ func processDocHTML(docHTML *html.Node) (
return
}

func processImage(rows xhtml.TableNodes, n int) (imageEmbed *db.EmbedImage, warning string) {
func processImage(rows xhtml.TableNodes, n int, kind string) (imageEmbed *db.EmbedImage, warning string) {
var width, height int
if w := xhtml.TextContent(rows.Value("width")); w != "" {
width, _ = strconv.Atoi(w)
Expand All @@ -239,6 +250,7 @@ func processImage(rows xhtml.TableNodes, n int) (imageEmbed *db.EmbedImage, warn
),
Width: width,
Height: height,
Kind: kind,
}

if path := xhtml.TextContent(rows.Value("path")); path != "" {
Expand Down
4 changes: 4 additions & 0 deletions pkg/almanack/service-gdocs-raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ func intermediateDocToPartnerHTML(doc *html.Node) {
switch dbembed.Type {
// Replace images with red placeholder text
case db.ImageEmbedTag:
if imgTag := dbembed.Value.(db.EmbedImage); imgTag.Kind == "spl" {
dataEl.Parent.RemoveChild(dataEl)
continue
}
placeholder := xhtml.New("h2", "style", "color: red;")
xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", dbembed.N))
xhtml.ReplaceWith(dataEl, placeholder)
Expand Down
5 changes: 5 additions & 0 deletions pkg/almanack/service-gdocs-rich.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package almanack
import (
"fmt"

"github.com/spotlightpa/almanack/internal/db"
"github.com/spotlightpa/almanack/internal/iterx"
"github.com/spotlightpa/almanack/internal/xhtml"
"golang.org/x/net/html"
Expand All @@ -27,6 +28,10 @@ func intermediateDocToPartnerRichText(doc *html.Node) {
// Replace other embeds with red placeholder text
for dataEl, value := range dataEls(doc, dtDBEmbed) {
dbembed := dbEmbedFromString(value)
if imgTag, ok := dbembed.Value.(db.EmbedImage); ok && imgTag.Kind == "spl" {
dataEl.Parent.RemoveChild(dataEl)
continue
}
placeholder := xhtml.New("h2", "style", "color: red;")
xhtml.AppendText(placeholder, fmt.Sprintf("Embed #%d", dbembed.N))
xhtml.ReplaceWith(dataEl, placeholder)
Expand Down
2 changes: 1 addition & 1 deletion pkg/almanack/service-gdocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func (svc Services) ProcessGDocsDoc(ctx context.Context, dbDoc db.GDocsDoc) (err
// Handle image uploads/database lookups
for tbl, rows := range xhtml.Tables(docHTML) {
switch label := rows.Label(); label {
case "photo", "image", "photograph", "illustration", "illo":
case "photo", "image", "photograph", "illustration", "illo", "spl-photo", "partner-photo":
if warning := svc.replaceImagePath(
ctx, tbl, rows, dbDoc.ExternalID, objID2Path,
); warning != "" {
Expand Down
11 changes: 7 additions & 4 deletions pkg/almanack/testdata/processDocHTML/OP1/embeds.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"caption": "Here is a caption",
"description": "Blah blah blah.",
"width": 0,
"height": 0
"height": 0,
"kind": "all"
}
},
{
Expand All @@ -25,7 +26,8 @@
"caption": "Overdose awareness",
"description": "",
"width": 0,
"height": 0
"height": 0,
"kind": "all"
}
},
{
Expand All @@ -37,7 +39,8 @@
"caption": "",
"description": "The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a \"decade.\"",
"width": 0,
"height": 0
"height": 0,
"kind": "all"
}
}
]
]
19 changes: 19 additions & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/article.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
My name is \*<a href="mailto:[email protected]">Carlana Johnson</a>\*.

This is my \_<a href="https://docs.google.com/document/d/103kCeBG2OQS_ZHkHUyKpT9Z_ajs4tuQ-WtCvlj79Vqs/edit">test document</a>\_.

\[Citation Needed\]

<strong>Lorem ipsum</strong> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim <em>id est laborum</em>.

# Some blocks

{{<embed/raw srcdoc="&lt;script src=&#34;http://example.com/&#34;&gt;&lt;/script&gt;">}}

Here is <em>some</em> <strong>text</strong>. Lorem ipsum.

{{<picture src="external/75h9dj4qkszh2ydb6e3h9pn2r0.jpeg" description="" caption="Overdose awareness" credit="Amanda Berg">}}

And here’s another image:

{{<picture src="external/08v6tmkadg07fyknwy3pb8sm3m.jpeg" description="The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a &#34;decade.&#34;" caption="" credit="Amanda Berg / For Spotlight PA">}}
86 changes: 86 additions & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/doc.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<p>
</p><table><tr><td><p>metadata
</p></td><td><p>
</p></td></tr><tr><td><p><strong>Slug
</strong></p></td><td><p>OP1
</p></td></tr><tr><td><p><strong>Date
</strong></p></td><td><p>4/20/2023
</p></td></tr><tr><td><p><strong>Hed
</strong></p></td><td><p>The Winds of Change
</p></td></tr><tr><td><p><strong>Description
</strong></p></td><td><p>Something is in the air tonight…
</p></td></tr><tr><td><p><strong>Byline
</strong></p></td><td><p>The Scorpions
</p></td></tr><tr><td><p><strong>Blurb (100 chars)
</strong></p></td><td><p>Blurb
</p></td></tr><tr><td><p><strong>Budget
</strong></p></td><td><p>
</p></td></tr><tr><td><p>A CIA op becomes an unexpected smash hit.<strong>
</strong></p></td><td><p>
</p></td></tr><tr><td><p><strong>Lede image
</strong></p></td><td><p>N / A
</p></td></tr><tr><td><p><strong>Lede image credit
</strong></p></td><td><p>Teenage Engineering
</p></td></tr><tr><td><p><strong>Lede image description
</strong></p></td><td><p>Synthesizer
</p></td></tr></table><p>
</p><p>My name is *<a href="mailto:[email protected]">Carlana Johnson</a>*.
</p><p>
</p><p>This is my _<a href="https://docs.google.com/document/d/103kCeBG2OQS_ZHkHUyKpT9Z_ajs4tuQ-WtCvlj79Vqs/edit">test document</a>_.
</p><p>
</p><p>[Citation Needed]
</p><p>
</p><p><strong>Lorem ipsum</strong> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim <em>id est laborum</em>.
</p><p>
</p><h1>Some blocks
</h1><p>
</p><table><tr><td><p>html
</p></td></tr><tr><td><p>
</p><p>&lt;script src=&#34;http://example.com/&#34;&gt;&lt;/script&gt;
</p><p>
</p></td></tr></table><p>
</p><p>
</p><table><tr><td><p>partner-photo
</p></td><td><p>
</p></td></tr><tr><td><p><a href="https://carlmjohnson.net/images/dither-320.png"><img src="https://lh6.googleusercontent.com/bOd9ROs85eAzE7mvH2ySxfKVSC98pIdaGOZiaOMFZRhNalaxVzOv7zdlWFV4AKNe-YhurZgLL4Jxe_bNJ_xFblXcvMg3Fs0P-B7azYu5iPhOlFEg2YrJCp83QhsLhe5ahrUMNDoBWre4clZ8NtrVu6Jq1wMPIVT2" title="" alt="" data-oid="kix.4v7yhj6ax6xk"/></a>
</p></td><td><p>
</p></td></tr><tr><td><p>credit
</p></td><td><p>
</p></td></tr><tr><td><p>CarlCo
</p></td><td><p>
</p></td></tr><tr><td><p>description
</p></td><td><p>
</p></td></tr><tr><td><p>Blah blah blah.
</p></td><td><p>
</p></td></tr><tr><td><p>caption
</p></td><td><p>
</p></td></tr><tr><td><p>Here is a caption
</p></td><td><p>
</p></td></tr><tr><td>path</td><td>external/cwp3y3z5x8svbtsfv7t68hggc0.png</td></tr></table><p>
</p><p>Here is <em>some</em> <strong>text</strong>. Lorem ipsum.
</p><p>
</p><p>
</p><table><tr><td><p><strong>spl-photo
</strong></p></td></tr><tr><td><p><a href="https://drive.google.com/file/d/1ssiQd8AKXHo99qkZZwYbHxfVJHY3RPnL/view?usp=share_link">20210831_spotlight_overdose_awareness_selects_0021.JPG</a>
</p></td></tr><tr><td><p>credit
</p></td></tr><tr><td><p>Amanda Berg
</p></td></tr><tr><td><p>caption
</p></td></tr><tr><td><p>Overdose awareness
</p></td></tr><tr><td>path</td><td>external/75h9dj4qkszh2ydb6e3h9pn2r0.jpeg</td></tr></table><p>
</p><p>And here’s another image:
</p><p>
</p><table><tr><td><p><strong>photo
</strong></p></td><td><p>
</p></td></tr><tr><td><p>path
</p></td><td><p><mark>external/08v6tmkadg07fyknwy3pb8sm3m.jpeg</mark>
</p></td></tr><tr><td><p>credit
</p></td><td><p>
</p></td></tr><tr><td><p>Amanda Berg / For Spotlight PA
</p></td><td><p>
</p></td></tr><tr><td><p>Alt
</p></td><td><p>
</p></td></tr><tr><td><p>The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a &#34;decade.&#34;
</p></td><td><p>
</p></td></tr></table><p>
</p><p>
</p>
33 changes: 33 additions & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/embeds.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[
{
"n": 1,
"type": "raw",
"value": "<script src=\"http://example.com/\"></script>"
},
{
"n": 2,
"type": "image",
"value": {
"path": "external/cwp3y3z5x8svbtsfv7t68hggc0.png",
"credit": "CarlCo",
"caption": "Here is a caption",
"description": "Blah blah blah.",
"width": 0,
"height": 0,
"kind": "partner"
}
},
{
"n": 3,
"type": "image",
"value": {
"path": "external/08v6tmkadg07fyknwy3pb8sm3m.jpeg",
"credit": "Amanda Berg / For Spotlight PA",
"caption": "",
"description": "The House floor in the Pa. Capitol. The chamber is controlled by Democrats for the first session in more than a \"decade.\"",
"width": 0,
"height": 0,
"kind": "all"
}
}
]
19 changes: 19 additions & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"publication_date": null,
"internal_id": "OP1",
"byline": "The Scorpions",
"budget": "A CIA op becomes an unexpected smash hit.",
"hed": "The Winds of Change",
"description": "Something is in the air tonight…",
"lede_image": "",
"lede_image_credit": "Teenage Engineering",
"lede_image_description": "Synthesizer",
"lede_image_caption": "",
"eyebrow": "",
"url_slug": "",
"blurb": "Blurb",
"link_title": "",
"seo_title": "",
"og_title": "",
"twitter_title": ""
}
1 change: 1 addition & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/raw.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<body><p>My name is *<a href="mailto:[email protected]">Carlana Johnson</a>*.</p><p>This is my _<a href="https://docs.google.com/document/d/103kCeBG2OQS_ZHkHUyKpT9Z_ajs4tuQ-WtCvlj79Vqs/edit">test document</a>_.</p><p>[Citation Needed]</p><p><strong>Lorem ipsum</strong> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim <em>id est laborum</em>.</p><h1>Some blocks</h1><script src="http://example.com/"></script><h2 style="color: red;">Embed #2</h2><p>Here is <em>some</em> <strong>text</strong>. Lorem ipsum.</p><p>And here’s another image:</p><h2 style="color: red;">Embed #3</h2></body>

Check warning

Code scanning / CodeQL

Inclusion of functionality from an untrusted source Medium test

Script loaded using unencrypted connection.
1 change: 1 addition & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/rich.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<body><p>My name is *<a href="mailto:[email protected]">Carlana Johnson</a>*.</p><p>This is my _<a href="https://docs.google.com/document/d/103kCeBG2OQS_ZHkHUyKpT9Z_ajs4tuQ-WtCvlj79Vqs/edit">test document</a>_.</p><p>[Citation Needed]</p><p><strong>Lorem ipsum</strong> dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim <em>id est laborum</em>.</p><h1>Some blocks</h1><h2 style="color: red;">Embed #1</h2><h2 style="color: red;">Embed #2</h2><p>Here is <em>some</em> <strong>text</strong>. Lorem ipsum.</p><p>And here’s another image:</p><h2 style="color: red;">Embed #3</h2></body>
1 change: 1 addition & 0 deletions pkg/almanack/testdata/processDocHTML/OP2/warnings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
null
5 changes: 3 additions & 2 deletions pkg/almanack/testdata/processDocHTML/SPLHAROLD/embeds.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"caption": "A plaque inside Millvale \"they bar\" Harold's Haunt profiles the businesses' owners and namesake, a neighborhood ghost.",
"description": "A plaque inside Harold’s Haunt is pictured.",
"width": 0,
"height": 0
"height": 0,
"kind": "all"
}
}
]
]
5 changes: 3 additions & 2 deletions pkg/almanack/testdata/processDocHTML/abc/embeds.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"caption": "Cap 1",
"description": "Desc 2",
"width": 640,
"height": 480
"height": 480,
"kind": "all"
}
}
]
]

0 comments on commit 4956c2f

Please sign in to comment.