Skip to content

Commit

Permalink
自动解析并上传文章附件
Browse files Browse the repository at this point in the history
  • Loading branch information
movsb committed Mar 26, 2024
1 parent 2d5f1f2 commit 9c9a8c4
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 14 deletions.
1 change: 0 additions & 1 deletion .dockerignore

This file was deleted.

2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
*~
*.swp
.DS_Store
__debug_bin
__debug_bin*

/tmp/
/.tmp/
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ main.go | 入口程序

- [ ] 移除 jQuery(早期的代码和文章有依赖)
- [ ] 把评论通知改成后台任务异步通知(因为可能失败,失败后应该重试)
- [ ] 自动检测文章引用的附件并上传(同时删除未引用的附件,即:同步)
- [ ] 显示未渲染的评论时,使用 html.innerText(实现方式:渲染成 html 后提取 标签内容并合并)

## 如果你想试一下

Expand Down
3 changes: 1 addition & 2 deletions cmd/client/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"compress/zlib"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
Expand Down Expand Up @@ -37,7 +36,7 @@ func (c *Client) BackupPosts(cmd *cobra.Command) {
}
r = zr
} else {
r = ioutil.NopCloser(bpr)
r = io.NopCloser(bpr)
}
defer r.Close()

Expand Down
7 changes: 4 additions & 3 deletions cmd/client/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ func AddCommands(rootCmd *cobra.Command) {
}
postsCmd.AddCommand(postsCreateCmd)
postsUploadCmd := &cobra.Command{
Use: `upload <files...>`,
Short: `Upload post assets, like images`,
Args: cobra.MinimumNArgs(1),
Use: `upload <files...>`,
Short: `Upload post assets, like images`,
Args: cobra.MinimumNArgs(1),
Deprecated: `将会自动上传文章附件,此命令不再需要手动执行。`,
Run: func(cmd *cobra.Command, args []string) {
client.UploadPostFiles(args)
},
Expand Down
158 changes: 151 additions & 7 deletions cmd/client/post.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@ package client
import (
"errors"
"fmt"
"log"
"os"
"path/filepath"
"strings"

"github.com/movsb/taoblog/protocols"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
html5 "golang.org/x/net/html"
field_mask "google.golang.org/protobuf/types/known/fieldmaskpb"
yaml "gopkg.in/yaml.v2"
)
Expand Down Expand Up @@ -38,6 +43,11 @@ type PostConfig struct {

// InitPost ...
func (c *Client) InitPost() error {
// 禁止意外在项目下创建。
if _, err := os.Stat(`go.mod`); err == nil {
log.Fatalln(`不允许在项目根目录下创建文章。`)
}

fp, err := os.Open("config.yml")
if err == nil {
fp.Close()
Expand Down Expand Up @@ -68,7 +78,10 @@ func (c *Client) CreatePost() error {
p.Type = cfg.Type
p.Metas = cfg.Metas

p.SourceType, p.Source = readSource(".")
var assets []string
_ = assets

p.SourceType, p.Source, assets = readSource(".")

rp, err := c.blog.CreatePost(c.token(), &p)
if err != nil {
Expand All @@ -79,6 +92,8 @@ func (c *Client) CreatePost() error {
cfg.Modified = rp.Modified
c.savePostConfig(&cfg)

c.UploadPostFiles(assets)

return nil
}

Expand Down Expand Up @@ -165,7 +180,10 @@ func (c *Client) UpdatePost() error {
p.Type = cfg.Type
p.Metas = cfg.Metas

p.SourceType, p.Source = readSource(".")
var assets []string
_ = assets

p.SourceType, p.Source, assets = readSource(".")

rp, err := c.blog.UpdatePost(c.token(), &protocols.UpdatePostRequest{
Post: &p,
Expand All @@ -191,6 +209,8 @@ func (c *Client) UpdatePost() error {
cfg.Metas = rp.Metas
c.savePostConfig(&cfg)

c.UploadPostFiles(assets)

return nil
}

Expand All @@ -202,21 +222,24 @@ func (c *Client) DeletePost(id int64) error {
return err
}

// UploadPostFiles ...
// UploadPostFiles 上传文章附近。
// TODO 目前为了简单起见,使用的是 HTTP POST 方式上传;
// TODO 应该像 Backup 那样改成带进度的 protocol buffer 方式上传。
func (c *Client) UploadPostFiles(files []string) {
config := c.readPostConfig()
if config.ID <= 0 {
panic("post not posted, post it first.")
}
if len(files) <= 0 {
panic("Specify files.")
// panic("Specify files.")
return
}
for _, file := range files {
fmt.Println(" +", file)
var err error
fp, err := os.Open(file)
if err != nil {
panic(err)
log.Fatalln(err)
}
defer fp.Close()
path := fmt.Sprintf("/posts/%d/files/%s", config.ID, file)
Expand Down Expand Up @@ -251,7 +274,7 @@ func (c *Client) savePostConfig(config *PostConfig) {
}
}

func readSource(dir string) (string, string) {
func readSource(dir string) (string, string, []string) {
var source string
var theName string

Expand All @@ -277,14 +300,135 @@ func readSource(dir string) (string, string) {
}

typ := ""
var assets []string
var err error
switch filepath.Ext(theName) {
case ".md":
typ = "markdown"
assets, err = parsePostAssets(source)
if err != nil {
log.Println(err)
}
case ".html":
typ = "html"
}

return typ, source
return typ, source, assets
}

// 从文章的源代码里面提取出附件列表。
// 参考:docs/usage/文章编辑::自动附件上传
// TODO 暂时放在 client 中,其实 server 中也可能用到,到时候再独立成公共模块
func parsePostAssets(source string) ([]string, error) {
sourceBytes := []byte(source)
reader := text.NewReader(sourceBytes)
doc := goldmark.DefaultParser().Parse(reader)

// 用来保存所有的相对路径列表
var assets []string

tryAdd := func(asset string) {
if strings.Contains(asset, `://`) || !filepath.IsLocal(asset) {
if asset != "" && (!strings.Contains(asset, `://`) && !filepath.IsAbs(asset)) {
log.Println(`maybe an invalid asset presents in the post:`, asset)
}
return
}
assets = append(assets, asset)
}

fromHTML := func(html string) {
assets, err := parseHtmlAssets(html)
if err != nil {
log.Println(err)
}
for _, asset := range assets {
tryAdd(asset)
}
}

ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}

// 如果修改了这个列表,注意同时更新到文档。
switch tag := n.(type) {
case *ast.Link:
tryAdd(string(tag.Destination))
case *ast.Image:
tryAdd(string(tag.Destination))
case *ast.HTMLBlock, *ast.RawHTML:
var lines *text.Segments
switch tag := n.(type) {
default:
panic(`unknown tag type`)
case *ast.HTMLBlock:
lines = tag.Lines()
case *ast.RawHTML:
lines = tag.Segments
}

var rawLines []string
for i := 0; i < lines.Len(); i++ {
seg := lines.At(i)
value := seg.Value(sourceBytes)
rawLines = append(rawLines, string(value))
}
fromHTML(strings.Join(rawLines, "\n"))
}
return ast.WalkContinue, nil
})

return assets, nil
}

func parseHtmlAssets(html string) ([]string, error) {
node, err := html5.Parse(strings.NewReader(html))
if err != nil {
return nil, err
}

var assets []string

var recurse func(node *html5.Node)

// 先访问节点自身,再访问各子节点
recurse = func(node *html5.Node) {
if !(node.Type == html5.DocumentNode || node.Type == html5.ElementNode) {
return
}

// log.Println("Data:", node.Data)
var path string
var wantedAttr string
switch strings.ToLower(node.Data) {
case `a`:
wantedAttr = `href`
case `img`, `source`, `iframe`:
wantedAttr = `src`
case `object`:
wantedAttr = `data`
}
if wantedAttr != `` {
for _, attr := range node.Attr {
if strings.EqualFold(attr.Key, wantedAttr) {
path = attr.Val
}
}
}
if path != `` {
assets = append(assets, path)
}

for child := node.FirstChild; child != nil; child = child.NextSibling {
recurse(child)
}
}

recurse(node)

return assets, nil
}

func (c *Client) SetRedirect(sourcePath, targetPath string) {
Expand Down
39 changes: 39 additions & 0 deletions cmd/client/post_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package client

import (
"strings"
"testing"
)

func TestParsePostAssets(t *testing.T) {
tests := []struct {
Source string
Assets []string
}{
{
Source: `a <a href="a.jpg" /> adf`,
Assets: []string{`a.jpg`},
},
{
Source: `a <A href="a.jpg" /> adf`,
Assets: []string{`a.jpg`},
},
}
for _, t1 := range tests {
assets, err := parsePostAssets(t1.Source)
if err != nil {
t.Error(err)
continue
}
if len(t1.Assets) != len(assets) {
t.Errorf(`assets not equal: %s`, t1.Source)
continue
}
for i := 0; i < len(t1.Assets); i++ {
if !strings.EqualFold(t1.Assets[i], assets[i]) {
t.Errorf(`assets not equal: %s`, t1.Source)
continue
}
}
}
}
14 changes: 14 additions & 0 deletions docs/usage/图片.md → docs/usage/文章编辑.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 文章编辑

## 图片插入

在 Markdown 中可以直接使用 Markdown 语法插入图片:

```markdown
Expand All @@ -9,3 +13,13 @@
```html
<img src="图片地址" loading="lazy" data-origin="图片来源信息" alt="图片加载失败时的替代文本" title="提示文本" style="width:宽度;height=高度;" />
```

## 附件自动上传

文章中的以下附件会自动识别并上传:

* `<a>` 标签中 `href` 为相对地址的
* `<img>` 标签中 `src` 为相对地址的
* `<source>` 标签中 `src` 为相对地址的
* `<iframe>` 标签中 `src` 为相对地址的
* `<object>` 标签中 `data` 为相对地址的

0 comments on commit 9c9a8c4

Please sign in to comment.