diff --git a/.gitignore b/.gitignore index 73d4ba0..d55dbc5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ feeds.yml gin-bin goread goread-*-* +public diff --git a/feed/feed.go b/feed/feed.go new file mode 100644 index 0000000..f4c6ff1 --- /dev/null +++ b/feed/feed.go @@ -0,0 +1,56 @@ +// Package feed extends the package gofeed by adding a Date field to gofeed.Item +// which contains either the published or the updated date, since some feeds +// only offer a update time, as well as a sorting implementation based on the +// field. +package feed + +import ( + "net/http" + "time" + + "github.com/mmcdole/gofeed" +) + +type Feed struct { + gofeed.Feed + Items []*Item +} + +type Item struct { + *gofeed.Item + Feed *gofeed.Feed + Category string +} + +func (i *Item) Time() time.Time { + if i.PublishedParsed != nil { + return *i.PublishedParsed + } + if i.UpdatedParsed != nil { + return *i.UpdatedParsed + } + return time.Time{} +} + +type SortByDate []*Item + +func (is SortByDate) Len() int { return len(is) } +func (is SortByDate) Less(i, j int) bool { return is[i].Time().Before(is[j].Time()) } +func (is SortByDate) Swap(i, j int) { is[i], is[j] = is[j], is[i] } + +type Parser struct{ gofeed.Parser } + +func NewParser(c *http.Client) *Parser { + p := gofeed.NewParser() + p.Client = c + return &Parser{*p} +} + +func (p *Parser) ParseURL(url string) (*Feed, error) { + f, err := p.Parser.ParseURL(url) + items := make([]*Item, len(f.Items)) + for i, item := range f.Items { + items[i] = &Item{Item: item, Feed: f} + } + return &Feed{*f, items}, err +} diff --git a/fetch.go b/fetch.go index 18eee8f..978d8e6 100644 --- a/fetch.go +++ b/fetch.go @@ -4,47 +4,52 @@ import ( "context" "net/http" - "github.com/mmcdole/gofeed" - "github.com/pkg/errors" + "github.com/bake/goread/feed" "golang.org/x/sync/semaphore" ) -func fetch(url string) (*gofeed.Feed, error) { - res, err := http.Get(url) - if err != nil { - return nil, errors.Wrapf(err, "could not get feed at %s", url) - } - defer res.Body.Close() - fp := gofeed.NewParser() - f, err := fp.Parse(res.Body) - if err != nil { - return nil, errors.Wrapf(err, "could not parse feed at %s", url) - } - return f, nil +type request struct { + cat, url string } -func fetchAll(urls []string, n int64) (chan *gofeed.Feed, chan error) { +type response struct { + request + feed *feed.Feed + err error +} + +// fetch accepts the number of parallel downloads and returns a request and a +// response channel. The caller is responsible to close the request channel +// after all requests are enqueued, the response chan gets closed automatically. +func fetch(n int64, c *http.Client) (chan<- request, <-chan response) { sem := semaphore.NewWeighted(n) ctx := context.Background() - feedc := make(chan *gofeed.Feed) - errc := make(chan error) + reqc := make(chan request) + resc := make(chan response) go func() { - defer close(errc) - defer close(feedc) - for _, url := range urls { + defer close(resc) + defer sem.Acquire(ctx, n) + for req := range reqc { sem.Acquire(ctx, 1) - url := url - go func() { + go func(req request) { defer sem.Release(1) - feed, err := fetch(url) - if err != nil { - errc <- err - return - } - feedc <- feed - }() + feed, err := feed.NewParser(c).ParseURL(req.url) + resc <- response{req, feed, err} + }(req) + } + }() + return reqc, resc +} + +func fetchAll(n int64, fs feeds) <-chan response { + reqc, resc := fetch(n, &http.Client{}) + go func() { + defer close(reqc) + for cat, urls := range fs { + for _, url := range urls { + reqc <- request{cat, url} + } } - sem.Acquire(ctx, n) }() - return feedc, errc + return resc } diff --git a/funcs/funcs.go b/funcs/funcs.go new file mode 100644 index 0000000..0ddcf33 --- /dev/null +++ b/funcs/funcs.go @@ -0,0 +1,45 @@ +// Package funcs contains a collection of generic templating functions. +package funcs + +import ( + "crypto/sha1" + "fmt" + "html/template" + "strings" + "time" + + "github.com/microcosm-cc/bluemonday" +) + +func FuncMap(truncateLen int) template.FuncMap { + return template.FuncMap{ + "hash": Hash(), + "sanitize": Sanitize(), + "time": Time(), + "title": Title(), + "trim": Trim(), + "truncate": Truncate(truncateLen), + } +} + +func Hash() func(string) string { + return func(s string) string { return fmt.Sprintf("%x", sha1.Sum([]byte(s))) } +} + +func Sanitize() func(string) string { return bluemonday.StrictPolicy().Sanitize } + +func Time() func() time.Time { return func() time.Time { return time.Now() } } + +func Title() func(string) string { return strings.Title } + +func Trim() func(string) string { return strings.TrimSpace } + +func Truncate(n int) func(string) string { + ellipsis := " …" + return func(s string) string { + if len(s)-len(ellipsis) <= n { + return s + } + return s[:n] + ellipsis + } +} diff --git a/item.go b/item.go deleted file mode 100644 index 417245b..0000000 --- a/item.go +++ /dev/null @@ -1,30 +0,0 @@ -package main - -import ( - "time" - - "github.com/mmcdole/gofeed" -) - -type item struct { - gofeed.Item - Feed gofeed.Feed - Time time.Time -} - -func newItem(base *gofeed.Item, feed *gofeed.Feed) item { - var t time.Time - if base.PublishedParsed != nil { - t = *base.PublishedParsed - } - if base.UpdatedParsed != nil { - t = *base.UpdatedParsed - } - return item{*base, *feed, t} -} - -type sortByPublished []item - -func (is sortByPublished) Len() int { return len(is) } -func (is sortByPublished) Less(i, j int) bool { return is[i].Time.Before(is[j].Time) } -func (is sortByPublished) Swap(i, j int) { is[i], is[j] = is[j], is[i] } diff --git a/main.go b/main.go index 44cbb14..08db9cb 100644 --- a/main.go +++ b/main.go @@ -5,21 +5,21 @@ package main import ( "flag" + "html/template" "log" "os" "path" "sort" - "strings" - "text/template" - "time" - "github.com/microcosm-cc/bluemonday" - "github.com/pkg/errors" + "github.com/bake/goread/feed" + "github.com/bake/goread/funcs" "gopkg.in/yaml.v2" ) var version = "development" +type feeds map[string][]string + func main() { inPath := flag.String("in", "feeds.yml", "Path to a list of feed URLs") outPath := flag.String("out", ".", "Path to generated HTML") @@ -29,85 +29,69 @@ func main() { truncateLen := flag.Int("truncate-length", 256, "Number of characters per feed item") flag.Parse() - r, err := os.Open(*inPath) - if err != nil { - log.Fatalf("could not open feeds: %v", err) - } - var cats map[string][]string - if err := yaml.NewDecoder(r).Decode(&cats); err != nil { - log.Fatal(err) + p := page{ + out: *outPath, + max: *maxItems, + Version: version, } - var catNames []string - for cat := range cats { - catNames = append(catNames, cat) + var err error + p.tmpl, err = template. + New(path.Base(*tmplPath)). + Funcs(funcs.FuncMap(*truncateLen)). + Parse(feedTmpl) + if err != nil { + log.Fatalf("could not parse internal template: %v", err) } - sort.Strings(catNames) - - tmpl := template.Must(template.New(path.Base(*tmplPath)).Funcs(template.FuncMap{ - "sanitize": bluemonday.StrictPolicy().Sanitize, - "trim": strings.TrimSpace, - "truncate": func(str string) string { - if len(str) <= *truncateLen { - return str - } - return str[:*truncateLen] + " …" - }, - }).Parse(feedTmpl)) if *tmplPath != "" { - tmpl, err = tmpl.ParseFiles(*tmplPath) + p.tmpl, err = p.tmpl.ParseFiles(*tmplPath) } if err != nil { log.Fatalf("could not parse template: %v", err) } - var allItems []item - for cat, urls := range cats { - var items []item - feedc, errc := fetchAll(urls, *concurrent) - for range urls { - select { - case feed := <-feedc: - for _, item := range feed.Items { - items = append(items, newItem(item, feed)) - } - case err := <-errc: - log.Printf("could not fetch feed from %s: %v\n", cat, err) - } - } - sort.Sort(sort.Reverse(sortByPublished(items))) - if len(items) > *maxItems { - items = items[:*maxItems] + r, err := os.Open(*inPath) + if err != nil { + log.Fatalf("could not open feeds: %v", err) + } + defer r.Close() + var fs feeds + if err := yaml.NewDecoder(r).Decode(&fs); err != nil { + log.Fatalf("could not decode %s: %v", path.Base(*inPath), err) + } + + var items []*feed.Item + for res := range fetchAll(*concurrent, fs) { + if res.err != nil { + log.Printf("could not get %s: %v", res.url, res.err) + continue } - allItems = append(allItems, items...) - if err := render(cat, catNames, items, tmpl, *outPath); err != nil { - log.Printf("could not render %s: %v", cat, err) + for _, item := range res.feed.Items { + item.Category = res.cat + items = append(items, item) } } + sort.Sort(sort.Reverse(feed.SortByDate(items))) - sort.Sort(sort.Reverse(sortByPublished(allItems))) - if len(allItems) > *maxItems { - allItems = allItems[:*maxItems] + cats := map[string][]*feed.Item{"index": items} + feeds := map[string][]*feed.Item{} + hash := funcs.Hash() + for _, item := range items { + cats[item.Category] = append(cats[item.Category], item) + feeds[hash(item.Feed.Link)] = append(feeds[hash(item.Feed.Link)], item) } - if err := render("index", catNames, allItems, tmpl, *outPath); err != nil { - log.Printf("could not render index: %v", err) + for cat := range cats { + p.Categories = append(p.Categories, cat) } -} - -func render(category string, categories []string, items []item, tmpl *template.Template, outPath string) error { - data := struct { - Category string - Categories []string - Items []item - Updated time.Time - Version string - }{category, categories, items, time.Now(), version} - w, err := os.Create(path.Join(outPath, category+".html")) - if err != nil { - return errors.Wrap(err, "could not generate output file") + sort.Strings(p.Categories) + for cat, items := range cats { + if err := p.render(cat, cat, items); err != nil { + log.Fatalf("could not render %s: %v", cat, err) + } } - if err := tmpl.Execute(w, data); err != nil { - return errors.Wrap(err, "could not execute template") + for feed, items := range feeds { + if err := p.render(feed, items[0].Feed.Title, items); err != nil { + log.Fatalf("could not render %s: %v", feed, err) + } } - return nil } diff --git a/render.go b/render.go new file mode 100644 index 0000000..b9552b0 --- /dev/null +++ b/render.go @@ -0,0 +1,40 @@ +package main + +import ( + "html/template" + "os" + "path" + "time" + + "github.com/bake/goread/feed" + "github.com/pkg/errors" +) + +type page struct { + tmpl *template.Template + out string + max int + + Category string + Categories []string + Items []*feed.Item + Updated time.Time + Version string +} + +func (p *page) render(name, category string, items []*feed.Item) error { + p.Items = items + p.Category = category + p.Updated = time.Now() + w, err := os.Create(path.Join(p.out, name+".html")) + if len(p.Items) > p.max { + p.Items = p.Items[:p.max] + } + if err != nil { + return errors.Wrap(err, "could not generate output file") + } + if err := p.tmpl.Execute(w, p); err != nil { + return errors.Wrap(err, "could not execute template") + } + return nil +}