-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathindex_group.go
141 lines (120 loc) · 3.01 KB
/
index_group.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package sitemap
import (
"io/ioutil"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
)
type IndexGroup struct {
name string
folder string
group_count int
sitemaps []Sitemap
sitemap_channel chan Sitemap
done chan bool
}
//Add a sitemap.Sitemap to the group
func (s *IndexGroup) Add(entry Sitemap) {
s.sitemap_channel <- entry
}
//Clean Urls not yet added to the group
func (s *IndexGroup) Clear() {
s.sitemaps = []Sitemap{}
}
//Returns one sitemap.Index of Urls not yet added to the group
func (s *IndexGroup) getSitemapSet() Index {
return Index{Sitemaps: s.sitemaps}
}
func (s *IndexGroup) getSitemapName() string {
return s.name + "_" + strconv.Itoa(s.group_count) + ".xml.gz"
}
//Saves the sitemap from the sitemap.URLSet
func (s *IndexGroup) Create(index Index) {
var path string
var remnant []Sitemap
xml, err := createSitemapIndexXml(index)
if err == ErrMaxFileSize {
//splits into two sitemaps recursively
newlimit := MAXURLSETSIZE / 2
s.Create(Index{Sitemaps: index.Sitemaps[newlimit:]})
s.Create(Index{Sitemaps: index.Sitemaps[:newlimit]})
return
} else if err == ErrMaxUrlSetSize {
remnant = index.Sitemaps[MAXURLSETSIZE:]
index.Sitemaps = index.Sitemaps[:MAXURLSETSIZE]
xml, err = createSitemapIndexXml(index)
}
if err != nil {
log.Fatal("File not saved:", err)
}
sitemap_name := s.getSitemapName()
path = filepath.Join(s.folder, sitemap_name)
err = saveXml(xml, path)
if err != nil {
log.Fatal("File not saved:", err)
}
s.group_count++
s.Clear()
//append remnant urls if exists
if len(remnant) > 0 {
s.sitemaps = append(s.sitemaps, remnant...)
}
log.Printf("Sitemap created on %s", path)
}
// Starts to run the given list of Sitemap Groups concurrently.
func CloseIndexGroups(groups ...*IndexGroup) (done <-chan bool) {
var wg sync.WaitGroup
wg.Add(len(groups))
ch := make(chan bool, 1)
for _, group := range groups {
go func(g *IndexGroup) {
<-g.Close()
wg.Done()
}(group)
}
go func() {
wg.Wait()
ch <- true
}()
return ch
}
//Mandatory operation, handle the rest of the url that has not been added to any sitemap and add.
//Furthermore performs cleaning of variables and closes the channel group
func (s *IndexGroup) Close() <-chan bool {
var closeDone = make(chan bool, 1)
close(s.sitemap_channel)
go func() {
<-s.done
closeDone <- true
}()
return closeDone
}
//Initialize channel
func (s *IndexGroup) Initialize() {
s.done = make(chan bool, 1)
s.sitemap_channel = make(chan Sitemap)
for entry := range s.sitemap_channel {
s.sitemaps = append(s.sitemaps, entry)
if len(s.sitemaps) == MAXURLSETSIZE {
s.Create(s.getSitemapSet())
}
}
//remnant urls
s.Create(s.getSitemapSet())
s.Clear()
s.done <- true
}
//Configure name and folder of group
func (s *IndexGroup) Configure(name string, folder string) error {
s.name = strings.Replace(name, ".xml.gz", "", 1)
s.group_count = 1
s.folder = folder
_, err := ioutil.ReadDir(folder)
if err != nil {
err = os.MkdirAll(folder, 0655)
}
return err
}