forked from andygrunwald/go-trending
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrending.go
464 lines (381 loc) · 14.8 KB
/
trending.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
package trending
import (
"fmt"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
// These are predefined constants to define the timerange of the requested repository or developer.
// If trending repositories or developer are requested, a timeframe has to be added.
// It is suggested to use this constants for this.
const (
// TimeToday is limit of the current day.
TimeToday = "daily"
// TimeWeek will focus on the complete week
TimeWeek = "weekly"
// TimeMonth include the complete month
TimeMonth = "monthly"
// Base URL for the github website
defaultBaseURL = "https://github.com"
// Relative URL for trending repositories
urlTrendingPath = "/trending"
// Relative URL for trending developers
urlDevelopersPath = "/developers"
// Standard mode: github.com/trending
modeRepositories = "repositories"
// Developers mode: github.com/trending/developers
modeDevelopers = "developers"
// Language mode: Only query parameters will be added
modeLanguages = "languages"
)
// Trending reflects the main datastructure of this package.
// It doesn`t provide an exported state, but based on this the methods are called.
// To receive a new instance just add
//
// package main
//
// import (
// "github.com/andygrunwald/go-trending"
// )
//
// func main() {
// trend := trending.NewTrending()
// ...
// }
type Trending struct {
// Base URL for requests.
// Defaults to the public GitHub website, but can be set to a domain endpoint to use with GitHub Enterprise.
// BaseURL should always be specified with a trailing slash.
BaseURL *url.URL
// Client to use for requests
Client *http.Client
}
// Project reflects a single trending repository.
// It provides information as printed on the source website https://github.com/trending.
type Project struct {
// Name is the name of the repository including user / organisation like "andygrunwald/go-trending" or "airbnb/javascript".
Name string
// Owner is the name of the user or organisation. "andygrunwald" in "andygrunwald/go-trending" or "airbnb" in "airbnb/javascript".
Owner string
// RepositoryName is the name of therepository. "go-trending" in "andygrunwald/go-trending" or "javascript" in "airbnb/javascript".
RepositoryName string
// Description is the description of the repository like "JavaScript Style Guide" (for "airbnb/javascript").
Description string
// Language is the determined programing language of the project (by Github).
// Sometimes Language is an empty string, because Github can`t determine the (main) programing language (like for "google/deepdream").
Language string
// Stars is the number of github stars this project received in the given timeframe (see TimeToday / TimeWeek / TimeMonth constants).
// This number don`t reflect the overall stars of the project.
Stars int
// URL is the http(s) address of the project reflected as url.URL datastructure like "https://github.com/Workiva/go-datastructures".
URL *url.URL
// ContributorURL is the http(s) address of the contributors page of the project reflected as url.URL datastructure like "https://github.com/Workiva/go-datastructures/graphs/contributors".
ContributorURL *url.URL
// Contributor are a collection of Developer.
// Be aware that this collection don`t covers all contributor.
// Only those who are mentioned at githubs trending page.
Contributor []Developer
}
// Language reflects a single (programing) language offered by github for filtering.
// If you call "GetProjects" you are able to filter by programing language.
// For filter input you should use the URLName of Language.
type Language struct {
// Name is the human readable name of the language like "Go" or "Web Ontology Language"
Name string
// URLName is the machine readable / usable name of the language used for filtering / url parameters like "go" or "web-ontology-language".
// Please use URLName if you want to filter your requests.
URLName string
// URL is the filter URL for the language like "https://github.com/trending?l=go" for "go" or "https://github.com/trending?l=unknown" or "unknown".
URL *url.URL
}
// Developer reflects a single trending developer / organisation.
// It provides information as printed on the source website https://github.com/trending/developers.
type Developer struct {
// ID is the github`s unique identifier of the user / organisation like 1342004 (google) or 698437 (airbnb).
ID int
// // DisplayName is the username of the developer / organisation like "torvalds" or "apache".
DisplayName string
// FullName is the real name of the developer / organisation like "Linus Torvalds" (for "torvalds") or "The Apache Software Foundation" (for "apache").
FullName string
// URL is the http(s) address of the developer / organisation reflected as url.URL datastructure like https://github.com/torvalds.
URL *url.URL
// Avatar is the http(s) address of the developer / organisation avatar as url.URL datastructure like https://avatars1.githubusercontent.com/u/1024025?v=3&s=192.
Avatar *url.URL
}
// NewTrending is the main entry point of the trending package.
// It provides access to the API of this package by returning a Trending datastructure.
// Usage:
//
// trend := trending.NewTrending()
// projects, err := trend.GetProjects(trending.TimeToday, "")
// ...
func NewTrending() *Trending {
return NewTrendingWithClient(http.DefaultClient)
}
// NewTrendingWithClient allows providing a custom http.Client to use for fetching trending items.
// It allows setting timeouts or using 3rd party http.Client implementations, such as Google App Engine
// urlfetch.Client.
func NewTrendingWithClient(client *http.Client) *Trending {
baseURL, _ := url.Parse(defaultBaseURL)
t := Trending{
BaseURL: baseURL,
Client: client,
}
return &t
}
// GetProjects provides a slice of Projects filtered by the given time and language.
//
// time can be filtered by applying by one of the Time* constants (e.g. TimeToday, TimeWeek, ...).
// If an empty string will be applied TimeToday will be the default (current default by Github).
//
// language can be filtered by applying a programing language by your choice.
// The input must be a known language by Github and be part of GetLanguages().
// Further more it must be the Language.URLName and not the human readable Language.Name.
// If language is an empty string "All languages" will be applied (current default by Github).
func (t *Trending) GetProjects(time, language string) ([]Project, error) {
var projects []Project
// Generate the correct URL to call
u, err := t.generateURL(modeRepositories, time, language)
if err != nil {
return projects, err
}
// Receive document
res, err := t.Client.Get(u.String())
if err != nil {
return projects, err
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return projects, err
}
defer res.Body.Close()
// Query our information
doc.Find(".Box article.Box-row").Each(func(i int, s *goquery.Selection) {
// Collect project information
name := t.getProjectName(s.Find("h2 a").Text())
// Split name (like "andygrunwald/go-trending") into owner ("andygrunwald") and repository name ("go-trending"")
splittedName := strings.SplitAfterN(name, "/", 2)
owner := splittedName[0][:len(splittedName[0])-1]
owner = strings.TrimSpace(owner)
repositoryName := strings.TrimSpace(splittedName[1])
// Overwrite name to be 100% sure it contains no space between owner and repo name
name = fmt.Sprintf("%s/%s", owner, repositoryName)
address, exists := s.Find("h2 a").First().Attr("href")
projectURL := t.appendBaseHostToPath(address, exists)
description := s.Find("p").Text()
description = strings.TrimSpace(description)
language := s.Find("span[itemprop=programmingLanguage]").Eq(0).Text()
language = strings.TrimSpace(language)
starsString := s.Find("div a[href$=\"/stargazers\"]").Text()
starsString = strings.TrimSpace(starsString)
// Replace english thousand separator ","
starsString = strings.Replace(starsString, ",", "", 1)
stars, err := strconv.Atoi(starsString)
if err != nil {
stars = 0
}
contributorSelection := s.Find("div.f6 a").Eq(2)
contributorPath, exists := contributorSelection.Attr("href")
contributorURL := t.appendBaseHostToPath(contributorPath, exists)
// Collect contributor
var developer []Developer
contributorSelection.Find("img").Each(func(j int, devSelection *goquery.Selection) {
devName, exists := devSelection.Attr("alt")
linkURL := t.appendBaseHostToPath(devName, exists)
avatar, exists := devSelection.Attr("src")
avatarURL := t.buildAvatarURL(avatar, exists)
developer = append(developer, t.newDeveloper(devName, "", linkURL, avatarURL))
})
p := Project{
Name: name,
Owner: owner,
RepositoryName: repositoryName,
Description: description,
Language: language,
Stars: stars,
URL: projectURL,
ContributorURL: contributorURL,
Contributor: developer,
}
projects = append(projects, p)
})
return projects, nil
}
// GetLanguages will return a slice of Language known by gitub.
// With the Language.URLName you can filter your GetProjects / GetDevelopers calls.
func (t *Trending) GetLanguages() ([]Language, error) {
return t.generateLanguages("#languages-menuitems a.select-menu-item")
}
// generateLanguages will retrieve the languages out of the github document.
// Trending languages are shown on the right side as a small list.
// Other languages are hidden in a dropdown at this site
func (t *Trending) generateLanguages(mainSelector string) ([]Language, error) {
var languages []Language
// Generate the URL to call
u, err := t.generateURL(modeLanguages, "", "")
if err != nil {
return languages, err
}
// Get document
res, err := t.Client.Get(u.String())
if err != nil {
return languages, err
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return languages, err
}
defer res.Body.Close()
// Query our information
doc.Find(mainSelector).Each(func(i int, s *goquery.Selection) {
expectedPrefix := "https://github.com"
languageAddress, _ := s.Attr("href")
if !strings.HasPrefix(languageAddress, expectedPrefix) {
languageAddress = expectedPrefix + languageAddress
}
languageURLName := ""
filterURL, _ := url.Parse(languageAddress)
re := regexp.MustCompile(`github.com/trending/([^/\\?]*)`)
if matches := re.FindStringSubmatch(languageAddress); len(matches) >= 2 && len(matches[1]) > 0 {
languageURLName = matches[1]
}
language := Language{
Name: strings.TrimSpace(s.Text()),
URLName: languageURLName,
URL: filterURL,
}
languages = append(languages, language)
})
return languages, nil
}
// GetDevelopers provides a slice of Developer filtered by the given time and language.
//
// time can be filtered by applying by one of the Time* constants (e.g. TimeToday, TimeWeek, ...).
// If an empty string will be applied TimeToday will be the default (current default by Github).
//
// language can be filtered by applying a programing language by your choice
// The input must be a known language by Github and be part of GetLanguages().
// Further more it must be the Language.URLName and not the human readable Language.Name.
// If language is an empty string "All languages" will be applied (current default by Github).
func (t *Trending) GetDevelopers(time, language string) ([]Developer, error) {
var developers []Developer
// Generate URL
u, err := t.generateURL(modeDevelopers, time, language)
if err != nil {
return developers, err
}
// Get document
res, err := t.Client.Get(u.String())
if err != nil {
return developers, err
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return developers, err
}
defer res.Body.Close()
// Query information
doc.Find("main .Box div article[id^=\"pa-\"]").Each(func(i int, s *goquery.Selection) {
name := s.Find("h1.h3 a").Text()
name = strings.TrimSpace(name)
name = strings.Split(name, " ")[0]
name = strings.TrimSpace(name)
fullName := s.Find("p.f4 a").Text()
fullName = t.trimBraces(fullName)
linkHref, exists := s.Find("h1.h3 a").Attr("href")
linkURL := t.appendBaseHostToPath(linkHref, exists)
avatar, exists := s.Find("img.avatar-user").Attr("src")
avatarURL := t.buildAvatarURL(avatar, exists)
developers = append(developers, t.newDeveloper(name, fullName, linkURL, avatarURL))
})
return developers, nil
}
// newDeveloper is a utility function to create a new Developer
func (t *Trending) newDeveloper(name, fullName string, linkURL, avatarURL *url.URL) Developer {
return Developer{
ID: t.getUserIDBasedOnAvatarURL(avatarURL),
DisplayName: name,
FullName: fullName,
URL: linkURL,
Avatar: avatarURL,
}
}
// trimBraces will remove braces "(" & ")" from the string
func (t *Trending) trimBraces(text string) string {
text = strings.TrimSpace(text)
text = strings.TrimLeft(text, "(")
text = strings.TrimRight(text, ")")
return text
}
// buildAvatarURL will build a url.URL out of the Avatar URL provided by Github
func (t *Trending) buildAvatarURL(avatar string, exists bool) *url.URL {
if !exists {
return nil
}
avatarURL, err := url.Parse(avatar)
if err != nil {
return nil
}
// Remove s parameter
// The "s" parameter controls the size of the avatar
q := avatarURL.Query()
q.Del("s")
avatarURL.RawQuery = q.Encode()
return avatarURL
}
// getUserIDBasedOnAvatarLink determines the UserID based on an avatar link avatarURL
func (t *Trending) getUserIDBasedOnAvatarURL(avatarURL *url.URL) int {
id := 0
if avatarURL == nil {
return id
}
re := regexp.MustCompile("u/([0-9]+)")
if matches := re.FindStringSubmatch(avatarURL.Path); len(matches) >= 2 && len(matches[1]) > 0 {
id, _ = strconv.Atoi(matches[1])
}
return id
}
// appendBaseHostToPath will add the base host to a relative url urlStr.
//
// A urlStr like "/trending" will be returned as https://github.com/trending
func (t *Trending) appendBaseHostToPath(urlStr string, exists bool) *url.URL {
if !exists {
return nil
}
rel, err := url.Parse(urlStr)
if err != nil {
return nil
}
return t.BaseURL.ResolveReference(rel)
}
// getProjectName will return the project name in format owner/repository
func (t *Trending) getProjectName(name string) string {
trimmedNameParts := []string{}
nameParts := strings.Split(name, "\n")
for _, part := range nameParts {
trimmedNameParts = append(trimmedNameParts, strings.TrimSpace(part))
}
return strings.Join(trimmedNameParts, "")
}
// generateURL will generate the correct URL to call the github site.
//
// Depending on mode, time and language it will set the correct pathes and query parameters.
func (t *Trending) generateURL(mode, time, language string) (*url.URL, error) {
urlStr := urlTrendingPath
if mode == modeDevelopers {
urlStr += urlDevelopersPath
}
u := t.appendBaseHostToPath(urlStr, true)
q := u.Query()
if len(time) > 0 {
q.Set("since", time)
}
if len(language) > 0 {
q.Set("l", language)
}
u.RawQuery = q.Encode()
return u, nil
}