Skip to content

Commit

Permalink
Add the telemetry/stats job (masa-finance/issues#216)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcamou committed Jan 29, 2025
1 parent 8c39d12 commit 9819ba6
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 37 deletions.
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,28 @@ The tee-worker currently supports 3 job types:

#### `telemetry`

This job type has no parameters, and returns the current state of the worker. It returns an object with the following fields. All timestamps are given in local time, in seconds since the Unix epoch (1/1/1970 00:00:00 UTC). The counts represent the interval between the `boot_time` and the `current_time`:
This job type has no parameters, and returns the current state of the worker. It returns an object with the following fields. All timestamps are given in local time, in seconds since the Unix epoch (1/1/1970 00:00:00 UTC). The counts represent the interval between the `boot_time` and the `current_time`. All the fields in the `stats` object are optional (if they are missing it means that its value is 0):

`boot_time` - Timestamp when the process started up.

`last_operation_time` - Timestamp when the last operation happened.

`current_time` - Current timestamp of the host.

`tweet_scrapes` - Total number of Twitter scrapes.
`stats.twitter_scrapes` - Total number of Twitter scrapes.

`returned_tweets` - Number of tweets returned to clients (this does not consider other types of data such as profiles or trending topics).
`stats.twitter_returned_tweets` - Number of tweets returned to clients (this does not consider other types of data such as profiles or trending topics).

`returned_profiles` - Number of profiles returned to clients.
`stats.twitter_returned_profiles` - Number of profiles returned to clients.

`tweet_errors` - Number of errors while scraping tweets.
`stats.twitter_returned_other` - Number of other records returned to clients (e.g. media, spaces or trending topics).

`web_success` - Number of successful web scrapes.
`stats.twitter_errors` - Number of errors while scraping tweets (excluding authentication and rate-limiting).

`web_errors` - Number of web scrapes that resulted in an error.
`stats.twitter_ratelimit_errors` - Number of Twitter rate-limiting errors.

`stats.twitter_auth_errors` - Number of Twitter authentication errors.

`stats.web_success` - Number of successful web scrapes.

`stats.web_errors` - Number of web scrapes that resulted in an error.
92 changes: 92 additions & 0 deletions internal/jobs/stats/stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package stats

import (
"encoding/json"
"time"

"github.com/sirupsen/logrus"
)

type statType string

const (
TwitterScrapes statType = "twitter_scrapes"
TwitterTweets statType = "twitter_returned_tweets"
TwitterProfiles statType = "twitter_returned_profiles"
TwitterOther statType = "twitter_returned_other"
TwitterErrors statType = "twitter_errors"
TwitterAuthErrors statType = "twitter_auth_errors"
TwitterRateErrors statType = "twitter_ratelimit_errors"
WebErrors statType = "web_errors"
WebSuccess statType = "web_success"
// TODO Should we add stats for calls to each of the Twitter job types?

)

// Make sure to keep this in sync with the above!
var allStats []statType = []statType{
TwitterScrapes,
TwitterTweets,
TwitterProfiles,
TwitterOther,
TwitterErrors,
TwitterAuthErrors,
TwitterRateErrors,
WebSuccess,
WebErrors,
}

type AddStat struct {
Type statType
Num uint
}

type stats struct {
BootTimeUnix int64 `json:"boot_time"`
LastOperationUnix int64 `json:"last_operation_time"`
CurrentTimeUnix int64 `json:"current_time"`
Stats map[statType]uint `json:"stats"`
}

type StatsCollector struct {
stats *stats
Chan chan AddStat
}

func StartCollector() *StatsCollector {
logrus.Info("Starting stats collector")

s := stats{
BootTimeUnix: time.Now().Unix(),
Stats: make(map[statType]uint),
}
for _, t := range allStats {
s.Stats[t] = 0
}

ch := make(chan AddStat, 64)

go func(s *stats, ch chan AddStat) {
for {
stat := <-ch
s.LastOperationUnix = time.Now().Unix()
if _, ok := s.Stats[stat.Type]; ok {
s.Stats[stat.Type] += stat.Num
} else {
s.Stats[stat.Type] = stat.Num
}
logrus.Debugf("Added %d to stat %s. Current stats: %#v", stat.Num, stat.Type, s)
}
}(&s, ch)

return &StatsCollector{stats: &s, Chan: ch}
}

func (s StatsCollector) Json() ([]byte, error) {
s.stats.CurrentTimeUnix = time.Now().Unix()
return json.Marshal(s.stats)
}

func (s StatsCollector) AddStat(typ statType, num uint) {
s.Chan <- AddStat{Type: typ, Num: num}
}
34 changes: 34 additions & 0 deletions internal/jobs/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package jobs

import (
"github.com/masa-finance/tee-worker/api/types"
"github.com/masa-finance/tee-worker/internal/jobs/stats"
"github.com/sirupsen/logrus"
)

const TelemetryJobType = "telemetry"

type TelemetryJob struct {
collector *stats.StatsCollector
}

func NewTelemetryJob(jc types.JobConfiguration, c *stats.StatsCollector) TelemetryJob {
return TelemetryJob{collector: c}
}

func (t TelemetryJob) ExecuteJob(j types.Job) (types.JobResult, error) {
logrus.Debug("Executing telemetry job")

data, err := t.collector.Json()
if err != nil {
return types.JobResult{
Error: err.Error(),
Job: j,
}, err
}

return types.JobResult{
Data: data,
Job: j,
}, nil
}
Loading

0 comments on commit 9819ba6

Please sign in to comment.