Skip to content

Commit

Permalink
do clean
Browse files Browse the repository at this point in the history
  • Loading branch information
justlorain committed Apr 5, 2024
1 parent 991c2b9 commit 80a3533
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 8 deletions.
17 changes: 15 additions & 2 deletions cron/cron.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ import (

// TODO: add progress bar
// TODO: support group, org, repo update in UpdateTask
// TODO: data cleaning e.g. ByteDance, bytedance, Bytedance => bytedance
// TODO: clean the db at the end of each task

var ErrReachedRetryTimes = errors.New("error reached retry times")

Expand Down Expand Up @@ -258,6 +256,10 @@ func InitTask(ctx context.Context, db *gorm.DB) error {
return err
}
}
// do clean
if err := CleanContributorCompanyAndLocation(ctx, db); err != nil {
return err
}
return nil
}

Expand Down Expand Up @@ -383,6 +385,10 @@ func UpdateTask(ctx context.Context, db *gorm.DB) error {
return err
}
}
// do clean
if err := CleanContributorCompanyAndLocation(ctx, db); err != nil {
return err
}
return nil
}

Expand Down Expand Up @@ -792,3 +798,10 @@ func DeleteRepos(ctx context.Context, db *gorm.DB, repos []string) error {
}
return nil
}

func CleanContributorCompanyAndLocation(ctx context.Context, db *gorm.DB) error {
if err := storage.UpdateContributorCompanyAndLocation(ctx, db, GlobalCleaner.Clean); err != nil {
return err
}
return nil
}
27 changes: 21 additions & 6 deletions cron/cron_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ import (
)

func TestInitTask(t *testing.T) {
config.GlobalConfig.ReadInConfig("../default.yaml")
storage.Init()
_ = config.GlobalConfig.ReadInConfig("../default.yaml")
_ = storage.Init()
graphql.Init()
rest.Init()
err := InitTask(context.Background(), storage.DB) // around 9 min for cloudwego init
Expand All @@ -43,8 +43,8 @@ func TestInitTask(t *testing.T) {
}

func TestUpdateTask(t *testing.T) {
config.GlobalConfig.ReadInConfig("../default.yaml")
storage.Init()
_ = config.GlobalConfig.ReadInConfig("../default.yaml")
_ = storage.Init()
graphql.Init()
rest.Init()

Expand Down Expand Up @@ -75,8 +75,8 @@ func TestProgressBar(t *testing.T) {
}

func TestTransaction(t *testing.T) {
config.GlobalConfig.ReadInConfig("../default.yaml")
storage.Init()
_ = config.GlobalConfig.ReadInConfig("../default.yaml")
_ = storage.Init()
graphql.Init()
rest.Init()
operation := func(ctx context.Context, db *gorm.DB, count int) error {
Expand Down Expand Up @@ -108,3 +108,18 @@ func TestTransaction(t *testing.T) {
time.Sleep(time.Second * 1)
}
}

func TestClean(t *testing.T) {
_ = config.GlobalConfig.ReadInConfig("../default.yaml")
_ = storage.Init()
ss := []string{
"`ByteDance` => `TEST PASS`",
"`蚂蚁` => `Alibaba`",
"`Beijing` => `Beijing, China`",
}
_ = GlobalCleaner.AddStrategies(ss...)
err := CleanContributorCompanyAndLocation(context.Background(), storage.DB)
if err != nil {
t.Fatal(err)
}
}
15 changes: 15 additions & 0 deletions storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,21 @@ func CreateContributors(ctx context.Context, db *gorm.DB, cs []*model.Contributo
return db.WithContext(ctx).Create(cs).Error
}

func UpdateContributorCompanyAndLocation(ctx context.Context, db *gorm.DB, update func(string) string) error {
var contributors []model.Contributor
if err := db.WithContext(ctx).Find(&contributors).Error; err != nil {
return err
}
for _, contributor := range contributors {
contributor.Company = update(contributor.Company)
contributor.Location = update(contributor.Location)
if err := db.WithContext(ctx).Save(&contributor).Error; err != nil {
return err
}
}
return nil
}

// QueryContributorCountByOrg
//
// SELECT COUNT(DISTINCT c.node_id) AS contributor_count
Expand Down

0 comments on commit 80a3533

Please sign in to comment.