From da3638eb566c9030a40e36534819580757547444 Mon Sep 17 00:00:00 2001 From: Alexander Narsudinov Date: Fri, 27 Oct 2023 19:59:03 +0200 Subject: [PATCH 1/3] Fix the goroutine variable capturing in `removeObjectsOneByOne` This patch fixes classic golang for-loop variable capturing issue that leeds to incorrect results in goroutines. This is fixed in latest versions of golang, but this project uses go 1.15, so it won't work as expected by an author. --- pkg/s3/client.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/s3/client.go b/pkg/s3/client.go index e720d41..563c033 100644 --- a/pkg/s3/client.go +++ b/pkg/s3/client.go @@ -197,15 +197,15 @@ func (client *s3Client) removeObjectsOneByOne(bucketName, prefix string) error { for object := range objectsCh { guardCh <- 1 - go func() { - err := client.minio.RemoveObject(client.ctx, bucketName, object.Key, - minio.RemoveObjectOptions{VersionID: object.VersionID}) + go func(obj minio.ObjectInfo) { + err := client.minio.RemoveObject(client.ctx, bucketName, obj.Key, + minio.RemoveObjectOptions{VersionID: obj.VersionID}) if err != nil { - glog.Errorf("Failed to remove object %s, error: %s", object.Key, err) + glog.Errorf("Failed to remove object %s, error: %s", obj.Key, err) removeErrors++ } <- guardCh - }() + }(object) } for i := 0; i < parallelism; i++ { guardCh <- 1 From d3e89f164bf8a1c8f6bc788c42581349ac4618ac Mon Sep 17 00:00:00 2001 From: Alexander Narsudinov Date: Fri, 27 Oct 2023 20:05:02 +0200 Subject: [PATCH 2/3] Use atomics to make writing of results safe in `removeObjectsOneByOne` Previous implementation didn't have any synchronization mechanism for goroutines that does the work. There are multiple approaches to make it work correctly, let's start with the simplest - atomics. --- pkg/s3/client.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/s3/client.go b/pkg/s3/client.go index 563c033..6e63e6d 100644 --- a/pkg/s3/client.go +++ b/pkg/s3/client.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "net/url" + "sync/atomic" "github.com/golang/glog" "github.com/minio/minio-go/v7" @@ -173,8 +174,8 @@ func (client *s3Client) removeObjectsOneByOne(bucketName, prefix string) error { objectsCh := make(chan minio.ObjectInfo, 1) guardCh := make(chan int, parallelism) var listErr error - totalObjects := 0 - removeErrors := 0 + var totalObjects int64 = 0 + var removeErrors int64 = 0 go func() { defer close(objectsCh) @@ -185,7 +186,7 @@ func (client *s3Client) removeObjectsOneByOne(bucketName, prefix string) error { listErr = object.Err return } - totalObjects++ + atomic.AddInt64(&totalObjects, 1) objectsCh <- object } }() @@ -202,7 +203,7 @@ func (client *s3Client) removeObjectsOneByOne(bucketName, prefix string) error { minio.RemoveObjectOptions{VersionID: obj.VersionID}) if err != nil { glog.Errorf("Failed to remove object %s, error: %s", obj.Key, err) - removeErrors++ + atomic.AddInt64(&removeErrors, 1) } <- guardCh }(object) From 6a7b88e78891167a2d77af8fae69b90aada6de59 Mon Sep 17 00:00:00 2001 From: Alexander Narsudinov Date: Fri, 27 Oct 2023 20:47:25 +0200 Subject: [PATCH 3/3] Increase the buffer size of objects channel in `removeObjectsOneByOne` I guess this is pretty reasonable to have the working queue with the same size as the maximum number of workers. --- pkg/s3/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/s3/client.go b/pkg/s3/client.go index 6e63e6d..7a1b1ec 100644 --- a/pkg/s3/client.go +++ b/pkg/s3/client.go @@ -171,7 +171,7 @@ func (client *s3Client) removeObjects(bucketName, prefix string) error { // will delete files one by one without file lock func (client *s3Client) removeObjectsOneByOne(bucketName, prefix string) error { parallelism := 16 - objectsCh := make(chan minio.ObjectInfo, 1) + objectsCh := make(chan minio.ObjectInfo, parallelism) guardCh := make(chan int, parallelism) var listErr error var totalObjects int64 = 0