diff --git a/processor/workers.go b/processor/workers.go index e0b1da8b3..93e5c0bdb 100644 --- a/processor/workers.go +++ b/processor/workers.go @@ -757,20 +757,6 @@ func processFile(job *FileJob) bool { CountStats(job) - if UlocMode { - ulocMutex.Lock() - for _, l := range strings.Split(string(job.Content), "\n") { - ulocGlobalCount[l] = struct{}{} - - _, ok := ulocLanguageCount[job.Language] - if !ok { - ulocLanguageCount[job.Language] = map[string]struct{}{} - } - ulocLanguageCount[job.Language][l] = struct{}{} - } - ulocMutex.Unlock() - } - if Duplicates { duplicates.mux.Lock() jobHash := job.Hash.Sum(nil) @@ -819,6 +805,22 @@ func processFile(job *FileJob) bool { return false } + // This needs to be at the end so we can ensure duplicate detection et.al run first + // avoiding inflating the counts + if UlocMode { + ulocMutex.Lock() + for _, l := range strings.Split(string(job.Content), "\n") { + ulocGlobalCount[l] = struct{}{} + + _, ok := ulocLanguageCount[job.Language] + if !ok { + ulocLanguageCount[job.Language] = map[string]struct{}{} + } + ulocLanguageCount[job.Language][l] = struct{}{} + } + ulocMutex.Unlock() + } + return true }