From 037e80ba066b617750b3db87d00b83c288c2f675 Mon Sep 17 00:00:00 2001 From: apocelipes Date: Thu, 12 Sep 2024 03:36:02 +0800 Subject: [PATCH] feat: support --exclude-content Fixes #507. --- SCC-OUTPUT-REPORT.html | 134 +++++++++++++++++++---------------- examples/issue507/exclude.go | 9 +++ examples/issue507/exclude.py | 3 + examples/issue507/include.py | 3 + main.go | 6 ++ processor/processor.go | 13 ++++ processor/workers.go | 22 ++++++ test-all.sh | 24 +++++++ 8 files changed, 152 insertions(+), 62 deletions(-) create mode 100644 examples/issue507/exclude.go create mode 100644 examples/issue507/exclude.py create mode 100644 examples/issue507/include.py diff --git a/SCC-OUTPUT-REPORT.html b/SCC-OUTPUT-REPORT.html index 544a0d8c3..c37e586ae 100644 --- a/SCC-OUTPUT-REPORT.html +++ b/SCC-OUTPUT-REPORT.html @@ -12,14 +12,14 @@ Go - 30 - 9515 - 1460 - 456 - 7599 - 1516 - 254099 - 4050 + 31 + 9667 + 1488 + 470 + 7709 + 1540 + 257316 + 4107 processor/formatters.go @@ -53,33 +53,33 @@ processor/workers.go - 870 - 128 + 892 + 131 91 - 651 - 225 - 25495 - 494 + 670 + 231 + 25897 + 503 processor/processor.go - 667 - 140 - 103 - 424 - 92 - 19295 - 435 + 680 + 142 + 104 + 434 + 96 + 19711 + 443 main.go - 398 + 404 10 6 - 382 + 388 10 - 8856 - 253 + 9010 + 256 processor/detector_test.go @@ -98,7 +98,7 @@ 14 269 47 - 7991 + 7998 202 processor/workers_tokei_test.go @@ -160,6 +160,16 @@ 50 3766 99 + + cmd/badges/simplecache.go + + 161 + 28 + 13 + 120 + 20 + 3070 + 94 processor/processor_test.go @@ -181,15 +191,15 @@ 1911 60 - cmd/badges/simplecache.go + cmd/badges/simplecache_test.go - 109 + 102 + 21 + 3 + 78 17 - 4 - 88 - 14 - 1931 - 75 + 2024 + 52 processor/structs_test.go @@ -230,16 +240,6 @@ 6 1316 37 - - cmd/badges/simplecache_test.go - - 52 - 12 - 0 - 40 - 9 - 1041 - 30 processor/cocomo.go @@ -250,16 +250,6 @@ 0 2209 35 - - processor/bloom.go - - 37 - 7 - 12 - 18 - 2 - 1062 - 29 processor/cocomo_test.go @@ -270,6 +260,16 @@ 6 686 23 + + processor/bloom.go + + 37 + 7 + 12 + 18 + 2 + 1062 + 29 processor/helpers_test.go @@ -310,6 +310,16 @@ 0 378 14 + + examples/issue507/exclude.go + + 9 + 3 + 1 + 5 + 0 + 116 + 7 processor/constants.go @@ -323,16 +333,16 @@ Total - 30 - 9515 - 1460 - 456 - 7599 - 1516 - 254099 - 4050 + 31 + 9667 + 1488 + 470 + 7709 + 1540 + 257316 + 4107 - Estimated Cost to Develop (organic) $227,190
Estimated Schedule Effort (organic) 7.83 months
Estimated People Required (organic) 2.58
+ Estimated Cost to Develop (organic) $230,644
Estimated Schedule Effort (organic) 7.88 months
Estimated People Required (organic) 2.60
\ No newline at end of file diff --git a/examples/issue507/exclude.go b/examples/issue507/exclude.go new file mode 100644 index 000000000..91a316c27 --- /dev/null +++ b/examples/issue507/exclude.go @@ -0,0 +1,9 @@ +package main + +import "fmt" + +func main() { + fmt.Println("Hello, world!") +} + +// exclude-content testing, DO NOT EDIT. diff --git a/examples/issue507/exclude.py b/examples/issue507/exclude.py new file mode 100644 index 000000000..afe347969 --- /dev/null +++ b/examples/issue507/exclude.py @@ -0,0 +1,3 @@ +# exclude-content testing, DO NOT EDIT. +def test(s: str) -> bool: + return s == 'exclude' diff --git a/examples/issue507/include.py b/examples/issue507/include.py new file mode 100644 index 000000000..6f397e456 --- /dev/null +++ b/examples/issue507/include.py @@ -0,0 +1,3 @@ +# exclude-content testing, this file should be included +if __name__ == '__main__': + print('Hello, world!') diff --git a/main.go b/main.go index bf25e6d41..0eb223f31 100644 --- a/main.go +++ b/main.go @@ -391,6 +391,12 @@ func main() { "$", "set currency symbol", ) + flags.StringVar( + &processor.ExcludeContent, + "exclude-content", + "", + "exclude files containing text that matches the given regular expression", + ) if err := rootCmd.Execute(); err != nil { os.Exit(1) diff --git a/processor/processor.go b/processor/processor.go index 90d7404ef..3388710d0 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -174,6 +174,10 @@ var ExcludeListExtensions = []string{} // ExcludeFilename is a list of filenames which should be ignored var ExcludeFilename = []string{} +// ExcludeContent is a regular expression which is used to exclude files containing text that matches it +var ExcludeContent string +var excludeContentPattern *regexp.Regexp + // AverageWage is the average wage in dollars used for the COCOMO cost estimate var AverageWage int64 = 56286 @@ -486,6 +490,14 @@ func processFlags() { UlocMode = true } + if ExcludeContent != "" { + var err error + excludeContentPattern, err = regexp.Compile(ExcludeContent) + if err != nil { + printError("ExcludeContent: " + err.Error()) + } + } + if Debug { printDebug(fmt.Sprintf("Path Deny List: %v", PathDenyList)) printDebug(fmt.Sprintf("Sort By: %s", SortBy)) @@ -502,6 +514,7 @@ func processFlags() { printDebug(fmt.Sprintf("IncludeSymLinks: %t", IncludeSymLinks)) printDebug(fmt.Sprintf("Uloc: %t", UlocMode)) printDebug(fmt.Sprintf("Dryness: %t", Dryness)) + printDebug(fmt.Sprintf("ExcludeContent: %s", ExcludeContent)) } } diff --git a/processor/workers.go b/processor/workers.go index 2be9b1ff6..830ee5fab 100644 --- a/processor/workers.go +++ b/processor/workers.go @@ -3,6 +3,7 @@ package processor import ( + "bufio" "bytes" "fmt" "hash" @@ -639,6 +640,20 @@ func checkBomSkip(fileJob *FileJob) int { return 0 } +func excludeByContents(data []byte) bool { + if excludeContentPattern == nil { + return false + } + + scanner := bufio.NewScanner(bytes.NewReader(data)) + for scanner.Scan() { + if excludeContentPattern.Match(scanner.Bytes()) { + return true + } + } + return false +} + // Reads and processes files from input chan in parallel, and sends results to // output chan func fileProcessorWorker(input chan *FileJob, output chan *FileJob) { @@ -662,6 +677,13 @@ func fileProcessorWorker(input chan *FileJob, output chan *FileJob) { fileStartTime := makeTimestampNano() content, err := reader.ReadFile(loc, int(job.Bytes)) + if excludeByContents(content) { + if Verbose { + printWarnf("exclude %s by its content", loc) + } + continue + } + atomic.AddInt64(&fileCount, 1) if atomic.LoadInt64(&gcEnabled) == 0 && atomic.LoadInt64(&fileCount) >= int64(GcFileCount) { diff --git a/test-all.sh b/test-all.sh index db303c66a..ad096cd4a 100755 --- a/test-all.sh +++ b/test-all.sh @@ -1061,6 +1061,30 @@ else echo -e "${GREEN}PASSED Issue379 Regression Check" fi +Issue507GoFilesWithoutExclude=$(./scc -f csv "examples/issue507/" | grep 'Go' | cut -d ',' -f 8) +Issue507PythonFilesWithoutExclude=$(./scc -f csv "examples/issue507/" | grep 'Python' | cut -d ',' -f 8) +excludeContent="^(#|//) *exclude-content test.+, DO NOT EDIT." +Issue507GoFilesExcluded=$(./scc -f csv --exclude-content="${excludeContent}" "examples/issue507/" | grep 'Go' | cut -d ',' -f 8) +Issue507PythonFilesExcluded=$(./scc -f csv --exclude-content="${excludeContent}" "examples/issue507/" | grep 'Python' | cut -d ',' -f 8) +if [ ${Issue507GoFilesWithoutExclude:-0} -ne 1 ] || [ ${Issue507PythonFilesWithoutExclude:-0} -ne 2 ] ; then + echo -e "${RED}=======================================================" + echo -e "FAILED Issue507 should contains 1 Go file and 2 Python files" + echo -e "=======================================================${NC}" + exit +elif [ ${Issue507GoFilesExcluded:-0} -ne 0 ] ; then + echo -e "${RED}=======================================================" + echo -e "FAILED Issue507 exclude Go files by contents" + echo -e "=======================================================${NC}" + exit +elif [ ${Issue507PythonFilesExcluded:-0} -ne 1 ] ; then + echo -e "${RED}=======================================================" + echo -e "FAILED Issue507 exclude Python files by contents" + echo -e "=======================================================${NC}" + exit +else + echo -e "${GREEN}PASSED Issue507 Exclude Content Check" +fi + # Extra case for longer languages that are normally truncated for i in 'CloudFormation (YAM' 'CloudFormation (JSO' do