Skip to content

Commit

Permalink
unikmer filter: update
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Jul 3, 2020
1 parent 1fb4bbb commit 2881064
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions unikmer/cmd/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ Attentions:
threshold := getFlagNonNegativeInt(cmd, "threshold")
invert := getFlagBool(cmd, "invert")
window := getFlagPositiveInt(cmd, "window")
penaltyS := getFlagInt(cmd, "penalty-s")
penaltyD := getFlagInt(cmd, "penalty-d")

if !isStdout(outFile) {
outFile += extDataFile
Expand Down Expand Up @@ -141,7 +143,7 @@ Attentions:
checkError(err)
}

hit = filterCode(code, k, threshold, window, scores)
hit = filterCode(code, k, penaltyS, penaltyD, threshold, window, &scores)

if invert {
if !hit {
Expand Down Expand Up @@ -176,30 +178,32 @@ func init() {
RootCmd.AddCommand(filterCmd)

filterCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
filterCmd.Flags().IntP("threshold", "t", 14, `score threshold for filter`)
filterCmd.Flags().IntP("window", "w", 10, `window size for checking score`)
filterCmd.Flags().BoolP("invert", "v", false, `invert result, i.e., output low-complexity k-mers`)
filterCmd.Flags().IntP("threshold", "t", 15, `penalty threshold for filter, higher is stricter`)
filterCmd.Flags().IntP("window", "w", 7, `window size for checking penalty`)
filterCmd.Flags().IntP("penalty-s", "s", 3, `penalty for successive bases`)
filterCmd.Flags().IntP("penalty-d", "d", 1, `penalty for different bases`)
}

func filterCode(code uint64, k int, threshold int, window int, scores []int) bool {
// code0 := code
func filterCode(code uint64, k int, penaltyS int, penaltyD int, threshold int, window int, scores *[]int) bool {
// compute scores
var last, c uint64
last = 356
for i := 0; i < k; i++ {
c = code & 3
if i > 0 {
if c == last {
scores[i] = 2
(*scores)[i] = penaltyS // successive
} else {
scores[i] = -1
(*scores)[i] = penaltyD // different
}
} else {
scores[i] = 1
(*scores)[i] = penaltyD
}
last = c
code >>= 2
}

// check score in sliding window
var s, pre int
iLast := k - window - 1
Expand All @@ -209,16 +213,16 @@ func filterCode(code uint64, k int, threshold int, window int, scores []int) boo
for i := 0; i <= iLast; i++ {
if i == 0 {
for j := 0; j < window; j++ {
s += scores[j]
s += (*scores)[j]
}
} else { // update score
s = s - pre + scores[i+window-1]
s = s - pre + (*scores)[i+window-1]
}
pre = scores[i]
// fmt.Printf("%s, %d, %d\n", unikmer.KmerCode{code0, k}, i, s)
pre = (*scores)[i]
if s >= threshold {
return true
}
}

return false
}

0 comments on commit 2881064

Please sign in to comment.