Skip to content

Commit

Permalink
clump correct
Browse files Browse the repository at this point in the history
  • Loading branch information
SamuelMathieu-code committed Sep 8, 2023
1 parent 87ed06a commit c0d3529
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 29 deletions.
2 changes: 1 addition & 1 deletion src/CompositeLD.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module CompositeLD


export ld_r2, getLDmat, formatSnpData!
export maf, ld_r2, getLDmat, formatSnpData!
export clump, tclump
export getStrongLD

Expand Down
84 changes: 56 additions & 28 deletions src/clump.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ See [`formatSnpData!`](@ref).
function clump(ref_genotypes::SnpData,
snps::AbstractVector{<:Tuple{Integer, Integer}};
r2_tresh::AbstractFloat = 0.1,
formated = false
formated = false,
min_maf::Real = 0
)::Vector{Bool}

if !formated
Expand All @@ -54,13 +55,19 @@ function clump(ref_genotypes::SnpData,
snps_indx = Vector{Int}(undef, size(snps, 1))
indx_v_b = Vector{Bool}(undef, size(snps, 1))
for (i, chr_pos_sing) in collect(enumerate(snps))
j = searchsortedfirst(ref_genotypes.snp_info.chr_pos, chr_pos_sing)
@inbounds begin
indx_v_b[i] = firstindex(ref_genotypes.snp_info.chr_pos) j lastindex(ref_genotypes.snp_info.chr_pos) &&
ref_genotypes.snp_info.chr_pos[j] == chr_pos_sing
if indx_v_b[i]
snps_indx[i] = ref_genotypes.snp_info.idx[j]
j = searchsorted(ref_genotypes.snp_info.chr_pos, chr_pos_sing) # range valid if of length 1 (> 1 => triallelic, < 1 => not found)
if length(j) == 1
snps_indx[i] = ref_genotypes.snp_info.idx[j[]]

# if maf insufficient : discard variant
if maf(ref_genotypes.snparray[:, snps_indx[i]]) > min_maf
indx_v_b[i] = true
else
indx_v_b[i] = false
end
else
# if variant not biallelic : discard
indx_v_b[i] = false
end
end

Expand All @@ -85,7 +92,8 @@ end
function clump(ref_genotypes::SnpData,
snps::AbstractVector{<:AbstractString};
r2_tresh::AbstractFloat = 0.1,
formated = false
formated = false,
min_maf::Real = 0
)::Vector{Bool}

if !formated
Expand All @@ -96,13 +104,19 @@ function clump(ref_genotypes::SnpData,
snps_indx = Vector{Int}(undef, size(snps, 1))
indx_v_b = Vector{Bool}(undef, size(snps, 1))
for (i, chr_pos_sing) in collect(enumerate(snps))
j = searchsortedfirst(ref_genotypes.snp_info.snpid, chr_pos_sing)
@inbounds begin
indx_v_b[i] = firstindex(ref_genotypes.snp_info.snpid) j lastindex(ref_genotypes.snp_info.snpid) &&
ref_genotypes.snp_info.snpid[j] == chr_pos_sing
if indx_v_b[i]
snps_indx[i] = ref_genotypes.snp_info.idx[j]
j = searchsorted(ref_genotypes.snp_info.snpid, chr_pos_sing)
if length(j) == 1
snps_indx[i] = ref_genotypes.snp_info.idx[j[]]

# if maf insufficient : discard variant
if maf(ref_genotypes.snparray[:, snps_indx[i]]) > min_maf
indx_v_b[i] = true
else
indx_v_b[i] = false
end
else
# if variant not biallelic : discard
indx_v_b[i] = false
end
end

Expand Down Expand Up @@ -156,7 +170,8 @@ See [`formatSnpData!`](@ref).
function tclump(ref_genotypes::SnpData,
snps::AbstractVector{<:Tuple{Integer, Integer}};
r2_tresh::AbstractFloat = 0.1,
formated = false
formated = false,
min_maf::Real = 0
)::Vector{Bool}

if !formated
Expand All @@ -167,13 +182,19 @@ function tclump(ref_genotypes::SnpData,
snps_indx = Vector{Int}(undef, size(snps, 1)) # indices
indx_v_b = Vector{Bool}(undef, size(snps, 1)) # found or not
@threads for (i, chr_pos_sing) in collect(enumerate(snps))
j = searchsortedfirst(ref_genotypes.snp_info.chr_pos, chr_pos_sing)
@inbounds begin
indx_v_b[i] = firstindex(ref_genotypes.snp_info.chr_pos) j lastindex(ref_genotypes.snp_info.chr_pos) &&
ref_genotypes.snp_info.chr_pos[j] == chr_pos_sing
if indx_v_b[i]
snps_indx[i] = ref_genotypes.snp_info.idx[j]
j = searchsorted(ref_genotypes.snp_info.chr_pos, chr_pos_sing)
@inbounds if length(j) == 1
snps_indx[i] = ref_genotypes.snp_info.idx[j[]]

# if maf insufficient : discard variant
if maf(ref_genotypes.snparray[:, snps_indx[i]]) > min_maf
indx_v_b[i] = true
else
indx_v_b[i] = false
end
else
# if variant not biallelic : discard
@inbounds indx_v_b[i] = false
end
end

Expand All @@ -199,7 +220,8 @@ end
function tclump(ref_genotypes::SnpData,
snps::AbstractVector{<:AbstractString};
r2_tresh::AbstractFloat = 0.1,
formated::Bool = false
formated::Bool = false,
min_maf::Real = 0
)::Vector{Bool}

if !formated
Expand All @@ -210,13 +232,19 @@ function tclump(ref_genotypes::SnpData,
snps_indx = Vector{Int}(undef, size(snps, 1))
indx_v_b = Vector{Bool}(undef, size(snps, 1))
@threads for (i, chr_pos_sing) in collect(enumerate(snps))
j = searchsortedfirst(ref_genotypes.snp_info.snpid, chr_pos_sing)
@inbounds begin
indx_v_b[i] = firstindex(ref_genotypes.snp_info.snpid) j lastindex(ref_genotypes.snp_info.snpid) &&
ref_genotypes.snp_info.snpid[j] == chr_pos_sing
if indx_v_b[i]
snps_indx[i] = ref_genotypes.snp_info.idx[j]
j = searchsorted(ref_genotypes.snp_info.snpid, chr_pos_sing)
@inbounds if length(j) == 1
snps_indx[i] = ref_genotypes.snp_info.idx[j[]]

# if maf insufficient : discard variant
if maf(ref_genotypes.snparray[:, snps_indx[i]]) > min_maf
indx_v_b[i] = true
else
indx_v_b[i] = false
end
else
# if variant not biallelic : discard
indx_v_b[i] = false
end
end

Expand Down
23 changes: 23 additions & 0 deletions src/ld.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,29 @@ macro HOMO_m() # aa
return :(0x03)
end

################## MAF ##################

@inline function maf(s1)::Float64
m = n = N = 0
@inbounds for i in eachindex(s1)
g = s1[i]
if g == @HOMO_M
N += 2
m += 2
elseif g == @HOMO_m
n += 2
m += 2
elseif g == @HETER
n += 1
N +=1
m += 2
end
end

return min(N/m, n/m)
end


############ LD calculations ############

# LD r² composite of pair of SNPs given two vectors of genotypes
Expand Down

0 comments on commit c0d3529

Please sign in to comment.