Skip to content

Commit

Permalink
add StackedSnpArray
Browse files Browse the repository at this point in the history
  • Loading branch information
kose-y committed Apr 6, 2022
1 parent d531481 commit 3ddb496
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name = "SnpArrays"
uuid = "4e780e97-f5bf-4111-9dc4-b70aaf691b06"
keywords = ["GWAS", "Genome-wide association studies", "binary plink"]
authors = ["Hua Zhou <[email protected]>", "Seyoon Ko <[email protected]>", "Douglas Bates <[email protected]>", "OpenMendel Team"]
version = "0.3.15"
version = "0.3.16"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
5 changes: 4 additions & 1 deletion src/SnpArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import SpecialFunctions: gamma_inc
import VectorizationBase: gesp
import Tables: table
export AbstractSnpArray, AbstractSnpBitMatrix, AbstractSnpLinAlg
export SnpArray, SnpBitMatrix, SnpLinAlg, SnpData
export SnpArray, SnpBitMatrix, SnpLinAlg, SnpData, StackedSnpArray
export compress_plink, decompress_plink, split_plink, merge_plink, write_plink
export counts, grm, grm_admixture, maf, mean, minorallele, missingpos, missingrate
export std, var, vcf2plink
Expand All @@ -32,6 +32,7 @@ const RECESSIVE_MODEL = Val(3)

include("codec.jl")
include("snparray.jl")
include("stackedsnparray.jl")
include("filter.jl")
include("cat.jl")
include("snpdata.jl")
Expand All @@ -42,6 +43,8 @@ include("linalg_bitmatrix.jl")
include("reorder.jl")
include("vcf2plink.jl")
include("admixture.jl")
AbstractSnpArray = Union{SnpArray, SubArray{UInt8, 1, SnpArray}, SubArray{UInt8, 2, SnpArray},
StackedSnpArray, SubArray{UInt8, 1, StackedSnpArray}, SubArray{UInt8, 2, StackedSnpArray}}

datadir(parts...) = joinpath(@__DIR__, "..", "data", parts...)

Expand Down
18 changes: 16 additions & 2 deletions src/snparray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,21 @@ struct SnpArray <: AbstractMatrix{UInt8}
m::Int
end

AbstractSnpArray = Union{SnpArray, SubArray{UInt8, 1, SnpArray}, SubArray{UInt8, 2, SnpArray}}
"""
StackedSnpArray(s::Vector{SnpArray})
Stacked SnpArray for unified indexing
"""
struct StackedSnpArray <: AbstractMatrix{UInt8} # details in stackedsnparray.jl
arrays::Vector{SnpArray}
m::Int
n::Int
ns::Vector{Int}
offsets::Vector{Int} # 0-based
end

AbstractSnpArray = Union{SnpArray, SubArray{UInt8, 1, SnpArray}, SubArray{UInt8, 2, SnpArray},
StackedSnpArray, SubArray{UInt8, 1, StackedSnpArray}, SubArray{UInt8, 2, StackedSnpArray}}

function SnpArray(bednm::AbstractString, m::Integer, args...; kwargs...)
checkplinkfilename(bednm, "bed")
Expand All @@ -40,7 +54,7 @@ function SnpArray(
kwargs...)
checkplinkfilename(bednm, "bed")
# check user supplied fam filename
if famnm == nothing
if famnm === nothing
famnm = replace(bednm, ".bed" => ".fam")
isfile(famnm) || throw(ArgumentError("fam file not found"))
else # user supplied fam file name
Expand Down
36 changes: 36 additions & 0 deletions src/stackedsnparray.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
function StackedSnpArray(arrays::Vector{SnpArray})
@assert length(arrays) > 0
m = arrays[1].m
for i in 2:length(arrays)
@assert m == arrays[i].m
end
ns = Array{Int}(undef, length(arrays))
offsets = similar(ns, length(ns) + 1)
offset = 0
for i in 1:length(arrays)
ns[i] = size(arrays[i], 2)
offsets[i] = offset
offset += ns[i]
end
offsets[end] = offset
n = offset
StackedSnpArray(arrays, m, n, ns, offsets)
end

@inline function Base.getindex(s::StackedSnpArray, i::Integer, j::Integer)
@boundscheck checkbounds(s, i, j)
arrayidx = searchsortedfirst(s.offsets, j) - 1
@inbounds inarrayidx = j - s.offsets[arrayidx]
s.arrays[arrayidx][i, inarrayidx]
end

@inline Base.eltype(s::StackedSnpArray) = UInt8

@inline Base.length(s::StackedSnpArray) = s.m * s.n

@inline Base.size(s::StackedSnpArray) = (s.m, s.n)

@inline Base.size(s::StackedSnpArray, k::Integer) =
k == 1 ? s.m :
(k == 2 ? s.n :
(k > 2 ? 1 : error("Dimension k out of range")))
10 changes: 10 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -752,3 +752,13 @@ g = grm(mouse)
@test count(kinship_pruning(g; method=:bottom_up)) == 132
@test count(kinship_pruning(g; method=:plink)) == 126
end

@testset "stackedsnparray" begin
mouse2 = StackedSnpArray([mouse, mouse])
@test size(mouse2) == (1940, 20300)
@test size(mouse2, 3) == 1
@test_throws ErrorException size(mouse2, 0)
@test length(mouse2) == 1940 * 20300
@test eltype(mouse2) == UInt8
@test mouse2[777, 16384] == mouse[777, 16384 - 10150]
end

2 comments on commit 3ddb496

@kose-y
Copy link
Member Author

@kose-y kose-y commented on 3ddb496 Apr 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/58004

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.16 -m "<description of version>" 3ddb496cabf5ea02faa8ee3cd2ca41eb3833a399
git push origin v0.3.16

Please sign in to comment.