Skip to content

Commit

Permalink
Update Cython integer <-> kmer funcs
Browse files Browse the repository at this point in the history
  • Loading branch information
jlumpe committed Aug 4, 2024
1 parent cb71a32 commit d4d47ef
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 15 deletions.
4 changes: 2 additions & 2 deletions src/gambit/_cython/kmers.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cimport numpy as np
ctypedef unsigned char CHAR


cpdef np.uint64_t kmer_to_index(const CHAR[:]) nogil except? 0
cpdef np.uint64_t kmer_to_index_rc(const CHAR[:]) nogil except? 0
cdef np.uint64_t c_kmer_to_index(const CHAR[:], bint*) nogil
cdef np.uint64_t c_kmer_to_index_rc(const CHAR[:], bint*) nogil
cdef void c_index_to_kmer(np.uint64_t, CHAR[:]) nogil
cdef void c_revcomp(const CHAR[:], CHAR[:]) nogil
60 changes: 47 additions & 13 deletions src/gambit/_cython/kmers.pyx
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
"""Cython module for working with DNA sequences and k-mers.
"""Cython module for working with DNA sequences and k-mers."""
Note: each of the 4 Python functions here have a C counterpart that does the actual work. The Python
version is just a wrapper that does any needed conversion, allocates buffers, and raises exceptions
if needed. The separation currently isn't necessary as the C functions aren't used anywhere else
outside the wrappers, but they may be in the future. Handling exceptions in the Python wrappers only
allows the C functions to be declared with nogil.
"""


cpdef np.uint64_t kmer_to_index(const CHAR[:] kmer) nogil except? 0:
"""kmer_to_index(kmer)
def kmer_to_index(const CHAR[:] kmer):
"""kmer_to_index(kmer: bytes) -> int
Convert k-mer byte string to its integer index.
"""
cdef:
np.uint64_t idx
bint exc = False

if kmer.shape[0] > 32:
raise ValueError('k must be <= 32')

idx = c_kmer_to_index(kmer, &exc)

if exc:
raise ValueError('Invalid character in k-mer')

return idx


cdef np.uint64_t c_kmer_to_index(const CHAR[:] kmer, bint *exc) nogil:
cdef:
np.uint64_t idx = 0
int i, k = kmer.shape[0]
CHAR nuc

if k > 32:
raise ValueError('k must be <= 32')

for i in range(k):
nuc = kmer[i]

Expand All @@ -30,24 +49,38 @@ cpdef np.uint64_t kmer_to_index(const CHAR[:] kmer) nogil except? 0:
elif nuc == 'T':
idx += 3
else:
raise ValueError(nuc)
exc[0] = True
return 0

return idx


cpdef np.uint64_t kmer_to_index_rc(const CHAR[:] kmer) nogil except? 0:
"""kmer_to_index_rc(kmer)
def kmer_to_index_rc(const CHAR[:] kmer):
"""kmer_to_index_rc(kmer: bytes) -> int
Get the integer index of the reverse complement of a k-mer byte string.
"""
cdef:
np.uint64_t idx
bint exc = False

if kmer.shape[0] > 32:
raise ValueError('k must be <= 32')

idx = c_kmer_to_index_rc(kmer, &exc)

if exc:
raise ValueError('Invalid character in k-mer')

return idx


cdef np.uint64_t c_kmer_to_index_rc(const CHAR[:] kmer, bint *exc) nogil:
cdef:
np.uint64_t idx = 0
int i, k = kmer.shape[0]
CHAR nuc

if k > 32:
raise ValueError('k must be <= 32')

for i in range(k):
nuc = kmer[k - i - 1]

Expand All @@ -63,7 +96,8 @@ cpdef np.uint64_t kmer_to_index_rc(const CHAR[:] kmer) nogil except? 0:
elif nuc == 'T':
idx += 0
else:
raise ValueError(nuc)
exc[0] = True
return 0

return idx

Expand Down

0 comments on commit d4d47ef

Please sign in to comment.