Skip to content

Commit

Permalink
Add optional lz4 codec
Browse files Browse the repository at this point in the history
  • Loading branch information
mcguipat authored and scottbelden committed Dec 16, 2019
1 parent d16df49 commit 4a11dbd
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 5 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ before_install:
- sudo apt-get install -y libsnappy-dev
- pip install python-snappy
- pip install zstandard
- pip install lz4
install:
- pip install -r developer_requirements.txt
script:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ encoding/decoding).
* Schemaless Reader
* JSON Writer
* JSON Reader
* Codecs (Snappy, Deflate, Zstandard, Bzip2)
* Codecs (Snappy, Deflate, Zstandard, Bzip2, LZ4)
* Schema resolution
* Aliases
* Logical Types
Expand Down
1 change: 1 addition & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
brew install snappy
CPPFLAGS="-I/usr/local/include -L/usr/local/lib" python -m pip install python-snappy
pip install zstandard
pip install lz4
- script: python -m pip install --upgrade -r developer_requirements.txt
displayName: 'Install dependencies'
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Supported Features
* Schemaless Reader
* JSON Writer
* JSON Reader
* Codecs (Snappy, Deflate, Zstandard, Bzip2)
* Codecs (Snappy, Deflate, Zstandard, Bzip2, LZ4)
* Schema resolution
* Aliases
* Logical Types
Expand Down
14 changes: 14 additions & 0 deletions fastavro/_read.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,20 @@ else:
BLOCK_READERS["zstandard"] = zstandard_read_block


cpdef lz4_read_block(fo):
length = read_long(fo)
data = fo.read(length)
return MemoryIO(lz4.block.decompress(data))


try:
import lz4.block
except ImportError:
BLOCK_READERS["lz4"] = missing_codec_lib("lz4", "lz4")
else:
BLOCK_READERS["lz4"] = lz4_read_block


def _iter_avro_records(fo, header, codec, writer_schema, reader_schema,
return_record_name=False):
cdef int32 i
Expand Down
14 changes: 14 additions & 0 deletions fastavro/_read_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,20 @@ def zstandard_read_block(decoder):
BLOCK_READERS["zstandard"] = zstandard_read_block


def lz4_read_block(decoder):
length = read_long(decoder)
data = decoder.read_fixed(length)
return MemoryIO(lz4.block.decompress(data))


try:
import lz4.block
except ImportError:
BLOCK_READERS["lz4"] = missing_codec_lib("lz4", "lz4")
else:
BLOCK_READERS["lz4"] = lz4_read_block


def _iter_avro_records(decoder, header, codec, writer_schema, reader_schema,
return_record_name=False):
"""Return iterator over avro records."""
Expand Down
17 changes: 17 additions & 0 deletions fastavro/_write.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,23 @@ cpdef zstandard_write_block(object fo, bytes block_bytes):
fo.write(data)


try:
import lz4.block
except ImportError:
BLOCK_WRITERS["lz4"] = _missing_dependency("lz4", "lz4")
else:
BLOCK_WRITERS["lz4"] = lz4_write_block


cpdef lz4_write_block(object fo, bytes block_bytes):
"""Write block in "lz4" codec."""
cdef bytearray tmp = bytearray()
data = lz4.block.compress(block_bytes)
write_long(tmp, len(data))
fo.write(tmp)
fo.write(data)


cdef class MemoryIO(object):
cdef bytearray value

Expand Down
15 changes: 15 additions & 0 deletions fastavro/_write_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,21 @@ def zstandard_write_block(encoder, block_bytes):
BLOCK_WRITERS["zstandard"] = zstandard_write_block


def lz4_write_block(encoder, block_bytes):
"""Write block in "lz4" codec."""
data = lz4.block.compress(block_bytes)
encoder.write_long(len(data))
encoder._fo.write(data)


try:
import lz4.block
except ImportError:
BLOCK_WRITERS["lz4"] = _missing_codec_lib("lz4", "lz4")
else:
BLOCK_WRITERS["lz4"] = lz4_write_block


class GenericWriter(object):

def __init__(self,
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ def version():
'Topic :: Scientific/Engineering :: Information Analysis',
],
extras_require={
'codecs': ['python-snappy', 'zstandard'],
'codecs': ['python-snappy', 'zstandard', 'lz4'],
'snappy': ['python-snappy'],
'zstandard': ['zstandard'],
'lz4': ['lz4'],
},
tests_require=['pytest', 'flake8', 'check-manifest'],
setup_requires=setup_requires
Expand Down
4 changes: 2 additions & 2 deletions tests/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_builtin_codecs(codec):
assert records == out_records


@pytest.mark.parametrize("codec", ["snappy", "zstandard"])
@pytest.mark.parametrize("codec", ["snappy", "zstandard", "lz4"])
@pytest.mark.skipif(os.name == "nt", reason="A pain to set up on windows")
def test_optional_codecs(codec):
schema = {
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_optional_codecs(codec):
assert records == out_records


@pytest.mark.parametrize("codec", ["snappy", "zstandard"])
@pytest.mark.parametrize("codec", ["snappy", "zstandard", "lz4"])
@pytest.mark.skipif(os.name != "nt", reason="codec is present")
def test_optional_codecs_not_installed(codec):
schema = {
Expand Down

0 comments on commit 4a11dbd

Please sign in to comment.