Skip to content

Commit

Permalink
Add ability to read .bz2 files (#1059)
Browse files Browse the repository at this point in the history
  • Loading branch information
st-pasha authored May 25, 2018
1 parent 54e3e90 commit a5df590
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
cells.
- `dt.__git_version__` variable containing the commit hash from which the
package was built.
- ability to read .bz2 compressed files with fread.


#### Fixed
- Ensure that fread only emits messages to Python from the master thread.
Expand Down
7 changes: 7 additions & 0 deletions datatable/fread.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,13 @@ def _resolve_archive(self, filename, subpath=None):
self.logger.debug("Extracting %s into memory" % filename)
self._text = zf.read()

elif ext == ".bz2":
import bz2
zf = bz2.open(filename, mode="rb")
if self._verbose:
self.logger.debug("Extracting %s into memory" % filename)
self._text = zf.read()

elif ext == ".xz":
import lzma
zf = lzma.open(filename, mode="rb")
Expand Down
15 changes: 15 additions & 0 deletions tests/fread/test_fread_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,21 @@ def test_fread_gz_file(tempfile, capsys):
os.unlink(gzfile)


def test_fread_bz2_file(tempfile, capsys):
import bz2
bzfile = tempfile + ".bz2"
with bz2.open(bzfile, "wb") as f:
f.write(b"A\n11\n22\n33\n")
try:
d0 = dt.fread(bzfile, verbose=True)
out, err = capsys.readouterr()
assert d0.internal.check()
assert d0.topython() == [[11, 22, 33]]
assert ("Extracting %s into memory" % bzfile) in out
finally:
os.remove(bzfile)


def test_fread_zip_file_1(tempfile, capsys):
import zipfile
zfname = tempfile + ".zip"
Expand Down

0 comments on commit a5df590

Please sign in to comment.