Skip to content

Commit

Permalink
Add write support to json from pd.DataFrame (AmpX-AI#31)
Browse files Browse the repository at this point in the history
add json write support
  • Loading branch information
ampx-mg authored Feb 7, 2023
1 parent 55b3ebe commit 14b9b94
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/fsql/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def write_object(
elif format == "csv":
with fs.open(url_suff, "wb") as fd:
data.to_csv(fd)
elif format == "json":
with fs.open(url_suff, "wb") as fd:
data.to_json(fd)
else:
raise ValueError(f"unsupported format for dataframe writing: {format}")
elif isinstance(data, io.StringIO) or isinstance(data, io.BytesIO):
Expand Down
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Sets a mock/fake of the S3 filesystem for any `fsql`-based usage."""
import json
import os

import fsspec
Expand All @@ -14,6 +15,10 @@ def put_s3_file(self, data, url):
with self.s3fs.open(url, "wb") as fd:
fd.write(data)

def read_json_file(self, url):
with self.s3fs.open(url, "r") as fd:
return json.load(fd)


@pytest.fixture
def helper():
Expand Down
26 changes: 26 additions & 0 deletions tests/test_write_object.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import json

import pandas as pd
import pytest
Expand Down Expand Up @@ -87,3 +88,28 @@ def test_write_vanilla_bytes(tmpdir):
with open(path, "rb") as f:
extracted = f.read()
assert extracted == data


def test_write_json_s3(helper):
"""Writes a dataframe as json, tests that read works."""
bucket = "test-bouquet"
fs = helper.s3fs
fs.mkdir(bucket)

input = pd.DataFrame({"k1": [1, 2], "k2": [3, 4]}, index=["one", "two"])
write_object("s3://test-bouquet/my_df.json", input, format="json")
output = helper.read_json_file("s3://test-bouquet/my_df.json")
expected_output = {"k1": {"one": 1, "two": 2}, "k2": {"one": 3, "two": 4}}
assert output == expected_output


def test_write_json(tmpdir):
"""Writes a json file, tests that read works."""
input = pd.DataFrame({"k1": [1, 2], "k2": [3, 4]}, index=["one", "two"])
path_base = tmpdir.join("my_file.json")
url = f"file://{path_base}"
write_object(url, input, format="json")
with open(path_base, "r") as f:
output = json.load(f)
expected_output = {"k1": {"one": 1, "two": 2}, "k2": {"one": 3, "two": 4}}
assert output == expected_output

0 comments on commit 14b9b94

Please sign in to comment.