forked from dr-leo/pandaSDMX
-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #212 from khaeru/enh/read-pandas
Convert pandas.DataFrame as if it were SDMX-CSV
- Loading branch information
Showing
19 changed files
with
499 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,126 +1,200 @@ | ||
from pathlib import Path | ||
from typing import TYPE_CHECKING, Any, Optional, Union | ||
from warnings import warn | ||
|
||
from . import csv, json, xml | ||
|
||
#: Reader classes | ||
READERS = [csv.Reader, json.Reader, xml.Reader] | ||
if TYPE_CHECKING: | ||
import io | ||
from typing import TypeVar | ||
|
||
import sdmx.message | ||
import sdmx.reader.base | ||
|
||
def _readers(): | ||
return ", ".join(map(lambda cls: cls.__name__, READERS)) | ||
T = TypeVar("T", bound=sdmx.reader.base.Converter) | ||
|
||
|
||
def detect_content_reader(content): | ||
"""Return a reader class for `content`. | ||
#: All converters. Application code **may** extend this collection with custom | ||
#: sub-classes of :class:`.Converter`. | ||
CONVERTER = [csv.DataFrameConverter, csv.Reader, json.Reader, xml.Reader] | ||
|
||
The :meth:`.BaseReader.detect` method for each class in :data:`READERS` is called; | ||
if a reader signals that it is compatible with `content`, then that class is | ||
returned. | ||
#: Only Readers for standard SDMX formats. | ||
READERS = [csv.Reader, json.Reader, xml.Reader] | ||
|
||
Raises | ||
------ | ||
ValueError | ||
If no reader class matches. | ||
|
||
def detect_content_reader(content) -> type["sdmx.reader.base.BaseReader"]: | ||
"""Return a reader class for :class:`bytes` `content`. | ||
.. deprecated:: 2.20.0 | ||
Use :func:`get_reader` instead. | ||
""" | ||
for cls in READERS: | ||
if cls.detect(content): | ||
return cls | ||
warn( | ||
"detect_content_reader(bytes); use get_reader() instead", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
return get_reader(content) | ||
|
||
raise ValueError(f"{repr(content)} not recognized by any of {_readers()}") | ||
|
||
def _get(data: Any, kwargs: Optional[dict], _classes: list[type["T"]]) -> type["T"]: | ||
for c in _classes: | ||
if c.handles(data, kwargs or {}): | ||
return c | ||
|
||
def get_reader_for_media_type(value): | ||
"""Return a reader class for HTTP content/media type `value`. | ||
raise ValueError( | ||
f"{data!r} not recognized by any of " | ||
+ ", ".join(map(lambda c: c.__name__, _classes)) | ||
) | ||
|
||
Raises | ||
------ | ||
ValueError | ||
If no reader class matches. | ||
|
||
See also | ||
-------- | ||
BaseReader.media_type | ||
""" | ||
for cls in READERS: | ||
if cls.handles_media_type(value): | ||
return cls | ||
def get_converter( | ||
data: Any, kwargs: Optional[dict] = None | ||
) -> type["sdmx.reader.base.Converter"]: | ||
"""Identify a :class:`Converter` or :class:`.Reader` for `data`. | ||
raise ValueError(f"Media type {value!r} not supported by any of {_readers()}") | ||
For each class in :data:`CONVERTER`, the :meth:`.Converter.handles` or | ||
:meth:`.BaseReader.handles` method is called with `data` and `kwargs`. | ||
`data` may include: | ||
def get_reader_for_path(path): | ||
"""Return a reader class for file `path`. | ||
- :class:`bytes` —same behaviour as deprecated :func:`.detect_content_reader`. | ||
- :class:`requests.Response` —same behaviour as deprecated | ||
:func:`.get_reader_for_media_type`. | ||
- :class:`pathlib.Path` —same behaviour as deprecated :func:`.get_reader_for_path`. | ||
…or, anything else that is handled by a class listed in :data:`CONVERTER`. | ||
Raises | ||
------ | ||
ValueError | ||
If no reader class matches. | ||
if none of the Converter classes can convert `data` and `kwargs` to SDMX. | ||
""" | ||
return _get(data, kwargs, CONVERTER) | ||
|
||
See also | ||
-------- | ||
BaseReader.suffixes | ||
|
||
def get_reader( | ||
data: Any, | ||
kwargs: Optional[dict] = None, | ||
_classes: list[type["sdmx.reader.base.BaseReader"]] = READERS, | ||
) -> type["sdmx.reader.base.BaseReader"]: | ||
"""Identify a :class:`.Reader` for `data`. | ||
Identical to :func:`.get_converter`, except only :data:`READERS` for SDMX standard | ||
formats are returned. | ||
""" | ||
suffix = Path(path).suffix | ||
for cls in READERS: | ||
if cls.supports_suffix(suffix): | ||
return cls | ||
return _get(data, kwargs, READERS) | ||
|
||
raise ValueError(f"File suffix {repr(suffix)} not supported by any of {_readers()}") | ||
|
||
def get_reader_for_media_type(value) -> type["sdmx.reader.base.BaseReader"]: | ||
"""Return a reader class for HTTP content/media type `value`. | ||
def read_sdmx(filename_or_obj, format=None, **kwargs): | ||
"""Load a SDMX-ML or SDMX-JSON message from a file or file-like object. | ||
.. deprecated:: 2.20.0 | ||
Use :func:`get_reader` instead. | ||
""" | ||
from requests import Response | ||
|
||
Parameters | ||
---------- | ||
filename_or_obj : str or :class:`~os.PathLike` or file | ||
format : 'XML' or 'JSON', optional | ||
warn( | ||
"get_reader_for_media_type(str); use get_reader(requests.Response) instead", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
|
||
Other Parameters | ||
---------------- | ||
dsd : :class:`DataStructureDefinition <.BaseDataStructureDefinition>` | ||
For “structure-specific” `format`=``XML`` messages only. | ||
# Use `value` as Content-Type header for an otherwise-empty Response | ||
resp = Response() | ||
resp.headers["content-type"] = value | ||
|
||
try: | ||
return get_reader(resp) | ||
except ValueError as e: | ||
*_, names = e.args[0].partition(" any of ") | ||
raise ValueError(f"Media type {value!r} not supported by any of {names}") | ||
|
||
|
||
def get_reader_for_path(path) -> type["sdmx.reader.base.BaseReader"]: | ||
"""Return a reader class for file `path`. | ||
.. deprecated:: 2.20.0 | ||
Use :func:`get_reader` instead. | ||
""" | ||
reader = None | ||
warn( | ||
"get_reader_for_path(…); use get_reader() instead", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
|
||
p = Path(path) | ||
try: | ||
path = Path(filename_or_obj) | ||
return get_reader(p) | ||
except ValueError as e: | ||
*_, names = e.args[0].partition(" any of ") | ||
raise ValueError(f"File suffix {p.suffix!r} not supported by any of {names}") | ||
|
||
# Open the file | ||
obj = open(path, "rb") | ||
except TypeError: | ||
# Not path-like → opened file | ||
path = None | ||
obj = filename_or_obj | ||
|
||
if path: | ||
try: | ||
# Use the file extension to guess the reader | ||
reader = get_reader_for_path(filename_or_obj) | ||
except ValueError: | ||
pass | ||
def read_sdmx( | ||
filename_or_obj: Union[bytes, str, Path, "io.IOBase", "io.BufferedReader"], | ||
format: Optional[str] = None, | ||
**kwargs, | ||
) -> "sdmx.message.Message": | ||
"""Read a :class:`.Message` from a path, file, or stream in an SDMX standard format. | ||
if not reader: | ||
try: | ||
reader = get_reader_for_path(Path(f"dummy.{format.lower()}")) | ||
except (AttributeError, ValueError): | ||
pass | ||
To identify whether `filename_or_obj` contains SDMX-CSV, SDMX-JSON, or SDMX-ML, | ||
:meth:`.BaseReader.handles` is called. | ||
if not reader: | ||
# Read a line and then return the cursor to the initial position | ||
pos = obj.tell() | ||
first_line = obj.readline().strip() | ||
obj.seek(pos) | ||
Parameters | ||
---------- | ||
filename_or_obj : | ||
may include: | ||
- :class:`str` or :class:`pathlib.Path`: path to a particular file. | ||
- :class:`bytes`: raw/binary SDMX content. | ||
- :class:`io.IOBase`: a buffer, opened file, or other I/O object containing | ||
binary SDMX content. | ||
format : 'CSV', 'XML', or 'JSON', optional | ||
force handling `filename_or_obj` as if it had the given extension, even if | ||
:meth:`~.BaseReader.handles` fails to match. | ||
Other Parameters | ||
---------------- | ||
structure : | ||
:class:`.Structure`, :class:`.StructureUsage`, or other information used by a | ||
:class:`.BaseReader` to interpret the content of `filename_or_obj`. For example, | ||
the :class:`DataStructureDefinition <.BaseDataStructureDefinition>` for a | ||
structure-specific SDMX-ML message. | ||
""" | ||
if isinstance(filename_or_obj, (str, Path)): | ||
path = Path(filename_or_obj) # Ensure Path type | ||
obj: Union[bytes, "io.IOBase"] = open(path, "rb") # Open the file | ||
else: | ||
path, obj = None, filename_or_obj | ||
|
||
# Try to identify a reader by first the path, then by the `obj` content | ||
for candidate in path, obj, Path(f"_.{(format or 'MISSING').lower()}"): | ||
try: | ||
reader = detect_content_reader(first_line) | ||
reader = get_reader(candidate, kwargs) | ||
except ValueError: | ||
pass | ||
reader = None | ||
else: | ||
break | ||
|
||
if not reader: | ||
raise RuntimeError( | ||
f"cannot infer SDMX message format from path {repr(path)}, " | ||
f"format={format}, or content '{first_line[:5].decode()}..'" | ||
f"cannot infer SDMX message format from path {path!r}, format " | ||
f"hint={format}, or content" | ||
) | ||
|
||
return reader().read_message(obj, **kwargs) | ||
return reader().convert(obj, **kwargs) | ||
|
||
|
||
def to_sdmx(data, **kwargs) -> "sdmx.message.Message": | ||
"""Convert `data` in non-SDMX formats and data structures to SDMX :class:`.Message`. | ||
Unlike :func:`.read_sdmx`, which handles only the standard SDMX formats SDMX-CSV, | ||
SDMX-JSON, and SDMX-ML, this method can will process any Python data structure | ||
handled by a known :data:`CONVERTER`. | ||
""" | ||
try: | ||
converter = get_converter(data, kwargs) | ||
except ValueError: | ||
raise NotImplementedError(f"Convert {type(data)} {data!r} to SDMX") | ||
|
||
return converter().convert(data, **kwargs) |
Oops, something went wrong.