Skip to content

Commit

Permalink
Make bodo compatible with newer pandas versions to enable UDF engine …
Browse files Browse the repository at this point in the history
…support (#92)

Currently import bodo and running simple jit functions in the pandas development environment does not work because the pandas version is newer and some module/apis have been removed.

SingleArrayManager, ArrayManager and _get_option are removed in newer versions of pandas. Fill them with dummy values if they can't be imported since BlockManager/SingleBlockManager is always used.

pd.read_gbq was removed in newer pandas versions
  • Loading branch information
scott-routledge2 authored Dec 27, 2024
1 parent 7c3e37e commit 80ef759
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
12 changes: 9 additions & 3 deletions bodo/hiframes/pd_dataframe_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -5289,7 +5289,7 @@ def overload_union_dataframes(


# Throw BodoError for top-level unsupported functions in Pandas
pd_unsupported = (
pd_unsupported = [
# Input/output
pd.read_pickle,
pd.read_table,
Expand All @@ -5304,7 +5304,6 @@ def overload_union_dataframes(
pd.read_sas,
pd.read_spss,
pd.read_sql_query,
pd.read_gbq,
pd.read_stata,
pd.ExcelWriter,
pd.json_normalize,
Expand All @@ -5325,7 +5324,14 @@ def overload_union_dataframes(
pd.test,
# GroupBy
pd.Grouper,
)
]


try:
pd_unsupported.append(pd.read_gbq)
except AttributeError:
# pd.read_gbq is not supported in Pandas > 2.2
pass


pd_util_unsupported = (
Expand Down
12 changes: 11 additions & 1 deletion bodo/pandas/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@
import pandas as pd
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.arrow.array import ArrowExtensionArray
from pandas.core.internals.array_manager import ArrayManager, SingleArrayManager

try:
from pandas.core.internals.array_manager import ArrayManager, SingleArrayManager
except ModuleNotFoundError:
# Pandas > 2.2 does not have an array_manager module (uses BlockManager/SinglBlockManager).
class ArrayManager:
pass

class SingleArrayManager:
pass


import bodo.user_logging
from bodo.pandas.lazy_metadata import LazyMetadataMixin
Expand Down
21 changes: 17 additions & 4 deletions bodo/pandas/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
from pandas._config.config import _get_option

from bodo.pandas.array_manager import LazyArrayManager, LazySingleArrayManager
from bodo.pandas.managers import LazyBlockManager, LazySingleBlockManager


def get_data_manager_pandas() -> str:
"""Get the value of mode.data_manager from pandas config.
Returns:
str: The value of the mode.data_manager option or 'block'
"""
try:
from pandas._config.config import _get_option

return _get_option("mode.data_manager", silent=True)
except ImportError:
# _get_option and mode.data_manager are not supported in Pandas > 2.2.
return "block"


def get_lazy_manager_class() -> type[LazyArrayManager | LazyBlockManager]:
"""Get the lazy manager class based on the pandas option mode.data_manager, suitable for DataFrame."""
data_manager = _get_option("mode.data_manager", silent=True)
data_manager = get_data_manager_pandas()
if data_manager == "block":
return LazyBlockManager
elif data_manager == "array":
Expand All @@ -20,7 +33,7 @@ def get_lazy_single_manager_class() -> (
type[LazySingleArrayManager | LazySingleBlockManager]
):
"""Get the lazy manager class based on the pandas option mode.data_manager, suitable for Series."""
data_manager = _get_option("mode.data_manager", silent=True)
data_manager = get_data_manager_pandas()
if data_manager == "block":
return LazySingleBlockManager
elif data_manager == "array":
Expand Down

0 comments on commit 80ef759

Please sign in to comment.