From 6c28f1ed4245f8bcb71148cd7472dac25a9fc3c4 Mon Sep 17 00:00:00 2001 From: bmweiner Date: Sun, 10 Jan 2016 15:05:49 -0500 Subject: [PATCH] Alters feature 'combining' behavior, see issue #51. --- sklearn_pandas/dataframe_mapper.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py index 9a59f6d..9162072 100644 --- a/sklearn_pandas/dataframe_mapper.py +++ b/sklearn_pandas/dataframe_mapper.py @@ -27,6 +27,10 @@ def _build_transformer(transformers): return transformers +def _sparse_to_dense(extracted): + return [x.toarray() if sparse.issparse(x) else x for x in extracted] + + class DataFrameMapper(BaseEstimator, TransformerMixin): """ Map Pandas data frame column subsets to their own @@ -120,13 +124,12 @@ def transform(self, X): # If any of the extracted features is sparse, combine sparsely. # Otherwise, combine as normal arrays. - if any(sparse.issparse(fea) for fea in extracted): - stacked = sparse.hstack(extracted).tocsr() - # return a sparse matrix only if the mapper was initialized - # with sparse=True - if not self.sparse: - stacked = stacked.toarray() + if self.sparse: + if any(sparse.issparse(fea) for fea in extracted): + # fails if array in extracted has dtype=object + return sparse.hstack(extracted).tocsr() + else: + # convert to sparse + return sparse.csr_matrix(np.hstack(extracted)) else: - stacked = np.hstack(extracted) - - return stacked + return np.hstack(_sparse_to_dense(extracted))