Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(steps): add transform for polynomial features #53

Merged
merged 8 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ibisml/steps/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ibisml.steps.common import Cast, Drop, Mutate, MutateAt
from ibisml.steps.encode import CategoricalEncode, CountEncode, OneHotEncode
from ibisml.steps.feature_engineering import PolynomialFeatures
from ibisml.steps.feature_selection import ZeroVariance
from ibisml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode
from ibisml.steps.standardize import ScaleMinMax, ScaleStandard
Expand All @@ -20,6 +21,7 @@
"Mutate",
"MutateAt",
"OneHotEncode",
"PolynomialFeatures",
"ScaleMinMax",
"ScaleStandard",
"ZeroVariance",
Expand Down
81 changes: 81 additions & 0 deletions ibisml/steps/feature_engineering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

import functools
import operator
from collections import Counter
from itertools import combinations_with_replacement
from typing import Any, Iterable

import ibis.expr.types as ir

from ibisml.core import Metadata, Step
from ibisml.select import SelectionType, selector


class PolynomialFeatures(Step):
"""A step for generating polynomial features.

Parameters
----------
inputs
A selection of columns to generate polynomial features.
All columns must be numeric.
degree : int, default `2`
The maximum degree of polynomial features to generate.

Examples
--------
>>> import ibisml as ml

Generate polynomial features for all numeric columns with a degree is 2.

>>> step = ml.PolynomialFeatures(ml.numeric(), 2)

Generate polynomial features a specific set of columns.

>>> step = ml.PolynomialFeatures(["x", "y"], 2)
"""

def __init__(self, inputs: SelectionType, *, degree: int = 2):
if degree < 2:
raise ValueError("Degree must be greater than 1")

self.inputs = selector(inputs)
self.degree = degree

def _repr(self) -> Iterable[tuple[str, Any]]:
yield ("", self.inputs)
yield ("degree", self.degree)

def fit_table(self, table: ir.Table, metadata: Metadata) -> None:
columns = self.inputs.select_columns(table, metadata)

for col_name in columns:
if not isinstance(table[col_name], ir.NumericColumn):
raise ValueError(
f"Cannot calculate polynomial features of {col_name!r} - "
"this column is not numeric"
)
combinations = []
for d in range(2, self.degree + 1):
combinations.extend(
[Counter(comb) for comb in combinations_with_replacement(columns, d)]
)
self.combinations_ = combinations

def transform_table(self, table):
expressions = {}
for combination in self.combinations_:
exp = functools.reduce(
operator.mul,
[
table[col] ** p if p > 1 else table[col]
for col, p in combination.items()
],
)
name = "poly_" + "_".join(
f"{col}^{p}" if p > 1 else col for col, p in combination.items()
)
expressions[name] = exp

return table.mutate(**expressions)
27 changes: 27 additions & 0 deletions tests/test_feature_engineering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import ibis
import pandas.testing as tm
import pytest

import ibisml as ml


@pytest.fixture()
def train_table():
N = 100
return ibis.memtable({"x": list(range(N)), "y": [10] * N, "z": ["s"] * N})


def test_PolynomialFeatures(train_table):
step = ml.PolynomialFeatures(ml.numeric(), degree=2)
step.fit_table(train_table, ml.core.Metadata())
result = step.transform_table(train_table)
expected = train_table.mutate(
**{
"poly_x^2": train_table.x**2,
"poly_x_y": train_table.x * train_table.y,
"poly_y^2": train_table.y**2,
}
)
assert expected.equals(result)
deepyaman marked this conversation as resolved.
Show resolved Hide resolved
# Check if the transformed table has the expected data
tm.assert_frame_equal(result.execute(), expected.execute())
Loading