Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standard Scaler fit-transform interface #179

Merged
merged 9 commits into from
Dec 14, 2023
10 changes: 2 additions & 8 deletions lib/scholar/preprocessing.ex
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,8 @@ defmodule Scholar.Preprocessing do
>
"""
deftransform standard_scale(tensor, opts \\ []) do
standard_scale_n(tensor, NimbleOptions.validate!(opts, @general_schema))
end

defnp standard_scale_n(tensor, opts) do
std = Nx.standard_deviation(tensor, axes: opts[:axes], keep_axes: true)
mean_reduced = Nx.mean(tensor, axes: opts[:axes], keep_axes: true)
mean_reduced = Nx.select(std == 0, 0.0, mean_reduced)
(tensor - mean_reduced) / Nx.select(std == 0, 1.0, std)
opts = NimbleOptions.validate!(opts, @general_schema)
Scholar.Preprocessing.StandardScaler.fit_transform(tensor, opts)
end

@doc """
Expand Down
46 changes: 46 additions & 0 deletions lib/scholar/preprocessing/standard_scaler.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
defmodule Scholar.Preprocessing.StandardScaler do
import Nx.Defn

defstruct [:deviation, :mean]
msluszniak marked this conversation as resolved.
Show resolved Hide resolved

opts_schema = [
axes: [
type: {:custom, Scholar.Options, :axes, []},
doc: """
Axes to calculate the distance over. By default the distance
is calculated between the whole tensors.
"""
]
]

@opts_schema NimbleOptions.new!(opts_schema)

deftransform fit(tensor, opts \\ []) do
NimbleOptions.validate!(opts, @opts_schema)
{std, mean} = fit_n(tensor, opts)

%__MODULE__{deviation: std, mean: mean}
msluszniak marked this conversation as resolved.
Show resolved Hide resolved
end

defnp fit_n(tensor, opts) do
std = Nx.standard_deviation(tensor, axes: opts[:axes], keep_axes: true)
mean_reduced = Nx.mean(tensor, axes: opts[:axes], keep_axes: true)
mean_reduced = Nx.select(Nx.equal(std, 0), 0.0, mean_reduced)
msluszniak marked this conversation as resolved.
Show resolved Hide resolved

{std, mean_reduced}
end

deftransform transform(%__MODULE__{deviation: std, mean: mean}, tensor) do
msluszniak marked this conversation as resolved.
Show resolved Hide resolved
scale(tensor, std, mean)
end

defn fit_transform(tensor, opts \\ []) do
tensor
|> fit(opts)
|> transform(tensor)
end

defnp scale(tensor, std, mean) do
(tensor - mean) / Nx.select(std == 0, 1.0, std)
end
end
25 changes: 25 additions & 0 deletions test/scholar/preprocessing/standard_scaler_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
defmodule StandardScalerTest do
use Scholar.Case, async: true
alias Scholar.Preprocessing.StandardScaler

describe "fit_transform/2" do
test "applies standard scaling to data" do
data = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])

expected =
Nx.tensor([
[0.5212860703468323, -1.3553436994552612, 1.4596009254455566],
[1.4596009254455566, -0.4170288145542145, -0.4170288145542145],
[-0.4170288145542145, 0.5212860703468323, -1.3553436994552612]
])

assert_all_close(StandardScaler.fit_transform(data), expected)
end

test "leaves data as it is when variance is zero" do
data = 42.0
expected = Nx.tensor(data)
assert StandardScaler.fit_transform(data) == expected
end
end
end
Loading