Skip to content

Commit

Permalink
Fix/linear shapes (#288)
Browse files Browse the repository at this point in the history
* user dot/4 on set_intercept

* use dot/4 on linear regression predict

* add column target tests

* better name for test

* working on svm

* svm supports multioutput

* check col and regular models are the same. invalid y raises

* move y shape validation to linear_helper function

* add linear helper function for shape validation

* formatter

* clean up test

* modify validate_y_shape for isotonic regression

* add y shape validation and test to isotonic regression

* formatter

* decouple validation from flattening helper to handle multioutput options

* flatten linear input. no longer matches sklearn

* add prediction test

* linear regression always returns {n_samples} vector

* formatter

* bayesian ridge fixed

* remove wip tags

* removing wip tags

* refactor test data

* wrote polynomial regression test

* wrote tests for logistic and ridge regression

* logistic and ridge test pass

* Update lib/scholar/linear/linear_helpers.ex

Co-authored-by: José Valim <[email protected]>

* Update lib/scholar/linear/linear_helpers.ex

Co-authored-by: José Valim <[email protected]>

* fixed valid_colum_vector? name

* fixed error messages

* updated docs

* proper formatting of predict docs

* ran formatter

---------

Co-authored-by: José Valim <[email protected]>
  • Loading branch information
JoaquinIglesiasTurina and josevalim authored Jul 30, 2024
1 parent 66ec4c8 commit e8a45a3
Show file tree
Hide file tree
Showing 15 changed files with 308 additions and 116 deletions.
6 changes: 6 additions & 0 deletions lib/scholar/linear/bayesian_ridge_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ defmodule Scholar.Linear.BayesianRidgeRegression do
>
"""
deftransform fit(x, y, opts \\ []) do
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

opts =
Expand Down Expand Up @@ -425,6 +428,9 @@ defmodule Scholar.Linear.BayesianRidgeRegression do

@doc """
Makes predictions with the given `model` on input `x`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
## Examples
iex> x = Nx.tensor([[1], [2], [6], [8], [10]])
Expand Down
10 changes: 9 additions & 1 deletion lib/scholar/linear/isotonic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ defmodule Scholar.Linear.IsotonicRegression do
require Nx
import Nx.Defn, except: [transform: 2]
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {
Nx.Container,
Expand Down Expand Up @@ -143,6 +144,9 @@ defmodule Scholar.Linear.IsotonicRegression do
}
"""
deftransform fit(x, y, opts \\ []) do
{n_samples} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

opts =
Expand All @@ -154,6 +158,7 @@ defmodule Scholar.Linear.IsotonicRegression do
{sample_weights, opts} = Keyword.pop(opts, :sample_weights, 1.0)
x_type = to_float_type(x)
x = to_float(x)

y = to_float(y)

sample_weights =
Expand Down Expand Up @@ -197,6 +202,9 @@ defmodule Scholar.Linear.IsotonicRegression do
@doc """
Makes predictions with the given `model` on input `x` and interpolating `function`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
Otherwise, predictions match train target shape.
## Examples
iex> x = Nx.tensor([1, 4, 7, 9, 10, 11])
Expand Down Expand Up @@ -520,6 +528,6 @@ defmodule Scholar.Linear.IsotonicRegression do
x = Nx.new_axis(x, -1)
y = Nx.new_axis(y, -1)
model = Scholar.Linear.LinearRegression.fit(x, y)
model.coefficients[0][0] >= 0
model.coefficients[0] >= 0
end
end
32 changes: 31 additions & 1 deletion lib/scholar/linear/linear_helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,36 @@ defmodule Scholar.Linear.LinearHelpers do

@moduledoc false

defp valid_column_vector?(y, n_samples) do
Nx.shape(y) == {n_samples, 1} and Nx.rank(y) == 2
end

@doc false
def flatten_column_vector(y, n_samples) do
is_column_vector? = valid_column_vector?(y, n_samples)

if is_column_vector? do
y |> Nx.flatten()
else
y
end
end

@doc false
def validate_y_shape(y, n_samples, module_name) do
y = flatten_column_vector(y, n_samples)
is_valid_target? = Nx.rank(y) == 1

if not is_valid_target? do
message =
"#{inspect(module_name)} expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"

raise ArgumentError, message
else
y
end
end

@doc false
def build_sample_weights(x, opts) do
x_type = to_float_type(x)
Expand Down Expand Up @@ -33,7 +63,7 @@ defmodule Scholar.Linear.LinearHelpers do
@doc false
defn set_intercept(coeff, x_offset, y_offset, fit_intercept?) do
if fit_intercept? do
y_offset - Nx.dot(coeff, x_offset)
y_offset - Nx.dot(coeff, [-1], x_offset, [-1])
else
Nx.tensor(0.0, type: Nx.type(coeff))
end
Expand Down
9 changes: 8 additions & 1 deletion lib/scholar/linear/linear_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ defmodule Scholar.Linear.LinearRegression do
>
"""
deftransform fit(x, y, opts \\ []) do
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.flatten_column_vector(y, n_samples)
opts = NimbleOptions.validate!(opts, @opts_schema)

opts =
Expand All @@ -77,6 +79,8 @@ defmodule Scholar.Linear.LinearRegression do
opts

sample_weights = LinearHelpers.build_sample_weights(x, opts)
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.flatten_column_vector(y, n_samples)

fit_n(x, y, sample_weights, opts)
end
Expand Down Expand Up @@ -113,6 +117,9 @@ defmodule Scholar.Linear.LinearRegression do
@doc """
Makes predictions with the given `model` on input `x`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
Otherwise, predictions match train target shape.
## Examples
iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
Expand All @@ -124,7 +131,7 @@ defmodule Scholar.Linear.LinearRegression do
)
"""
defn predict(%__MODULE__{coefficients: coeff, intercept: intercept} = _model, x) do
Nx.dot(x, coeff) + intercept
Nx.dot(x, [-1], coeff, [-1]) + intercept
end

# Implements ordinary least-squares by estimating the
Expand Down
9 changes: 5 additions & 4 deletions lib/scholar/linear/logistic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ defmodule Scholar.Linear.LogisticRegression do
"""
import Nx.Defn
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {Nx.Container, containers: [:coefficients, :bias]}
defstruct [:coefficients, :bias]
Expand Down Expand Up @@ -94,10 +95,8 @@ defmodule Scholar.Linear.LogisticRegression do
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

if Nx.rank(y) != 1 do
raise ArgumentError,
"expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
end
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

Expand Down Expand Up @@ -205,6 +204,8 @@ defmodule Scholar.Linear.LogisticRegression do
@doc """
Makes predictions with the given `model` on inputs `x`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
## Examples
iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
Expand Down
3 changes: 3 additions & 0 deletions lib/scholar/linear/polynomial_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ defmodule Scholar.Linear.PolynomialRegression do
@doc """
Makes predictions with the given `model` on input `x`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
Otherwise, predictions match train target shape.
## Examples
iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
Expand Down
6 changes: 6 additions & 0 deletions lib/scholar/linear/ridge_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ defmodule Scholar.Linear.RidgeRegression do
}
"""
deftransform fit(x, y, opts \\ []) do
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.flatten_column_vector(y, n_samples)

opts = NimbleOptions.validate!(opts, @opts_schema)

sample_weights? = opts[:sample_weights] != nil
Expand Down Expand Up @@ -196,6 +199,9 @@ defmodule Scholar.Linear.RidgeRegression do
@doc """
Makes predictions with the given `model` on input `x`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
Otherwise, predictions match train target shape.
## Examples
iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
Expand Down
11 changes: 6 additions & 5 deletions lib/scholar/linear/svm.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ defmodule Scholar.Linear.SVM do
"""
import Nx.Defn
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {Nx.Container, containers: [:coefficients, :bias]}
defstruct [:coefficients, :bias]
Expand Down Expand Up @@ -122,10 +123,8 @@ defmodule Scholar.Linear.SVM do
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

if Nx.rank(y) != 1 do
raise ArgumentError,
"expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
end
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

Expand Down Expand Up @@ -185,7 +184,7 @@ defmodule Scholar.Linear.SVM do
while {{coef, bias, has_converged, coef_optimizer_state, bias_optimizer_state},
{x, y, iterations, iter, eps, j = 0}},
j < num_classes do
y_j = y == j
y_j = y |> Nx.flatten() == j
coef_j = Nx.take(coef, j)
bias_j = Nx.take(bias, j)

Expand Down Expand Up @@ -252,6 +251,8 @@ defmodule Scholar.Linear.SVM do
@doc """
Makes predictions with the given model on inputs `x`.
Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.
## Examples
iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
iex> y = Nx.tensor([1, 0, 1])
Expand Down
27 changes: 27 additions & 0 deletions test/scholar/linear/bayesian_ridge_regression_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,33 @@ defmodule Scholar.Linear.BayesianRidgeRegressionTest do
assert_all_close(expected, predicted, atol: 1.0e-1)
end

test "toy bayesian ridge with column target" do
x = Nx.tensor([[1], [2], [6], [8], [10]])
y = Nx.tensor([1, 2, 6, 8, 10])
model = BayesianRidgeRegression.fit(x, y)
pred = BayesianRidgeRegression.predict(model, x)
col_model = BayesianRidgeRegression.fit(x, y |> Nx.new_axis(-1))
col_pred = BayesianRidgeRegression.predict(col_model, x)
assert model == col_model
assert pred == col_pred
end

test "2 column target raises" do
x = Nx.tensor([[1], [2], [6], [8], [10]])
y = Nx.tensor([1, 2, 6, 8, 10])
y = Nx.new_axis(y, -1)
y = Nx.concatenate([y, y], axis: 1)

message =
"Scholar.Linear.BayesianRidgeRegression expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"

assert_raise ArgumentError,
message,
fn ->
BayesianRidgeRegression.fit(x, y)
end
end

test "ridge vs bayesian ridge: parameters" do
x = Nx.tensor([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = Nx.tensor([1, 2, 3, 2, 0, 4, 5])
Expand Down
26 changes: 26 additions & 0 deletions test/scholar/linear/isotonic_regression_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,32 @@ defmodule Scholar.Linear.IsotonicRegressionTest do
assert model.preprocess == {}
end

test "fit column target" do
x = Nx.tensor([2.0, 2.0, 3.0, 4.0, 5.0])
y = Nx.tensor([2.0, 3.0, 7.0, 8.0, 9.0])
sample_weights = Nx.tensor([1, 3, 2, 7, 4])
model = IsotonicRegression.fit(x, y, sample_weights: sample_weights)
col_model = IsotonicRegression.fit(x, y |> Nx.new_axis(-1), sample_weights: sample_weights)
assert model == col_model
end

test "fit 2 column target raises" do
x = Nx.tensor([2.0, 2.0, 3.0, 4.0, 5.0])
y = Nx.tensor([2.0, 3.0, 7.0, 8.0, 9.0])
y = Nx.new_axis(y, -1)
y = Nx.concatenate([y, y], axis: 1)
sample_weights = Nx.tensor([1, 3, 2, 7, 4])

message =
"Scholar.Linear.IsotonicRegression expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"

assert_raise ArgumentError,
message,
fn ->
IsotonicRegression.fit(x, y, sample_weights: sample_weights)
end
end

test "fit with sample_weights and :increasing? set to false" do
x = Nx.tensor([2.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0])
y = Nx.tensor([11, 12, 9, 7, 5, 4, 2])
Expand Down
Loading

0 comments on commit e8a45a3

Please sign in to comment.