Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/linear shapes #288

Merged
merged 33 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d7bbaec
user dot/4 on set_intercept
JoaquinIglesiasTurina Jun 17, 2024
3cf37fc
use dot/4 on linear regression predict
JoaquinIglesiasTurina Jun 17, 2024
d3ded77
add column target tests
JoaquinIglesiasTurina Jun 17, 2024
feb7e5f
better name for test
JoaquinIglesiasTurina Jun 17, 2024
c803223
working on svm
JoaquinIglesiasTurina Jun 22, 2024
4cde60b
svm supports multioutput
JoaquinIglesiasTurina Jun 22, 2024
0f1c0f7
check col and regular models are the same. invalid y raises
JoaquinIglesiasTurina Jun 30, 2024
d09fb53
move y shape validation to linear_helper function
JoaquinIglesiasTurina Jun 30, 2024
07626ea
add linear helper function for shape validation
JoaquinIglesiasTurina Jun 30, 2024
d3319fa
formatter
JoaquinIglesiasTurina Jun 30, 2024
b315c84
clean up test
JoaquinIglesiasTurina Jun 30, 2024
515453a
modify validate_y_shape for isotonic regression
JoaquinIglesiasTurina Jun 30, 2024
28bd4dd
add y shape validation and test to isotonic regression
JoaquinIglesiasTurina Jun 30, 2024
9dd1b88
formatter
JoaquinIglesiasTurina Jun 30, 2024
b5abefa
decouple validation from flattening helper to handle multioutput options
JoaquinIglesiasTurina Jul 1, 2024
45eab77
flatten linear input. no longer matches sklearn
JoaquinIglesiasTurina Jul 1, 2024
4d4aff1
add prediction test
JoaquinIglesiasTurina Jul 1, 2024
455e6b0
linear regression always returns {n_samples} vector
JoaquinIglesiasTurina Jul 1, 2024
95b8825
formatter
JoaquinIglesiasTurina Jul 10, 2024
149ead6
bayesian ridge fixed
JoaquinIglesiasTurina Jul 10, 2024
7af987c
remove wip tags
JoaquinIglesiasTurina Jul 10, 2024
0865950
removing wip tags
JoaquinIglesiasTurina Jul 10, 2024
d972a52
refactor test data
JoaquinIglesiasTurina Jul 10, 2024
c555ec9
wrote polynomial regression test
JoaquinIglesiasTurina Jul 10, 2024
4d334d3
wrote tests for logistic and ridge regression
JoaquinIglesiasTurina Jul 10, 2024
27e8cc6
logistic and ridge test pass
JoaquinIglesiasTurina Jul 10, 2024
55106de
Update lib/scholar/linear/linear_helpers.ex
JoaquinIglesiasTurina Jul 28, 2024
bde1551
Update lib/scholar/linear/linear_helpers.ex
JoaquinIglesiasTurina Jul 28, 2024
348e171
fixed valid_colum_vector? name
JoaquinIglesiasTurina Jul 28, 2024
0546cb3
fixed error messages
JoaquinIglesiasTurina Jul 28, 2024
af993d7
updated docs
JoaquinIglesiasTurina Jul 28, 2024
88066fd
proper formatting of predict docs
JoaquinIglesiasTurina Jul 28, 2024
c22d40d
ran formatter
JoaquinIglesiasTurina Jul 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/scholar/linear/bayesian_ridge_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ defmodule Scholar.Linear.BayesianRidgeRegression do
>
"""
deftransform fit(x, y, opts \\ []) do
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

opts =
Expand Down
7 changes: 6 additions & 1 deletion lib/scholar/linear/isotonic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ defmodule Scholar.Linear.IsotonicRegression do
require Nx
import Nx.Defn, except: [transform: 2]
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {
Nx.Container,
Expand Down Expand Up @@ -143,6 +144,9 @@ defmodule Scholar.Linear.IsotonicRegression do
}
"""
deftransform fit(x, y, opts \\ []) do
{n_samples} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

opts =
Expand All @@ -154,6 +158,7 @@ defmodule Scholar.Linear.IsotonicRegression do
{sample_weights, opts} = Keyword.pop(opts, :sample_weights, 1.0)
x_type = to_float_type(x)
x = to_float(x)

y = to_float(y)

sample_weights =
Expand Down Expand Up @@ -520,6 +525,6 @@ defmodule Scholar.Linear.IsotonicRegression do
x = Nx.new_axis(x, -1)
y = Nx.new_axis(y, -1)
model = Scholar.Linear.LinearRegression.fit(x, y)
model.coefficients[0][0] >= 0
model.coefficients[0] >= 0
end
end
33 changes: 32 additions & 1 deletion lib/scholar/linear/linear_helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,37 @@ defmodule Scholar.Linear.LinearHelpers do

@moduledoc false

@doc false
def valid_column_vector(y, n_samples) do
JoaquinIglesiasTurina marked this conversation as resolved.
Show resolved Hide resolved
Nx.shape(y) == {n_samples, 1} and Nx.rank(y) == 2
end

@doc false
def flatten_column_vector(y, n_samples) do
is_column_vector? = valid_column_vector(y, n_samples)

if is_column_vector? do
y |> Nx.flatten()
else
y
end
end

@doc false
def validate_y_shape(y, n_samples, module_name) do
y = flatten_column_vector(y, n_samples)
is_valid_target? = Nx.rank(y) == 1

if not is_valid_target? do
message =
"#{module_name} expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
JoaquinIglesiasTurina marked this conversation as resolved.
Show resolved Hide resolved

raise ArgumentError, message
else
y
end
end

@doc false
def build_sample_weights(x, opts) do
x_type = to_float_type(x)
Expand Down Expand Up @@ -33,7 +64,7 @@ defmodule Scholar.Linear.LinearHelpers do
@doc false
defn set_intercept(coeff, x_offset, y_offset, fit_intercept?) do
if fit_intercept? do
y_offset - Nx.dot(coeff, x_offset)
y_offset - Nx.dot(coeff, [-1], x_offset, [-1])
else
Nx.tensor(0.0, type: Nx.type(coeff))
end
Expand Down
6 changes: 5 additions & 1 deletion lib/scholar/linear/linear_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ defmodule Scholar.Linear.LinearRegression do
>
"""
deftransform fit(x, y, opts \\ []) do
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.flatten_column_vector(y, n_samples)
opts = NimbleOptions.validate!(opts, @opts_schema)

opts =
Expand All @@ -77,6 +79,8 @@ defmodule Scholar.Linear.LinearRegression do
opts

sample_weights = LinearHelpers.build_sample_weights(x, opts)
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.flatten_column_vector(y, n_samples)

fit_n(x, y, sample_weights, opts)
end
Expand Down Expand Up @@ -124,7 +128,7 @@ defmodule Scholar.Linear.LinearRegression do
)
"""
defn predict(%__MODULE__{coefficients: coeff, intercept: intercept} = _model, x) do
Nx.dot(x, coeff) + intercept
Nx.dot(x, [-1], coeff, [-1]) + intercept
end

# Implements ordinary least-squares by estimating the
Expand Down
7 changes: 3 additions & 4 deletions lib/scholar/linear/logistic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ defmodule Scholar.Linear.LogisticRegression do
"""
import Nx.Defn
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {Nx.Container, containers: [:coefficients, :bias]}
defstruct [:coefficients, :bias]
Expand Down Expand Up @@ -94,10 +95,8 @@ defmodule Scholar.Linear.LogisticRegression do
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

if Nx.rank(y) != 1 do
raise ArgumentError,
"expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
end
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

Expand Down
3 changes: 3 additions & 0 deletions lib/scholar/linear/ridge_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ defmodule Scholar.Linear.RidgeRegression do
}
"""
deftransform fit(x, y, opts \\ []) do
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.flatten_column_vector(y, n_samples)

opts = NimbleOptions.validate!(opts, @opts_schema)

sample_weights? = opts[:sample_weights] != nil
Expand Down
9 changes: 4 additions & 5 deletions lib/scholar/linear/svm.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ defmodule Scholar.Linear.SVM do
"""
import Nx.Defn
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {Nx.Container, containers: [:coefficients, :bias]}
defstruct [:coefficients, :bias]
Expand Down Expand Up @@ -122,10 +123,8 @@ defmodule Scholar.Linear.SVM do
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

if Nx.rank(y) != 1 do
raise ArgumentError,
"expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
end
{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)

opts = NimbleOptions.validate!(opts, @opts_schema)

Expand Down Expand Up @@ -185,7 +184,7 @@ defmodule Scholar.Linear.SVM do
while {{coef, bias, has_converged, coef_optimizer_state, bias_optimizer_state},
{x, y, iterations, iter, eps, j = 0}},
j < num_classes do
y_j = y == j
y_j = y |> Nx.flatten() == j
coef_j = Nx.take(coef, j)
bias_j = Nx.take(bias, j)

Expand Down
27 changes: 27 additions & 0 deletions test/scholar/linear/bayesian_ridge_regression_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,33 @@ defmodule Scholar.Linear.BayesianRidgeRegressionTest do
assert_all_close(expected, predicted, atol: 1.0e-1)
end

test "toy bayesian ridge with column target" do
x = Nx.tensor([[1], [2], [6], [8], [10]])
y = Nx.tensor([1, 2, 6, 8, 10])
model = BayesianRidgeRegression.fit(x, y)
pred = BayesianRidgeRegression.predict(model, x)
col_model = BayesianRidgeRegression.fit(x, y |> Nx.new_axis(-1))
col_pred = BayesianRidgeRegression.predict(col_model, x)
assert model == col_model
assert pred == col_pred
end

test "2 column target raises" do
x = Nx.tensor([[1], [2], [6], [8], [10]])
y = Nx.tensor([1, 2, 6, 8, 10])
y = Nx.new_axis(y, -1)
y = Nx.concatenate([y, y], axis: 1)

message =
"Elixir.#{inspect(BayesianRidgeRegression)} expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"

assert_raise ArgumentError,
message,
fn ->
BayesianRidgeRegression.fit(x, y)
end
end

test "ridge vs bayesian ridge: parameters" do
x = Nx.tensor([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = Nx.tensor([1, 2, 3, 2, 0, 4, 5])
Expand Down
26 changes: 26 additions & 0 deletions test/scholar/linear/isotonic_regression_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,32 @@ defmodule Scholar.Linear.IsotonicRegressionTest do
assert model.preprocess == {}
end

test "fit column target" do
x = Nx.tensor([2.0, 2.0, 3.0, 4.0, 5.0])
y = Nx.tensor([2.0, 3.0, 7.0, 8.0, 9.0])
sample_weights = Nx.tensor([1, 3, 2, 7, 4])
model = IsotonicRegression.fit(x, y, sample_weights: sample_weights)
col_model = IsotonicRegression.fit(x, y |> Nx.new_axis(-1), sample_weights: sample_weights)
assert model == col_model
end

test "fit 2 column target raises" do
x = Nx.tensor([2.0, 2.0, 3.0, 4.0, 5.0])
y = Nx.tensor([2.0, 3.0, 7.0, 8.0, 9.0])
y = Nx.new_axis(y, -1)
y = Nx.concatenate([y, y], axis: 1)
sample_weights = Nx.tensor([1, 3, 2, 7, 4])

message =
"Elixir.#{inspect(IsotonicRegression)} expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"

assert_raise ArgumentError,
message,
fn ->
IsotonicRegression.fit(x, y, sample_weights: sample_weights)
end
end

test "fit with sample_weights and :increasing? set to false" do
x = Nx.tensor([2.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0])
y = Nx.tensor([11, 12, 9, 7, 5, 4, 2])
Expand Down
48 changes: 30 additions & 18 deletions test/scholar/linear/linear_regression_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ defmodule Scholar.Linear.LinearRegressionTest do
doctest LinearRegression

describe "fit" do
test "matches sklearn for shapes {1, 1}, {1, 1} and type {:f, 32}" do
test "test for shapes {1, 1}, {1, 1} and type {:f, 32}" do
a = Nx.tensor([[0.5666993856430054]])
b = Nx.tensor([[0.8904717564582825]])
expected_coeff = Nx.tensor([[0.0]])
expected_coeff = Nx.tensor([0.0])
expected_intercept = Nx.tensor([0.89047176])

%LinearRegression{coefficients: actual_coeff, intercept: actual_intercept} =
Expand All @@ -17,7 +17,7 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_intercept, actual_intercept)
end

test "matches sklearn for shapes {4, 6}, {4} and type {:f, 32}" do
test "test for shapes {4, 6}, {4} and type {:f, 32}" do
a =
Nx.tensor([
[
Expand Down Expand Up @@ -76,7 +76,7 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_intercept, actual_intercept, rtol: 1.0e-2, atol: 1.0e-3)
end

test "matches sklearn for shapes {6, 6}, {6, 1} and type {:f, 64}" do
test "test for shapes {6, 6}, {6, 1} and type {:f, 64}" do
a =
Nx.tensor([
[
Expand Down Expand Up @@ -141,14 +141,12 @@ defmodule Scholar.Linear.LinearRegressionTest do

expected_coeff =
Nx.tensor([
[
-0.3777002030151436,
-0.4445957357428203,
-0.14451413829286042,
0.31438593891571714,
-0.9484560114249797,
0.04914973264178196
]
-0.3777002030151436,
-0.4445957357428203,
-0.14451413829286042,
0.31438593891571714,
-0.9484560114249797,
0.04914973264178196
])

expected_intercept = Nx.tensor([1.31901913])
Expand All @@ -160,7 +158,7 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_intercept, actual_intercept, rtol: 1.0e-2, atol: 1.0e-3)
end

test "matches sklearn for shapes {8, 6}, {8, 4} and type {:f, 32}" do
test "test for shapes {8, 6}, {8, 4} and type {:f, 32}" do
a =
Nx.tensor([
[
Expand Down Expand Up @@ -286,11 +284,11 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_intercept, actual_intercept, rtol: 1.0e-1, atol: 1.0e-2)
end

test "matches sklearn for shapes {1, 1}, {1, 1} and type {:f, 32} and sample_weights" do
test "test for shapes {1, 1}, {1, 1} and type {:f, 32} and sample_weights" do
a = Nx.tensor([[0.3166404366493225]])
b = Nx.tensor([[0.6253954172134399]])
sample_weights = [0.2065236121416092]
expected_coeff = Nx.tensor([[0.0]])
expected_coeff = Nx.tensor([0.0])
expected_intercept = Nx.tensor([0.62539542])

%LinearRegression{coefficients: actual_coeff, intercept: actual_intercept} =
Expand Down Expand Up @@ -364,7 +362,7 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_intercept, actual_intercept, rtol: 1.0e-3, atol: 1.0e-2)
end

test "matches sklearn for shapes {6, 6}, {6, 1} and type {:f, 64} and sample_weight" do
test "test for shapes {6, 6}, {6, 1} and type {:f, 64} and sample_weight" do
a =
Nx.tensor([
[
Expand Down Expand Up @@ -437,7 +435,7 @@ defmodule Scholar.Linear.LinearRegressionTest do
]

expected_coeff =
Nx.tensor([[-1.252728, 0.33221864, -0.23523702, -0.53585187, 0.00157968, -0.24489391]])
Nx.tensor([-1.252728, 0.33221864, -0.23523702, -0.53585187, 0.00157968, -0.24489391])

expected_intercept = Nx.tensor([1.52024138])

Expand All @@ -448,7 +446,7 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_intercept, actual_intercept, rtol: 1.0e-2, atol: 1.0e-3)
end

test "matches sklearn for shapes {8, 6}, {8, 4} and type {:f, 32} and sample_weight" do
test "test for shapes {8, 6}, {8, 4} and type {:f, 32} and sample_weight" do
a =
Nx.tensor([
[
Expand Down Expand Up @@ -895,4 +893,18 @@ defmodule Scholar.Linear.LinearRegressionTest do
assert_all_close(expected_prediction, actual_prediction, rtol: 1.0e-3, atol: 1.0e-3)
end
end

describe "fit and predict with colum target" do
test "test column target" do
x = Nx.tensor([[1], [2], [6], [8], [10]])
y = Nx.tensor([1, 2, 6, 8, 10])

lr = LinearRegression.fit(x, y)
lr_column = LinearRegression.fit(x, y |> Nx.new_axis(-1))
pred = LinearRegression.predict(lr, x)
pred_col = LinearRegression.predict(lr_column, x)
assert lr == lr_column
assert pred == pred_col
end
end
end
16 changes: 15 additions & 1 deletion test/scholar/linear/logistic_regression_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,22 @@ defmodule Scholar.Linear.LogisticRegressionTest do
y = Nx.tensor([[0, 1], [1, 0]])

assert_raise ArgumentError,
"expected y to have shape {n_samples}, got tensor with shape: {2, 2}",
"Elixir.#{inspect(LogisticRegression)} expected y to have shape {n_samples}, got tensor with shape: {2, 2}",
fn -> LogisticRegression.fit(x, y, num_classes: 2) end
end
end

describe "column target tests" do
@tag :wip
test "column target" do
{x_train, _, y_train, _} = iris_data()

model = LogisticRegression.fit(x_train, y_train, num_classes: 3)
pred = LogisticRegression.predict(model, x_train)
col_model = LogisticRegression.fit(x_train, y_train |> Nx.new_axis(-1), num_classes: 3)
col_pred = LogisticRegression.predict(col_model, x_train)
assert model == col_model
assert pred == col_pred
end
end
end
Loading
Loading