Skip to content

Commit

Permalink
Get rid of TableRegressionModel by storing formula directly in model (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
nalimilan authored Nov 20, 2022
1 parent 9dc4d6b commit b728917
Show file tree
Hide file tree
Showing 12 changed files with 620 additions and 175 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CSV = "0.7, 0.8"
Expand All @@ -27,6 +28,7 @@ StatsAPI = "1.4"
StatsBase = "0.33.5"
StatsFuns = "0.6, 0.7, 0.8, 0.9, 1.0"
StatsModels = "0.6.23"
Tables = "1"
julia = "1"

[extras]
Expand Down
4 changes: 2 additions & 2 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The most general approach to fitting a model is with the `fit` function, as in
julia> using Random
julia> fit(LinearModel, hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10))
LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}:
LinearModel
Coefficients:
────────────────────────────────────────────────────────────────
Expand All @@ -41,7 +41,7 @@ This model can also be fit as
julia> using Random
julia> lm(hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10))
LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}:
LinearModel
Coefficients:
────────────────────────────────────────────────────────────────
Expand Down
18 changes: 9 additions & 9 deletions docs/src/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7])
3 │ 3 7
julia> ols = lm(@formula(Y ~ X), data)
StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
LinearModel
Y ~ 1 + X
Expand Down Expand Up @@ -99,7 +99,7 @@ julia> data = DataFrame(X=[1,2,2], Y=[1,0,1])
3 │ 2 1
julia> probit = glm(@formula(Y ~ X), data, Binomial(), ProbitLink())
StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
GeneralizedLinearModel
Y ~ 1 + X
Expand Down Expand Up @@ -140,7 +140,7 @@ julia> quine = dataset("MASS", "quine")
131 rows omitted
julia> nbrmodel = glm(@formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink())
StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
GeneralizedLinearModel
Days ~ 1 + Eth + Sex + Age + Lrn
Expand All @@ -158,7 +158,7 @@ Lrn: SL 0.296768 0.185934 1.60 0.1105 -0.0676559 0.661191
────────────────────────────────────────────────────────────────────────────
julia> nbrmodel = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink())
StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
GeneralizedLinearModel
Days ~ 1 + Eth + Sex + Age + Lrn
Expand All @@ -175,7 +175,7 @@ Age: F3 0.356955 0.247228 1.44 0.1488 -0.127602 0.841513
Lrn: SL 0.292138 0.18565 1.57 0.1156 -0.0717297 0.656006
────────────────────────────────────────────────────────────────────────────
julia> println("Estimated theta = ", round(nbrmodel.model.rr.d.r, digits=5))
julia> println("Estimated theta = ", round(nbrmodel.rr.d.r, digits=5))
Estimated theta = 1.27489
```
Expand Down Expand Up @@ -207,7 +207,7 @@ julia> form = dataset("datasets", "Formaldehyde")
6 │ 0.9 0.782
julia> lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form)
StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
LinearModel
OptDen ~ 1 + Carb
Expand Down Expand Up @@ -256,7 +256,7 @@ julia> LifeCycleSavings = dataset("datasets", "LifeCycleSavings")
35 rows omitted
julia> fm2 = fit(LinearModel, @formula(SR ~ Pop15 + Pop75 + DPI + DDPI), LifeCycleSavings)
StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
LinearModel
SR ~ 1 + Pop15 + Pop75 + DPI + DDPI
Expand Down Expand Up @@ -364,7 +364,7 @@ julia> dobson = DataFrame(Counts = [18.,17,15,20,10,21,25,13,13],
9 │ 13.0 3 3
julia> gm1 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson())
StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Poisson{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
GeneralizedLinearModel
Counts ~ 1 + Outcome + Treatment
Expand Down Expand Up @@ -421,7 +421,7 @@ julia> round(optimal_bic.minimizer, digits = 5) # Optimal λ
0.40935
julia> glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(optimal_bic.minimizer)) # Best model
StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, PowerLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
GeneralizedLinearModel
Volume ~ 1 + Height + Girth
Expand Down
4 changes: 2 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ julia> data = DataFrame(y = rand(rng, 100), x = categorical(repeat([1, 2, 3, 4],
julia> lm(@formula(y ~ x), data)
StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
LinearModel
y ~ 1 + x
Expand All @@ -108,7 +108,7 @@ julia> using StableRNGs
julia> data = DataFrame(y = rand(StableRNG(1), 100), x = repeat([1, 2, 3, 4], 25));
julia> lm(@formula(y ~ x), data, contrasts = Dict(:x => DummyCoding()))
StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}
LinearModel
y ~ 1 + x
Expand Down
29 changes: 27 additions & 2 deletions src/GLM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ module GLM
import Base: (\), convert, show, size
import LinearAlgebra: cholesky, cholesky!
import Statistics: cor
import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
import StatsBase: coef, coeftable, coefnames, confint, deviance, nulldeviance, dof, dof_residual,
loglikelihood, nullloglikelihood, nobs, stderror, vcov,
residuals, predict, predict!,
fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue
import StatsFuns: xlogy
import SpecialFunctions: erfc, erfcinv, digamma, trigamma
import StatsModels: hasintercept
import Tables
export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
Expand Down Expand Up @@ -89,6 +91,29 @@ module GLM
pivoted_cholesky!(A; kwargs...) = cholesky!(A, RowMaximum(); kwargs...)
end

const COMMON_FIT_KWARGS_DOCS = """
- `dropcollinear::Bool=true`: Controls whether or not a model matrix
less-than-full rank is accepted.
If `true` (the default) the coefficient for redundant linearly dependent columns is
`0.0` and all associated statistics are set to `NaN`.
Typically from a set of linearly-dependent columns the last ones are identified as redundant
(however, the exact selection of columns identified as redundant is not guaranteed).
- `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
Such weights are equivalent to repeating each observation a number of times equal
to its weight. Do note that this interpretation gives equal point estimates but
different standard errors from analytical (a.k.a. inverse variance) weights and
from probability (a.k.a. sampling) weights which are the default in some other
software.
Can be length 0 to indicate no weighting (default).
- `contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()`: a `Dict` mapping term names
(as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts
(e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`,
etc.). If contrasts are not provided for a variable, the appropriate
term type will be guessed based on the data type from the data column:
any numeric data is assumed to be continuous, and any non-numeric data
is assumed to be categorical (with `DummyCoding()` as the default contrast type).
"""

include("linpred.jl")
include("lm.jl")
include("glmtools.jl")
Expand Down
17 changes: 17 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,20 @@

@deprecate confint(obj::LinearModel, level::Real) confint(obj, level=level)
@deprecate confint(obj::AbstractGLM, level::Real) confint(obj, level=level)

function Base.getproperty(mm::LinPredModel, f::Symbol)
if f === :model
Base.depwarn("accessing the `model` field of GLM.jl models is deprecated, " *
"as they are no longer wrapped in a `TableRegressionModel` " *
"and can be used directly now", :getproperty)
return mm
elseif f === :mf
Base.depwarn("accessing the `mf` field of GLM.jl models is deprecated, " *
"as they are no longer wrapped in a `TableRegressionModel`." *
"Use `formula(m)` to access the model formula.", :getproperty)
form = formula(mm)
return ModelFrame{Nothing, typeof(mm)}(form, nothing, nothing, typeof(mm))
else
return getfield(mm, f)
end
end
5 changes: 2 additions & 3 deletions src/ftest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ julia> model = lm(@formula(Result ~ 1 + Treatment), dat);
julia> bigmodel = lm(@formula(Result ~ 1 + Treatment + Other), dat);
julia> ftest(nullmodel.model, model.model)
julia> ftest(nullmodel, model)
F-test: 2 models fitted on 12 observations
─────────────────────────────────────────────────────────────────
DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
Expand All @@ -118,7 +117,7 @@ F-test: 2 models fitted on 12 observations
[2] 3 1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-07
─────────────────────────────────────────────────────────────────
julia> ftest(nullmodel.model, model.model, bigmodel.model)
julia> ftest(nullmodel, model, bigmodel)
F-test: 3 models fitted on 12 observations
─────────────────────────────────────────────────────────────────
DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
Expand Down
Loading

0 comments on commit b728917

Please sign in to comment.