Skip to content

Commit

Permalink
fixed bugs in R clients utility functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
wendycwong committed Oct 16, 2024
1 parent 6450082 commit 2b09a4e
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 75 deletions.
56 changes: 28 additions & 28 deletions h2o-algos/src/main/java/hex/hglm/HGLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ void fitEM(HGLMModel model, Job job, ScoringHistory scTrain, ScoringHistory scVa
HGLMTask.ResidualLLHTask rLlh = new HGLMTask.ResidualLLHTask(_job, _parms, _dinfo, ubeta, _state.get_beta(),
engineTask); // use equation 17 of the doc
rLlh.doAll(_dinfo._adaptedFrame);
// equation 17
tauEVar2 = calTauEvarEq17(rLlh._residualSquare, _state.get_tauEVar2(), cjInv, engineTask._ArjTArj, engineTask._oneOverN);

HGLMTask.ResidualLLHTask rLlh2 = new HGLMTask.ResidualLLHTask(_job, _parms, _dinfo, ubeta, beta, engineTask);
Expand All @@ -317,8 +318,14 @@ void fitEM(HGLMModel model, Job job, ScoringHistory scTrain, ScoringHistory scVa
// check to make sure determinant of V is positive, see section II.V of the doc
if (!checkPositiveG(engineTask._numLevel2Units, tMat))
Log.info("HGLM model building is stopped due to matrix G in section II.V of the doc is no longer PSD");
double logLikelihood = calHGLMllg(_state._nobs, tMat, tauEVar, model._output._arjtarj, rLlh2._sse_fixed,
rLlh2._yMinusXTimesZ); // equation 10
double logLikelihood2 = calHGLMllg(_state._nobs, tMat, tauEVar2, model._output._arjtarj, rLlh._sse_fixed,
rLlh2._yMinusXTimesZ); // equation 17
Log.info("likelihood use tauEVar from equation 10 " + logLikelihood + ", likelihood use tauEVar" +
" from equation 17 "+logLikelihood2);
// check if stopping conditions are satisfied
if (!progress(beta, ubeta, tMat, tauEVar2, scTrain, scValid, model, rLlh2))
if (!progress(beta, ubeta, tMat, tauEVar2, scTrain, scValid, model, rLlh))
return;
}
}
Expand All @@ -332,43 +339,36 @@ public boolean progress(double[] beta, double[][] ubeta, double[][] tmat, double
_state._iter++;
double[] betaDiff = new double[beta.length];
minus(betaDiff, beta, _state.get_beta());
double maxBetaDiff = maxMag(betaDiff)/maxMag(beta);
double maxBetaDiff = maxMag(betaDiff) / maxMag(beta);
double[][] tmatDiff = new double[tmat.length][tmat[0].length];
minus(tmatDiff, tmat, _state.get_T());
double maxTmatDiff = maxMag(tmatDiff)/maxMag(tmat);
double maxTmatDiff = maxMag(tmatDiff) / maxMag(tmat);
double[][] ubetaDiff = new double[ubeta.length][ubeta[0].length];
minus(ubetaDiff, ubeta, _state.get_ubeta());
double maxUBetaDiff = maxMag(ubetaDiff)/maxMag(ubeta);
double tauEVarDiff = Math.abs(tauEVar - _state.get_tauEVar())/tauEVar;
double maxUBetaDiff = maxMag(ubetaDiff) / maxMag(ubeta);
double tauEVarDiff = Math.abs(tauEVar - _state.get_tauEVar()) / tauEVar;
// calculate log likelihood with current parameter settings, standardize if parms._standardize and vice versa
double logLikelihood = calHGLMllg(_state._nobs, tmat, tauEVar, model._output._arjtarj, rLlh2._sse_fixed,
double logLikelihood = calHGLMllg(_state._nobs, tmat, tauEVar, model._output._arjtarj, rLlh2._sse_fixed,
rLlh2._yMinusXTimesZ);
boolean converged = ((maxBetaDiff < _parms._em_epsilon) && (maxTmatDiff < _parms._em_epsilon) && (maxUBetaDiff
< _parms._em_epsilon) && (tauEVarDiff < _parms._em_epsilon) && (logLikelihood < model._output._log_likelihood));
boolean converged = ((maxBetaDiff <= _parms._em_epsilon) && (maxTmatDiff <= _parms._em_epsilon) && (maxUBetaDiff
<= _parms._em_epsilon) && (tauEVarDiff <= _parms._em_epsilon));
ComputationStateHGLM.ComputationStateSimple simpleState = new ComputationStateHGLM.ComputationStateSimple(_state.get_beta(), _state.get_ubeta(),
_state.get_T(), _state.get_tauEVar());
if (!converged) { // update values in _state
try {
_state.set_beta(beta);
_state.set_ubeta(ubeta);
_state.set_T(tmat);
_state.set_tauEVar(tauEVar);
model._output._log_likelihood = logLikelihood;
if (_parms._score_each_iteration || _parms._score_iteration_interval / _state._iter == 0) {
model._output.setModelOutputFields(_state);
scoreAndUpdateModel(model, true, scTrain); // perform scoring and updating scoring history
if (_parms.valid() != null)
scoreAndUpdateModel(model, false, scValid);
} else {
scTrain.addIterationScore(_state._iter, model._output._log_likelihood, tauEVar);
}
} catch(Exception ex) {
_state.set_beta(simpleState._beta);
_state.set_ubeta(simpleState._ubeta);
_state.set_T(simpleState._tmat);
_state.set_tauEVar(tauEVar);
return false; // stop execution when calculation of loglikelihood is bad due to matrix inverse failure
_state.set_beta(beta);
_state.set_ubeta(ubeta);
_state.set_T(tmat);
_state.set_tauEVar(tauEVar);
model._output._log_likelihood = logLikelihood;
if (_parms._score_each_iteration || _parms._score_iteration_interval / _state._iter == 0) {
model._output.setModelOutputFields(_state);
scoreAndUpdateModel(model, true, scTrain); // perform scoring and updating scoring history
if (_parms.valid() != null)
scoreAndUpdateModel(model, false, scValid);
} else {
scTrain.addIterationScore(_state._iter, model._output._log_likelihood, tauEVar);
}

}
return !stop_requested() && !converged && (_state._iter < _parms._max_iterations);
}
Expand Down
2 changes: 1 addition & 1 deletion h2o-bindings/bin/custom/R/gen_hglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
} else {
stop("random_columns is required.")
}
if (!missing(group_column) {
if (!missing(group_column)) {
parms$group_column <- group_column
} else {
stop("group_column is required.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ def generate_dataset(family, trainData, group_column, random_columns):
startVal = [-1.4313612, 0.6795744, 1.9795154, -3.1187255, 0.2058840, -1.6596187, 0.3460812, -0.7809777,
1.6617960, -0.5174034, 1.8273497, -2.4161541, 0.9474324, 2.3616221, 0.7710148, 0.2706556, 1.0541668]
# hglm_test/gaussian_0GC_allRC_2enum2numeric_p05oise_p08T_wIntercept_standardize
# hglm_test/gaussian_0GC_allRC_2enum2numeric_2p0noise_p5T_wIntercept
m = hglm(family=family, max_iterations=0, random_columns=random_columns, group_column=group_column,
tau_u_var_init = 0.08, tau_e_var_init = 0.05, random_intercept = True, gen_syn_data=True,
seed = 12345, standardize = True, initial_fixed_effects=startVal)
tau_u_var_init = 0.5, tau_e_var_init = 2, random_intercept = True, gen_syn_data=True,
seed = 12345, standardize = False, initial_fixed_effects=startVal)
m.train(training_frame=trainData, y = "response", x =myX)
f2 = m.predict(trainData)
finalDataset = trainData[names_without_response]
Expand Down
4 changes: 2 additions & 2 deletions h2o-r/h2o-package/R/hglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ h2o.hglm <- function(x,
} else {
stop("random_columns is required.")
}
if (!missing(group_column) {
if (!missing(group_column)) {
parms$group_column <- group_column
} else {
stop("group_column is required.")
Expand Down Expand Up @@ -256,7 +256,7 @@ h2o.hglm <- function(x,
} else {
stop("random_columns is required.")
}
if (!missing(group_column) {
if (!missing(group_column)) {
parms$group_column <- group_column
} else {
stop("group_column is required.")
Expand Down
11 changes: 11 additions & 0 deletions h2o-r/h2o-package/pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ reference:
- h2o.coef
- h2o.coef_names
- h2o.coef_norm
- h2o.coef_random
- h2o.coefs_random_names
- h2o.coefs_random_names_norm
- h2o.coef_random_norm
- h2o.coef_with_p_values
- h2o.colnames
- h2o.columns_by_type
Expand Down Expand Up @@ -172,6 +176,7 @@ reference:
- h2o.hist
- h2o.hit_ratio_table
- h2o.hour
- h2o.icc
- h2o.ice_plot
- h2o.ifelse
- h2o.import_hive_table
Expand Down Expand Up @@ -199,6 +204,7 @@ reference:
- h2o.kolmogorov_smirnov
- h2o.kurtosis
- h2o.learning_curve_plot
- h2o.level_2_names
- h2o.levels
- h2o.list_all_extensions
- h2o.list_api_extensions
Expand All @@ -222,10 +228,12 @@ reference:
- h2o.make_metrics
- h2o.makeGLMModel
- h2o.match
- h2o.matrix_T
- h2o.max
- h2o.modelSelection
- h2o.mean_per_class_error
- h2o.mean_residual_deviance
- h2o.mean_residual_fixed
- h2o.mean
- h2o.median
- h2o.melt
Expand Down Expand Up @@ -288,6 +296,7 @@ reference:
- h2o.residual_deviance
- h2o.residual_dof
- h2o.residual_analysis_plot
- h2o.residual_variance
- h2o.result
- h2o.rm
- h2o.rmse
Expand All @@ -304,6 +313,8 @@ reference:
- h2o.save_frame
- h2o.scale
- h2o.scoreHistory
- h2o.scoring_history
- h2o.scoring_history_valid
- h2o.scoreHistoryGAM
- h2o.sd
- h2o.sdev
Expand Down

This file was deleted.

20 changes: 20 additions & 0 deletions h2o-r/tests/testdir_algos/hglm/runit_GH_8487_coefs_check.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../../scripts/h2o-r-test-setup.R")
##
# Build HGLM Model and check against models from model outputs.
##

test.HGLMData1 <- function() {
tol = 1e-4
h2odata <- h2o.importFile(locate("smalldata/hglm_test/gaussian_0GC_allRC_2enum2numeric_2p0noise_p5T_wIntercept.gz"))
browser()
yresp <- "response"
random_columns <- c("C2", "C3", "C10", "C20")
group_column <- "C1"


}

doTest("Check HGLM model building and coefficient retrievea.", test.HGLMData1)


0 comments on commit 2b09a4e

Please sign in to comment.