Skip to content

Commit

Permalink
continue to fix metrics passing to clients
Browse files Browse the repository at this point in the history
  • Loading branch information
wendycwong committed Oct 3, 2024
1 parent d6a969c commit 952baff
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 25 deletions.
8 changes: 7 additions & 1 deletion h2o-algos/src/main/java/hex/hglm/HGLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ else if (!trainFrame.vec(_parms._group_column).isCategorical())
if (!goodRandomColumns)
error("random_columns", " can only contain columns in the training frame.");
}

if (_parms._gen_syn_data) {
_parms._max_iterations = 0;
if (_parms._tau_e_var_init <= 0)
error("tau_e_var_init", "If gen_syn_data is true, tau_e_var_init must be > 0.");
}
}
}

Expand Down Expand Up @@ -232,7 +238,7 @@ public void computeImpl() {
}

private TwoDimTable generateSummary(HGLMModel.HGLMModelOutput modelOutput) {
String[] names = new String[]{"iteration", "loglikelihood", "loglikelihood_valid"};
String[] names = new String[]{"iteration", "loglikelihood", "loglikelihood2"};
String[] types = new String[]{"int", "double", "double"};
String[] formats = new String[]{"%d", "%.5f", "%.5f"};
TwoDimTable summary = new TwoDimTable("HGLM Model", "summary", new String[]{""}, names, types, formats, "");
Expand Down
21 changes: 21 additions & 0 deletions h2o-algos/src/main/java/hex/hglm/HGLMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import water.udf.CFuncRef;

import java.io.Serializable;
import java.util.Arrays;

import static hex.glm.GLMModel.GLMParameters.Family.gaussian;
import static hex.hglm.HGLMModel.HGLMParameters.Method.EM;
Expand Down Expand Up @@ -102,6 +103,7 @@ public static class HGLMParameters extends Model.Parameters {
public boolean _showFixedMatVecs = false; // internal parameter, if true, will show AfjTY, ArjTY, ArjTArj, AfjTAfj, AfjTArj
public int _score_iteration_interval = 5;
public boolean _score_each_iteration = false;
public boolean _gen_syn_data = false;

@Override
public String algoName() {
Expand Down Expand Up @@ -317,4 +319,23 @@ protected AutoBuffer writeAll_impl(AutoBuffer ab) {
protected Keyed readAll_impl(AutoBuffer ab, Futures fs) {
return super.readAll_impl(ab, fs);
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(super.toString());
sb.append(" loglikelihood: "+this._output._log_likelihood);
sb.append(" fixed effect coefficients: "+ Arrays.toString(this._output._beta));
int numLevel2 = this._output._ubeta.length;
for (int index=0; index<numLevel2; index++)
sb.append(" standard error of random effects for level 2 index " + index + ": "+this._output._tmat[index][index]);
/* sb.append(" standard error of residual error: "+_var_residual);
sb.append(" ICC: "+ Arrays.toString(_icc));
sb.append(" loglikelihood: "+_log_likelihood);
sb.append(" iterations taken to build model: " + _iterations);
sb.append(" coefficients for fixed effect: "+Arrays.toString(_beta));
for (int index=0; index<numLevel2; index++)
sb.append(" coefficients for random effect for level 2 index: "+index+": "+Arrays.toString(_ubeta[index]));*/
return sb.toString();
}
}
6 changes: 3 additions & 3 deletions h2o-algos/src/main/java/hex/hglm/HGLMScore.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ public HGLMScore(final Job j, final HGLMModel model, DataInfo dinfo, final Strin
_dinfo = dinfo;
_computeMetrics = computeMetrics; // can be true only if the response column is available and calcualte loglikelihood
_makePredictions = makePredictions;
_beta = model._output._beta;
_ubeta = model._output._ubeta;
_beta = model._output._beta; // non-standardized/non-normalized coefficients
_ubeta = model._output._ubeta; // non-standardized/non-normalized coefficients
_predDomains = respDomain;
_nclass = model._output.nclasses();
_parms = model._parms;
Expand Down Expand Up @@ -130,7 +130,7 @@ public double[] scoreRow(DataInfo.Row r, double[] preds, double[] xji, double[]
fillInRandomRowValues(r, zji, _parms, _randomCatIndices, _randomNumIndices, _randomCatArrayStartIndices,
_predStartIndexRandom, _dinfo, _randomSlopeToo, _randomIntercept);
preds[0] = innerProduct(xji, _beta) + innerProduct(zji, _ubeta[level2Index]) + r.offset;
preds[0] = _parms._max_iterations == 0 ? preds[0]+randomObj.nextGaussian() : preds[0];
preds[0] = _parms._gen_syn_data ? preds[0]+randomObj.nextGaussian()*_parms._tau_e_var_init : preds[0];
return preds;
}
}
21 changes: 13 additions & 8 deletions h2o-algos/src/main/java/hex/schemas/HGLMModelV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import water.api.schemas3.ModelOutputSchemaV3;
import water.api.schemas3.ModelSchemaV3;
import water.api.schemas3.TwoDimTableV3;
import water.util.Log;
import water.util.TwoDimTable;

import java.util.Arrays;
Expand Down Expand Up @@ -63,8 +64,10 @@ public HGLMModelOutputV3 fillFromImpl(HGLMModel.HGLMModelOutput impl) {
beta_normalized = impl._beta_normalized;
ubeta = impl._ubeta;
ubeta_normalized = impl._ubeta_normalized;
fixed_coefficients_table = new TwoDimTableV3();
fixed_coefficients_table.fillFromImpl(generateCoeffTable("fixed effect oefficients",
"HGLM fixed effect coefficients", beta, beta_normalized, fixed_coefficient_names));
random_coefficients_table = new TwoDimTableV3();
random_coefficients_table.fillFromImpl(generate2DCoeffTable("random effect coefficients",
"HGLM random effect coefficients", ubeta, ubeta_normalized, random_coefficient_names,
random_coefficient_names_normalized, impl._group_column_names));
Expand All @@ -75,8 +78,8 @@ public HGLMModelOutputV3 fillFromImpl(HGLMModel.HGLMModelOutput impl) {
public static TwoDimTable generateCoeffTable(String title1, String title2, double[] coeffs, double[] coeffs_normalized,
String[] coeffNames) {
String[] colnames = new String[] {"coefficient values", "standardized coefficient values"};
String[] colTypes = new String[] {"%.5f", "%.5f"};
String[] colFormats = new String[] {"double", "double"};
String[] colFormats = new String[] {"%.5f", "%.5f"};
String[] colTypes = new String[] {"double", "double"};
TwoDimTable tdt = new TwoDimTable(title1, title2, coeffNames, colnames, colTypes, colFormats, "names");
int tableLen = coeffs.length;
for (int index=0; index<tableLen; index++) {
Expand Down Expand Up @@ -110,14 +113,15 @@ public static TwoDimTable generate2DCoeffTable(String title1, String title2, dou
double[] fCoeffValues = flattenArray(coeffsUsed);
double[] fCoeffValuesNormalized = flattenArray(coeffsNormalizedUsed);
String[] fCoeffNames = extendCoeffNames(coeffNamesused, numLevel2Index);
String[] fLevel2Vals = extendLevel2Ind(level2Domain, coeffsUsed.length);
String[] fLevel2Vals = extendLevel2Ind(level2Domain, coeffsUsed[0].length);

String[] colnames = new String[] {"coefficient names", "coefficient values", "standardized coefficient values"};
String[] colTypes = new String[] {"%s", "%.5f", "%.5f"};
String[] colFormats = new String[] {"string", "double", "double"};
String[] colFormats = new String[] {"%s", "%.5f", "%.5f"};
String[] colTypes = new String[] {"string", "double", "double"};
TwoDimTable tdt = new TwoDimTable(title1, title2, fLevel2Vals, colnames, colTypes, colFormats, "names");
int tableLen = fCoeffValues.length;
int tableLen = fCoeffNames.length;
for (int index=0; index<tableLen; index++) {
Log.info("index "+index);
tdt.set(index, 0, fCoeffNames[index]);
tdt.set(index, 1, fCoeffValues[index]);
tdt.set(index, 2, fCoeffValuesNormalized[index]);
Expand All @@ -128,7 +132,8 @@ public static TwoDimTable generate2DCoeffTable(String title1, String title2, dou
public static String[] extendLevel2Ind(String[] level2Domain, int numCoeff) {
int levelIndNum = level2Domain.length;
String[][] extendedDomain = new String[levelIndNum][numCoeff];
for (int index=0; index<levelIndNum; index++) {
int extendLen = extendedDomain.length;
for (int index=0; index<extendLen; index++) {
Arrays.fill(extendedDomain[index], level2Domain[index]);
}
return flattenArray(extendedDomain);
Expand All @@ -139,7 +144,7 @@ public static String[] extendCoeffNames(String[] coeffNames, int numLevel2Ind) {
String[] extendedCoeffNames = new String[numCoeff*numLevel2Ind];
int indexStart;
for (int index=0; index<numLevel2Ind; index++) {
indexStart = index*numLevel2Ind;
indexStart = index*numCoeff;
System.arraycopy(coeffNames, 0, extendedCoeffNames, indexStart, numCoeff);
}
return extendedCoeffNames;
Expand Down
7 changes: 6 additions & 1 deletion h2o-algos/src/main/java/hex/schemas/HGLMV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public static final class HGLMParametersV3 extends ModelParametersSchemaV3<HGLMM
"method",
"em_epsilon",
"random_intercept",
"group_column"
"group_column",
"gen_syn_data"
};

@API(help = "Perform scoring for every score_iteration_interval iterations.", level = Level.secondary)
Expand Down Expand Up @@ -123,5 +124,9 @@ public static final class HGLMParametersV3 extends ModelParametersSchemaV3<HGLMM

@API(help="group_column is the column that is categorical and used to generate the groups in HGLM")
public String group_column;

@API(help="if true, add gaussian noise with variance specified in parms._tau_e_var_init.",
direction=Direction.INPUT, gridable=true)
public boolean gen_syn_data;
}
}
18 changes: 9 additions & 9 deletions h2o-algos/src/test/java/hex/hglm/HGLMBasicTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ public void testCoeffDeNNormalizationWithRandomIntercept() {
.build();
Scope.track(ubetaFrameStandardize);

Frame fr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame fr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
fr.replace(0, fr.vec(0).toCategoricalVec()).remove();
DKV.put(fr);
Scope.track(fr);
Expand Down Expand Up @@ -811,7 +811,7 @@ public void testCoeffDeNNormalizationWORandomIntercept() {
.build();
Scope.track(ubetaFrameStandardize);

Frame fr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame fr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
fr.replace(0, fr.vec(0).toCategoricalVec()).remove();
DKV.put(fr);
Scope.track(fr);
Expand Down Expand Up @@ -892,7 +892,7 @@ public void testRandomInterceptOnly() {
public void testSemiconductor() {
try {
Scope.enter();
Frame fr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame fr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
fr.replace(0, fr.vec(0).toCategoricalVec()).remove();
DKV.put(fr);
Scope.track(fr);
Expand Down Expand Up @@ -947,15 +947,15 @@ public void testPredictionMetricsSumaryScoringHistoryWRIntercept() {
.build();
Scope.track(ubetaInitFrame);

Frame fr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame validFr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame fr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
Frame validFr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
fr.replace(0, fr.vec(0).toCategoricalVec()).remove();
validFr.replace(0, validFr.vec(0).toCategoricalVec()).remove();
DKV.put(fr);
DKV.put(validFr);
Scope.track(fr);
Scope.track(validFr);
SplitFrame sf = new SplitFrame(validFr, new double[]{0.5, 0.5}, new Key[]{Key.make("train.hex"), Key.make("test.hex")});
SplitFrame sf = new SplitFrame(validFr, new double[]{0.1, 0.9}, new Key[]{Key.make("train.hex"), Key.make("test.hex")});
sf.exec().get();
Key[] ksplits = sf._destination_frames;
Frame tr = DKV.get(ksplits[0]).get();
Expand Down Expand Up @@ -1017,8 +1017,8 @@ public void testPredictionMetricsSumaryScoringHistoryWORIntercept() {
.build();
Scope.track(ubetaInitFrame);

Frame fr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame validFr = parseTestFile("smalldata/glm_test/semiconductor.csv");
Frame fr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
Frame validFr = parseTestFile("smalldata/hglm_test/semiconductor.csv");
fr.replace(0, fr.vec(0).toCategoricalVec()).remove();
validFr.replace(0, validFr.vec(0).toCategoricalVec()).remove();
DKV.put(fr);
Expand Down Expand Up @@ -1110,7 +1110,7 @@ public void fillDataRows(Frame fr, int rowInd, String[] coefNames, String[] rCoe
public void testMultiChunkData(){
/* try {
Scope.enter();
Frame fr = parseTestFile("smalldata/glm_test/HGLM_5KRows_100Z.csv");
Frame fr = parseTestFile("smalldata/hglm_test/HGLM_5KRows_100Z.csv");
fr.replace(0, fr.vec(0).toCategoricalVec()).remove();
fr.replace(1, fr.vec(1).toCategoricalVec()).remove();
fr.replace(2, fr.vec(2).toCategoricalVec()).remove();
Expand Down
19 changes: 19 additions & 0 deletions h2o-py/h2o/estimators/hglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(self,
em_epsilon=1e-06, # type: float
random_intercept=True, # type: bool
group_column=None, # type: Optional[str]
gen_syn_data=False, # type: bool
):
"""
:param model_id: Destination id for this model; auto-generated if not specified.
Expand Down Expand Up @@ -159,6 +160,9 @@ def __init__(self,
:param group_column: group_column is the column that is categorical and used to generate the groups in HGLM
Defaults to ``None``.
:type group_column: str, optional
:param gen_syn_data: if true, add gaussian noise with variance specified in parms._tau_e_var_init.
Defaults to ``False``.
:type gen_syn_data: bool
"""
super(H2OHGLMEstimator, self).__init__()
self._parms = {}
Expand Down Expand Up @@ -191,6 +195,7 @@ def __init__(self,
self.em_epsilon = em_epsilon
self.random_intercept = random_intercept
self.group_column = group_column
self.gen_syn_data = gen_syn_data

@property
def training_frame(self):
Expand Down Expand Up @@ -606,4 +611,18 @@ def group_column(self, group_column):
assert_is_type(group_column, None, str)
self._parms["group_column"] = group_column

@property
def gen_syn_data(self):
"""
if true, add gaussian noise with variance specified in parms._tau_e_var_init.
Type: ``bool``, defaults to ``False``.
"""
return self._parms.get("gen_syn_data")

@gen_syn_data.setter
def gen_syn_data(self, gen_syn_data):
assert_is_type(gen_syn_data, None, bool)
self._parms["gen_syn_data"] = gen_syn_data


Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_define_dataset():
family = 'gaussian' # can be any valid GLM families
nrow = 40000
nenum = 6
nreal = 6 # last one is the response
nreal = 1 # last one is the response
enum_columns = pyunit_utils.random_dataset_enums_only(nrow, nenum, factorL=8, misFrac=0.0)
real_columns = pyunit_utils.random_dataset_real_only(nrow, nreal, realR = 2, misFrac=0.0)
dataset = enum_columns.cbind(real_columns)
Expand All @@ -43,8 +43,13 @@ def generate_dataset(family, trainData, group_column, random_columns):
names_without_response = trainData.names
names_without_response.remove(myY)

startVal = [1.90118665, -1.26168122, 0.42931675, 0.98026587, 0.76808271, -0.63595311, 0.36327481, -0.30597578,
1.54715533, 1.39872770 -1.75658816, 1.31031573, -1.85003284, 1.33139505, 1.17457290, 1.84407102,
-0.07955216, 0.87777599, 0.07614022, 1.96488429, -0.52619981, -1.11206544, -0.55910850, -0.68860274,
0.61111377, 1.48083252, -0.46079518, -1.10481602, -1.32406489, 1.47580376, 0.66306257, -0.40125219,
0.70811714, -0.93184588, 1.52309741, -0.65421192, 0.83816616]
m = hglm(family=family, max_iterations=10, random_columns=random_columns, group_column=group_column,
tau_u_var_init = 0.5, tau_e_var_init = 0.6, random_intercept = True)
tau_u_var_init = 0.08, tau_e_var_init = 0.06, random_intercept = False, gen_syn_data=True, initial_fixed_effects=startVal)
m.train(training_frame=trainData, y = "response", x =myX)
f2 = m.predict(trainData)
finalDataset = trainData[names_without_response]
Expand Down
10 changes: 9 additions & 1 deletion h2o-r/h2o-package/R/hglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
#' @param em_epsilon Converge if beta/ubeta/tmat/tauEVar changes less (using L-infinity norm) than em esilon. ONLY applies to EM
#' method. Defaults to 1e-06.
#' @param random_intercept \code{Logical}. if true, will allow random component to the GLM coefficients. Defaults to TRUE.
#' @param gen_syn_data \code{Logical}. if true, add gaussian noise with variance specified in parms._tau_e_var_init. Defaults to
#' FALSE.
#' @examples
#' \dontrun{
#' library(h2o)
Expand Down Expand Up @@ -101,7 +103,8 @@ h2o.hglm <- function(x,
tau_e_var_init = 0,
method = c("EM"),
em_epsilon = 1e-06,
random_intercept = TRUE)
random_intercept = TRUE,
gen_syn_data = FALSE)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
Expand Down Expand Up @@ -186,6 +189,8 @@ h2o.hglm <- function(x,
parms$random_intercept <- random_intercept
if (!missing(group_column))
parms$group_column <- group_column
if (!missing(gen_syn_data))
parms$gen_syn_data <- gen_syn_data

# Error check and build model
model <- .h2o.modelJob('hglm', parms, h2oRestApiVersion=3, verbose=FALSE)
Expand Down Expand Up @@ -219,6 +224,7 @@ h2o.hglm <- function(x,
method = c("EM"),
em_epsilon = 1e-06,
random_intercept = TRUE,
gen_syn_data = FALSE,
segment_columns = NULL,
segment_models_id = NULL,
parallelism = 1)
Expand Down Expand Up @@ -308,6 +314,8 @@ h2o.hglm <- function(x,
parms$random_intercept <- random_intercept
if (!missing(group_column))
parms$group_column <- group_column
if (!missing(gen_syn_data))
parms$gen_syn_data <- gen_syn_data

# Build segment-models specific parameters
segment_parms <- list()
Expand Down

0 comments on commit 952baff

Please sign in to comment.