Skip to content

Commit

Permalink
Revert "GH-15855: core pipeline API (#16039)"
Browse files Browse the repository at this point in the history
This reverts commit c15ea1e.
  • Loading branch information
valenad1 committed Mar 8, 2024
1 parent 2f7bd43 commit 224c5df
Show file tree
Hide file tree
Showing 116 changed files with 425 additions and 4,368 deletions.
37 changes: 12 additions & 25 deletions h2o-admissibleml/src/main/java/hex/Infogram/Infogram.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package hex.Infogram;

import hex.*;
import hex.Infogram.InfogramModel.InfogramModelOutput;
import hex.Infogram.InfogramModel.InfogramParameters;
import hex.ModelMetrics.MetricBuilder;
import water.*;
import water.exceptions.H2OModelBuilderIllegalArgumentException;
import water.fvec.Frame;
Expand All @@ -21,8 +18,8 @@
import static water.util.ArrayUtils.sort;
import static water.util.ArrayUtils.sum;

public class Infogram extends ModelBuilder<hex.Infogram.InfogramModel, InfogramParameters,
InfogramModelOutput> {
public class Infogram extends ModelBuilder<hex.Infogram.InfogramModel, hex.Infogram.InfogramModel.InfogramParameters,
hex.Infogram.InfogramModel.InfogramModelOutput> {
static final double NORMALIZE_ADMISSIBLE_INDEX = 1.0/Math.sqrt(2.0);
boolean _buildCore; // true to find core predictors, false to find admissible predictors
String[] _topKPredictors; // contain the names of top predictors to consider for infogram
Expand All @@ -48,14 +45,14 @@ public class Infogram extends ModelBuilder<hex.Infogram.InfogramModel, InfogramP
Model.Parameters.FoldAssignmentScheme _foldAssignmentOrig = null;
String _foldColumnOrig = null;

public Infogram(boolean startup_once) { super(new InfogramParameters(), startup_once);}
public Infogram(boolean startup_once) { super(new hex.Infogram.InfogramModel.InfogramParameters(), startup_once);}

public Infogram(InfogramParameters parms) {
public Infogram(hex.Infogram.InfogramModel.InfogramParameters parms) {
super(parms);
init(false);
}

public Infogram(InfogramParameters parms, Key<hex.Infogram.InfogramModel> key) {
public Infogram(hex.Infogram.InfogramModel.InfogramParameters parms, Key<hex.Infogram.InfogramModel> key) {
super(parms, key);
init(false);
}
Expand All @@ -74,23 +71,18 @@ protected int nModelsInParallel(int folds) {
* This is called before cross-validation is carried out
*/
@Override
protected void cv_init() {
super.cv_init();
public void computeCrossValidation() {
info("cross-validation", "cross-validation infogram information is stored in frame with key" +
" labeled as admissible_score_key_cv and the admissible features in admissible_features_cv.");
if (error_count() > 0) {
throw H2OModelBuilderIllegalArgumentException.makeFromBuilder(Infogram.this);
}
}

@Override
protected MetricBuilder makeCVMetricBuilder(ModelBuilder<InfogramModel, InfogramParameters, InfogramModelOutput> cvModelBuilder, Futures fs) {
return null; //infogram does not support scoring
super.computeCrossValidation();
}

// find the best alpha/lambda values used to build the main model moving forward by looking at the devianceValid
@Override
protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
public void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
int nBuilders = cvModelBuilders.length;
double[][] cmiRaw = new double[nBuilders][];
List<List<String>> columns = new ArrayList<>();
Expand All @@ -111,12 +103,7 @@ protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders)
}
_cvDone = true; // cv is done and we are going to build main model next
}

@Override
protected void cv_mainModelScores(int N, MetricBuilder[] mbs, ModelBuilder<InfogramModel, InfogramParameters, InfogramModelOutput>[] cvModelBuilders) {
//infogram does not support scoring
}


public void calculateMeanInfogramInfo(double[][] cmiRaw, List<List<String>> columns,
long[] nObs) {
int nFolds = cmiRaw.length;
Expand Down Expand Up @@ -317,7 +304,7 @@ public final void buildModel() {
try {
boolean validPresent = _parms.valid() != null;
prepareModelTrainingFrame(); // generate training frame with predictors and sensitive features (if specified)
InfogramModel model = new hex.Infogram.InfogramModel(dest(), _parms, new InfogramModelOutput(Infogram.this));
InfogramModel model = new hex.Infogram.InfogramModel(dest(), _parms, new hex.Infogram.InfogramModel.InfogramModelOutput(Infogram.this));
_model = model.delete_and_lock(_job);
_model._output._start_time = System.currentTimeMillis();
_cmiRaw = new double[_numModels];
Expand Down Expand Up @@ -372,7 +359,7 @@ public final void buildModel() {
* relevance >= relevance_threshold. Derive _admissible_index as distance from point with cmi = 1 and
* relevance = 1. In addition, all arrays are sorted on _admissible_index.
*/
private void copyCMIRelevance(InfogramModelOutput modelOutput) {
private void copyCMIRelevance(InfogramModel.InfogramModelOutput modelOutput) {
modelOutput._cmi_raw = new double[_cmi.length];
System.arraycopy(_cmiRaw, 0, modelOutput._cmi_raw, 0, modelOutput._cmi_raw.length);
modelOutput._admissible_index = new double[_cmi.length];
Expand All @@ -388,7 +375,7 @@ private void copyCMIRelevance(InfogramModelOutput modelOutput) {
modelOutput._admissible_index, modelOutput._admissible, modelOutput._all_predictor_names);
}

public void copyCMIRelevanceValid(InfogramModelOutput modelOutput) {
public void copyCMIRelevanceValid(InfogramModel.InfogramModelOutput modelOutput) {
modelOutput._cmi_raw_valid = new double[_cmiValid.length];
System.arraycopy(_cmiRawValid, 0, modelOutput._cmi_raw_valid, 0, modelOutput._cmi_raw_valid.length);
modelOutput._admissible_index_valid = new double[_cmiValid.length];
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/deeplearning/DeepLearning.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ static DataInfo makeDataInfo(Frame train, Frame valid, DeepLearningParameters pa
}
}

@Override protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
@Override public void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
_parms._overwrite_with_best_model = false;

if( _parms._stopping_rounds == 0 && _parms._max_runtime_secs == 0) return; // No exciting changes to stopping conditions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ public ModelMetrics makeModelMetrics(Frame fr, Frame adaptFrm) {
@Override
public ModelMetrics.MetricBuilder<?> getMetricBuilder() {
throw new UnsupportedOperationException("Stacked Ensemble model doesn't implement MetricBuilder infrastructure code, " +
"retrieve your metrics by calling makeModelMetrics method.");
"retrieve your metrics by calling getOrMakeMetrics method.");
}
}

Expand Down
69 changes: 54 additions & 15 deletions h2o-algos/src/main/java/hex/glm/GLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import hex.util.LinearAlgebraUtils;
import hex.util.LinearAlgebraUtils.BMulTask;
import hex.util.LinearAlgebraUtils.FindMaxIndex;
import jsr166y.CountedCompleter;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import water.*;
Expand Down Expand Up @@ -118,8 +119,7 @@ public boolean isSupervised() {
public ModelCategory[] can_build() {
return new ModelCategory[]{
ModelCategory.Regression,
ModelCategory.Binomial,
ModelCategory.Multinomial
ModelCategory.Binomial,
};
}

Expand Down Expand Up @@ -148,12 +148,13 @@ public ModelCategory[] can_build() {
* (builds N+1 models, all have train+validation metrics, the main model has N-fold cross-validated validation metrics)
*/
@Override
protected void cv_init() {
public void computeCrossValidation() {
// init computes global list of lambdas
init(true);
_cvRuns = true;
if (error_count() > 0)
throw H2OModelBuilderIllegalArgumentException.makeFromBuilder(GLM.this);
super.computeCrossValidation();
}


Expand Down Expand Up @@ -292,7 +293,7 @@ private double[] alignSubModelsAcrossCVModels(ModelBuilder[] cvModelBuilders) {
* 4. unlock the n-folds models (they are changed here, so the unlocking happens here)
*/
@Override
protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
public void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
setMaxRuntimeSecsForMainModel();
double bestTestDev = Double.POSITIVE_INFINITY;
double[] alphasAndLambdas = alignSubModelsAcrossCVModels(cvModelBuilders);
Expand Down Expand Up @@ -371,6 +372,12 @@ protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders)
break;
}
}
for (int i = 0; i < cvModelBuilders.length; ++i) {
GLM g = (GLM) cvModelBuilders[i];
if (g._toRemove != null)
for (Key k : g._toRemove)
Keyed.remove(k);
}

for (int i = 0; i < cvModelBuilders.length; ++i) {
GLM g = (GLM) cvModelBuilders[i];
Expand Down Expand Up @@ -1536,11 +1543,11 @@ private void buildModel() {

protected static final long WORK_TOTAL = 1000000;

@Override
protected void cleanUp() {
if (_parms._lambda_search && _parms._is_cv_model)
keepUntilCompletion(_dinfo.getWeightsVec()._key);
super.cleanUp();
transient Key [] _toRemove;

private Key[] removeLater(Key ...k){
_toRemove = _toRemove == null?k:ArrayUtils.append(_toRemove,k);
return k;
}

@Override protected GLMDriver trainModelImpl() { return _driver = new GLMDriver(); }
Expand Down Expand Up @@ -1569,6 +1576,23 @@ public final class GLMDriver extends Driver implements ProgressMonitor {
private transient GLMTask.GLMIterationTask _gramInfluence;
private transient double[][] _cholInvInfluence;

private void doCleanup() {
try {
if (_parms._lambda_search && _parms._is_cv_model)
Scope.untrack(removeLater(_dinfo.getWeightsVec()._key));
if (_parms._HGLM) {
Key[] vecKeys = _toRemove;
for (int index = 0; index < vecKeys.length; index++) {
Vec tempVec = DKV.getGet(vecKeys[index]);
tempVec.remove();
}
}
} catch (Exception e) {
Log.err("Error while cleaning up GLM " + _result);
Log.err(e);
}
}

private transient Cholesky _chol;
private transient L1Solver _lslvr;

Expand Down Expand Up @@ -3540,8 +3564,9 @@ private Vec[] genGLMVectors(DataInfo dinfo, double[] nb) {
sumExp += Math.exp(nb[i * N + P] - maxRow);
}
Vec[] vecs = dinfo._adaptedFrame.anyVec().makeDoubles(2, new double[]{sumExp, maxRow});
if (_parms._lambda_search) {
track(vecs[0]); track(vecs[1]);
if (_parms._lambda_search && _parms._is_cv_model) {
Scope.untrack(vecs[0]._key, vecs[1]._key);
removeLater(vecs[0]._key, vecs[1]._key);
}
return vecs;
}
Expand Down Expand Up @@ -3823,7 +3848,7 @@ private void checkCoeffsBounds() {
* - column 2: zi, intermediate values
* - column 3: eta = X*beta, intermediate values
*/
private void addWdataZiEtaOld2Response() { // attach wdata, zi, eta to response for HGLM
public void addWdataZiEtaOld2Response() { // attach wdata, zi, eta to response for HGLM
int moreColnum = 3 + _parms._random_columns.length;
Vec[] vecs = _dinfo._adaptedFrame.anyVec().makeZeros(moreColnum);
String[] colNames = new String[moreColnum];
Expand All @@ -3836,11 +3861,25 @@ private void addWdataZiEtaOld2Response() { // attach wdata, zi, eta to response
vecs[index] = _parms.train().vec(randColIndices[index - 3]).makeCopy();
}
_dinfo.addResponse(colNames, vecs);
Frame wdataZiEta = new Frame(Key.make("wdataZiEta"+Key.rand()), colNames, vecs);
DKV.put(wdataZiEta);
track(wdataZiEta);
for (int index = 0; index < moreColnum; index++) {
Scope.untrack(vecs[index]._key);
removeLater(vecs[index]._key);
}
}

@Override
public void onCompletion(CountedCompleter caller) {
doCleanup();
super.onCompletion(caller);
}

@Override
public boolean onExceptionalCompletion(Throwable t, CountedCompleter caller) {
doCleanup();
return super.onExceptionalCompletion(t, caller);
}


@Override
public boolean progress(double[] beta, GradientInfo ginfo) {
_state._iter++;
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/kmeans/KMeans.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ else if( user_points.numRows() != _parms._k)
if (expensive && error_count() == 0) checkMemoryFootPrint();
}

protected void cv_makeAggregateModelMetrics(ModelMetrics.MetricBuilder[] mbs){
public void cv_makeAggregateModelMetrics(ModelMetrics.MetricBuilder[] mbs){
super.cv_makeAggregateModelMetrics(mbs);
((ModelMetricsClustering.MetricBuilderClustering) mbs[0])._within_sumsqe = null;
((ModelMetricsClustering.MetricBuilderClustering) mbs[0])._size = null;
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/tree/SharedTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -1203,7 +1203,7 @@ public double initialValue() {
return _parms._parallel_main_model_building;
}

@Override protected void cv_computeAndSetOptimalParameters(ModelBuilder<M, P, O>[] cvModelBuilders) {
@Override public void cv_computeAndSetOptimalParameters(ModelBuilder<M, P, O>[] cvModelBuilders) {
// Extract stopping conditions from each CV model, and compute the best stopping answer
if (!cv_initStoppingParameters())
return; // No exciting changes to stopping conditions
Expand Down
5 changes: 2 additions & 3 deletions h2o-bindings/bin/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ def get_customizations_for(language, algo, property=None, default=None):
tokens = property.split('.')
value = customizations
for token in tokens:
if token in value:
value = value.get(token)
else:
value = value.get(token)
if value is None:
return default
return value
else:
Expand Down
28 changes: 0 additions & 28 deletions h2o-bindings/bin/custom/R/gen_pipeline.py

This file was deleted.

56 changes: 0 additions & 56 deletions h2o-bindings/bin/custom/python/gen_pipeline.py

This file was deleted.

4 changes: 2 additions & 2 deletions h2o-bindings/bin/gen_R.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def get_schema_params(pname):
"verbose",
"destination_key"] # destination_key is only for SVD
bulk_params = list(zip(*filter(lambda t: not t[0] in bulk_pnames_skip, zip(sig_pnames, sig_params))))
bulk_pnames = list(bulk_params[0]) if bulk_params else []
sig_bulk_params = list(bulk_params[1]) if bulk_params else []
bulk_pnames = list(bulk_params[0])
sig_bulk_params = list(bulk_params[1])
sig_bulk_params.append("segment_columns = NULL")
sig_bulk_params.append("segment_models_id = NULL")
sig_bulk_params.append("parallelism = 1")
Expand Down
Loading

0 comments on commit 224c5df

Please sign in to comment.