Skip to content

Commit

Permalink
Revert GH-15857 AutoML pipeline [nocheck] (#16117)
Browse files Browse the repository at this point in the history
* Revert "GH-15857: cleanup legacy TE integration in ModelBuilder and AutoML (#16061)"

This reverts commit a8f309b.

* Revert "GH-15857: AutoML pipeline support (#16041)"

This reverts commit 17fa9ee.

* Revert "GH-15856: Grid pipeline support (#16040)"

This reverts commit b7ac670.

* Revert "GH-15855: core pipeline API (#16039)"

This reverts commit c15ea1e.
  • Loading branch information
valenad1 authored Mar 11, 2024
1 parent 7617ac4 commit 5927bb1
Show file tree
Hide file tree
Showing 179 changed files with 1,404 additions and 6,254 deletions.
37 changes: 12 additions & 25 deletions h2o-admissibleml/src/main/java/hex/Infogram/Infogram.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package hex.Infogram;

import hex.*;
import hex.Infogram.InfogramModel.InfogramModelOutput;
import hex.Infogram.InfogramModel.InfogramParameters;
import hex.ModelMetrics.MetricBuilder;
import water.*;
import water.exceptions.H2OModelBuilderIllegalArgumentException;
import water.fvec.Frame;
Expand All @@ -21,8 +18,8 @@
import static water.util.ArrayUtils.sort;
import static water.util.ArrayUtils.sum;

public class Infogram extends ModelBuilder<hex.Infogram.InfogramModel, InfogramParameters,
InfogramModelOutput> {
public class Infogram extends ModelBuilder<hex.Infogram.InfogramModel, hex.Infogram.InfogramModel.InfogramParameters,
hex.Infogram.InfogramModel.InfogramModelOutput> {
static final double NORMALIZE_ADMISSIBLE_INDEX = 1.0/Math.sqrt(2.0);
boolean _buildCore; // true to find core predictors, false to find admissible predictors
String[] _topKPredictors; // contain the names of top predictors to consider for infogram
Expand All @@ -48,14 +45,14 @@ public class Infogram extends ModelBuilder<hex.Infogram.InfogramModel, InfogramP
Model.Parameters.FoldAssignmentScheme _foldAssignmentOrig = null;
String _foldColumnOrig = null;

public Infogram(boolean startup_once) { super(new InfogramParameters(), startup_once);}
public Infogram(boolean startup_once) { super(new hex.Infogram.InfogramModel.InfogramParameters(), startup_once);}

public Infogram(InfogramParameters parms) {
public Infogram(hex.Infogram.InfogramModel.InfogramParameters parms) {
super(parms);
init(false);
}

public Infogram(InfogramParameters parms, Key<hex.Infogram.InfogramModel> key) {
public Infogram(hex.Infogram.InfogramModel.InfogramParameters parms, Key<hex.Infogram.InfogramModel> key) {
super(parms, key);
init(false);
}
Expand All @@ -74,23 +71,18 @@ protected int nModelsInParallel(int folds) {
* This is called before cross-validation is carried out
*/
@Override
protected void cv_init() {
super.cv_init();
public void computeCrossValidation() {
info("cross-validation", "cross-validation infogram information is stored in frame with key" +
" labeled as admissible_score_key_cv and the admissible features in admissible_features_cv.");
if (error_count() > 0) {
throw H2OModelBuilderIllegalArgumentException.makeFromBuilder(Infogram.this);
}
}

@Override
protected MetricBuilder makeCVMetricBuilder(ModelBuilder<InfogramModel, InfogramParameters, InfogramModelOutput> cvModelBuilder, Futures fs) {
return null; //infogram does not support scoring
super.computeCrossValidation();
}

// find the best alpha/lambda values used to build the main model moving forward by looking at the devianceValid
@Override
protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
public void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
int nBuilders = cvModelBuilders.length;
double[][] cmiRaw = new double[nBuilders][];
List<List<String>> columns = new ArrayList<>();
Expand All @@ -111,12 +103,7 @@ protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders)
}
_cvDone = true; // cv is done and we are going to build main model next
}

@Override
protected void cv_mainModelScores(int N, MetricBuilder[] mbs, ModelBuilder<InfogramModel, InfogramParameters, InfogramModelOutput>[] cvModelBuilders) {
//infogram does not support scoring
}


public void calculateMeanInfogramInfo(double[][] cmiRaw, List<List<String>> columns,
long[] nObs) {
int nFolds = cmiRaw.length;
Expand Down Expand Up @@ -317,7 +304,7 @@ public final void buildModel() {
try {
boolean validPresent = _parms.valid() != null;
prepareModelTrainingFrame(); // generate training frame with predictors and sensitive features (if specified)
InfogramModel model = new hex.Infogram.InfogramModel(dest(), _parms, new InfogramModelOutput(Infogram.this));
InfogramModel model = new hex.Infogram.InfogramModel(dest(), _parms, new hex.Infogram.InfogramModel.InfogramModelOutput(Infogram.this));
_model = model.delete_and_lock(_job);
_model._output._start_time = System.currentTimeMillis();
_cmiRaw = new double[_numModels];
Expand Down Expand Up @@ -372,7 +359,7 @@ public final void buildModel() {
* relevance >= relevance_threshold. Derive _admissible_index as distance from point with cmi = 1 and
* relevance = 1. In addition, all arrays are sorted on _admissible_index.
*/
private void copyCMIRelevance(InfogramModelOutput modelOutput) {
private void copyCMIRelevance(InfogramModel.InfogramModelOutput modelOutput) {
modelOutput._cmi_raw = new double[_cmi.length];
System.arraycopy(_cmiRaw, 0, modelOutput._cmi_raw, 0, modelOutput._cmi_raw.length);
modelOutput._admissible_index = new double[_cmi.length];
Expand All @@ -388,7 +375,7 @@ private void copyCMIRelevance(InfogramModelOutput modelOutput) {
modelOutput._admissible_index, modelOutput._admissible, modelOutput._all_predictor_names);
}

public void copyCMIRelevanceValid(InfogramModelOutput modelOutput) {
public void copyCMIRelevanceValid(InfogramModel.InfogramModelOutput modelOutput) {
modelOutput._cmi_raw_valid = new double[_cmiValid.length];
System.arraycopy(_cmiRawValid, 0, modelOutput._cmi_raw_valid, 0, modelOutput._cmi_raw_valid.length);
modelOutput._admissible_index_valid = new double[_cmiValid.length];
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/deeplearning/DeepLearning.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ static DataInfo makeDataInfo(Frame train, Frame valid, DeepLearningParameters pa
}
}

@Override protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
@Override public void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
_parms._overwrite_with_best_model = false;

if( _parms._stopping_rounds == 0 && _parms._max_runtime_secs == 0) return; // No exciting changes to stopping conditions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ public ModelMetrics makeModelMetrics(Frame fr, Frame adaptFrm) {
@Override
public ModelMetrics.MetricBuilder<?> getMetricBuilder() {
throw new UnsupportedOperationException("Stacked Ensemble model doesn't implement MetricBuilder infrastructure code, " +
"retrieve your metrics by calling makeModelMetrics method.");
"retrieve your metrics by calling getOrMakeMetrics method.");
}
}

Expand Down
69 changes: 54 additions & 15 deletions h2o-algos/src/main/java/hex/glm/GLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import hex.util.LinearAlgebraUtils;
import hex.util.LinearAlgebraUtils.BMulTask;
import hex.util.LinearAlgebraUtils.FindMaxIndex;
import jsr166y.CountedCompleter;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import water.*;
Expand Down Expand Up @@ -118,8 +119,7 @@ public boolean isSupervised() {
public ModelCategory[] can_build() {
return new ModelCategory[]{
ModelCategory.Regression,
ModelCategory.Binomial,
ModelCategory.Multinomial
ModelCategory.Binomial,
};
}

Expand Down Expand Up @@ -148,12 +148,13 @@ public ModelCategory[] can_build() {
* (builds N+1 models, all have train+validation metrics, the main model has N-fold cross-validated validation metrics)
*/
@Override
protected void cv_init() {
public void computeCrossValidation() {
// init computes global list of lambdas
init(true);
_cvRuns = true;
if (error_count() > 0)
throw H2OModelBuilderIllegalArgumentException.makeFromBuilder(GLM.this);
super.computeCrossValidation();
}


Expand Down Expand Up @@ -292,7 +293,7 @@ private double[] alignSubModelsAcrossCVModels(ModelBuilder[] cvModelBuilders) {
* 4. unlock the n-folds models (they are changed here, so the unlocking happens here)
*/
@Override
protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
public void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders) {
setMaxRuntimeSecsForMainModel();
double bestTestDev = Double.POSITIVE_INFINITY;
double[] alphasAndLambdas = alignSubModelsAcrossCVModels(cvModelBuilders);
Expand Down Expand Up @@ -371,6 +372,12 @@ protected void cv_computeAndSetOptimalParameters(ModelBuilder[] cvModelBuilders)
break;
}
}
for (int i = 0; i < cvModelBuilders.length; ++i) {
GLM g = (GLM) cvModelBuilders[i];
if (g._toRemove != null)
for (Key k : g._toRemove)
Keyed.remove(k);
}

for (int i = 0; i < cvModelBuilders.length; ++i) {
GLM g = (GLM) cvModelBuilders[i];
Expand Down Expand Up @@ -1536,11 +1543,11 @@ private void buildModel() {

protected static final long WORK_TOTAL = 1000000;

@Override
protected void cleanUp() {
if (_parms._lambda_search && _parms._is_cv_model)
keepUntilCompletion(_dinfo.getWeightsVec()._key);
super.cleanUp();
transient Key [] _toRemove;

private Key[] removeLater(Key ...k){
_toRemove = _toRemove == null?k:ArrayUtils.append(_toRemove,k);
return k;
}

@Override protected GLMDriver trainModelImpl() { return _driver = new GLMDriver(); }
Expand Down Expand Up @@ -1569,6 +1576,23 @@ public final class GLMDriver extends Driver implements ProgressMonitor {
private transient GLMTask.GLMIterationTask _gramInfluence;
private transient double[][] _cholInvInfluence;

private void doCleanup() {
try {
if (_parms._lambda_search && _parms._is_cv_model)
Scope.untrack(removeLater(_dinfo.getWeightsVec()._key));
if (_parms._HGLM) {
Key[] vecKeys = _toRemove;
for (int index = 0; index < vecKeys.length; index++) {
Vec tempVec = DKV.getGet(vecKeys[index]);
tempVec.remove();
}
}
} catch (Exception e) {
Log.err("Error while cleaning up GLM " + _result);
Log.err(e);
}
}

private transient Cholesky _chol;
private transient L1Solver _lslvr;

Expand Down Expand Up @@ -3540,8 +3564,9 @@ private Vec[] genGLMVectors(DataInfo dinfo, double[] nb) {
sumExp += Math.exp(nb[i * N + P] - maxRow);
}
Vec[] vecs = dinfo._adaptedFrame.anyVec().makeDoubles(2, new double[]{sumExp, maxRow});
if (_parms._lambda_search) {
track(vecs[0]); track(vecs[1]);
if (_parms._lambda_search && _parms._is_cv_model) {
Scope.untrack(vecs[0]._key, vecs[1]._key);
removeLater(vecs[0]._key, vecs[1]._key);
}
return vecs;
}
Expand Down Expand Up @@ -3823,7 +3848,7 @@ private void checkCoeffsBounds() {
* - column 2: zi, intermediate values
* - column 3: eta = X*beta, intermediate values
*/
private void addWdataZiEtaOld2Response() { // attach wdata, zi, eta to response for HGLM
public void addWdataZiEtaOld2Response() { // attach wdata, zi, eta to response for HGLM
int moreColnum = 3 + _parms._random_columns.length;
Vec[] vecs = _dinfo._adaptedFrame.anyVec().makeZeros(moreColnum);
String[] colNames = new String[moreColnum];
Expand All @@ -3836,11 +3861,25 @@ private void addWdataZiEtaOld2Response() { // attach wdata, zi, eta to response
vecs[index] = _parms.train().vec(randColIndices[index - 3]).makeCopy();
}
_dinfo.addResponse(colNames, vecs);
Frame wdataZiEta = new Frame(Key.make("wdataZiEta"+Key.rand()), colNames, vecs);
DKV.put(wdataZiEta);
track(wdataZiEta);
for (int index = 0; index < moreColnum; index++) {
Scope.untrack(vecs[index]._key);
removeLater(vecs[index]._key);
}
}

@Override
public void onCompletion(CountedCompleter caller) {
doCleanup();
super.onCompletion(caller);
}

@Override
public boolean onExceptionalCompletion(Throwable t, CountedCompleter caller) {
doCleanup();
return super.onExceptionalCompletion(t, caller);
}


@Override
public boolean progress(double[] beta, GradientInfo ginfo) {
_state._iter++;
Expand Down
12 changes: 0 additions & 12 deletions h2o-algos/src/main/java/hex/glm/GLMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import hex.genmodel.utils.DistributionFamily;
import hex.glm.GLMModel.GLMParameters.Family;
import hex.glm.GLMModel.GLMParameters.Link;
import hex.grid.Grid;
import hex.util.EffectiveParametersUtils;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.RealDistribution;
Expand Down Expand Up @@ -1035,17 +1034,6 @@ public DistributionFamily getDistributionFamily() {
return familyToDistribution(_family);
}

@Override
public void addSearchWarnings(Grid.SearchFailure searchFailure, Grid grid) {
super.addSearchWarnings(searchFailure, grid);
if (ArrayUtils.contains(grid.getHyperNames(), "alpha")) {
// maybe we should find a way to raise this warning at the very beginning of grid search, similar to validation in ModelBuilder#init().
searchFailure.addWarning("Adding alpha array to hyperparameter runs slower with gridsearch. "+
"This is due to the fact that the algo has to run initialization for every alpha value. "+
"Setting the alpha array as a model parameter will skip the initialization and run faster overall.");
}
}

public void updateTweedieParams(double tweedieVariancePower, double tweedieLinkPower, double dispersion){
_tweedie_variance_power = tweedieVariancePower;
_tweedie_link_power = tweedieLinkPower;
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/kmeans/KMeans.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ else if( user_points.numRows() != _parms._k)
if (expensive && error_count() == 0) checkMemoryFootPrint();
}

protected void cv_makeAggregateModelMetrics(ModelMetrics.MetricBuilder[] mbs){
public void cv_makeAggregateModelMetrics(ModelMetrics.MetricBuilder[] mbs){
super.cv_makeAggregateModelMetrics(mbs);
((ModelMetricsClustering.MetricBuilderClustering) mbs[0])._within_sumsqe = null;
((ModelMetricsClustering.MetricBuilderClustering) mbs[0])._size = null;
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/tree/SharedTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -1203,7 +1203,7 @@ public double initialValue() {
return _parms._parallel_main_model_building;
}

@Override protected void cv_computeAndSetOptimalParameters(ModelBuilder<M, P, O>[] cvModelBuilders) {
@Override public void cv_computeAndSetOptimalParameters(ModelBuilder<M, P, O>[] cvModelBuilders) {
// Extract stopping conditions from each CV model, and compute the best stopping answer
if (!cv_initStoppingParameters())
return; // No exciting changes to stopping conditions
Expand Down
Loading

0 comments on commit 5927bb1

Please sign in to comment.