diff --git a/h2o-algos/src/main/java/hex/tree/isoforextended/ExtendedIsolationForest.java b/h2o-algos/src/main/java/hex/tree/isoforextended/ExtendedIsolationForest.java index f6bcc5ca5ca5..c208c149ec2f 100644 --- a/h2o-algos/src/main/java/hex/tree/isoforextended/ExtendedIsolationForest.java +++ b/h2o-algos/src/main/java/hex/tree/isoforextended/ExtendedIsolationForest.java @@ -2,11 +2,15 @@ import hex.ModelBuilder; import hex.ModelCategory; +import hex.ModelMetrics; +import hex.ScoreKeeper; import hex.tree.isoforextended.isolationtree.CompressedIsolationTree; import hex.tree.isoforextended.isolationtree.IsolationTree; import hex.tree.isoforextended.isolationtree.IsolationTreeStats; import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; import water.DKV; import water.H2O; import water.Job; @@ -154,6 +158,10 @@ public void computeImpl() { private void buildIsolationTreeEnsemble() { _model._output._iTreeKeys = new Key[_parms._ntrees]; + _model._output._scored_train = new ScoreKeeper[_parms._ntrees + 1]; + _model._output._scored_train[0] = new ScoreKeeper(); + _model._output._training_time_ms = new long[_parms._ntrees + 1]; + _model._output._training_time_ms[0] = System.currentTimeMillis(); int heightLimit = (int) Math.ceil(MathUtils.log2(_parms._sample_size)); @@ -171,10 +179,20 @@ private void buildIsolationTreeEnsemble() { DKV.put(compressedIsolationTree); _job.update(1); _model.update(_job); - LOG.info((tid + 1) + ". tree was built in " + timer.toString()); + _model._output._training_time_ms[tid + 1] = System.currentTimeMillis(); + LOG.info((tid + 1) + ". tree was built in " + timer); isolationTreeStats.updateBy(isolationTree); + + boolean manualInterval = _parms._score_tree_interval > 0 && (tid +1) % _parms._score_tree_interval == 0; + + _model._output._scored_train[tid + 1] = new ScoreKeeper(); + if (_parms._score_each_iteration || manualInterval) { + ModelMetrics.MetricBuilder mb = new ScoreExtendedIsolationForestTask(_model).doAll(_train).getMetricsBuilder(); + _model._output._scored_train[tid + 1].fillFrom(mb.makeModelMetrics(_model, _parms.train(), null, null)); + } } _model._output._training_metrics = new ScoreExtendedIsolationForestTask(_model).doAll(_train).getMetricsBuilder().makeModelMetrics(_model, _parms.train(), null, null); + _model._output._scoring_history = createScoringHistoryTable(); } } @@ -238,4 +256,50 @@ public TwoDimTable createModelSummaryTable() { return table; } + protected TwoDimTable createScoringHistoryTable() { + List colHeaders = new ArrayList<>(); + List colTypes = new ArrayList<>(); + List colFormat = new ArrayList<>(); + colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s"); + colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s"); + colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d"); + colHeaders.add("Mean Tree Path Length"); colTypes.add("double"); colFormat.add("%.5f"); + colHeaders.add("Mean Anomaly Score"); colTypes.add("double"); colFormat.add("%.5f"); + if (_parms._custom_metric_func != null) { + colHeaders.add("Training Custom"); colTypes.add("double"); colFormat.add("%.5f"); + } + + ScoreKeeper[] sks = _model._output._scored_train; + + int rows = 0; + for (int i = 0; i < sks.length; i++) { + if (i != 0 && Double.isNaN(sks[i]._anomaly_score)) continue; + rows++; + } + TwoDimTable table = new TwoDimTable( + "Scoring History", null, + new String[rows], + colHeaders.toArray(new String[0]), + colTypes.toArray(new String[0]), + colFormat.toArray(new String[0]), + ""); + int row = 0; + for( int i = 0; i[] _iTreeKeys;