diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f043c1cef..3eac074040 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). -## [YY.MM.R] - YYYY-MM-DD +## [20.12] - 2020-12-20 ### Added @@ -16,10 +16,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - ([#998](https://github.com/catalyst-team/catalyst/pull/998)) - ``reciprocal_rank`` metric - unified recsys metrics preprocessing -- ([#1018](https://github.com/catalyst-team/catalyst/pull/1014)) +- ([#1018](https://github.com/catalyst-team/catalyst/pull/1018)) - readme examples for all supported metrics under ``catalyst.metrics`` - ``wrap_metric_fn_with_activation`` for model outputs wrapping with activation - extra tests for metrics +- ([#1039](https://github.com/catalyst-team/catalyst/pull/1039)) + - ``per_class=False`` option for metrics callbacks + - ``PrecisionCallack``, ``RecallCallack`` for multiclass problems + - extra docs ### Changed diff --git a/README.md b/README.md index a3ed7190ad..7e3e82d014 100644 --- a/README.md +++ b/README.md @@ -557,7 +557,7 @@ class CustomRunner(dl.Runner): y_hat, x_ = self.model(x_noise) loss_clf = F.cross_entropy(y_hat, y) - iou = metrics.iou(x_, x) + iou = metrics.iou(x_, x).mean() loss_iou = 1 - iou loss = loss_clf + loss_iou accuracy01, accuracy03, accuracy05 = metrics.accuracy(y_hat, y, topk=(1, 3, 5)) diff --git a/bin/teamcity/dl_cpu.sh b/bin/teamcity/dl_cpu.sh index 4b2b89d9c7..57c23a9b57 100644 --- a/bin/teamcity/dl_cpu.sh +++ b/bin/teamcity/dl_cpu.sh @@ -8,4 +8,4 @@ bash ./bin/teamcity/dl_.sh # bash ./bin/teamcity/dl_apex.sh ################################### CPU ###################################### -USE_APEX="0" CUDA_VISIBLE_DEVICES="" bash ./bin/tests/check_dl_all.sh +USE_AMP="0" USE_APEX="0" CUDA_VISIBLE_DEVICES="" bash ./bin/tests/check_dl_all.sh diff --git a/bin/teamcity/dl_gpu.sh b/bin/teamcity/dl_gpu.sh index 2dbac07b4f..6f8fc20579 100644 --- a/bin/teamcity/dl_gpu.sh +++ b/bin/teamcity/dl_gpu.sh @@ -8,5 +8,7 @@ bash ./bin/teamcity/dl_.sh bash ./bin/teamcity/dl_apex.sh ################################### GPU ###################################### -USE_APEX="0" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh -USE_APEX="1" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh +USE_AMP="0" USE_APEX="0" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh +USE_AMP="0" USE_APEX="1" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh +USE_AMP="1" USE_APEX="0" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh +USE_AMP="1" USE_APEX="1" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh diff --git a/bin/teamcity/dl_gpu2.sh b/bin/teamcity/dl_gpu2.sh index b8b7cac77f..68ebe974b5 100644 --- a/bin/teamcity/dl_gpu2.sh +++ b/bin/teamcity/dl_gpu2.sh @@ -8,11 +8,19 @@ bash ./bin/teamcity/dl_.sh bash ./bin/teamcity/dl_apex.sh ################################### GPU2 #################################### -USE_APEX="0" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ +USE_AMP="0" USE_APEX="0" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ bash ./bin/tests/check_dl_all.sh -USE_APEX="0" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ +USE_AMP="0" USE_APEX="0" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ bash ./bin/tests/check_dl_all.sh -USE_APEX="1" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ +USE_AMP="0" USE_APEX="1" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ bash ./bin/tests/check_dl_all.sh -USE_APEX="1" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ +USE_AMP="0" USE_APEX="1" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_AMP="1" USE_APEX="0" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_AMP="1" USE_APEX="0" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_AMP="1" USE_APEX="1" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_AMP="1" USE_APEX="1" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ bash ./bin/tests/check_dl_all.sh diff --git a/catalyst/callbacks/metrics/__init__.py b/catalyst/callbacks/metrics/__init__.py index 086dd62845..c35335bfac 100644 --- a/catalyst/callbacks/metrics/__init__.py +++ b/catalyst/callbacks/metrics/__init__.py @@ -26,4 +26,8 @@ from catalyst.callbacks.metrics.ppv_tpr_f1 import ( PrecisionRecallF1ScoreCallback, ) -from catalyst.callbacks.metrics.precision import AveragePrecisionCallback +from catalyst.callbacks.metrics.precision import ( + AveragePrecisionCallback, + PrecisionCallback, +) +from catalyst.callbacks.metrics.recall import RecallCallback diff --git a/catalyst/callbacks/metrics/accuracy.py b/catalyst/callbacks/metrics/accuracy.py index 55b14b07d3..f9d55b0fba 100644 --- a/catalyst/callbacks/metrics/accuracy.py +++ b/catalyst/callbacks/metrics/accuracy.py @@ -44,8 +44,9 @@ def __init__( **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.accuracy.accuracy` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.BatchMetricCallback`` and + ``catalyst.metrics.accuracy.accuracy`` docs """ topk_args = ( topk_args or accuracy_args or get_default_topk_args(num_classes) @@ -91,8 +92,9 @@ def __init__( **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.accuracy.multilabel_accuracy` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.BatchMetricCallback`` and + ``catalyst.metrics.accuracy.multilabel_accuracy`` docs """ super().__init__( prefix=prefix, diff --git a/catalyst/callbacks/metrics/auc.py b/catalyst/callbacks/metrics/auc.py index c9fbd308ab..b4c295e277 100644 --- a/catalyst/callbacks/metrics/auc.py +++ b/catalyst/callbacks/metrics/auc.py @@ -21,6 +21,7 @@ def __init__( output_key: str = "logits", prefix: str = "auc", activation: str = "Sigmoid", + per_class: bool = False, class_args: List[str] = None, **kwargs, ): @@ -31,21 +32,25 @@ def __init__( output_key: output key to use for auc calculation; specifies our ``y_pred``. prefix: key for the metric's name - multiplier: scale factor for the metric. activation: An torch.nn activation applied to the outputs. Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise class_args: class names to display in the logs. If None, defaults to indices for each class, starting from 0 **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.auc.auc` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.LoaderMetricCallback`` and + ``catalyst.metrics.auc.auc`` docs """ metric_fn = wrap_metric_fn_with_activation( metric_fn=auc, activation=activation ) - metric_fn = wrap_class_metric2dict(metric_fn, class_args=class_args) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) super().__init__( prefix=prefix, metric_fn=metric_fn, diff --git a/catalyst/callbacks/metrics/dice.py b/catalyst/callbacks/metrics/dice.py index 7c0946aea9..6682829f0d 100644 --- a/catalyst/callbacks/metrics/dice.py +++ b/catalyst/callbacks/metrics/dice.py @@ -27,6 +27,7 @@ def __init__( output_key: str = "logits", prefix: str = "dice", activation: str = "Sigmoid", + per_class: bool = False, class_args: List[str] = None, **kwargs, ): @@ -39,18 +40,23 @@ def __init__( prefix: key to store in logs activation: An torch.nn activation applied to the outputs. Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise class_args: class names to display in the logs. If None, defaults to indices for each class, starting from 0 **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.dice.dice` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.BatchMetricCallback`` and + ``catalyst.metrics.dice.dice`` docs """ metric_fn = wrap_metric_fn_with_activation( metric_fn=dice, activation=activation ) - metric_fn = wrap_class_metric2dict(metric_fn, class_args=class_args) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) super().__init__( prefix=prefix, metric_fn=metric_fn, diff --git a/catalyst/callbacks/metrics/f1_score.py b/catalyst/callbacks/metrics/f1_score.py index 43a57e61f7..c6b3e087c1 100644 --- a/catalyst/callbacks/metrics/f1_score.py +++ b/catalyst/callbacks/metrics/f1_score.py @@ -17,6 +17,7 @@ def __init__( output_key: str = "logits", prefix: str = "f1_score", activation: str = "Softmax", + per_class: bool = False, class_args: List[str] = None, **kwargs, ): @@ -29,18 +30,23 @@ def __init__( prefix: key for the metric's name activation: An torch.nn activation applied to the outputs. Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise class_args: class names to display in the logs. If None, defaults to indices for each class, starting from 0 **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.f1_score.fbeta_score` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.BatchMetricCallback`` and + ``catalyst.metrics.f1_score.fbeta_score`` docs """ metric_fn = wrap_metric_fn_with_activation( metric_fn=fbeta_score, activation=activation ) - metric_fn = wrap_class_metric2dict(metric_fn, class_args=class_args) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) super().__init__( prefix=prefix, metric_fn=metric_fn, diff --git a/catalyst/callbacks/metrics/iou.py b/catalyst/callbacks/metrics/iou.py index 7b641aeb4b..25a205676d 100644 --- a/catalyst/callbacks/metrics/iou.py +++ b/catalyst/callbacks/metrics/iou.py @@ -17,6 +17,7 @@ def __init__( output_key: str = "logits", prefix: str = "iou", activation: str = "Sigmoid", + per_class: bool = False, class_args: List[str] = None, **kwargs, ): @@ -31,18 +32,23 @@ def __init__( threshold: threshold for outputs binarization activation: An torch.nn activation applied to the outputs. Must be one of ``'none'``, ``'Sigmoid'``, ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise class_args: class names to display in the logs. If None, defaults to indices for each class, starting from 0 **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.iou.iou` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.BatchMetricCallback`` and + ``catalyst.metrics.iou.iou`` docs """ metric_fn = wrap_metric_fn_with_activation( metric_fn=iou, activation=activation ) - metric_fn = wrap_class_metric2dict(metric_fn, class_args=class_args) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) super().__init__( prefix=prefix, metric_fn=metric_fn, diff --git a/catalyst/callbacks/metrics/precision.py b/catalyst/callbacks/metrics/precision.py index e96b35778e..0d005ed82a 100644 --- a/catalyst/callbacks/metrics/precision.py +++ b/catalyst/callbacks/metrics/precision.py @@ -1,11 +1,58 @@ from typing import List -from catalyst.callbacks.metric import LoaderMetricCallback +from catalyst.callbacks.metric import BatchMetricCallback, LoaderMetricCallback from catalyst.metrics.functional import ( wrap_class_metric2dict, wrap_metric_fn_with_activation, ) -from catalyst.metrics.precision import average_precision +from catalyst.metrics.precision import average_precision, precision + + +class PrecisionCallback(BatchMetricCallback): + """Precision score metric callback.""" + + def __init__( + self, + input_key: str = "targets", + output_key: str = "logits", + prefix: str = "precision", + activation: str = "Softmax", + per_class: bool = False, + class_args: List[str] = None, + **kwargs, + ): + """ + Args: + input_key: input key to use for iou calculation + specifies our ``y_true`` + output_key: output key to use for iou calculation; + specifies our ``y_pred`` + prefix: key for the metric's name + activation: An torch.nn activation applied to the outputs. + Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise + class_args: class names to display in the logs. + If None, defaults to indices for each class, starting from 0 + **kwargs: key-value params to pass to the metric + + .. note:: + For `**kwargs` info, please follow + `catalyst.metrics.precision.precision` docs + """ + metric_fn = wrap_metric_fn_with_activation( + metric_fn=precision, activation=activation + ) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) + super().__init__( + prefix=prefix, + metric_fn=metric_fn, + input_key=input_key, + output_key=output_key, + **kwargs, + ) class AveragePrecisionCallback(LoaderMetricCallback): @@ -17,6 +64,7 @@ def __init__( output_key: str = "logits", prefix: str = "average_precision", activation: str = "Sigmoid", + per_class: bool = False, class_args: List[str] = None, **kwargs, ): @@ -31,18 +79,23 @@ def __init__( prefix: key for the metric's name activation: An torch.nn activation applied to the outputs. Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise class_args: class names to display in the logs. If None, defaults to indices for each class, starting from 0 **kwargs: key-value params to pass to the metric .. note:: - For `**kwargs` info, please follow - `catalyst.metrics.precision.average_precision` docs + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.LoaderMetricCallback`` and + ``catalyst.metrics.precision.average_precision`` docs """ metric_fn = wrap_metric_fn_with_activation( metric_fn=average_precision, activation=activation ) - metric_fn = wrap_class_metric2dict(metric_fn, class_args=class_args) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) super().__init__( prefix=prefix, metric_fn=metric_fn, @@ -52,4 +105,4 @@ def __init__( ) -__all__ = ["AveragePrecisionCallback"] +__all__ = ["AveragePrecisionCallback", "PrecisionCallback"] diff --git a/catalyst/callbacks/metrics/recall.py b/catalyst/callbacks/metrics/recall.py new file mode 100644 index 0000000000..de335ebce4 --- /dev/null +++ b/catalyst/callbacks/metrics/recall.py @@ -0,0 +1,59 @@ +from typing import List + +from catalyst.callbacks.metric import BatchMetricCallback +from catalyst.metrics.functional import ( + wrap_class_metric2dict, + wrap_metric_fn_with_activation, +) +from catalyst.metrics.recall import recall + + +class RecallCallback(BatchMetricCallback): + """Recall score metric callback.""" + + def __init__( + self, + input_key: str = "targets", + output_key: str = "logits", + prefix: str = "recall", + activation: str = "Softmax", + per_class: bool = False, + class_args: List[str] = None, + **kwargs, + ): + """ + Args: + input_key: input key to use for iou calculation + specifies our ``y_true`` + output_key: output key to use for iou calculation; + specifies our ``y_pred`` + prefix: key for the metric's name + activation: An torch.nn activation applied to the outputs. + Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax'`` + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise + class_args: class names to display in the logs. + If None, defaults to indices for each class, starting from 0 + **kwargs: key-value params to pass to the metric + + .. note:: + For ``**kwargs`` info, please follow + ``catalyst.callbacks.metric.BatchMetricCallback`` and + ``catalyst.metrics.recall.recall`` docs + """ + metric_fn = wrap_metric_fn_with_activation( + metric_fn=recall, activation=activation + ) + metric_fn = wrap_class_metric2dict( + metric_fn, per_class=per_class, class_args=class_args + ) + super().__init__( + prefix=prefix, + metric_fn=metric_fn, + input_key=input_key, + output_key=output_key, + **kwargs, + ) + + +__all__ = ["RecallCallback"] diff --git a/catalyst/metrics/functional.py b/catalyst/metrics/functional.py index 85003df50d..f01ec77f37 100644 --- a/catalyst/metrics/functional.py +++ b/catalyst/metrics/functional.py @@ -1,5 +1,6 @@ from typing import Callable, Dict, Optional, Sequence, Tuple from functools import partial +import logging import numpy as np @@ -14,6 +15,8 @@ # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics # as a baseline +logger = logging.getLogger(__name__) + def process_multiclass_components( outputs: torch.Tensor, @@ -417,7 +420,9 @@ def wrapped_metric_fn( def wrap_class_metric2dict( - metric_fn: Callable, class_args: Sequence[str] = None + metric_fn: Callable, + per_class: bool = False, + class_args: Sequence[str] = None, ) -> Callable: """# noqa: D202 Logging wrapper for metrics with torch.Tensor output @@ -427,26 +432,44 @@ def wrap_class_metric2dict( Args: metric_fn: metric function to compute - class_args: class names for logging. + per_class: boolean flag to log per class metrics, + or use mean/macro statistics otherwise + class_args: class names for logging, default: None - class indexes will be used. Returns: wrapped metric function with List[Dict] output """ + if per_class is False and class_args is not None: + logger.warning( + "``per_class`` is disabled, but ``class_args`` are not None" + "check the experiment conditions." + ) - def class_metric_with_dict_output(*args, **kwargs): - output = metric_fn(*args, **kwargs) - num_classes = len(output) - output_class_args = class_args or [ - f"/class_{i:02}" for i in range(num_classes) - ] - mean_stats = torch.mean(output).item() - output = { - key: value.item() for key, value in zip(output_class_args, output) - } - output[""] = mean_stats - output["/mean"] = mean_stats - return output + if per_class: + + def class_metric_with_dict_output(*args, **kwargs): + output = metric_fn(*args, **kwargs) + num_classes = len(output) + output_class_args = class_args or [ + f"/class_{i:02}" for i in range(num_classes) + ] + mean_stats = torch.mean(output).item() + output = { + key: value.item() + for key, value in zip(output_class_args, output) + } + output[""] = mean_stats + output["/mean"] = mean_stats + return output + + else: + + def class_metric_with_dict_output(*args, **kwargs): + output = metric_fn(*args, **kwargs) + mean_stats = torch.mean(output).item() + output = {"": mean_stats} + return output return class_metric_with_dict_output diff --git a/catalyst/metrics/tests/test_fbeta_precision_recall.py b/catalyst/metrics/tests/test_fbeta_precision_recall.py index 2804d56168..6e8af350f3 100644 --- a/catalyst/metrics/tests/test_fbeta_precision_recall.py +++ b/catalyst/metrics/tests/test_fbeta_precision_recall.py @@ -61,11 +61,11 @@ def test_precision_recall_fbeta_support_binary( ( precision_score, recall_score, - fbeta_score_ev, + fbeta_score_value, support, ) = precision_recall_fbeta_support(outputs=outputs, targets=targets) assert torch.isclose(precision_score[1], torch.tensor(precision_true)) assert torch.isclose(recall_score[1], torch.tensor(recall_true)) - assert torch.isclose(fbeta_score_ev[1], torch.tensor(fbeta_true)) + assert torch.isclose(fbeta_score_value[1], torch.tensor(fbeta_true)) assert support[1] == support_true diff --git a/docs/api/callbacks.rst b/docs/api/callbacks.rst index eefd8c6702..380ae1435b 100644 --- a/docs/api/callbacks.rst +++ b/docs/api/callbacks.rst @@ -191,16 +191,23 @@ Perplexity :undoc-members: :show-inheritance: -Global precision, recall and F1-score -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.callbacks.metrics.ppv_tpr_f1 +Precision +~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: catalyst.callbacks.metrics.precision :members: :undoc-members: :show-inheritance: -Precision +Recall ~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.callbacks.metrics.precision +.. automodule:: catalyst.callbacks.metrics.recall + :members: + :undoc-members: + :show-inheritance: + +Global precision, recall and F1-score +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: catalyst.callbacks.metrics.ppv_tpr_f1 :members: :undoc-members: :show-inheritance: diff --git a/tests/_tests_scripts/dl_z_mvp_mnist_unet.py b/tests/_tests_scripts/dl_z_mvp_mnist_unet.py index 977afa6aaa..d46a561c5d 100644 --- a/tests/_tests_scripts/dl_z_mvp_mnist_unet.py +++ b/tests/_tests_scripts/dl_z_mvp_mnist_unet.py @@ -35,7 +35,7 @@ def _handle_batch(self, batch): y_hat, x_ = self.model(x_noise) loss_clf = F.cross_entropy(y_hat, y) - iou = metrics.iou(x_, x) + iou = metrics.iou(x_, x).mean() loss_iou = 1 - iou loss = loss_clf + loss_iou accuracy01, accuracy03, accuracy05 = metrics.accuracy(