diff --git a/river/metrics/efficient_rollingprauc/__init__.py b/river/metrics/efficient_rollingprauc/__init__.py new file mode 100644 index 0000000000..cac3a79140 --- /dev/null +++ b/river/metrics/efficient_rollingprauc/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .efficient_rollingprauc import EfficientRollingPRAUC + +__all__ = ["EfficientRollingPRAUC"] diff --git a/river/metrics/efficient_rollingprauc/cpp/RollingPRAUC.cpp b/river/metrics/efficient_rollingprauc/cpp/RollingPRAUC.cpp new file mode 100644 index 0000000000..568d0b4bc1 --- /dev/null +++ b/river/metrics/efficient_rollingprauc/cpp/RollingPRAUC.cpp @@ -0,0 +1,150 @@ +#include "RollingPRAUC.hpp" + +#include +#include + +namespace rollingprauc { + +RollingPRAUC::RollingPRAUC(): positiveLabel{1}, windowSize{1000}, positives{0} { +} + +RollingPRAUC::RollingPRAUC(int positiveLabel, long unsigned windowSize): + positiveLabel{positiveLabel}, windowSize{windowSize}, positives{0} { +} + +void RollingPRAUC::update(int label, double score) { + if (this->window.size() == this->windowSize) + this->removeLast(); + + this->insert(label, score); + + return; +} + +void RollingPRAUC::revert(int label, double score) { + int normalizedLabel = 0; + if (label == this->positiveLabel) + normalizedLabel = 1; + + std::deque>::const_iterator it{this->window.cbegin()}; + for (; it != this->window.cend(); ++it) + if (std::get<0>(*it) == score && std::get<1>(*it) == normalizedLabel) + break; + + if (it == this->window.cend()) + return; + + if (normalizedLabel) + this->positives--; + + this->window.erase(it); + + std::multiset>::const_iterator itr{ + this->orderedWindow.find(std::make_tuple(score, label)) + }; + this->orderedWindow.erase(itr); + + return; +} + +double RollingPRAUC::get() const { + unsigned long windowSize{this->window.size()}; + + // If there is only one class in the window, it will lead to a + // division by zero. So, zero is returned. + if (!this->positives || !(windowSize - this->positives)) + return 0; + + unsigned long fp{windowSize - this->positives}; + unsigned long tp{this->positives}, tpPrev{tp}; + + double auc{0}, scorePrev{std::numeric_limits::max()}; + + double prec{tp / (double) (tp + fp)}, precPrev{prec}; + + std::multiset>::const_iterator it{this->orderedWindow.begin()}; + double score; + int label; + + for (; it != this->orderedWindow.end(); ++it) { + score = std::get<0>(*it); + label = std::get<1>(*it); + + if (score != scorePrev) { + prec = tp / (double) (tp + fp); + + if (precPrev > prec) + prec = precPrev; // Monotonic. decreasing + + auc += this->trapzArea(tp, tpPrev, prec, precPrev); + + scorePrev = score; + tpPrev = tp; + precPrev = prec; + } + + if (label) tp--; + else fp--; + } + + auc += this->trapzArea(tp, tpPrev, 1.0, precPrev); + + return auc / this->positives; // Scale the x axis +} + +void RollingPRAUC::insert(int label, double score) { + // Normalize label to 0 (negative) or 1 (positive) + int l = 0; + if (label == this->positiveLabel) { + l = 1; + this->positives++; + } + + this->window.emplace_back(score, l); + this->orderedWindow.emplace(score, l); + + return; +} + +void RollingPRAUC::removeLast() { + std::tuple last{this->window.front()}; + + if (std::get<1>(last)) + this->positives--; + + this->window.pop_front(); + + // Erase using a iterator to avoid multiple erases with equivalent instances + std::multiset>::iterator it{ + this->orderedWindow.find(last) + }; + this->orderedWindow.erase(it); + + return; +} + +std::vector RollingPRAUC::getTrueLabels() const { + std::vector trueLabels; + + std::deque>::const_iterator it{this->window.begin()}; + for (; it != this->window.end(); ++it) + trueLabels.push_back(std::get<1>(*it)); + + return trueLabels; +} + +std::vector RollingPRAUC::getScores() const { + std::vector scores; + + std::deque>::const_iterator it{this->window.begin()}; + for (; it != this->window.end(); ++it) + scores.push_back(std::get<0>(*it)); + + return scores; +} + +double RollingPRAUC::trapzArea(double x1, double x2, double y1, double y2) const { + return abs(x1 - x2) * (y1 + y2) / 2; +} + +} // namespace rollingprauc diff --git a/river/metrics/efficient_rollingprauc/cpp/RollingPRAUC.hpp b/river/metrics/efficient_rollingprauc/cpp/RollingPRAUC.hpp new file mode 100644 index 0000000000..e47e423079 --- /dev/null +++ b/river/metrics/efficient_rollingprauc/cpp/RollingPRAUC.hpp @@ -0,0 +1,59 @@ +#ifndef ROLLINGPRAUC_HPP +#define ROLLINGPRAUC_HPP + +#include +#include +#include +#include + +namespace rollingprauc { + +class RollingPRAUC { + public: + RollingPRAUC(); + RollingPRAUC(const int positiveLabel, const long unsigned windowSize); + + virtual ~RollingPRAUC() = default; + + // Calls insert() and removeLast() if needed + virtual void update(const int label, const double score); + + // Erase the most recent instance with content equal to params + virtual void revert(const int label, const double score); + + // Calculates the PRAUC and returns it + virtual double get() const; + + // Returns y_true as a vector + virtual std::vector getTrueLabels() const; + + // Returns y_score as a vector + virtual std::vector getScores() const; + + private: + // Insert instance based on params + virtual void insert(const int label, const double score); + + // Remove oldest instance + virtual void removeLast(); + + // Calculates the trapezoid area + double trapzArea(double x1, double x2, double y1, double y2) const; + + int positiveLabel; + + std::size_t windowSize; + std::size_t positives; + + // window maintains a queue of the instances to store the temporal + // aspect of the stream. Using deque to allow revert() + std::deque> window; + + // orderedWindow maintains a multiset (implemented as a tree) + // to store the ordered instances + std::multiset> orderedWindow; +}; + +} // namespace rollingprauc + +#endif diff --git a/river/metrics/efficient_rollingprauc/efficient_rollingprauc.pxd b/river/metrics/efficient_rollingprauc/efficient_rollingprauc.pxd new file mode 100644 index 0000000000..c9a0d8f5c0 --- /dev/null +++ b/river/metrics/efficient_rollingprauc/efficient_rollingprauc.pxd @@ -0,0 +1,13 @@ +from libcpp.vector cimport vector + +cdef extern from "cpp/RollingPRAUC.cpp": + pass + +cdef extern from "cpp/RollingPRAUC.hpp" namespace "rollingprauc": + cdef cppclass RollingPRAUC: + RollingPRAUC(int positiveLabel, int windowSize) except + + void update(int label, double score) + void revert(int label, double score) + double get() + vector[int] getTrueLabels() + vector[double] getScores() diff --git a/river/metrics/efficient_rollingprauc/efficient_rollingprauc.pyx b/river/metrics/efficient_rollingprauc/efficient_rollingprauc.pyx new file mode 100644 index 0000000000..1c8cb4123e --- /dev/null +++ b/river/metrics/efficient_rollingprauc/efficient_rollingprauc.pyx @@ -0,0 +1,55 @@ +# distutils: language = c++ +# distutils: extra_compile_args = "-std=c++11" + +import cython + +from .efficient_rollingprauc cimport RollingPRAUC as CppRollingPRAUC + +cdef class EfficientRollingPRAUC: + cdef cython.int positiveLabel + cdef cython.ulong windowSize + cdef CppRollingPRAUC* rollingprauc + + def __cinit__(self, cython.int positiveLabel, cython.ulong windowSize): + self.positiveLabel = positiveLabel + self.windowSize = windowSize + self.rollingprauc = new CppRollingPRAUC(positiveLabel, windowSize) + + def __dealloc__(self): + if not self.rollingprauc == NULL: + del self.rollingprauc + + def update(self, label, score): + self.rollingprauc.update(label, score) + + def revert(self, label, score): + self.rollingprauc.revert(label, score) + + def get(self): + return self.rollingprauc.get() + + def __getnewargs_ex__(self): + # Pickle will use this function to pass the arguments to __new__ + return (self.positiveLabel, self.windowSize),{} + + def __getstate__(self): + """ + On pickling, the true labels and scores of the instances in the + window will be dumped + """ + return (self.rollingprauc.getTrueLabels(), self.rollingprauc.getScores()) + + def __setstate__(self, state): + """ + On unpickling, the state parameter will have the true labels + and scores, this function updates the rollingprauc with them + """ + + # Labels returned by __getstate__ are normalized (0 or 1) + labels, scores = state + + for label, score in zip(labels, scores): + # If label is 1, update with the positive label defined by the constructor + # Else, update with a negative label + l = self.positiveLabel if label else int(not self.positiveLabel) + self.update(l, score)