From 0fc98f4ac4fc36eda32d91f6cafee6f1e9b126a2 Mon Sep 17 00:00:00 2001
From: Uditi Mehta <57388785+uditijmehta@users.noreply.github.com>
Date: Wed, 9 Oct 2024 13:54:38 -0400
Subject: [PATCH 01/35] [ENG-6195] Fix admin confirmation link generation and
 handling (#10734)

* Fix admin confirmation link generation and handling

* Fix test failures

* Refactor confirmation link logic and add unit tests

* cleanup unused code

---------

Co-authored-by: Uditi Mehta <uditimehta@coss-mbp.myfiosgateway.com>
Co-authored-by: Uditi Mehta <uditimehta@COSs-MBP.lan>
---
 admin/users/views.py            | 16 ++++++++---
 admin_tests/users/test_views.py | 48 +++++++++++++++++++++++++++++++--
 osf/models/user.py              | 17 ++++++++++++
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/admin/users/views.py b/admin/users/views.py
index 69bfa821c5c..1e6d6e3b09a 100644
--- a/admin/users/views.py
+++ b/admin/users/views.py
@@ -16,6 +16,7 @@
 from django.core.mail import send_mail
 from django.shortcuts import redirect
 from django.core.paginator import Paginator
+from django.core.exceptions import ValidationError
 
 from osf.exceptions import UserStateError
 from osf.models.base import Guid
@@ -456,10 +457,19 @@ def get_context_data(self, **kwargs):
 
 class GetUserConfirmationLink(GetUserLink):
     def get_link(self, user):
+        if user.is_confirmed:
+            return f'User {user._id} is already confirmed'
+
+        if user.deleted or user.is_merged:
+            return f'User {user._id} is deleted or merged'
+
         try:
-            return user.get_confirmation_url(user.username, force=True)
-        except KeyError as e:
-            return str(e)
+            confirmation_link = user.get_or_create_confirmation_url(user.username, force=True, renew=True)
+            return confirmation_link
+        except ValidationError:
+            return f'Invalid email for user {user._id}'
+        except KeyError:
+            return 'Could not generate or refresh confirmation link'
 
     def get_link_type(self):
         return 'User Confirmation'
diff --git a/admin_tests/users/test_views.py b/admin_tests/users/test_views.py
index 80da9721651..cd51459e134 100644
--- a/admin_tests/users/test_views.py
+++ b/admin_tests/users/test_views.py
@@ -486,10 +486,15 @@ def test_get_user_confirmation_link(self):
         view = views.GetUserConfirmationLink()
         view = setup_view(view, request, guid=user._id)
 
+        link = view.get_link(user)
+
+        user.refresh_from_db()
+
         user_token = list(user.email_verifications.keys())[0]
+
         ideal_link_path = f'/confirm/{user._id}/{user_token}/'
-        link = view.get_link(user)
-        link_path = str(furl(link).path)
+
+        link_path = str(furl(link).path).rstrip('/') + '/'
 
         assert link_path == ideal_link_path
 
@@ -511,6 +516,45 @@ def test_get_user_confirmation_link_with_expired_token(self):
 
         assert link_path == ideal_link_path
 
+    def test_get_user_confirmation_link_generates_new_token_if_expired(self):
+        user = UnconfirmedUserFactory()
+        request = RequestFactory().get('/fake_path')
+        view = views.GetUserConfirmationLink()
+        view = setup_view(view, request, guid=user._id)
+
+        old_user_token = list(user.email_verifications.keys())[0]
+        user.email_verifications[old_user_token]['expiration'] = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(hours=24)
+        user.save()
+
+        link = view.get_link(user)
+        user.refresh_from_db()
+
+        new_user_token = list(user.email_verifications.keys())[0]
+
+        assert new_user_token != old_user_token
+
+        link_path = str(furl(link).path)
+        ideal_link_path = f'/confirm/{user._id}/{new_user_token}/'
+        assert link_path == ideal_link_path
+
+    def test_get_user_confirmation_link_does_not_change_unexpired_token(self):
+        user = UnconfirmedUserFactory()
+        request = RequestFactory().get('/fake_path')
+        view = views.GetUserConfirmationLink()
+        view = setup_view(view, request, guid=user._id)
+
+        user_token_before = list(user.email_verifications.keys())[0]
+
+        user.email_verifications[user_token_before]['expiration'] = datetime.utcnow().replace(tzinfo=pytz.utc) + timedelta(hours=24)
+        user.save()
+
+        with mock.patch('osf.models.user.OSFUser.get_or_create_confirmation_url') as mock_method:
+            mock_method.return_value = user.get_confirmation_url(user.username, force=False, renew=False)
+
+        user_token_after = list(user.email_verifications.keys())[0]
+
+        assert user_token_before == user_token_after
+
     def test_get_password_reset_link(self):
         user = UnconfirmedUserFactory()
         request = RequestFactory().get('/fake_path')
diff --git a/osf/models/user.py b/osf/models/user.py
index 29e10efa991..d0783c208aa 100644
--- a/osf/models/user.py
+++ b/osf/models/user.py
@@ -1342,6 +1342,23 @@ def get_confirmation_url(self, email,
         destination = '?{}'.format(urlencode({'destination': destination})) if destination else ''
         return f'{base}confirm/{external}{self._primary_key}/{token}/{destination}'
 
+    def get_or_create_confirmation_url(self, email, force=False, renew=False):
+        """
+        Get or create a confirmation URL for the given email.
+
+        :param email: The email to generate a confirmation URL for.
+        :param force: Force generating a new confirmation link.
+        :param renew: Renew an expired token.
+        :raises ValidationError: If email is invalid or domain is banned.
+        :return: Confirmation URL for the email.
+        """
+        try:
+            self.get_confirmation_token(email, force=force, renew=renew)
+        except KeyError:
+            self.add_unconfirmed_email(email)
+            self.save()
+        return self.get_confirmation_url(email)
+
     def register(self, username, password=None, accepted_terms_of_service=None):
         """Registers the user.
         """

From 5ae3ed2da3c0882734f4408fcfdae9d527bb0f4c Mon Sep 17 00:00:00 2001
From: Uditi Mehta <uditimehta@coss-mbp.myfiosgateway.com>
Date: Fri, 20 Sep 2024 13:34:49 -0400
Subject: [PATCH 02/35] Resolve issue with updating preprint fields and
 validation errors

---
 api/preprints/serializers.py                  |  62 ++-
 .../preprints/views/test_preprint_detail.py   | 362 ++++++++++++++++++
 osf/models/preprint.py                        |  50 +--
 3 files changed, 449 insertions(+), 25 deletions(-)

diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py
index 97cc3f3fb7c..7d3ebdfaa00 100644
--- a/api/preprints/serializers.py
+++ b/api/preprints/serializers.py
@@ -369,7 +369,67 @@ def update(self, preprint, validated_data):
             preprint.custom_publication_citation = validated_data['custom_publication_citation'] or None
             save_preprint = True
 
-        self.handle_author_assertions(preprint, validated_data, auth)
+        if 'has_coi' in validated_data:
+            try:
+                preprint.update_has_coi(auth, validated_data['has_coi'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'conflict_of_interest_statement' in validated_data:
+            try:
+                preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'has_data_links' in validated_data:
+            try:
+                preprint.update_has_data_links(auth, validated_data['has_data_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'why_no_data' in validated_data:
+            try:
+                preprint.update_why_no_data(auth, validated_data['why_no_data'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'data_links' in validated_data:
+            try:
+                preprint.update_data_links(auth, validated_data['data_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'has_prereg_links' in validated_data:
+            try:
+                preprint.update_has_prereg_links(auth, validated_data['has_prereg_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'why_no_prereg' in validated_data:
+            try:
+                preprint.update_why_no_prereg(auth, validated_data['why_no_prereg'])
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'prereg_links' in validated_data:
+            try:
+                preprint.update_prereg_links(auth, validated_data['prereg_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'prereg_link_info' in validated_data:
+            try:
+                preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
 
         if published is not None:
             if not preprint.primary_file:
diff --git a/api_tests/preprints/views/test_preprint_detail.py b/api_tests/preprints/views/test_preprint_detail.py
index 7e3b279c406..df50db8166d 100644
--- a/api_tests/preprints/views/test_preprint_detail.py
+++ b/api_tests/preprints/views/test_preprint_detail.py
@@ -835,6 +835,368 @@ def test_update_preprint_task_called_on_api_update(
 
         assert mock_on_preprint_updated.called
 
+    def test_update_has_coi(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={'has_coi': True}
+        )
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_coi']
+
+        preprint.reload()
+        assert preprint.has_coi
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_HAS_COI
+        assert log.params == {'preprint': preprint._id, 'user': user._id, 'value': True}
+
+    def test_update_conflict_of_interest_statement(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={'conflict_of_interest_statement': 'Owns shares in Closed Science Corporation.'}
+        )
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        preprint.has_coi = False
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['conflict_of_interest_statement'] ==\
+               'Owns shares in Closed Science Corporation.'
+
+        preprint.has_coi = True
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['conflict_of_interest_statement'] ==\
+               'Owns shares in Closed Science Corporation.'
+
+        preprint.reload()
+        assert preprint.conflict_of_interest_statement == 'Owns shares in Closed Science Corporation.'
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_COI_STATEMENT
+        assert log.params == {
+            'preprint': preprint._id,
+            'user': user._id,
+            'value': 'Owns shares in Closed Science Corporation.'
+        }
+
+    def test_update_has_data_links(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'has_data_links': 'available'})
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_data_links'] == 'available'
+
+        preprint.reload()
+        assert preprint.has_data_links
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_HAS_DATA_LINKS
+        assert log.params == {'value': 'available', 'user': user._id, 'preprint': preprint._id}
+
+    def test_update_why_no_data(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'why_no_data': 'My dog ate it.'})
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['why_no_data'] == 'My dog ate it.'
+
+        preprint.has_data_links = 'no'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['why_no_data'] == 'My dog ate it.'
+
+        preprint.reload()
+        assert preprint.why_no_data
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_WHY_NO_DATA
+        assert log.params == {'user': user._id, 'preprint': preprint._id}
+
+    def test_update_data_links(self, app, user, preprint, url):
+        data_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'data_links': data_links})
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        preprint.has_data_links = 'no'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['data_links'] == data_links
+
+        preprint.has_data_links = 'available'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['data_links'] == data_links
+
+        preprint.reload()
+        assert preprint.data_links == data_links
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_DATA_LINKS
+        assert log.params == {'user': user._id, 'preprint': preprint._id}
+
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'data_links': 'maformed payload'})
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Expected a list of items but got type "str".'
+
+    def test_invalid_data_links(self, app, user, preprint, url):
+        preprint.has_data_links = 'available'
+        preprint.save()
+
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'data_links': ['thisaintright']})
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Enter a valid URL.'
+
+    def test_update_has_prereg_links(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'has_prereg_links': 'available'})
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_prereg_links'] == 'available'
+
+        preprint.reload()
+        assert preprint.has_prereg_links
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_HAS_PREREG_LINKS
+        assert log.params == {'value': 'available', 'user': user._id, 'preprint': preprint._id}
+
+    def test_invalid_prereg_links(self, app, user, preprint, url):
+        preprint.has_prereg_links = 'available'
+        preprint.save()
+
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'prereg_links': ['thisaintright']})
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Enter a valid URL.'
+
+    def test_no_data_links_clears_links(self, app, user, preprint, url):
+        preprint.has_data_links = 'available'
+        preprint.data_links = ['http://www.apple.com']
+        preprint.save()
+
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'has_data_links': 'no'})
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_data_links'] == 'no'
+        assert res.json['data']['attributes']['data_links'] == []
+
+        preprint.reload()
+        assert preprint.has_data_links == 'no'
+        assert preprint.data_links == []
+
+    def test_no_prereg_links_clears_links(self, app, user, preprint, url):
+        preprint.has_prereg_links = 'available'
+        preprint.prereg_links = ['http://example.com']
+        preprint.prereg_link_info = 'prereg_analysis'
+        preprint.save()
+
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'has_prereg_links': 'no'})
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_prereg_links'] == 'no'
+        preprint.reload()
+        assert res.json['data']['attributes']['prereg_links'] == []
+        assert not res.json['data']['attributes']['prereg_link_info']
+
+        assert preprint.prereg_links == []
+        assert preprint.prereg_link_info is None
+
+    def test_update_why_no_prereg(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'why_no_prereg': 'My dog ate it.'})
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['why_no_prereg'] == 'My dog ate it.'
+
+        preprint.has_prereg_links = False
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['why_no_prereg'] == 'My dog ate it.'
+
+        preprint.reload()
+        assert preprint.why_no_prereg
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_WHY_NO_PREREG
+        assert log.params == {'user': user._id, 'preprint': preprint._id}
+
+    def test_update_prereg_links(self, app, user, preprint, url):
+
+        prereg_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'prereg_links': prereg_links})
+
+        contrib = AuthUserFactory()
+        preprint.add_contributor(contrib, READ)
+        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
+
+        preprint.has_prereg_links = 'no'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['prereg_links'] == prereg_links
+
+        preprint.has_prereg_links = 'available'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['prereg_links'] == prereg_links
+
+        preprint.reload()
+        assert preprint.prereg_links == prereg_links
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_PREREG_LINKS
+        assert log.params == {'user': user._id, 'preprint': preprint._id}
+
+        update_payload = build_preprint_update_payload(preprint._id, attributes={'prereg_links': 'maformed payload'})
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Expected a list of items but got type "str".'
+
+    def test_update_prereg_link_info(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={'prereg_link_info': 'prereg_designs'}
+        )
+
+        preprint.has_prereg_links = 'no'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['prereg_link_info'] == 'prereg_designs'
+
+        preprint.has_prereg_links = 'available'
+        preprint.save()
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['prereg_link_info'] == 'prereg_designs'
+
+        preprint.reload()
+        assert preprint.prereg_link_info == 'prereg_designs'
+        log = preprint.logs.first()
+        assert log.action == PreprintLog.UPDATE_PREREG_LINKS_INFO
+        assert log.params == {'user': user._id, 'preprint': preprint._id}
+
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={'prereg_link_info': 'maformed payload'}
+        )
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == '"maformed payload" is not a valid choice.'
+
+    def test_sloan_updates(self, app, user, preprint, url):
+        """
+        - Tests to ensure updating a preprint with unchanged data does not create superfluous log statements.
+        - Tests to ensure various dependent fields can be updated in a single request.
+        """
+        preprint.has_prereg_links = 'available'
+        preprint.prereg_links = ['http://no-sf.io']
+        preprint.prereg_link_info = 'prereg_designs'
+        preprint.save()
+
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={
+                'has_prereg_links': 'available',
+                'prereg_link_info': 'prereg_designs',
+                'prereg_links': ['http://osf.io'],  # changing here should be only non-factory created log.
+            }
+        )
+        app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        # Any superfluous log statements?
+        logs = preprint.logs.all().values_list('action', 'params')
+        assert logs.count() == 3  # actions should be: 'subjects_updated', 'published', 'prereg_links_updated'
+        assert logs.latest() == ('prereg_links_updated', {'user': user._id, 'preprint': preprint._id})
+
+        # Can we set `has_prereg_links` to false and update `why_no_prereg` in a single request?
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={
+                'has_prereg_links': 'no',
+                'why_no_prereg': 'My dog ate it.'
+            }
+        )
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_prereg_links'] == 'no'
+        assert res.json['data']['attributes']['why_no_prereg'] == 'My dog ate it.'
+
+        preprint.refresh_from_db()
+        assert preprint.has_prereg_links == 'no'
+        assert preprint.why_no_prereg == 'My dog ate it.'
+
 
 @pytest.mark.django_db
 class TestPreprintUpdateSubjects(UpdateSubjectsMixin):
diff --git a/osf/models/preprint.py b/osf/models/preprint.py
index 4ce426ccaed..87c337532cc 100644
--- a/osf/models/preprint.py
+++ b/osf/models/preprint.py
@@ -996,6 +996,9 @@ def update_has_coi(self, auth: Auth, has_coi: bool, log: bool = True, save: bool
 
         This method brought to you via a grant from the Alfred P Sloan Foundation.
         """
+        if has_coi is None:
+            has_coi = False
+
         if self.has_coi == has_coi:
             return
 
@@ -1028,17 +1031,14 @@ def update_conflict_of_interest_statement(self, auth: Auth, coi_statement: str,
         if self.conflict_of_interest_statement == coi_statement:
             return
 
-        if not self.has_coi:
-            raise PreprintStateError('You do not have the ability to edit a conflict of interest while the has_coi field is '
-                                  'set to false or unanswered')
-
-        self.conflict_of_interest_statement = coi_statement
+        self.conflict_of_interest_statement = coi_statement or ''
 
         if log:
             self.add_log(
                 action=PreprintLog.UPDATE_COI_STATEMENT,
                 params={
                     'user': auth.user._id,
+                    'value': self.conflict_of_interest_statement
                 },
                 auth=auth,
             )
@@ -1061,6 +1061,9 @@ def update_has_data_links(self, auth: Auth, has_data_links: bool, log: bool = Tr
         if self.has_data_links == has_data_links:
             return
 
+        if has_data_links == 'no':
+            self.data_links = []
+
         self.has_data_links = has_data_links
 
         if log:
@@ -1072,7 +1075,7 @@ def update_has_data_links(self, auth: Auth, has_data_links: bool, log: bool = Tr
                 },
                 auth=auth
             )
-        if has_data_links != 'available':
+        if not has_data_links:
             self.update_data_links(auth, data_links=[], log=False)
         if save:
             self.save()
@@ -1093,9 +1096,8 @@ def update_data_links(self, auth: Auth, data_links: list, log: bool = True, save
         if self.data_links == data_links:
             return
 
-        if not self.has_data_links == 'available' and data_links:
-            raise PreprintStateError('You cannot edit this statement while your data links availability is set to false'
-                                     ' or is unanswered.')
+        if not self.has_data_links and data_links:
+            self.data_links = []
 
         self.data_links = data_links
 
@@ -1126,11 +1128,10 @@ def update_why_no_data(self, auth: Auth, why_no_data: str, log: bool = True, sav
         if self.why_no_data == why_no_data:
             return
 
-        if not self.has_data_links == 'no':
-            raise PreprintStateError('You cannot edit this statement while your data links availability is set to true or'
-                                  ' is unanswered.')
-        else:
-            self.why_no_data = why_no_data
+        if self.has_data_links:
+            self.why_no_data = ''
+
+        self.why_no_data = why_no_data
 
         if log:
             self.add_log(
@@ -1159,6 +1160,10 @@ def update_has_prereg_links(self, auth: Auth, has_prereg_links: bool, log: bool
         if has_prereg_links == self.has_prereg_links:
             return
 
+        if has_prereg_links == 'no':
+            self.prereg_links = []
+            self.prereg_link_info = None
+
         self.has_prereg_links = has_prereg_links
 
         if log:
@@ -1170,7 +1175,7 @@ def update_has_prereg_links(self, auth: Auth, has_prereg_links: bool, log: bool
                 },
                 auth=auth
             )
-        if has_prereg_links != 'available':
+        if not has_prereg_links:
             self.update_prereg_links(auth, prereg_links=[], log=False)
             self.update_prereg_link_info(auth, prereg_link_info=None, log=False)
         if save:
@@ -1192,9 +1197,8 @@ def update_why_no_prereg(self, auth: Auth, why_no_prereg: str, log: bool = True,
         if why_no_prereg == self.why_no_prereg:
             return
 
-        if self.has_prereg_links == 'available' or self.has_prereg_links is None:
-            raise PreprintStateError('You cannot edit this statement while your prereg links '
-                                  'availability is set to true or is unanswered.')
+        if self.has_prereg_links or self.has_prereg_links is None:
+            self.why_no_prereg = ''
 
         self.why_no_prereg = why_no_prereg
 
@@ -1225,9 +1229,8 @@ def update_prereg_links(self, auth: Auth, prereg_links: list, log: bool = True,
         if prereg_links == self.prereg_links:
             return
 
-        if not self.has_prereg_links == 'available' and prereg_links:
-            raise PreprintStateError('You cannot edit this field while your prereg links'
-                                  ' availability is set to false or is unanswered.')
+        if not self.has_prereg_links and prereg_links:
+            self.prereg_links = []
 
         self.prereg_links = prereg_links
 
@@ -1259,9 +1262,8 @@ def update_prereg_link_info(self, auth: Auth, prereg_link_info: str, log: bool =
         if self.prereg_link_info == prereg_link_info:
             return
 
-        if not self.has_prereg_links == 'available' and prereg_link_info:
-            raise PreprintStateError('You cannot edit this field while your prereg links'
-                                  ' availability is set to false or is unanswered.')
+        if not self.has_prereg_links and prereg_link_info:
+            self.prereg_link_info = None
 
         self.prereg_link_info = prereg_link_info
 

From 008488affbfcf7ed84ff4e18f085ac2afd43884b Mon Sep 17 00:00:00 2001
From: Uditi Mehta <uditimehta@COSs-MBP.lan>
Date: Wed, 2 Oct 2024 15:53:06 -0400
Subject: [PATCH 03/35] Fix PreprintSerializer validation to handle has_coi and
 has_data_links updates

---
 api/preprints/serializers.py                  | 40 +++++++++++++++++++
 .../preprints/views/test_preprint_detail.py   | 13 +++---
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py
index 7d3ebdfaa00..4562bb901da 100644
--- a/api/preprints/serializers.py
+++ b/api/preprints/serializers.py
@@ -289,6 +289,46 @@ def update(self, preprint, validated_data):
         if not preprint.has_permission(auth.user, osf_permissions.WRITE):
             raise exceptions.PermissionDenied(detail='User must have admin or write permissions to update a preprint.')
 
+        save_preprint = False
+        recently_published = False
+
+        if 'has_coi' in validated_data:
+            try:
+                preprint.update_has_coi(auth, validated_data['has_coi'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        conflict_statement = validated_data.get('conflict_of_interest_statement', None)
+        has_coi = preprint.has_coi
+        if has_coi is False and conflict_statement:
+            raise exceptions.ValidationError(
+                detail='Cannot provide conflict of interest statement when has_coi is set to False.',
+            )
+
+        if 'has_data_links' in validated_data:
+            try:
+                preprint.update_has_data_links(auth, validated_data['has_data_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        why_no_data = validated_data.get('why_no_data', None)
+        has_data_links = preprint.has_data_links
+        if has_data_links != 'no' and why_no_data:
+            raise exceptions.ValidationError(
+                detail='You cannot edit this statement while your data links availability is set to true or is unanswered.',
+            )
+
+        if has_data_links == 'no':
+            if 'data_links' in validated_data and validated_data['data_links']:
+                raise exceptions.ValidationError(
+                    detail='Cannot provide data links when has_data_links is set to "no".',
+                )
+            if preprint.data_links:
+                preprint.update_data_links(auth, [])
+                save_preprint = True
+
         published = validated_data.pop('is_published', None)
         if published and preprint.provider.is_reviewed:
             url = absolute_reverse(
diff --git a/api_tests/preprints/views/test_preprint_detail.py b/api_tests/preprints/views/test_preprint_detail.py
index df50db8166d..b283345fefd 100644
--- a/api_tests/preprints/views/test_preprint_detail.py
+++ b/api_tests/preprints/views/test_preprint_detail.py
@@ -873,9 +873,8 @@ def test_update_conflict_of_interest_statement(self, app, user, preprint, url):
         preprint.save()
         res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
 
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['conflict_of_interest_statement'] ==\
-               'Owns shares in Closed Science Corporation.'
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Cannot provide conflict of interest statement when has_coi is set to False.'
 
         preprint.has_coi = True
         preprint.save()
@@ -926,8 +925,8 @@ def test_update_why_no_data(self, app, user, preprint, url):
 
         res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
 
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['why_no_data'] == 'My dog ate it.'
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'You cannot edit this statement while your data links availability is set to true or is unanswered.'
 
         preprint.has_data_links = 'no'
         preprint.save()
@@ -956,8 +955,8 @@ def test_update_data_links(self, app, user, preprint, url):
         preprint.save()
         res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
 
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['data_links'] == data_links
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Cannot provide data links when has_data_links is set to "no".'
 
         preprint.has_data_links = 'available'
         preprint.save()

From 5688949ac7589b3e315c33976db132a82d20e620 Mon Sep 17 00:00:00 2001
From: Uditi Mehta <uditimehta@COSs-MBP.lan>
Date: Thu, 3 Oct 2024 13:44:38 -0400
Subject: [PATCH 04/35] Update tests to reflect the corrected validations

---
 api/preprints/serializers.py                  | 104 +++++++++++++++---
 .../preprints/views/test_preprint_detail.py   |  70 ++++++++++--
 2 files changed, 145 insertions(+), 29 deletions(-)

diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py
index 4562bb901da..cd4680e69fb 100644
--- a/api/preprints/serializers.py
+++ b/api/preprints/serializers.py
@@ -292,6 +292,51 @@ def update(self, preprint, validated_data):
         save_preprint = False
         recently_published = False
 
+        for field in ['conflict_of_interest_statement', 'why_no_data', 'why_no_prereg']:
+            if field in validated_data:
+                value = validated_data[field]
+                if isinstance(value, str) and not value.strip():
+                    validated_data[field] = None
+
+        updated_has_coi = validated_data.get('has_coi', preprint.has_coi)
+        updated_conflict_statement = validated_data.get('conflict_of_interest_statement', preprint.conflict_of_interest_statement)
+
+        updated_has_data_links = validated_data.get('has_data_links', preprint.has_data_links)
+        updated_why_no_data = validated_data.get('why_no_data', preprint.why_no_data)
+
+        updated_has_prereg_links = validated_data.get('has_prereg_links', preprint.has_prereg_links)
+        updated_why_no_prereg = validated_data.get('why_no_prereg', preprint.why_no_prereg)
+
+        if updated_has_coi is False and updated_conflict_statement:
+            raise exceptions.ValidationError(
+                detail='Cannot provide conflict of interest statement when has_coi is set to False.',
+            )
+
+        if updated_has_data_links != 'no' and updated_why_no_data:
+            raise exceptions.ValidationError(
+                detail='You cannot edit this statement while your data links availability is set to true or is unanswered.',
+            )
+
+        if updated_has_data_links == 'no' and 'data_links' in validated_data and validated_data['data_links']:
+            raise exceptions.ValidationError(
+                detail='Cannot provide data links when has_data_links is set to "no".',
+            )
+
+        if updated_has_prereg_links != 'no' and updated_why_no_prereg:
+            raise exceptions.ValidationError(
+                detail='You cannot edit this statement while your prereg links availability is set to true or is unanswered.',
+            )
+
+        if updated_has_prereg_links != 'available':
+            if 'prereg_links' in validated_data and validated_data['prereg_links']:
+                raise exceptions.ValidationError(
+                    detail='You cannot edit this field while your prereg links availability is set to false or is unanswered.',
+                )
+            if 'prereg_link_info' in validated_data and validated_data['prereg_link_info']:
+                raise exceptions.ValidationError(
+                    detail='You cannot edit this field while your prereg links availability is set to false or is unanswered.',
+                )
+
         if 'has_coi' in validated_data:
             try:
                 preprint.update_has_coi(auth, validated_data['has_coi'])
@@ -299,12 +344,12 @@ def update(self, preprint, validated_data):
             except PreprintStateError as e:
                 raise exceptions.ValidationError(detail=str(e))
 
-        conflict_statement = validated_data.get('conflict_of_interest_statement', None)
-        has_coi = preprint.has_coi
-        if has_coi is False and conflict_statement:
-            raise exceptions.ValidationError(
-                detail='Cannot provide conflict of interest statement when has_coi is set to False.',
-            )
+        if 'conflict_of_interest_statement' in validated_data:
+            try:
+                preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
 
         if 'has_data_links' in validated_data:
             try:
@@ -313,22 +358,45 @@ def update(self, preprint, validated_data):
             except PreprintStateError as e:
                 raise exceptions.ValidationError(detail=str(e))
 
-        why_no_data = validated_data.get('why_no_data', None)
-        has_data_links = preprint.has_data_links
-        if has_data_links != 'no' and why_no_data:
-            raise exceptions.ValidationError(
-                detail='You cannot edit this statement while your data links availability is set to true or is unanswered.',
-            )
+        if 'why_no_data' in validated_data:
+            try:
+                preprint.update_why_no_data(auth, validated_data['why_no_data'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
 
-        if has_data_links == 'no':
-            if 'data_links' in validated_data and validated_data['data_links']:
-                raise exceptions.ValidationError(
-                    detail='Cannot provide data links when has_data_links is set to "no".',
-                )
-            if preprint.data_links:
+        if 'data_links' in validated_data:
+            try:
+                preprint.update_data_links(auth, validated_data['data_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+        else:
+            if updated_has_data_links == 'no' and preprint.data_links:
                 preprint.update_data_links(auth, [])
                 save_preprint = True
 
+        if 'why_no_prereg' in validated_data:
+            try:
+                preprint.update_why_no_prereg(auth, validated_data['why_no_prereg'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'prereg_links' in validated_data:
+            try:
+                preprint.update_prereg_links(auth, validated_data['prereg_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
+        if 'prereg_link_info' in validated_data:
+            try:
+                preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
         published = validated_data.pop('is_published', None)
         if published and preprint.provider.is_reviewed:
             url = absolute_reverse(
diff --git a/api_tests/preprints/views/test_preprint_detail.py b/api_tests/preprints/views/test_preprint_detail.py
index b283345fefd..5426e4023eb 100644
--- a/api_tests/preprints/views/test_preprint_detail.py
+++ b/api_tests/preprints/views/test_preprint_detail.py
@@ -1063,21 +1063,26 @@ def test_update_why_no_prereg(self, app, user, preprint, url):
         assert res.status_code == 403
         assert res.json['errors'][0]['detail'] == 'You do not have permission to perform this action.'
 
+        preprint.has_prereg_links = 'available'
+        preprint.save()
         res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'You cannot edit this statement while your prereg links availability is set to true or is unanswered.'
 
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['why_no_prereg'] == 'My dog ate it.'
-
-        preprint.has_prereg_links = False
-        preprint.save()
+        update_payload = build_preprint_update_payload(preprint._id, attributes={
+            'why_no_prereg': 'My dog ate it.',
+            'has_prereg_links': 'no'
+        })
         res = app.patch_json_api(url, update_payload, auth=user.auth)
 
         assert res.status_code == 200
         assert res.json['data']['attributes']['why_no_prereg'] == 'My dog ate it.'
 
         preprint.reload()
-        assert preprint.why_no_prereg
-        log = preprint.logs.first()
+        assert preprint.why_no_prereg == 'My dog ate it.'
+
+        log = preprint.logs.filter(action=PreprintLog.UPDATE_WHY_NO_PREREG).first()
+        assert log is not None, 'Expected log entry for why_no_prereg_updated not found.'
         assert log.action == PreprintLog.UPDATE_WHY_NO_PREREG
         assert log.params == {'user': user._id, 'preprint': preprint._id}
 
@@ -1096,8 +1101,8 @@ def test_update_prereg_links(self, app, user, preprint, url):
         preprint.save()
         res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
 
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['prereg_links'] == prereg_links
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'You cannot edit this field while your prereg links availability is set to false or is unanswered.'
 
         preprint.has_prereg_links = 'available'
         preprint.save()
@@ -1128,8 +1133,8 @@ def test_update_prereg_link_info(self, app, user, preprint, url):
         preprint.save()
         res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
 
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['prereg_link_info'] == 'prereg_designs'
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'You cannot edit this field while your prereg links availability is set to false or is unanswered.'
 
         preprint.has_prereg_links = 'available'
         preprint.save()
@@ -1153,6 +1158,49 @@ def test_update_prereg_link_info(self, app, user, preprint, url):
         assert res.status_code == 400
         assert res.json['errors'][0]['detail'] == '"maformed payload" is not a valid choice.'
 
+    def test_update_has_coi_false_with_null_conflict_statement(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={
+                'has_coi': False,
+                'conflict_of_interest_statement': None
+            }
+        )
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_coi'] is False
+        assert res.json['data']['attributes']['conflict_of_interest_statement'] is None
+
+        preprint.reload()
+        assert preprint.has_coi is False
+        assert preprint.conflict_of_interest_statement is None
+
+    def test_update_has_data_links_no_with_data_links_provided(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={
+                'has_data_links': 'no',
+                'data_links': ['http://example.com/data']
+            }
+        )
+
+        initial_has_data_links = preprint.has_data_links
+        initial_data_links = preprint.data_links
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth, expect_errors=True)
+
+        assert res.status_code == 400
+        assert res.json['errors'][0]['detail'] == 'Cannot provide data links when has_data_links is set to "no".'
+
+        preprint.reload()
+
+        assert preprint.has_data_links == initial_has_data_links
+        assert preprint.data_links == initial_data_links
+
+        assert preprint.has_data_links != 'no'
+
     def test_sloan_updates(self, app, user, preprint, url):
         """
         - Tests to ensure updating a preprint with unchanged data does not create superfluous log statements.

From 5c02749a97605e7afd727f10f528630727f507c4 Mon Sep 17 00:00:00 2001
From: Uditi Mehta <uditimehta@COSs-MBP.lan>
Date: Thu, 3 Oct 2024 16:38:06 -0400
Subject: [PATCH 05/35] Add tests for data links and preregistration validation

---
 .../preprints/views/test_preprint_detail.py   |  42 +++
 .../test_preprint_detail_author_assertions.py | 300 ------------------
 2 files changed, 42 insertions(+), 300 deletions(-)
 delete mode 100644 api_tests/preprints/views/test_preprint_detail_author_assertions.py

diff --git a/api_tests/preprints/views/test_preprint_detail.py b/api_tests/preprints/views/test_preprint_detail.py
index 5426e4023eb..3106835a940 100644
--- a/api_tests/preprints/views/test_preprint_detail.py
+++ b/api_tests/preprints/views/test_preprint_detail.py
@@ -18,6 +18,7 @@
 from osf.models import (
     NodeLicense,
     PreprintContributor,
+    PreprintLog
 )
 from osf.utils.permissions import WRITE
 from osf.utils.workflows import DefaultStates
@@ -1201,6 +1202,47 @@ def test_update_has_data_links_no_with_data_links_provided(self, app, user, prep
 
         assert preprint.has_data_links != 'no'
 
+    def test_update_has_data_links_no_with_empty_data_links(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={
+                'has_data_links': 'no',
+                'data_links': []
+            }
+        )
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_data_links'] == 'no'
+        assert res.json['data']['attributes']['data_links'] == []
+
+        preprint.reload()
+        assert preprint.has_data_links == 'no'
+        assert preprint.data_links == []
+
+    def test_update_has_prereg_links_no_with_empty_prereg_links(self, app, user, preprint, url):
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={
+                'has_prereg_links': 'no',
+                'prereg_links': [],
+                'prereg_link_info': ''
+            }
+        )
+
+        res = app.patch_json_api(url, update_payload, auth=user.auth)
+
+        assert res.status_code == 200
+        assert res.json['data']['attributes']['has_prereg_links'] == 'no'
+        assert res.json['data']['attributes']['prereg_links'] == []
+        assert res.json['data']['attributes']['prereg_link_info'] == ''
+
+        preprint.reload()
+        assert preprint.has_prereg_links == 'no'
+        assert preprint.prereg_links == []
+        assert preprint.prereg_link_info == ''
+
     def test_sloan_updates(self, app, user, preprint, url):
         """
         - Tests to ensure updating a preprint with unchanged data does not create superfluous log statements.
diff --git a/api_tests/preprints/views/test_preprint_detail_author_assertions.py b/api_tests/preprints/views/test_preprint_detail_author_assertions.py
deleted file mode 100644
index 63dc8696d41..00000000000
--- a/api_tests/preprints/views/test_preprint_detail_author_assertions.py
+++ /dev/null
@@ -1,300 +0,0 @@
-import pytest
-
-from osf.utils.permissions import READ, WRITE, ADMIN
-from api.base.settings.defaults import API_BASE
-from osf.models import PreprintLog
-from osf_tests.factories import PreprintFactory, AuthUserFactory
-
-
-def build_preprint_update_payload(
-        node_id, attributes=None, relationships=None,
-        jsonapi_type='preprints'):
-    payload = {
-        'data': {
-            'id': node_id,
-            'type': jsonapi_type,
-            'attributes': attributes,
-            'relationships': relationships
-        }
-    }
-    return payload
-
-
-@pytest.mark.django_db
-@pytest.mark.enable_enqueue_task
-class TestPreprintUpdateWithAuthorAssertion:
-
-    @pytest.fixture()
-    def user(self):
-        return AuthUserFactory()
-
-    @pytest.fixture()
-    def preprint(self, user):
-        """
-        Creator is not admin permission
-        """
-        preprint = PreprintFactory(creator=user)
-        admin = AuthUserFactory()
-        preprint.add_contributor(admin, ADMIN)
-        preprint.add_contributor(user, READ)
-        return preprint
-
-    @pytest.fixture()
-    def url(self, preprint):
-        return f'/{API_BASE}preprints/{preprint._id}/'
-
-    @pytest.fixture()
-    def read_contrib(self, preprint):
-        contrib = AuthUserFactory()
-        preprint.add_contributor(contrib, READ)
-        return contrib
-
-    @pytest.fixture()
-    def write_contrib(self, preprint):
-        contrib = AuthUserFactory()
-        preprint.add_contributor(contrib, WRITE)
-        return contrib
-
-    @pytest.fixture()
-    def admin_contrib(self, preprint):
-        contrib = AuthUserFactory()
-        preprint.add_contributor(contrib, ADMIN)
-        return contrib
-
-    def assert_permission(self, app, url, contrib, attributes, expected_status):
-        update_payload = build_preprint_update_payload(node_id=contrib._id, attributes=attributes)
-        res = app.patch_json_api(url, update_payload, auth=contrib.auth, expect_errors=True)
-        assert res.status_code == expected_status
-
-    # Testing permissions for updating has_coi
-    def test_update_has_coi_permission_denied(self, app, read_contrib, url):
-        self.assert_permission(app, url, read_contrib, {'has_coi': True}, 403)
-
-    def test_update_has_coi_permission_granted_write(self, app, write_contrib, url):
-        self.assert_permission(app, url, write_contrib, {'has_coi': True}, 403)
-
-    def test_update_has_coi_permission_granted_admin(self, app, admin_contrib, url):
-        self.assert_permission(app, url, admin_contrib, {'has_coi': True}, 200)
-
-    def test_update_has_coi_permission_granted_creator(self, app, user, url):
-        self.assert_permission(app, url, user, {'has_coi': True}, 403)
-
-    # Testing permissions for updating conflict_of_interest_statement
-    def test_update_conflict_of_interest_statement_permission_denied(self, app, read_contrib, url):
-        self.assert_permission(app, url, read_contrib, {'conflict_of_interest_statement': 'Test'}, 403)
-
-    def test_update_conflict_of_interest_statement_permission_granted_write(self, app, write_contrib, preprint, url):
-        preprint.has_coi = True
-        preprint.save()
-        self.assert_permission(app, url, write_contrib, {'conflict_of_interest_statement': 'Test'}, 403)
-
-    def test_update_conflict_of_interest_statement_permission_granted_admin(self, app, admin_contrib, preprint, url):
-        preprint.has_coi = True
-        preprint.save()
-        self.assert_permission(app, url, admin_contrib, {'conflict_of_interest_statement': 'Test'}, 200)
-
-    def test_update_conflict_of_interest_statement_permission_granted_creator(self, app, user, preprint, url):
-        preprint.has_coi = True
-        preprint.save()
-        self.assert_permission(app, url, user, {'conflict_of_interest_statement': 'Test'}, 403)
-
-    # Testing permissions for updating has_data_links
-    def test_update_has_data_links_permission_denied(self, app, read_contrib, url):
-        self.assert_permission(app, url, read_contrib, {'has_data_links': 'available'}, 403)
-
-    def test_update_has_data_links_permission_granted_write(self, app, write_contrib, url):
-        self.assert_permission(app, url, write_contrib, {'has_data_links': 'available'}, 403)
-
-    def test_update_has_data_links_permission_granted_admin(self, app, admin_contrib, url):
-        self.assert_permission(app, url, admin_contrib, {'has_data_links': 'available'}, 200)
-
-    def test_update_has_data_links_permission_granted_creator(self, app, user, url):
-        self.assert_permission(app, url, user, {'has_data_links': 'available'}, 403)
-
-    # Testing permissions for updating why_no_data
-    def test_update_why_no_data_permission_denied(self, app, read_contrib, url):
-        self.assert_permission(app, url, read_contrib, {'why_no_data': 'My dog ate it.'}, 403)
-
-    def test_update_why_no_data_permission_granted_write(self, app, write_contrib, preprint, url):
-        preprint.has_data_links = 'no'
-        preprint.save()
-        self.assert_permission(app, url, write_contrib, {'why_no_data': 'My dog ate it.'}, 403)
-
-    def test_update_why_no_data_permission_granted_admin(self, app, admin_contrib, preprint, url):
-        preprint.has_data_links = 'no'
-        preprint.save()
-        self.assert_permission(app, url, admin_contrib, {'why_no_data': 'My dog ate it.'}, 200)
-
-    def test_update_why_no_data_permission_granted_creator(self, app, user, preprint, url):
-        preprint.has_data_links = 'no'
-        preprint.save()
-        self.assert_permission(app, url, user, {'why_no_data': 'My dog ate it.'}, 403)
-
-    # Testing permissions for updating data_links
-    def test_update_data_links_permission_denied(self, app, read_contrib, url):
-        data_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        self.assert_permission(app, url, read_contrib, {'data_links': data_links}, 403)
-
-    def test_update_data_links_permission_granted_write(self, app, write_contrib, preprint, url):
-        data_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        preprint.has_data_links = 'available'
-        preprint.save()
-        self.assert_permission(app, url, write_contrib, {'data_links': data_links}, 403)
-
-    def test_update_data_links_permission_granted_admin(self, app, admin_contrib, preprint, url):
-        data_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        preprint.has_data_links = 'available'
-        preprint.save()
-        self.assert_permission(app, url, admin_contrib, {'data_links': data_links}, 200)
-
-    def test_update_data_links_permission_granted_creator(self, app, user, preprint, url):
-        data_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        preprint.has_data_links = 'available'
-        preprint.save()
-        self.assert_permission(app, url, user, {'data_links': data_links}, 403)
-
-    def test_update_data_links_invalid_payload(self, app, admin_contrib, url):
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'data_links': 'maformed payload'})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 400
-        assert res.json['errors'][0]['detail'] == 'Expected a list of items but got type "str".'
-
-    def test_update_data_links_invalid_url(self, app, admin_contrib, preprint, url):
-        preprint.has_data_links = 'available'
-        preprint.save()
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'data_links': ['thisaintright']})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 400
-        assert res.json['errors'][0]['detail'] == 'Enter a valid URL.'
-
-    # Testing permissions for updating has_prereg_links
-    def test_update_has_prereg_links_permission_denied(self, app, read_contrib, url):
-        self.assert_permission(app, url, read_contrib, {'has_prereg_links': 'available'}, 403)
-
-    def test_update_has_prereg_links_permission_granted_write(self, app, write_contrib, url):
-        self.assert_permission(app, url, write_contrib, {'has_prereg_links': 'available'}, 403)
-
-    def test_update_has_prereg_links_permission_granted_admin(self, app, admin_contrib, url):
-        self.assert_permission(app, url, admin_contrib, {'has_prereg_links': 'available'}, 200)
-
-    def test_update_has_prereg_links_permission_granted_creator(self, app, user, url):
-        self.assert_permission(app, url, user, {'has_prereg_links': 'available'}, 403)
-
-    # Testing permissions for updating prereg_links
-    def test_update_prereg_links_permission_denied(self, app, read_contrib, url):
-        prereg_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        self.assert_permission(app, url, read_contrib, {'prereg_links': prereg_links}, 403)
-
-    def test_update_prereg_links_permission_granted_write(self, app, write_contrib, preprint, url):
-        prereg_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        preprint.has_prereg_links = 'available'
-        preprint.save()
-        self.assert_permission(app, url, write_contrib, {'prereg_links': prereg_links}, 403)
-
-    def test_update_prereg_links_permission_granted_admin(self, app, admin_contrib, preprint, url):
-        prereg_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        preprint.has_prereg_links = 'available'
-        preprint.save()
-        self.assert_permission(app, url, admin_contrib, {'prereg_links': prereg_links}, 200)
-
-    def test_update_prereg_links_permission_granted_creator(self, app, user, preprint, url):
-        prereg_links = ['http://www.JasonKelce.com', 'http://www.ItsTheWholeTeam.com/']
-        preprint.has_prereg_links = 'available'
-        preprint.save()
-        self.assert_permission(app, url, user, {'prereg_links': prereg_links}, 403)
-
-    def test_update_prereg_links_invalid_payload(self, app, admin_contrib, url):
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'prereg_links': 'maformed payload'})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 400
-        assert res.json['errors'][0]['detail'] == 'Expected a list of items but got type "str".'
-
-    def test_update_prereg_links_invalid_url(self, app, admin_contrib, preprint, url):
-        preprint.has_prereg_links = 'available'
-        preprint.save()
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'prereg_links': ['thisaintright']})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 400
-        assert res.json['errors'][0]['detail'] == 'Enter a valid URL.'
-
-    def test_update_prereg_link_info_fail_prereg_links(self, app, admin_contrib, preprint, url):
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'prereg_link_info': 'prereg_designs'})
-        preprint.has_prereg_links = 'no'
-        preprint.save()
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 400
-        assert res.json['errors'][0]['detail'] == 'You cannot edit this field while your prereg links availability is set to false or is unanswered.'
-
-    def test_update_prereg_link_info_success(self, app, admin_contrib, preprint, url):
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'prereg_link_info': 'prereg_designs'})
-        preprint.has_prereg_links = 'available'
-        preprint.save()
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth)
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['prereg_link_info'] == 'prereg_designs'
-        preprint.reload()
-        assert preprint.prereg_link_info == 'prereg_designs'
-        log = preprint.logs.first()
-        assert log.action == PreprintLog.UPDATE_PREREG_LINKS_INFO
-        assert log.params == {'user': admin_contrib._id, 'preprint': preprint._id}
-
-    def test_update_prereg_link_info_invalid_payload(self, app, admin_contrib, url):
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'prereg_link_info': 'maformed payload'})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 400
-        assert res.json['errors'][0]['detail'] == '"maformed payload" is not a valid choice.'
-
-    def test_no_prereg_links_clears_links(self, app, admin_contrib, preprint, url):
-        preprint.has_prereg_links = 'available'
-        preprint.prereg_links = ['http://example.com']
-        preprint.prereg_link_info = 'prereg_analysis'
-        preprint.save()
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'has_prereg_links': 'no'})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth)
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['has_prereg_links'] == 'no'
-        assert res.json['data']['attributes']['prereg_links'] == []
-        assert not res.json['data']['attributes']['prereg_link_info']
-
-    def test_no_data_links_clears_links(self, app, admin_contrib, preprint, url):
-        preprint.has_data_links = 'available'
-        preprint.data_links = ['http://www.apple.com']
-        preprint.save()
-        update_payload = build_preprint_update_payload(node_id=admin_contrib._id, attributes={'has_data_links': 'no'})
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth)
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['has_data_links'] == 'no'
-        assert res.json['data']['attributes']['data_links'] == []
-
-    def test_sloan_updates(self, app, admin_contrib, preprint, url):
-        preprint.has_prereg_links = 'available'
-        preprint.prereg_links = ['http://no-sf.io']
-        preprint.prereg_link_info = 'prereg_designs'
-        preprint.save()
-        update_payload = build_preprint_update_payload(
-            node_id=preprint._id,
-            attributes={
-                'has_prereg_links': 'available',
-                'prereg_link_info': 'prereg_designs',
-                'prereg_links': ['http://osf.io'],
-            }
-        )
-        app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        logs = preprint.logs.all().values_list('action', 'params')
-        assert logs.count() == 5
-        assert logs.latest() == ('prereg_links_updated', {'user': admin_contrib._id, 'preprint': preprint._id})
-
-        update_payload = build_preprint_update_payload(
-            node_id=preprint._id,
-            attributes={
-                'has_prereg_links': 'no',
-                'why_no_prereg': 'My dog ate it.'
-            }
-        )
-        res = app.patch_json_api(url, update_payload, auth=admin_contrib.auth, expect_errors=True)
-        assert res.status_code == 200
-        assert res.json['data']['attributes']['has_prereg_links'] == 'no'
-        assert res.json['data']['attributes']['why_no_prereg'] == 'My dog ate it.'
-        preprint.refresh_from_db()
-        assert preprint.has_prereg_links == 'no'
-        assert preprint.why_no_prereg == 'My dog ate it.'

From 5ffde1ba32541a0fe50dc55209de94c8ce25c625 Mon Sep 17 00:00:00 2001
From: Uditi Mehta <uditimehta@COSs-MBP.lan>
Date: Wed, 9 Oct 2024 10:40:50 -0400
Subject: [PATCH 06/35] Add admin permission checks

---
 api/preprints/serializers.py                    | 11 +++++++++++
 .../preprints/views/test_preprint_detail.py     | 17 +++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py
index cd4680e69fb..28ca617ee70 100644
--- a/api/preprints/serializers.py
+++ b/api/preprints/serializers.py
@@ -337,7 +337,12 @@ def update(self, preprint, validated_data):
                     detail='You cannot edit this field while your prereg links availability is set to false or is unanswered.',
                 )
 
+        def require_admin_permission():
+            if not preprint.has_permission(auth.user, osf_permissions.ADMIN):
+                raise exceptions.PermissionDenied(detail='Must have admin permissions to update author assertion fields.')
+
         if 'has_coi' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_has_coi(auth, validated_data['has_coi'])
                 save_preprint = True
@@ -345,6 +350,7 @@ def update(self, preprint, validated_data):
                 raise exceptions.ValidationError(detail=str(e))
 
         if 'conflict_of_interest_statement' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement'])
                 save_preprint = True
@@ -352,6 +358,7 @@ def update(self, preprint, validated_data):
                 raise exceptions.ValidationError(detail=str(e))
 
         if 'has_data_links' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_has_data_links(auth, validated_data['has_data_links'])
                 save_preprint = True
@@ -366,6 +373,7 @@ def update(self, preprint, validated_data):
                 raise exceptions.ValidationError(detail=str(e))
 
         if 'data_links' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_data_links(auth, validated_data['data_links'])
                 save_preprint = True
@@ -377,6 +385,7 @@ def update(self, preprint, validated_data):
                 save_preprint = True
 
         if 'why_no_prereg' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_why_no_prereg(auth, validated_data['why_no_prereg'])
                 save_preprint = True
@@ -384,6 +393,7 @@ def update(self, preprint, validated_data):
                 raise exceptions.ValidationError(detail=str(e))
 
         if 'prereg_links' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_prereg_links(auth, validated_data['prereg_links'])
                 save_preprint = True
@@ -391,6 +401,7 @@ def update(self, preprint, validated_data):
                 raise exceptions.ValidationError(detail=str(e))
 
         if 'prereg_link_info' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'])
                 save_preprint = True
diff --git a/api_tests/preprints/views/test_preprint_detail.py b/api_tests/preprints/views/test_preprint_detail.py
index 3106835a940..ffec9722514 100644
--- a/api_tests/preprints/views/test_preprint_detail.py
+++ b/api_tests/preprints/views/test_preprint_detail.py
@@ -20,6 +20,7 @@
     PreprintContributor,
     PreprintLog
 )
+from osf.utils import permissions as osf_permissions
 from osf.utils.permissions import WRITE
 from osf.utils.workflows import DefaultStates
 from osf_tests.factories import (
@@ -1243,6 +1244,22 @@ def test_update_has_prereg_links_no_with_empty_prereg_links(self, app, user, pre
         assert preprint.prereg_links == []
         assert preprint.prereg_link_info == ''
 
+    def test_non_admin_cannot_update_has_coi(self, app, user, preprint, url):
+        write_contrib = AuthUserFactory()
+        preprint.add_contributor(write_contrib, permissions=osf_permissions.WRITE, auth=Auth(user), save=True)
+
+        update_payload = build_preprint_update_payload(
+            preprint._id,
+            attributes={'has_coi': True}
+        )
+
+        res = app.patch_json_api(url, update_payload, auth=write_contrib.auth, expect_errors=True)
+        assert res.status_code == 403
+        assert res.json['errors'][0]['detail'] == 'Must have admin permissions to update author assertion fields.'
+
+        preprint.reload()
+        assert preprint.has_coi is None
+
     def test_sloan_updates(self, app, user, preprint, url):
         """
         - Tests to ensure updating a preprint with unchanged data does not create superfluous log statements.

From 59fab7d0f19f15784f50dac9c4f1c4932fae8018 Mon Sep 17 00:00:00 2001
From: Uditi Mehta <uditimehta@COSs-MBP.lan>
Date: Thu, 10 Oct 2024 10:12:32 -0400
Subject: [PATCH 07/35] remove redundan and duplicate code

---
 api/preprints/serializers.py | 142 +++--------------------------------
 1 file changed, 10 insertions(+), 132 deletions(-)

diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py
index 28ca617ee70..b8ad259aa3e 100644
--- a/api/preprints/serializers.py
+++ b/api/preprints/serializers.py
@@ -366,6 +366,7 @@ def require_admin_permission():
                 raise exceptions.ValidationError(detail=str(e))
 
         if 'why_no_data' in validated_data:
+            require_admin_permission()
             try:
                 preprint.update_why_no_data(auth, validated_data['why_no_data'])
                 save_preprint = True
@@ -384,6 +385,15 @@ def require_admin_permission():
                 preprint.update_data_links(auth, [])
                 save_preprint = True
 
+        if 'has_prereg_links' in validated_data:
+            require_admin_permission()
+
+            try:
+                preprint.update_has_prereg_links(auth, validated_data['has_prereg_links'])
+                save_preprint = True
+            except PreprintStateError as e:
+                raise exceptions.ValidationError(detail=str(e))
+
         if 'why_no_prereg' in validated_data:
             require_admin_permission()
             try:
@@ -488,68 +498,6 @@ def require_admin_permission():
             preprint.custom_publication_citation = validated_data['custom_publication_citation'] or None
             save_preprint = True
 
-        if 'has_coi' in validated_data:
-            try:
-                preprint.update_has_coi(auth, validated_data['has_coi'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'conflict_of_interest_statement' in validated_data:
-            try:
-                preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'has_data_links' in validated_data:
-            try:
-                preprint.update_has_data_links(auth, validated_data['has_data_links'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'why_no_data' in validated_data:
-            try:
-                preprint.update_why_no_data(auth, validated_data['why_no_data'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'data_links' in validated_data:
-            try:
-                preprint.update_data_links(auth, validated_data['data_links'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'has_prereg_links' in validated_data:
-            try:
-                preprint.update_has_prereg_links(auth, validated_data['has_prereg_links'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'why_no_prereg' in validated_data:
-            try:
-                preprint.update_why_no_prereg(auth, validated_data['why_no_prereg'])
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'prereg_links' in validated_data:
-            try:
-                preprint.update_prereg_links(auth, validated_data['prereg_links'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
-        if 'prereg_link_info' in validated_data:
-            try:
-                preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'])
-                save_preprint = True
-            except PreprintStateError as e:
-                raise exceptions.ValidationError(detail=str(e))
-
         if published is not None:
             if not preprint.primary_file:
                 raise exceptions.ValidationError(
@@ -575,76 +523,6 @@ def require_admin_permission():
 
         return preprint
 
-    def handle_author_assertions(self, preprint, validated_data, auth):
-        author_assertions = {
-            'has_coi',
-            'conflict_of_interest_statement',
-            'has_data_links',
-            'why_no_data',
-            'data_links',
-            'why_no_prereg',
-            'prereg_links',
-            'has_prereg_links',
-            'prereg_link_info',
-        }
-        if author_assertions & validated_data.keys():
-            if not preprint.is_admin_contributor(auth.user):
-                raise exceptions.PermissionDenied('User must be admin to add author assertions')
-
-            if 'has_coi' in validated_data:
-                try:
-                    preprint.update_has_coi(auth, validated_data['has_coi'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'conflict_of_interest_statement' in validated_data:
-                try:
-                    preprint.update_conflict_of_interest_statement(auth, validated_data['conflict_of_interest_statement'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'has_data_links' in validated_data:
-                try:
-                    preprint.update_has_data_links(auth, validated_data['has_data_links'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'why_no_data' in validated_data:
-                try:
-                    preprint.update_why_no_data(auth, validated_data['why_no_data'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'data_links' in validated_data:
-                try:
-                    preprint.update_data_links(auth, validated_data['data_links'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'has_prereg_links' in validated_data:
-                try:
-                    preprint.update_has_prereg_links(auth, validated_data['has_prereg_links'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'why_no_prereg' in validated_data:
-                try:
-                    preprint.update_why_no_prereg(auth, validated_data['why_no_prereg'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'prereg_links' in validated_data:
-                try:
-                    preprint.update_prereg_links(auth, validated_data['prereg_links'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
-            if 'prereg_link_info' in validated_data:
-                try:
-                    preprint.update_prereg_link_info(auth, validated_data['prereg_link_info'])
-                except PreprintStateError as e:
-                    raise exceptions.ValidationError(detail=str(e))
-
     def set_field(self, func, val, auth, save=False):
         try:
             func(val, auth)

From 53206aadfbfb64cd6bb13d5bc54abc74289a27af Mon Sep 17 00:00:00 2001
From: Longze Chen <cslzchen@gmail.com>
Date: Wed, 30 Oct 2024 12:44:08 -0400
Subject: [PATCH 08/35] Update changelog and bump version

---
 CHANGELOG    | 5 +++++
 package.json | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 205d47de13c..94705b38a79 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,11 @@
 
 We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.
 
+24.08.0 (2024-10-30)
+====================
+
+- Fix admin confirmation link generation and handling
+
 24.07.0 (2024-09-19)
 ====================
 
diff --git a/package.json b/package.json
index be5c3b44a30..8b0edd12961 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "OSF",
-  "version": "24.07.0",
+  "version": "24.08.0",
   "description": "Facilitating Open Science",
   "repository": "https://github.com/CenterForOpenScience/osf.io",
   "author": "Center for Open Science",

From 78bc9e822a2741d4a7b2eb7fd2fb0980a4d05ca4 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 4 Nov 2024 09:40:04 -0500
Subject: [PATCH 09/35] Handle edge case in confirmation link generation

---
 osf/models/user.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/osf/models/user.py b/osf/models/user.py
index d0783c208aa..22bbfc5baf9 100644
--- a/osf/models/user.py
+++ b/osf/models/user.py
@@ -1227,12 +1227,12 @@ def update_guessed_names(self):
         self.family_name = parsed['family']
         self.suffix = parsed['suffix']
 
-    def add_unconfirmed_email(self, email, expiration=None, external_identity=None):
+    def add_unconfirmed_email(self, email, expiration=None, external_identity=None, force=False):
         """
         Add an email verification token for a given email.
 
         :param email: the email to confirm
-        :param email: overwrite default expiration time
+        :param expiration: overwrite default expiration time
         :param external_identity: the user's external identity
         :return: a token
         :raises: ValueError if email already confirmed, except for login through external idp.
@@ -1249,7 +1249,8 @@ def add_unconfirmed_email(self, email, expiration=None, external_identity=None):
             validate_email(email)
 
         if not external_identity and self.emails.filter(address=email).exists():
-            raise ValueError('Email already confirmed to this user.')
+            if not force or self.is_confirmed:
+                raise ValueError('Email already confirmed to this user.')
 
         # If the unconfirmed email is already present, refresh the token
         if email in self.unconfirmed_emails:
@@ -1304,14 +1305,14 @@ def get_confirmation_token(self, email, force=False, renew=False):
                 # assume the token is expired
                 expiration = info.get('expiration')
                 if renew:
-                    new_token = self.add_unconfirmed_email(email)
+                    new_token = self.add_unconfirmed_email(email, force=force)
                     self.save()
                     return new_token
                 if not expiration or (expiration and expiration < timezone.now()):
                     if not force:
                         raise ExpiredTokenError(f'Token for email "{email}" is expired')
                     else:
-                        new_token = self.add_unconfirmed_email(email)
+                        new_token = self.add_unconfirmed_email(email, force=force)
                         self.save()
                         return new_token
                 return token
@@ -1355,7 +1356,7 @@ def get_or_create_confirmation_url(self, email, force=False, renew=False):
         try:
             self.get_confirmation_token(email, force=force, renew=renew)
         except KeyError:
-            self.add_unconfirmed_email(email)
+            self.add_unconfirmed_email(email, force=force)
             self.save()
         return self.get_confirmation_url(email)
 

From b49605351e1e3a35941bfb831548f8ee460bf6e0 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 14 Oct 2024 15:40:59 -0400
Subject: [PATCH 10/35] Add change_node_region script [ENG-5242]

---
 osf/management/commands/change_node_region.py | 160 ++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 osf/management/commands/change_node_region.py

diff --git a/osf/management/commands/change_node_region.py b/osf/management/commands/change_node_region.py
new file mode 100644
index 00000000000..abce28672bf
--- /dev/null
+++ b/osf/management/commands/change_node_region.py
@@ -0,0 +1,160 @@
+import logging
+import json
+
+from django.core.management.base import BaseCommand
+from django.db import transaction
+from google.cloud.storage.client import Client
+from google.oauth2.service_account import Credentials
+
+from osf.models import AbstractNode
+from osf.utils.migrations import disable_auto_now_fields
+from addons.osfstorage.models import Region
+
+logger = logging.getLogger(__name__)
+
+def _get_file_block_map(node):
+    file_block_map = {}
+    file_input_qids = node.registration_schema.schema_blocks.filter(
+        block_type='file-input'
+    ).values_list('registration_response_key', flat=True)
+    for schema_response in node.schema_responses.all():
+        for block in schema_response.response_blocks.filter(schema_key__in=file_input_qids):
+            for file_response in block.response:
+                if file_block_map.get(file_response['file_id'], False):
+                    file_block_map[file_response['file_id']].append(block)
+                else:
+                    file_block_map[file_response['file_id']] = [block]
+    return file_block_map
+
+def _update_blocks(file_block_map, original_id, cloned_id):
+    for block in file_block_map[original_id]:
+        logger.info(f'Updating block {block._id} file info')
+        response = []
+        for file_response in block.response:
+            if original_id == file_response['file_id']:
+                for key in file_response['file_urls'].keys():
+                    file_response['file_urls'][key] = file_response['file_urls'][key].replace(original_id, cloned_id)
+            response.append(file_response)
+        block.response = response
+        block.save()
+
+def _update_schema_meta(node):
+    logger.info('Updating legacy schema information...')
+    node.registration_responses = node.schema_responses.latest('-created').all_responses
+    node.registered_meta[node.registration_schema._id] = node.expand_registration_responses()
+    node.save()
+    logger.info('Updated legacy schema information.')
+
+def _copy_and_clone_versions(original_file, cloned_file, src_bucket, dest_bucket, dest_bucket_name, dest_region):
+    for v in original_file.versions.order_by('identifier').all():
+        blob_hash = v.location['object']
+        logger.info(f'Preparing to move version {blob_hash}')
+        # Copy each version to dest_bucket
+        src_blob = src_bucket.get_blob(blob_hash)
+        src_bucket.copy_blob(src_blob, dest_bucket)
+        logger.info(f'Blob {blob_hash} copied to destination, cloning version object.')
+        # Clone each version, update location
+        cloned_v = v.clone()
+        cloned_v.location['bucket'] = dest_bucket_name
+        # Set FKs
+        cloned_v.creator = v.creator
+        cloned_v.region = dest_region
+        # Save before M2M's can be set
+        cloned_v.save()
+        cloned_file.add_version(cloned_v)
+        # Retain original timestamps
+        cloned_v.created = v.created
+        cloned_v.modified = v.modified
+        cloned_v.save()
+        logger.info(f'Version {blob_hash} cloned.')
+
+def _clone_file(file_obj):
+    # Clone each file, so that the originals will be purged from src_region
+    cloned_f = file_obj.clone()
+    # Set (G)FKs
+    cloned_f.target = file_obj.target
+    cloned_f.parent = file_obj.parent
+    cloned_f.checkout = file_obj.checkout
+    cloned_f.copied_from = file_obj.copied_from
+    # Save before M2M's can be set, assigning both id and _id
+    cloned_f.save()
+    # Repoint Guids
+    assert cloned_f.id, f'Cloned file ID not assigned for {file_obj._id}'
+    file_obj.guids.update(object_id=cloned_f.id)
+    # Retain original timestamps
+    cloned_f.created = file_obj.created
+    cloned_f.modified = file_obj.modified
+    cloned_f.save()
+    return cloned_f
+
+def change_node_region(node, dest_region, gcs_creds):
+    creds = Credentials.from_service_account_info(gcs_creds)
+    client = Client(credentials=creds)
+    osfstorage_addon = node.get_addon('osfstorage')
+    src_region = osfstorage_addon.region
+    if src_region.id == dest_region.id:
+        logger.warning(f'Source and destination regions match: {src_region._id}. Exiting.')
+        return
+    src_bucket_name = src_region.waterbutler_settings['storage']['bucket']
+    dest_bucket_name = dest_region.waterbutler_settings['storage']['bucket']
+    src_bucket = client.get_bucket(src_bucket_name)
+    dest_bucket = client.get_bucket(dest_bucket_name)
+    response_blocks_by_file_id = {}
+    with transaction.atomic():
+        with disable_auto_now_fields():
+            if node.type == 'osf.registration':
+                response_blocks_by_file_id = _get_file_block_map(node)
+            for f in node.files.all():
+                logger.info(f'Prepraring to move file {f._id}')
+                cloned_f = _clone_file(f)
+                if f._id in response_blocks_by_file_id:
+                    logger.info(f'Prepraring to update ResponseBlocks for file {f._id}')
+                    _update_blocks(response_blocks_by_file_id, f._id, cloned_f._id)
+                logger.info(f'File {f._id} cloned, copying versions...')
+                _copy_and_clone_versions(f, cloned_f, src_bucket, dest_bucket, dest_bucket_name, dest_region)
+                # Trash original file
+                f.delete()
+            logger.info('All files complete.')
+            if response_blocks_by_file_id:
+                _update_schema_meta(node)
+            osfstorage_addon.region = dest_region
+            osfstorage_addon.save()
+            logger.info('Region updated. Exiting.')
+
+class Command(BaseCommand):
+
+    def add_arguments(self, parser):
+        super().add_arguments(parser)
+        parser.add_argument(
+            '-n',
+            '--node',
+            type=str,
+            action='store',
+            dest='node',
+            help='Node._id to migrate.',
+        )
+        parser.add_argument(
+            '-r',
+            '--region',
+            type=str,
+            action='store',
+            dest='region',
+            help='Region._id to migrate files to.',
+        )
+        parser.add_argument(
+            '-c',
+            '--credentials',
+            type=str,
+            action='store',
+            dest='gcs_creds',
+            help='GCS Credentials to use. JSON string.',
+        )
+
+    def handle(self, *args, **options):
+        node = AbstractNode.load(options.get('node', None))
+        region = Region.load(options.get('region', None))
+        gcs_creds = json.loads(options.get('gcs_creds', '{}'))
+        assert node, 'Node not found'
+        assert region, 'Region not found'
+        assert gcs_creds, 'Credentials required'
+        change_node_region(node, region, gcs_creds)

From be98bc0f70647048320b7d44e1cc5f46eb5decc5 Mon Sep 17 00:00:00 2001
From: mfraezz <maf7sm@virginia.edu>
Date: Thu, 14 Nov 2024 13:49:54 -0500
Subject: [PATCH 11/35] [Feature Release][ENG-5024] Institutional Dashboard
 Improvements (#10797)

Add support for new Institutional Dashboard gated by a waffle flag

---------

Co-authored-by: abram axel booth <boothaa@gmail.com>
Co-authored-by: John Tordoff <Johnetordoff@users.noreply.github.com>
Co-authored-by: John Tordoff <>
---
 admin/management/views.py                     |   7 +-
 api/base/elasticsearch_dsl_views.py           | 172 +++++++
 api/base/pagination.py                        |   9 +-
 api/base/serializers.py                       |  28 ++
 api/base/settings/defaults.py                 |   3 +
 api/base/utils.py                             |  19 +
 api/caching/tasks.py                          |  64 ++-
 api/institutions/serializers.py               | 124 ++++-
 api/institutions/urls.py                      |   4 +-
 api/institutions/views.py                     | 146 +++++-
 api/metrics/renderers.py                      |  27 +-
 api/share/utils.py                            | 124 +++--
 api_tests/base/test_views.py                  |   4 +-
 .../views/test_institution_department_list.py |   2 +-
 .../views/test_institution_detail.py          |  90 ++--
 .../views/test_institution_summary_metrics.py | 256 ++++++++++-
 .../test_institution_user_metric_list.py      | 425 ++++++++++++++++--
 api_tests/metrics/test_composite_query.py     |   2 +-
 api_tests/metrics/test_preprint_metrics.py    |   2 +-
 api_tests/metrics/test_raw_metrics.py         |  10 +-
 .../test_registries_moderation_metrics.py     |   4 +-
 api_tests/share/_utils.py                     |  53 ++-
 api_tests/share/test_share_preprint.py        |   2 +-
 conftest.py                                   |  49 +-
 osf/admin.py                                  |  18 +
 osf/features.yaml                             |   4 +
 .../make_dummy_pageviews_for_metrics.py       |   6 +-
 .../commands/monthly_reporters_go.py          |  13 +-
 osf/metadata/gather/basket.py                 |  19 +-
 osf/metadata/gather/gatherer.py               |   9 +-
 osf/metadata/osf_gathering.py                 | 196 +++++++-
 osf/metadata/rdfutils.py                      |   2 +
 osf/metadata/serializers/turtle.py            |   8 +-
 osf/metrics/counted_usage.py                  |  12 +-
 osf/metrics/reporters/__init__.py             |   6 +
 osf/metrics/reporters/_base.py                |  19 +-
 .../reporters/institution_summary_monthly.py  | 105 +++++
 osf/metrics/reporters/institutional_users.py  | 161 +++++++
 osf/metrics/reporters/public_item_usage.py    | 286 ++++++++++++
 osf/metrics/reporters/spam_count.py           |   7 +-
 osf/metrics/reports.py                        | 157 ++++++-
 osf/metrics/utils.py                          |  41 +-
 ...tution_link_to_external_reports_archive.py |  18 +
 osf/models/institution.py                     |   6 +
 osf/models/node.py                            |   3 +-
 osf/models/user.py                            |   2 +-
 osf_tests/factories.py                        |   2 +-
 osf_tests/metadata/_utils.py                  |  17 +-
 .../expected_metadata_files/file_basic.turtle |   6 +-
 .../expected_metadata_files/file_full.turtle  |   6 +-
 .../file_monthly_supplement.turtle            |  13 +
 .../file_supplement.turtle                    |   1 +
 .../preprint_basic.turtle                     |   5 +-
 .../preprint_full.turtle                      |   5 +-
 .../preprint_monthly_supplement.turtle        |  13 +
 .../preprint_supplement.turtle                |   7 +
 .../project_basic.turtle                      |  19 +-
 .../project_full.turtle                       |  19 +-
 .../project_monthly_supplement.turtle         |  13 +
 .../project_supplement.turtle                 |  13 +
 .../registration_basic.turtle                 |  28 +-
 .../registration_full.turtle                  |  28 +-
 .../registration_monthly_supplement.turtle    |  13 +
 .../registration_supplement.turtle            |   7 +
 .../user_monthly_supplement.turtle            |   1 +
 .../user_supplement.turtle                    |   1 +
 osf_tests/metadata/test_basket.py             |   5 +-
 osf_tests/metadata/test_gatherer_registry.py  |   4 +
 osf_tests/metadata/test_osf_gathering.py      | 142 +++++-
 .../metadata/test_serialized_metadata.py      | 184 +++++---
 .../test_institutional_summary_reporter.py    | 286 ++++++++++++
 .../test_institutional_users_reporter.py      | 262 +++++++++++
 .../test_public_item_usage_reporter.py        | 238 ++++++++++
 osf_tests/metrics/test_daily_report.py        |  19 +-
 osf_tests/metrics/test_monthly_report.py      | 146 ++++++
 osf_tests/metrics/test_yearmonth.txt          |  48 ++
 osf_tests/test_management_commands.py         |   2 +-
 77 files changed, 3914 insertions(+), 363 deletions(-)
 create mode 100644 api/base/elasticsearch_dsl_views.py
 create mode 100644 osf/metrics/reporters/institution_summary_monthly.py
 create mode 100644 osf/metrics/reporters/institutional_users.py
 create mode 100644 osf/metrics/reporters/public_item_usage.py
 create mode 100644 osf/migrations/0024_institution_link_to_external_reports_archive.py
 create mode 100644 osf_tests/metadata/expected_metadata_files/file_monthly_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/file_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/preprint_monthly_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/project_monthly_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/project_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/registration_monthly_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/user_monthly_supplement.turtle
 create mode 100644 osf_tests/metadata/expected_metadata_files/user_supplement.turtle
 create mode 100644 osf_tests/metrics/reporters/test_institutional_summary_reporter.py
 create mode 100644 osf_tests/metrics/reporters/test_institutional_users_reporter.py
 create mode 100644 osf_tests/metrics/reporters/test_public_item_usage_reporter.py
 create mode 100644 osf_tests/metrics/test_monthly_report.py
 create mode 100644 osf_tests/metrics/test_yearmonth.txt

diff --git a/admin/management/views.py b/admin/management/views.py
index 3bd675790dd..88548a518d1 100644
--- a/admin/management/views.py
+++ b/admin/management/views.py
@@ -1,4 +1,3 @@
-import datetime
 from dateutil.parser import isoparse
 from django.views.generic import TemplateView, View
 from django.contrib import messages
@@ -120,11 +119,11 @@ def post(self, request, *args, **kwargs):
         if monthly_report_date:
             report_date = isoparse(monthly_report_date).date()
         else:
-            report_date = datetime.datetime.now().date()
+            report_date = None
 
         errors = monthly_reporters_go(
-            report_month=report_date.month,
-            report_year=report_date.year
+            report_month=getattr(report_date, 'month', None),
+            report_year=getattr(report_date, 'year', None)
         )
 
         if errors:
diff --git a/api/base/elasticsearch_dsl_views.py b/api/base/elasticsearch_dsl_views.py
new file mode 100644
index 00000000000..6199fd82d0e
--- /dev/null
+++ b/api/base/elasticsearch_dsl_views.py
@@ -0,0 +1,172 @@
+from __future__ import annotations
+import abc
+import datetime
+import typing
+
+import elasticsearch_dsl as edsl
+from rest_framework import generics, exceptions as drf_exceptions
+from rest_framework.settings import api_settings as drf_settings
+from api.base.settings.defaults import REPORT_FILENAME_FORMAT
+
+if typing.TYPE_CHECKING:
+    from rest_framework import serializers
+
+from api.base.filters import FilterMixin
+from api.base.views import JSONAPIBaseView
+from api.metrics.renderers import (
+    MetricsReportsCsvRenderer,
+    MetricsReportsTsvRenderer,
+    MetricsReportsJsonRenderer,
+)
+from api.base.pagination import ElasticsearchQuerySizeMaximumPagination, JSONAPIPagination
+from api.base.renderers import JSONAPIRenderer
+
+
+class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, abc.ABC):
+    '''abstract view class using `elasticsearch_dsl.Search` as a queryset-analogue
+
+    builds a `Search` based on `self.get_default_search()` and the request's
+    query parameters for filtering, sorting, and pagination -- fetches only
+    the data required for the response, just like with a queryset!
+    '''
+    serializer_class: type[serializers.BaseSerializer]  # required on subclasses
+
+    default_ordering: str | None = None  # name of a serializer field, prepended with "-" for descending sort
+    ordering_fields: frozenset[str] = frozenset()  # serializer field names
+
+    @abc.abstractmethod
+    def get_default_search(self) -> edsl.Search | None:
+        '''the base `elasticsearch_dsl.Search` for this list, based on url path
+
+        (common jsonapi query parameters will be considered automatically)
+        '''
+        ...
+
+    FILE_RENDERER_CLASSES = {
+        MetricsReportsCsvRenderer,
+        MetricsReportsTsvRenderer,
+        MetricsReportsJsonRenderer,
+    }
+
+    def set_content_disposition(self, response, renderer: str):
+        """Set the Content-Disposition header to prompt a file download with the appropriate filename.
+
+        Args:
+            response: The HTTP response object to modify.
+            renderer: The renderer instance used for the response, which determines the file extension.
+        """
+        current_date = datetime.datetime.now().strftime('%Y-%m')
+
+        if isinstance(renderer, JSONAPIRenderer):
+            extension = 'json'
+        else:
+            extension = getattr(renderer, 'extension', renderer.format)
+
+        filename = REPORT_FILENAME_FORMAT.format(
+            view_name=self.view_name,
+            date_created=current_date,
+            extension=extension,
+        )
+
+        response['Content-Disposition'] = f'attachment; filename="{filename}"'
+
+    def finalize_response(self, request, response, *args, **kwargs):
+        # Call the parent method to finalize the response first
+        response = super().finalize_response(request, response, *args, **kwargs)
+        # Check if this is a direct download request or file renderer classes, set to the Content-Disposition header
+        # so filename and attachment for browser download
+        if isinstance(request.accepted_renderer, tuple(self.FILE_RENDERER_CLASSES)):
+            self.set_content_disposition(response, request.accepted_renderer)
+
+        return response
+
+    ###
+    # beware! inheritance shenanigans below
+
+    # override FilterMixin to disable all operators besides 'eq' and 'ne'
+    MATCHABLE_FIELDS = ()
+    COMPARABLE_FIELDS = ()
+    DEFAULT_OPERATOR_OVERRIDES = {}
+    # (if you want to add fulltext-search or range-filter support, remove the override
+    #  and update `__add_search_filter` to handle those operators -- tho note that the
+    #  underlying elasticsearch field mapping will need to be compatible with the query)
+
+    # override DEFAULT_FILTER_BACKENDS rest_framework setting
+    # (filtering handled in-view to reuse logic from FilterMixin)
+    filter_backends = ()
+
+    # note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on,
+    #       it works fine with default pagination
+
+    # override rest_framework.generics.GenericAPIView
+    @property
+    def pagination_class(self):
+        """
+        When downloading a file assume no pagination is necessary unless the user specifies
+        """
+        is_file_download = any(
+            self.request.accepted_renderer.format == renderer.format
+            for renderer in self.FILE_RENDERER_CLASSES
+        )
+        # if it's a file download of the JSON respect default page size
+        if is_file_download:
+            return ElasticsearchQuerySizeMaximumPagination
+        return JSONAPIPagination
+
+    def get_queryset(self):
+        _search = self.get_default_search()
+        if _search is None:
+            return []
+        # using parsing logic from FilterMixin (oddly nested dict and all)
+        for _parsed_param in self.parse_query_params(self.request.query_params).values():
+            for _parsed_filter in _parsed_param.values():
+                _search = self.__add_search_filter(
+                    _search,
+                    elastic_field_name=_parsed_filter['source_field_name'],
+                    operator=_parsed_filter['op'],
+                    value=_parsed_filter['value'],
+                )
+        return self.__add_sort(_search)
+
+    ###
+    # private methods
+
+    def __add_sort(self, search: edsl.Search) -> edsl.Search:
+        _elastic_sort = self.__get_elastic_sort()
+        return (search if _elastic_sort is None else search.sort(_elastic_sort))
+
+    def __get_elastic_sort(self) -> str | None:
+        _sort_param = self.request.query_params.get(drf_settings.ORDERING_PARAM, self.default_ordering)
+        if not _sort_param:
+            return None
+        _sort_field, _ascending = (
+            (_sort_param[1:], False)
+            if _sort_param.startswith('-')
+            else (_sort_param, True)
+        )
+        if _sort_field not in self.ordering_fields:
+            raise drf_exceptions.ValidationError(
+                f'invalid value for {drf_settings.ORDERING_PARAM} query param (valid values: {", ".join(self.ordering_fields)})',
+            )
+        _serializer_field = self.get_serializer().fields[_sort_field]
+        _elastic_sort_field = _serializer_field.source
+        return (_elastic_sort_field if _ascending else f'-{_elastic_sort_field}')
+
+    def __add_search_filter(
+        self,
+        search: edsl.Search,
+        elastic_field_name: str,
+        operator: str,
+        value: str,
+    ) -> edsl.Search:
+        match operator:  # operators from FilterMixin
+            case 'eq':
+                if value == '':
+                    return search.exclude('exists', field=elastic_field_name)
+                return search.filter('term', **{elastic_field_name: value})
+            case 'ne':
+                if value == '':
+                    return search.filter('exists', field=elastic_field_name)
+                return search.exclude('term', **{elastic_field_name: value})
+            case _:
+                raise NotImplementedError(f'unsupported filter operator "{operator}"')
diff --git a/api/base/pagination.py b/api/base/pagination.py
index 7ed3db5f6e3..676f0baa8fb 100644
--- a/api/base/pagination.py
+++ b/api/base/pagination.py
@@ -10,7 +10,7 @@
     replace_query_param, remove_query_param,
 )
 from api.base.serializers import is_anonymized
-from api.base.settings import MAX_PAGE_SIZE
+from api.base.settings import MAX_PAGE_SIZE, MAX_SIZE_OF_ES_QUERY
 from api.base.utils import absolute_reverse
 
 from osf.models import AbstractNode, Comment, Preprint, Guid, DraftRegistration
@@ -172,6 +172,13 @@ class MaxSizePagination(JSONAPIPagination):
     max_page_size = None
     page_size_query_param = None
 
+
+class ElasticsearchQuerySizeMaximumPagination(JSONAPIPagination):
+    page_size = MAX_SIZE_OF_ES_QUERY
+    max_page_size = MAX_SIZE_OF_ES_QUERY
+    page_size_query_param = None
+
+
 class NoMaxPageSizePagination(JSONAPIPagination):
     max_page_size = None
 
diff --git a/api/base/serializers.py b/api/base/serializers.py
index ac28139da97..3c8c518ea16 100644
--- a/api/base/serializers.py
+++ b/api/base/serializers.py
@@ -17,6 +17,7 @@
 
 from api.base import utils
 from api.base.exceptions import EnumFieldMemberError
+from osf.metrics.utils import YearMonth
 from osf.utils import permissions as osf_permissions
 from osf.utils import sanitize
 from osf.utils import functional
@@ -171,6 +172,18 @@ def should_show(self, instance):
         return request and (request.user.is_anonymous or has_admin_scope)
 
 
+class ShowIfObjectPermission(ConditionalField):
+    """Show the field only for users with a given object permission
+    """
+    def __init__(self, field, *, permission: str, **kwargs):
+        super().__init__(field, **kwargs)
+        self._required_object_permission = permission
+
+    def should_show(self, instance):
+        _request = self.context.get('request')
+        return _request.user.has_perm(self._required_object_permission, obj=instance)
+
+
 class HideIfRegistration(ConditionalField):
     """
     If node is a registration, this field will return None.
@@ -2012,3 +2025,18 @@ def to_internal_value(self, data):
             return self._enum_class[data.upper()].value
         except KeyError:
             raise EnumFieldMemberError(self._enum_class, data)
+
+
+class YearmonthField(ser.Field):
+    def to_representation(self, value: YearMonth | None) -> str | None:
+        if value is None:
+            return None
+        return str(value)
+
+    def to_internal_value(self, data: str | None) -> YearMonth | None:
+        if data is None:
+            return None
+        try:
+            return YearMonth.from_str(data)
+        except ValueError as e:
+            raise ser.ValidationError(str(e))
diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index 136f7f48b6b..367ca1b04f9 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -359,8 +359,11 @@
 
 MAX_SIZE_OF_ES_QUERY = 10000
 DEFAULT_ES_NULL_VALUE = 'N/A'
+REPORT_FILENAME_FORMAT = '{view_name}_{date_created}.{extension}'
 
 CI_ENV = False
 
 CITATION_STYLES_REPO_URL = 'https://github.com/CenterForOpenScience/styles/archive/88e6ed31a91e9f5a480b486029cda97b535935d4.zip'
 DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'
+
+WAFFLE_ENABLE_ADMIN_PAGES = False  # instead, customized waffle admins in osf/admin.py
diff --git a/api/base/utils.py b/api/base/utils.py
index 9e0dcbc7e8c..1da52026d7e 100644
--- a/api/base/utils.py
+++ b/api/base/utils.py
@@ -2,6 +2,7 @@
 from urllib.parse import urlunsplit, urlsplit, parse_qs, urlencode
 from packaging.version import Version
 from hashids import Hashids
+import waffle
 
 from django.apps import apps
 from django.core.exceptions import ObjectDoesNotExist
@@ -275,3 +276,21 @@ def __len__(self):
     def add_dict_as_item(self, dict):
         item = type('item', (object,), dict)
         self.append(item)
+
+
+def toggle_view_by_flag(flag_name, old_view, new_view):
+    '''toggle between view implementations based on a feature flag
+
+    returns a wrapper view function that:
+    - when the given flag is inactive, passes thru to `old_view`
+    - when the given flag is active, passes thru to `new_view`
+    '''
+    def _view_by_flag(request, *args, **kwargs):
+        if waffle.flag_is_active(request, flag_name):
+            return new_view(request, *args, **kwargs)
+        return old_view(request, *args, **kwargs)
+    if hasattr(new_view, 'view_class'):
+        # set view_class to masquerade as a class-based view, for sake of assumptions
+        # in `api_tests.base.test_views` and `api.base.serializers.RelationshipField`
+        _view_by_flag.view_class = new_view.view_class  # type: ignore[attr-defined]
+    return _view_by_flag
diff --git a/api/caching/tasks.py b/api/caching/tasks.py
index 0b7a4b6670f..b3afba02c2e 100644
--- a/api/caching/tasks.py
+++ b/api/caching/tasks.py
@@ -1,11 +1,11 @@
+import logging
 from urllib.parse import urlparse
+
+from django.apps import apps
 from django.db import connection
 from django.db.models import Sum
-
 import requests
-import logging
 
-from django.apps import apps
 from api.caching.utils import storage_usage_cache
 from framework.postcommit_tasks.handlers import enqueue_postcommit_task
 
@@ -16,6 +16,9 @@
 logger = logging.getLogger(__name__)
 
 
+_DEFAULT_FILEVERSION_PAGE_SIZE = 500000
+
+
 def get_varnish_servers():
     #  TODO: this should get the varnish servers from HAProxy or a setting
     return settings.VARNISH_SERVERS
@@ -111,35 +114,60 @@ def ban_url(instance):
 
 
 @app.task(max_retries=5, default_retry_delay=10)
-def update_storage_usage_cache(target_id, target_guid, per_page=500000):
+def update_storage_usage_cache(target_id, target_guid, per_page=_DEFAULT_FILEVERSION_PAGE_SIZE):
     if not settings.ENABLE_STORAGE_USAGE_CACHE:
         return
+    from osf.models import Guid
+    storage_usage_total = compute_storage_usage_total(Guid.load(target_guid).referent, per_page=per_page)
+    key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
+    storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+
+
+def compute_storage_usage_total(target_obj, per_page=_DEFAULT_FILEVERSION_PAGE_SIZE):
+    from django.contrib.contenttypes.models import ContentType
     sql = """
         SELECT count(size), sum(size) from
         (SELECT size FROM osf_basefileversionsthrough AS obfnv
         LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
         LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
-        LEFT JOIN django_content_type type on file.target_content_type_id = type.id
         WHERE file.provider = 'osfstorage'
-        AND type.model = 'abstractnode'
         AND file.deleted_on IS NULL
-        AND file.target_object_id=%s
+        AND file.target_object_id=%(target_pk)s
+        AND file.target_content_type_id=%(target_content_type_pk)s
         ORDER BY version.id
-        LIMIT %s OFFSET %s) file_page
+        LIMIT %(per_page)s OFFSET %(offset)s
+    ) file_page
     """
-    count = per_page
+    last_count = 1  # initialize non-zero
     offset = 0
     storage_usage_total = 0
+    content_type_pk = ContentType.objects.get_for_model(target_obj).pk
     with connection.cursor() as cursor:
-        while count:
-            cursor.execute(sql, [target_id, per_page, offset])
-            result = cursor.fetchall()
-            storage_usage_total += int(result[0][1]) if result[0][1] else 0
-            count = int(result[0][0]) if result[0][0] else 0
-            offset += count
-
-    key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
-    storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+        while last_count:
+            cursor.execute(
+                sql, {
+                    'target_pk': target_obj.pk,
+                    'target_content_type_pk': content_type_pk,
+                    'per_page': per_page,
+                    'offset': offset,
+                },
+            )
+            this_count, size_sum = cursor.fetchall()[0]
+            storage_usage_total += int(size_sum or 0)
+            last_count = (this_count or 0)
+            offset += last_count
+    return storage_usage_total
+
+
+def get_storage_usage_total(target_obj):
+    if not settings.ENABLE_STORAGE_USAGE_CACHE:
+        return compute_storage_usage_total(target_obj)
+    _cache_key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_obj._id)
+    _storage_usage_total = storage_usage_cache.get(_cache_key)
+    if _storage_usage_total is None:
+        _storage_usage_total = compute_storage_usage_total(target_obj)
+        storage_usage_cache.set(_cache_key, _storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+    return _storage_usage_total
 
 
 def update_storage_usage(target):
diff --git a/api/institutions/serializers.py b/api/institutions/serializers.py
index f1124d896f8..e3679b2a9c5 100644
--- a/api/institutions/serializers.py
+++ b/api/institutions/serializers.py
@@ -12,8 +12,10 @@
     BaseAPISerializer,
     ShowIfVersion,
     IDField,
+    ShowIfObjectPermission,
 )
 
+from api.base.serializers import YearmonthField
 from api.nodes.serializers import CompoundIDField
 from api.base.exceptions import RelationshipPostMakesNoChanges
 from api.base.utils import absolute_reverse
@@ -35,6 +37,10 @@ class InstitutionSerializer(JSONAPISerializer):
     ror_iri = ser.CharField(read_only=True, source='ror_uri')
     iris = ser.SerializerMethodField(read_only=True)
     assets = ser.SerializerMethodField(read_only=True)
+    link_to_external_reports_archive = ShowIfObjectPermission(
+        ser.CharField(read_only=True),
+        permission='view_institutional_metrics',
+    )
     links = LinksField({
         'self': 'get_api_url',
         'html': 'get_absolute_html_url',
@@ -55,19 +61,28 @@ class InstitutionSerializer(JSONAPISerializer):
         related_view_kwargs={'institution_id': '<_id>'},
     )
 
-    department_metrics = RelationshipField(
-        related_view='institutions:institution-department-metrics',
-        related_view_kwargs={'institution_id': '<_id>'},
+    department_metrics = ShowIfObjectPermission(
+        RelationshipField(
+            related_view='institutions:institution-department-metrics',
+            related_view_kwargs={'institution_id': '<_id>'},
+        ),
+        permission='view_institutional_metrics',
     )
 
-    user_metrics = RelationshipField(
-        related_view='institutions:institution-user-metrics',
-        related_view_kwargs={'institution_id': '<_id>'},
+    user_metrics = ShowIfObjectPermission(
+        RelationshipField(
+            related_view='institutions:institution-user-metrics',
+            related_view_kwargs={'institution_id': '<_id>'},
+        ),
+        permission='view_institutional_metrics',
     )
 
-    summary_metrics = RelationshipField(
-        related_view='institutions:institution-summary-metrics',
-        related_view_kwargs={'institution_id': '<_id>'},
+    summary_metrics = ShowIfObjectPermission(
+        RelationshipField(
+            related_view='institutions:institution-summary-metrics',
+            related_view_kwargs={'institution_id': '<_id>'},
+        ),
+        permission='view_institutional_metrics',
     )
 
     def get_api_url(self, obj):
@@ -256,7 +271,12 @@ def get_absolute_url(self, obj):
         )
 
 
-class InstitutionUserMetricsSerializer(JSONAPISerializer):
+class OldInstitutionUserMetricsSerializer(JSONAPISerializer):
+    '''serializer for institution-users metrics
+
+    used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is NOT active
+    (and should be removed when that flag is permanently active)
+    '''
 
     class Meta:
         type_ = 'institution-users'
@@ -294,6 +314,90 @@ def get_absolute_url(self, obj):
         )
 
 
+class NewInstitutionUserMetricsSerializer(JSONAPISerializer):
+    '''serializer for institution-users metrics
+
+    used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is active
+    (and should be renamed without "New" when that flag is permanently active)
+    '''
+
+    class Meta:
+        type_ = 'institution-users'
+
+    filterable_fields = frozenset({
+        'department',
+        'orcid_id',
+    })
+
+    id = IDField(source='meta.id', read_only=True)
+    user_name = ser.CharField(read_only=True)
+    department = ser.CharField(read_only=True, source='department_name')
+    orcid_id = ser.CharField(read_only=True)
+    month_last_login = YearmonthField(read_only=True)
+    month_last_active = YearmonthField(read_only=True)
+    account_creation_date = YearmonthField(read_only=True)
+
+    public_projects = ser.IntegerField(read_only=True, source='public_project_count')
+    private_projects = ser.IntegerField(read_only=True, source='private_project_count')
+    public_registration_count = ser.IntegerField(read_only=True)
+    embargoed_registration_count = ser.IntegerField(read_only=True)
+    published_preprint_count = ser.IntegerField(read_only=True)
+    public_file_count = ser.IntegerField(read_only=True)
+    storage_byte_count = ser.IntegerField(read_only=True)
+
+    user = RelationshipField(
+        related_view='users:user-detail',
+        related_view_kwargs={'user_id': '<user_id>'},
+    )
+    institution = RelationshipField(
+        related_view='institutions:institution-detail',
+        related_view_kwargs={'institution_id': '<institution_id>'},
+    )
+
+    links = LinksField({})
+
+    def get_absolute_url(self):
+        return None  # there is no detail view for institution-users
+
+
+class NewInstitutionSummaryMetricsSerializer(JSONAPISerializer):
+    '''serializer for institution-summary metrics
+
+    used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is active
+    (and should be renamed without "New" when that flag is permanently active)
+    '''
+
+    class Meta:
+        type_ = 'institution-summary-metrics'
+
+    id = IDField(read_only=True)
+
+    user_count = ser.IntegerField(read_only=True)
+    public_project_count = ser.IntegerField(read_only=True)
+    private_project_count = ser.IntegerField(read_only=True)
+    public_registration_count = ser.IntegerField(read_only=True)
+    embargoed_registration_count = ser.IntegerField(read_only=True)
+    published_preprint_count = ser.IntegerField(read_only=True)
+    public_file_count = ser.IntegerField(read_only=True)
+    storage_byte_count = ser.IntegerField(read_only=True)
+    monthly_logged_in_user_count = ser.IntegerField(read_only=True)
+    monthly_active_user_count = ser.IntegerField(read_only=True)
+
+    user = RelationshipField(
+        related_view='users:user-detail',
+        related_view_kwargs={'user_id': '<user_id>'},
+    )
+    institution = RelationshipField(
+        related_view='institutions:institution-detail',
+        related_view_kwargs={'institution_id': '<institution_id>'},
+    )
+
+    links = LinksField({})
+
+    def get_absolute_url(self):
+        return None  # there is no detail view for institution-users
+
+
 class InstitutionRelated(JSONAPIRelationshipSerializer):
     id = ser.CharField(source='_id', required=False, allow_null=True)
     class Meta:
diff --git a/api/institutions/urls.py b/api/institutions/urls.py
index be4f9ca0b43..477fe8d9377 100644
--- a/api/institutions/urls.py
+++ b/api/institutions/urls.py
@@ -13,7 +13,7 @@
     re_path(r'^(?P<institution_id>\w+)/relationships/registrations/$', views.InstitutionRegistrationsRelationship.as_view(), name=views.InstitutionRegistrationsRelationship.view_name),
     re_path(r'^(?P<institution_id>\w+)/relationships/nodes/$', views.InstitutionNodesRelationship.as_view(), name=views.InstitutionNodesRelationship.view_name),
     re_path(r'^(?P<institution_id>\w+)/users/$', views.InstitutionUserList.as_view(), name=views.InstitutionUserList.view_name),
-    re_path(r'^(?P<institution_id>\w+)/metrics/summary/$', views.InstitutionSummaryMetrics.as_view(), name=views.InstitutionSummaryMetrics.view_name),
+    re_path(r'^(?P<institution_id>\w+)/metrics/summary/$', views.institution_summary_metrics_detail_view, name=views.institution_summary_metrics_detail_view.view_name),
     re_path(r'^(?P<institution_id>\w+)/metrics/departments/$', views.InstitutionDepartmentList.as_view(), name=views.InstitutionDepartmentList.view_name),
-    re_path(r'^(?P<institution_id>\w+)/metrics/users/$', views.InstitutionUserMetricsList.as_view(), name=views.InstitutionUserMetricsList.view_name),
+    re_path(r'^(?P<institution_id>\w+)/metrics/users/$', views.institution_user_metrics_list_view, name=views.institution_user_metrics_list_view.view_name),
 ]
diff --git a/api/institutions/views.py b/api/institutions/views.py
index d21c15e0746..124e523c7e8 100644
--- a/api/institutions/views.py
+++ b/api/institutions/views.py
@@ -8,12 +8,16 @@
 
 from framework.auth.oauth_scopes import CoreScopes
 
+import osf.features
 from osf.metrics import InstitutionProjectCounts
 from osf.models import OSFUser, Node, Institution, Registration
 from osf.metrics import UserInstitutionProjectCounts
+from osf.metrics.reports import InstitutionalUserReport, InstitutionMonthlySummaryReport
+from osf.metrics.utils import YearMonth
 from osf.utils import permissions as osf_permissions
 
 from api.base import permissions as base_permissions
+from api.base.elasticsearch_dsl_views import ElasticsearchListView
 from api.base.filters import ListFilterMixin
 from api.base.views import JSONAPIBaseView
 from api.base.serializers import JSONAPISerializer
@@ -25,9 +29,17 @@
 )
 from api.base.settings import MAX_SIZE_OF_ES_QUERY
 from api.base.exceptions import RelationshipPostMakesNoChanges
-from api.base.utils import MockQueryset
+from api.base.utils import (
+    MockQueryset,
+    toggle_view_by_flag,
+)
 from api.base.settings import DEFAULT_ES_NULL_VALUE
 from api.metrics.permissions import IsInstitutionalMetricsUser
+from api.metrics.renderers import (
+    MetricsReportsCsvRenderer,
+    MetricsReportsTsvRenderer,
+    MetricsReportsJsonRenderer,
+)
 from api.nodes.serializers import NodeSerializer
 from api.nodes.filters import NodesFilterMixin
 from api.users.serializers import UserSerializer
@@ -40,7 +52,9 @@
     InstitutionRegistrationsRelationshipSerializer,
     InstitutionSummaryMetricSerializer,
     InstitutionDepartmentMetricsSerializer,
-    InstitutionUserMetricsSerializer,
+    NewInstitutionUserMetricsSerializer,
+    OldInstitutionUserMetricsSerializer,
+    NewInstitutionSummaryMetricsSerializer,
 )
 from api.institutions.permissions import UserIsAffiliated
 from api.institutions.renderers import InstitutionDepartmentMetricsCSVRenderer, InstitutionUserMetricsCSVRenderer, MetricsCSVRenderer
@@ -384,7 +398,7 @@ def create(self, *args, **kwargs):
         return ret
 
 
-class InstitutionSummaryMetrics(JSONAPIBaseView, generics.RetrieveAPIView, InstitutionMixin):
+class _OldInstitutionSummaryMetrics(JSONAPIBaseView, generics.RetrieveAPIView, InstitutionMixin):
     permission_classes = (
         drf_permissions.IsAuthenticatedOrReadOnly,
         base_permissions.TokenHasScope,
@@ -493,10 +507,15 @@ def get_default_queryset(self):
         return self._make_elasticsearch_results_filterable(search, id=institution._id)
 
 
-class InstitutionUserMetricsList(InstitutionImpactList):
+class _OldInstitutionUserMetricsList(InstitutionImpactList):
+    '''list view for institution-users metrics
+
+    used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is NOT active
+    (and should be removed when that flag is permanently active)
+    '''
     view_name = 'institution-user-metrics'
 
-    serializer_class = InstitutionUserMetricsSerializer
+    serializer_class = OldInstitutionUserMetricsSerializer
     renderer_classes = tuple(api_settings.DEFAULT_RENDERER_CLASSES) + (InstitutionUserMetricsCSVRenderer,)
 
     ordering_fields = ('user_name', 'department')
@@ -521,3 +540,120 @@ def get_default_queryset(self):
         institution = self.get_institution()
         search = UserInstitutionProjectCounts.get_current_user_metrics(institution)
         return self._make_elasticsearch_results_filterable(search, id=institution._id, department=DEFAULT_ES_NULL_VALUE)
+
+
+class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchListView):
+    '''list view for institution-users metrics
+
+    used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is active
+    (and should be renamed without "New" when that flag is permanently active)
+    '''
+    permission_classes = (
+        drf_permissions.IsAuthenticatedOrReadOnly,
+        base_permissions.TokenHasScope,
+        IsInstitutionalMetricsUser,
+    )
+
+    required_read_scopes = [CoreScopes.INSTITUTION_METRICS_READ]
+    required_write_scopes = [CoreScopes.NULL]
+
+    view_category = 'institutions'
+    view_name = 'institution-user-metrics'
+    renderer_classes = (
+        *api_settings.DEFAULT_RENDERER_CLASSES,
+        MetricsReportsCsvRenderer,
+        MetricsReportsTsvRenderer,
+        MetricsReportsJsonRenderer,
+    )
+
+    serializer_class = NewInstitutionUserMetricsSerializer
+
+    default_ordering = '-storage_byte_count'
+    ordering_fields = frozenset((
+        'user_name',
+        'department',
+        'month_last_login',
+        'month_last_active',
+        'account_creation_date',
+        'public_projects',
+        'private_projects',
+        'public_registration_count',
+        'embargoed_registration_count',
+        'published_preprint_count',
+        'public_file_count',
+        'storage_byte_count',
+    ))
+
+    def get_default_search(self):
+        _yearmonth = InstitutionalUserReport.most_recent_yearmonth()
+        if _yearmonth is None:
+            return None
+        return (
+            InstitutionalUserReport.search()
+            .filter('term', report_yearmonth=str(_yearmonth))
+            .filter('term', institution_id=self.get_institution()._id)
+        )
+
+
+class _NewInstitutionSummaryMetricsDetail(JSONAPIBaseView, generics.RetrieveAPIView, InstitutionMixin):
+    '''detail view for institution-summary metrics
+
+    used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is active
+    (and should be renamed without "New" when that flag is permanently active)
+    '''
+    permission_classes = (
+        drf_permissions.IsAuthenticatedOrReadOnly,
+        base_permissions.TokenHasScope,
+        IsInstitutionalMetricsUser,
+    )
+
+    required_read_scopes = [CoreScopes.INSTITUTION_METRICS_READ]
+    required_write_scopes = [CoreScopes.NULL]
+
+    view_category = 'institutions'
+    view_name = 'institution-summary-metrics'
+
+    serializer_class = NewInstitutionSummaryMetricsSerializer
+
+    def get_object(self):
+        institution = self.get_institution()
+        search_object = self.get_default_search()
+        if search_object:
+            object = search_object.execute()[0]
+            object.id = institution._id
+            return object
+
+    def get_default_search(self):
+        yearmonth = InstitutionMonthlySummaryReport.most_recent_yearmonth()
+        if report_date_str := self.request.query_params.get('report_yearmonth'):
+            try:
+                yearmonth = YearMonth.from_str(report_date_str)
+            except ValueError:
+                pass
+
+        if yearmonth is None:
+            return None
+
+        return InstitutionMonthlySummaryReport.search().filter(
+            'term',
+            report_yearmonth=str(yearmonth),
+        ).filter(
+            'term',
+            institution_id=self.get_institution()._id,
+        )
+
+
+institution_summary_metrics_detail_view = toggle_view_by_flag(
+    flag_name=osf.features.INSTITUTIONAL_DASHBOARD_2024,
+    old_view=_OldInstitutionSummaryMetrics.as_view(),
+    new_view=_NewInstitutionSummaryMetricsDetail.as_view(),
+)
+institution_summary_metrics_detail_view.view_name = 'institution-summary-metrics'
+
+
+institution_user_metrics_list_view = toggle_view_by_flag(
+    flag_name=osf.features.INSTITUTIONAL_DASHBOARD_2024,
+    old_view=_OldInstitutionUserMetricsList.as_view(),
+    new_view=_NewInstitutionUserMetricsList.as_view(),
+)
+institution_user_metrics_list_view.view_name = 'institution-user-metrics'
diff --git a/api/metrics/renderers.py b/api/metrics/renderers.py
index fd4bdc78da2..1e33515b68c 100644
--- a/api/metrics/renderers.py
+++ b/api/metrics/renderers.py
@@ -1,6 +1,6 @@
 import csv
 import io
-
+import json
 from django.http import Http404
 
 from rest_framework import renderers
@@ -42,11 +42,7 @@ def get_csv_row(keys_list, report_attrs):
     ]
 
 
-class MetricsReportsCsvRenderer(renderers.BaseRenderer):
-    media_type = 'text/csv'
-    format = 'csv'
-    CSV_DIALECT = csv.excel
-
+class MetricsReportsRenderer(renderers.BaseRenderer):
     def render(self, json_response, accepted_media_type=None, renderer_context=None):
         serialized_reports = (
             jsonapi_resource['attributes']
@@ -67,7 +63,24 @@ def render(self, json_response, accepted_media_type=None, renderer_context=None)
         return csv_filecontent.getvalue()
 
 
-class MetricsReportsTsvRenderer(MetricsReportsCsvRenderer):
+class MetricsReportsCsvRenderer(MetricsReportsRenderer):
+    format = 'csv'
+    extension = 'csv'
+    media_type = 'text/csv'
+    CSV_DIALECT = csv.excel
+
+
+class MetricsReportsTsvRenderer(MetricsReportsRenderer):
     format = 'tsv'
+    extension = 'tsv'
     media_type = 'text/tab-separated-values'
     CSV_DIALECT = csv.excel_tab
+
+
+class MetricsReportsJsonRenderer(renderers.BaseRenderer):
+    format = 'json_report'
+    extension = 'json'
+    media_type = 'application/json'
+
+    def render(self, json_response, accepted_media_type=None, renderer_context=None):
+        return json.dumps([item['attributes'] for item in json_response['data']])
diff --git a/api/share/utils.py b/api/share/utils.py
index 34c9be4609c..4f4137dcf58 100644
--- a/api/share/utils.py
+++ b/api/share/utils.py
@@ -3,6 +3,7 @@
 SHARE/Trove accepts metadata records as "indexcards" in turtle format: https://www.w3.org/TR/turtle/
 """
 from functools import partial
+from http import HTTPStatus
 import logging
 import random
 from urllib.parse import urljoin
@@ -17,7 +18,11 @@
 from framework.encryption import ensure_bytes
 from framework.sentry import log_exception
 from osf import models as osf_db
-from osf.metadata.tools import pls_gather_metadata_file
+from osf.metadata.osf_gathering import (
+    OsfmapPartition,
+    pls_get_magic_metadata_basket,
+)
+from osf.metadata.serializers import get_metadata_serializer
 from website import settings
 
 
@@ -25,7 +30,7 @@
 
 
 def shtrove_ingest_url():
-    return f'{settings.SHARE_URL}api/v3/ingest'
+    return f'{settings.SHARE_URL}trove/ingest'
 
 
 def sharev2_push_url():
@@ -69,83 +74,100 @@ def _enqueue_update_share(osfresource):
         enqueue_task(async_update_resource_share.s(_osfguid_value))
 
 
-@celery_app.task(bind=True, max_retries=4, acks_late=True)
-def task__update_share(self, guid: str, is_backfill=False):
+@celery_app.task(
+    bind=True,
+    acks_late=True,
+    max_retries=4,
+    retry_backoff=True,
+)
+def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name='MAIN'):
     """
-    This function updates share  takes Preprints, Projects and Registrations.
-    :param self:
-    :param guid:
-    :return:
+    Send SHARE/trove current metadata record(s) for the osf-guid-identified object
     """
-    resp = _do_update_share(guid, is_backfill=is_backfill)
+    _osfmap_partition = OsfmapPartition[osfmap_partition_name]
+    _osfid_instance = apps.get_model('osf.Guid').load(guid)
+    if _osfid_instance is None:
+        raise ValueError(f'unknown osfguid "{guid}"')
+    _resource = _osfid_instance.referent
+    _is_deletion = _should_delete_indexcard(_resource)
+    _response = (
+        pls_delete_trove_record(_resource, osfmap_partition=_osfmap_partition)
+        if _is_deletion
+        else pls_send_trove_record(
+            _resource,
+            is_backfill=is_backfill,
+            osfmap_partition=_osfmap_partition,
+        )
+    )
     try:
-        resp.raise_for_status()
+        _response.raise_for_status()
     except Exception as e:
-        if self.request.retries == self.max_retries:
-            log_exception(e)
-        elif resp.status_code >= 500:
-            try:
-                self.retry(
-                    exc=e,
-                    countdown=(random.random() + 1) * min(60 + settings.CELERY_RETRY_BACKOFF_BASE ** self.request.retries, 60 * 10),
+        log_exception(e)
+        if HTTPStatus(_response.status_code).is_server_error:
+            raise self.retry(exc=e)
+    else:  # success response
+        if not _is_deletion:
+            # enqueue followup task for supplementary metadata
+            _next_partition = _next_osfmap_partition(_osfmap_partition)
+            if _next_partition is not None:
+                task__update_share.delay(
+                    guid,
+                    is_backfill=is_backfill,
+                    osfmap_partition_name=_next_partition.name,
                 )
-            except Retry as e:  # Retry is only raise after > 5 retries
-                log_exception(e)
-        else:
-            log_exception(e)
-
-    return resp
 
 
-def pls_send_trove_indexcard(osf_item, *, is_backfill=False):
+def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition):
     try:
         _iri = osf_item.get_semantic_iri()
     except (AttributeError, ValueError):
         raise ValueError(f'could not get iri for {osf_item}')
-    _metadata_record = pls_gather_metadata_file(osf_item, 'turtle')
+    _basket = pls_get_magic_metadata_basket(osf_item)
+    _serializer = get_metadata_serializer(
+        format_key='turtle',
+        basket=_basket,
+        serializer_config={'osfmap_partition': osfmap_partition},
+    )
+    _serialized_record = _serializer.serialize()
     _queryparams = {
         'focus_iri': _iri,
-        'record_identifier': _shtrove_record_identifier(osf_item),
+        'record_identifier': _shtrove_record_identifier(osf_item, osfmap_partition),
     }
     if is_backfill:
-        _queryparams['nonurgent'] = True
+        _queryparams['nonurgent'] = ''
+    if osfmap_partition.is_supplementary:
+        _queryparams['is_supplementary'] = ''
+        _expiration_date = osfmap_partition.get_expiration_date(_basket)
+        if _expiration_date is not None:
+            _queryparams['expiration_date'] = str(_expiration_date)
     return requests.post(
         shtrove_ingest_url(),
         params=_queryparams,
         headers={
-            'Content-Type': _metadata_record.mediatype,
+            'Content-Type': _serializer.mediatype,
             **_shtrove_auth_headers(osf_item),
         },
-        data=ensure_bytes(_metadata_record.serialized_metadata),
+        data=ensure_bytes(_serialized_record),
     )
 
 
-def pls_delete_trove_indexcard(osf_item):
+def pls_delete_trove_record(osf_item, osfmap_partition: OsfmapPartition):
     return requests.delete(
         shtrove_ingest_url(),
         params={
-            'record_identifier': _shtrove_record_identifier(osf_item),
+            'record_identifier': _shtrove_record_identifier(osf_item, osfmap_partition),
         },
         headers=_shtrove_auth_headers(osf_item),
     )
 
 
-def _do_update_share(osfguid: str, *, is_backfill=False):
-    logger.debug('%s._do_update_share("%s", is_backfill=%s)', __name__, osfguid, is_backfill)
-    _guid_instance = apps.get_model('osf.Guid').load(osfguid)
-    if _guid_instance is None:
-        raise ValueError(f'unknown osfguid "{osfguid}"')
-    _resource = _guid_instance.referent
-    _response = (
-        pls_delete_trove_indexcard(_resource)
-        if _should_delete_indexcard(_resource)
-        else pls_send_trove_indexcard(_resource, is_backfill=is_backfill)
+def _shtrove_record_identifier(osf_item, osfmap_partition: OsfmapPartition):
+    _id = osf_item.guids.values_list('_id', flat=True).first()
+    return (
+        f'{_id}/{osfmap_partition.name}'
+        if osfmap_partition.is_supplementary
+        else _id
     )
-    return _response
-
-
-def _shtrove_record_identifier(osf_item):
-    return osf_item.guids.values_list('_id', flat=True).first()
 
 
 def _shtrove_auth_headers(osf_item):
@@ -182,6 +204,16 @@ def _is_item_public(guid_referent) -> bool:
     return getattr(guid_referent, 'is_public', False)    # quacks like AbstractNode
 
 
+def _next_osfmap_partition(partition: OsfmapPartition) -> OsfmapPartition | None:
+    match partition:
+        case OsfmapPartition.MAIN:
+            return OsfmapPartition.SUPPLEMENT
+        case OsfmapPartition.SUPPLEMENT:
+            return OsfmapPartition.MONTHLY_SUPPLEMENT
+        case _:
+            return None
+
+
 ###
 # BEGIN soon-to-be-deleted (🤞) legacy sharev2 push
 # (until dust has settled on iri-centric (rdf-based) search)
diff --git a/api_tests/base/test_views.py b/api_tests/base/test_views.py
index 6d4a35c07e0..212ebed351a 100644
--- a/api_tests/base/test_views.py
+++ b/api_tests/base/test_views.py
@@ -43,9 +43,9 @@
         if hasattr(patt, 'url_patterns'):
             # Namespaced list of patterns
             for subpatt in patt.url_patterns:
-                VIEW_CLASSES.append(subpatt.callback.cls)
+                VIEW_CLASSES.append(subpatt.callback.view_class)
         else:
-            VIEW_CLASSES.append(patt.callback.cls)
+            VIEW_CLASSES.append(patt.callback.view_class)
 
 
 class TestApiBaseViews(ApiTestCase):
diff --git a/api_tests/institutions/views/test_institution_department_list.py b/api_tests/institutions/views/test_institution_department_list.py
index 5a22d17fdff..f2a335eed85 100644
--- a/api_tests/institutions/views/test_institution_department_list.py
+++ b/api_tests/institutions/views/test_institution_department_list.py
@@ -10,7 +10,7 @@
 from osf.metrics import UserInstitutionProjectCounts
 
 
-@pytest.mark.es
+@pytest.mark.es_metrics
 @pytest.mark.django_db
 class TestInstitutionDepartmentList:
 
diff --git a/api_tests/institutions/views/test_institution_detail.py b/api_tests/institutions/views/test_institution_detail.py
index e21e3a7087b..a8d81f7138f 100644
--- a/api_tests/institutions/views/test_institution_detail.py
+++ b/api_tests/institutions/views/test_institution_detail.py
@@ -1,6 +1,9 @@
 import pytest
 
-from osf_tests.factories import InstitutionFactory
+from osf_tests.factories import (
+    AuthUserFactory,
+    InstitutionFactory,
+)
 from api.base.settings.defaults import API_BASE
 from django.core.validators import URLValidator
 
@@ -11,6 +14,8 @@ class TestInstitutionDetail:
         'nodes',
         'registrations',
         'users',
+    }
+    expected_metrics_relationships = {
         'department_metrics',
         'user_metrics',
         'summary_metrics'
@@ -26,34 +31,55 @@ def institution(self):
     def url(self, institution):
         return f'/{API_BASE}institutions/{institution._id}/'
 
-    def test_detail_response(self, app, institution, url):
-
-        # 404 on wrong _id
-        res = app.get(f'/{institution}institutions/1PO/', expect_errors=True)
-        assert res.status_code == 404
-
-        res = app.get(url)
-        assert res.status_code == 200
-        attrs = res.json['data']['attributes']
-        assert attrs['name'] == institution.name
-        assert attrs['iri'] == institution.identifier_domain
-        assert attrs['ror_iri'] == institution.ror_uri
-        assert set(attrs['iris']) == {
-            institution.ror_uri,
-            institution.identifier_domain,
-            institution.absolute_url,
-        }
-        assert 'logo_path' in attrs
-        assert set(attrs['assets'].keys()) == {'logo', 'logo_rounded', 'banner'}
-        assert res.json['data']['links']['self'].endswith(url)
-
-        relationships = res.json['data']['relationships']
-        assert self.expected_relationships == set(relationships.keys())
-        for relationships in list(relationships.values()):
-            # ↓ returns None if url is valid else throws error.
-            assert self.is_valid_url(relationships['links']['related']['href']) is None
-
-        # test_return_without_logo_path
-        res = app.get(f'{url}?version=2.14')
-        assert res.status_code == 200
-        assert 'logo_path' not in res.json['data']['attributes']
+    @pytest.fixture()
+    def rando(self):
+        return AuthUserFactory()
+
+    @pytest.fixture()
+    def institutional_admin(self, institution):
+        _admin_user = AuthUserFactory()
+        institution.get_group('institutional_admins').user_set.add(_admin_user)
+        return _admin_user
+
+    def test_detail_response(self, app, institution, url, rando, institutional_admin):
+
+        for _user in (None, rando, institutional_admin):
+            _auth = (None if _user is None else _user.auth)
+            # 404 on wrong _id
+            res = app.get(f'/{institution}institutions/1PO/', expect_errors=True, auth=_auth)
+            assert res.status_code == 404
+
+            res = app.get(url, auth=_auth)
+            assert res.status_code == 200
+            attrs = res.json['data']['attributes']
+            assert attrs['name'] == institution.name
+            assert attrs['iri'] == institution.identifier_domain
+            assert attrs['ror_iri'] == institution.ror_uri
+            assert set(attrs['iris']) == {
+                institution.ror_uri,
+                institution.identifier_domain,
+                institution.absolute_url,
+            }
+            assert 'logo_path' in attrs
+            assert set(attrs['assets'].keys()) == {'logo', 'logo_rounded', 'banner'}
+            if _user is institutional_admin:
+                assert attrs['link_to_external_reports_archive'] == institution.link_to_external_reports_archive
+            else:
+                assert 'link_to_external_reports_archive' not in attrs
+            assert res.json['data']['links']['self'].endswith(url)
+
+            relationships = res.json['data']['relationships']
+            _expected_relationships = (
+                self.expected_relationships | self.expected_metrics_relationships
+                if _user is institutional_admin
+                else self.expected_relationships
+            )
+            assert _expected_relationships == set(relationships.keys())
+            for relationships in list(relationships.values()):
+                # ↓ returns None if url is valid else throws error.
+                assert self.is_valid_url(relationships['links']['related']['href']) is None
+
+            # test_return_without_logo_path
+            res = app.get(f'{url}?version=2.14', auth=_auth)
+            assert res.status_code == 200
+            assert 'logo_path' not in res.json['data']['attributes']
diff --git a/api_tests/institutions/views/test_institution_summary_metrics.py b/api_tests/institutions/views/test_institution_summary_metrics.py
index b29998d5561..d423663ea89 100644
--- a/api_tests/institutions/views/test_institution_summary_metrics.py
+++ b/api_tests/institutions/views/test_institution_summary_metrics.py
@@ -1,15 +1,19 @@
 import pytest
 import datetime
 
+from waffle.testutils import override_flag
+from osf.metrics import InstitutionProjectCounts
+
 from api.base.settings.defaults import API_BASE
 from osf_tests.factories import (
+    InstitutionFactory,
     AuthUserFactory,
-    InstitutionFactory
 )
-from osf.metrics import InstitutionProjectCounts
+from osf.metrics.reports import InstitutionMonthlySummaryReport
+from osf import features
 
 
-@pytest.mark.es
+@pytest.mark.es_metrics
 @pytest.mark.django_db
 class TestInstitutionSummaryMetrics:
 
@@ -92,3 +96,249 @@ def test_get(self, app, url, institution, user, admin):
                 'self': f'http://localhost:8000/v2/institutions/{institution._id}/metrics/summary/'
             }
         }
+
+
+@pytest.mark.es_metrics
+@pytest.mark.django_db
+class TestNewInstitutionSummaryMetricsList:
+    @pytest.fixture(autouse=True)
+    def _waffled(self):
+        with override_flag(features.INSTITUTIONAL_DASHBOARD_2024, active=True):
+            yield
+
+    @pytest.fixture()
+    def institution(self):
+        return InstitutionFactory()
+
+    @pytest.fixture()
+    def rando(self):
+        return AuthUserFactory()
+
+    @pytest.fixture()
+    def institutional_admin(self, institution):
+        admin_user = AuthUserFactory()
+        institution.get_group('institutional_admins').user_set.add(admin_user)
+        return admin_user
+
+    @pytest.fixture()
+    def unshown_reports(self, institution):
+        # Reports that should not be shown in the results
+        # Report from another institution
+        another_institution = InstitutionFactory()
+        _summary_report_factory('2024-08', another_institution)
+        # Old report from the same institution
+        _summary_report_factory('2024-07', institution)
+        _summary_report_factory('2018-02', institution)
+
+    @pytest.fixture()
+    def reports(self, institution):
+        return [
+            _summary_report_factory(
+                '2024-08', institution,
+                user_count=100,
+                public_project_count=50,
+                private_project_count=25,
+                public_registration_count=10,
+                embargoed_registration_count=5,
+                published_preprint_count=15,
+                public_file_count=20,
+                storage_byte_count=5000000000,
+                monthly_logged_in_user_count=80,
+                monthly_active_user_count=60,
+            ),
+            _summary_report_factory(
+                '2024-08', institution,
+                user_count=200,
+                public_project_count=150,
+                private_project_count=125,
+                public_registration_count=110,
+                embargoed_registration_count=105,
+                published_preprint_count=115,
+                public_file_count=120,
+                storage_byte_count=15000000000,
+                monthly_logged_in_user_count=180,
+                monthly_active_user_count=160,
+            ),
+        ]
+
+    @pytest.fixture()
+    def url(self, institution):
+        return f'/{API_BASE}institutions/{institution._id}/metrics/summary/'
+
+    def test_anon(self, app, url):
+        resp = app.get(url, expect_errors=True)
+        assert resp.status_code == 401
+
+    def test_rando(self, app, url, rando):
+        resp = app.get(url, auth=rando.auth, expect_errors=True)
+        assert resp.status_code == 403
+
+    def test_get_empty(self, app, url, institutional_admin):
+        resp = app.get(url, auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.json['meta'] == {'version': '2.0'}
+
+    def test_get_report(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        resp = app.get(url, auth=institutional_admin.auth)
+        assert resp.status_code == 200
+
+        data = resp.json['data']
+
+        assert data['id'] == institution._id
+        assert data['type'] == 'institution-summary-metrics'
+
+        attributes = data['attributes']
+        assert attributes['user_count'] == 200
+        assert attributes['public_project_count'] == 150
+        assert attributes['private_project_count'] == 125
+        assert attributes['public_registration_count'] == 110
+        assert attributes['embargoed_registration_count'] == 105
+        assert attributes['published_preprint_count'] == 115
+        assert attributes['public_file_count'] == 120
+        assert attributes['storage_byte_count'] == 15000000000
+        assert attributes['monthly_logged_in_user_count'] == 180
+        assert attributes['monthly_active_user_count'] == 160
+
+    def test_get_report_with_multiple_months_and_institutions(
+        self, app, url, institutional_admin, institution
+    ):
+        # Create reports for multiple months and institutions
+        other_institution = InstitutionFactory()
+        _summary_report_factory(
+            '2024-09', institution,
+            user_count=250,
+            public_project_count=200,
+            private_project_count=150,
+            public_registration_count=120,
+            embargoed_registration_count=110,
+            published_preprint_count=130,
+            public_file_count=140,
+            storage_byte_count=20000000000,
+            monthly_logged_in_user_count=220,
+            monthly_active_user_count=200,
+        )
+        _summary_report_factory(
+            '2024-08', institution,
+            user_count=200,
+            public_project_count=150,
+            private_project_count=125,
+            public_registration_count=110,
+            embargoed_registration_count=105,
+            published_preprint_count=115,
+            public_file_count=120,
+            storage_byte_count=15000000000,
+            monthly_logged_in_user_count=180,
+            monthly_active_user_count=160,
+        )
+        _summary_report_factory(
+            '2024-09', other_institution,
+            user_count=300,
+            public_project_count=250,
+            private_project_count=200,
+            public_registration_count=180,
+            embargoed_registration_count=170,
+            published_preprint_count=190,
+            public_file_count=210,
+            storage_byte_count=25000000000,
+            monthly_logged_in_user_count=270,
+            monthly_active_user_count=260,
+        )
+
+        resp = app.get(url, auth=institutional_admin.auth)
+        assert resp.status_code == 200
+
+        data = resp.json['data']
+
+        assert data['id'] == institution._id
+        assert data['type'] == 'institution-summary-metrics'
+
+        attributes = data['attributes']
+
+        assert attributes['user_count'] == 250
+        assert attributes['public_project_count'] == 200
+        assert attributes['private_project_count'] == 150
+        assert attributes['public_registration_count'] == 120
+        assert attributes['embargoed_registration_count'] == 110
+        assert attributes['published_preprint_count'] == 130
+        assert attributes['public_file_count'] == 140
+        assert attributes['storage_byte_count'] == 20000000000
+        assert attributes['monthly_logged_in_user_count'] == 220
+        assert attributes['monthly_active_user_count'] == 200
+
+    def test_get_with_valid_report_dates(self, app, url, institution, institutional_admin):
+        _summary_report_factory(
+            '2024-08',
+            institution,
+            user_count=0,
+        )
+        _summary_report_factory(
+            '2024-09',
+            institution,
+            user_count=999,
+
+        )
+        _summary_report_factory(
+            '2018-02',
+            institution,
+            user_count=4133,
+        )
+
+        resp = app.get(f'{url}?report_yearmonth=2024-08', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+
+        attributes = resp.json['data']['attributes']
+        assert attributes['user_count'] == 0
+
+        resp = app.get(f'{url}?report_yearmonth=2018-02', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+
+        attributes = resp.json['data']['attributes']
+        assert attributes['user_count'] == 4133
+
+    def test_get_with_invalid_report_date(self, app, url, institution, institutional_admin):
+        _summary_report_factory(
+            '2024-08',
+            institution,
+            user_count=0,
+        )
+        _summary_report_factory(
+            '2024-09',
+            institution,
+            user_count=999,
+        )
+
+        # Request with an invalid report_date format
+        resp = app.get(f'{url}?report_yearmonth=invalid-date', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+
+        # Verify it defaults to the most recent report data
+        attributes = resp.json['data']['attributes']
+        assert attributes['user_count'] == 999
+
+    def test_get_without_report_date_uses_most_recent(self, app, url, institution, institutional_admin):
+        _summary_report_factory(
+            '2024-08',
+            institution,
+            user_count=0,
+        )
+        _summary_report_factory(
+            '2024-09',
+            institution,
+            user_count=999,
+        )
+
+        resp = app.get(url, auth=institutional_admin.auth)
+        assert resp.status_code == 200
+
+        attributes = resp.json['data']['attributes']
+        assert attributes['user_count'] == 999
+
+
+def _summary_report_factory(yearmonth, institution, **kwargs):
+    report = InstitutionMonthlySummaryReport(
+        report_yearmonth=yearmonth,
+        institution_id=institution._id,
+        **kwargs,
+    )
+    report.save(refresh=True)
+    return report
diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py
index dfee4d178f5..f83fd7fc3fa 100644
--- a/api_tests/institutions/views/test_institution_user_metric_list.py
+++ b/api_tests/institutions/views/test_institution_user_metric_list.py
@@ -1,22 +1,31 @@
-import pytest
 import datetime
 import csv
+import json
 from io import StringIO
 from random import random
-import time
+from urllib.parse import urlencode
+
+import pytest
+from waffle.testutils import override_flag
 
-from api.base.settings.defaults import API_BASE, DEFAULT_ES_NULL_VALUE
+from api.base.settings.defaults import API_BASE, DEFAULT_ES_NULL_VALUE, REPORT_FILENAME_FORMAT
+import osf.features
 from osf_tests.factories import (
     InstitutionFactory,
     AuthUserFactory,
 )
 
 from osf.metrics import UserInstitutionProjectCounts
-from api.base import settings
+from osf.metrics.reports import InstitutionalUserReport
 
-@pytest.mark.es
+@pytest.mark.es_metrics
 @pytest.mark.django_db
-class TestInstitutionUserMetricList:
+class TestOldInstitutionUserMetricList:
+
+    @pytest.fixture(autouse=True)
+    def _waffled(self):
+        with override_flag(osf.features.INSTITUTIONAL_DASHBOARD_2024, active=False):
+            yield  # these tests apply only before institution dashboard improvements
 
     @pytest.fixture()
     def institution(self):
@@ -52,33 +61,31 @@ def admin(self, institution):
     @pytest.fixture()
     def populate_counts(self, institution, user, user2):
         # Old data that shouldn't appear in responses
-        UserInstitutionProjectCounts.record(
+        UserInstitutionProjectCounts(
             user_id=user._id,
             institution_id=institution._id,
             department='Biology dept',
             public_project_count=4,
             private_project_count=4,
             timestamp=datetime.date(2019, 6, 4)
-        ).save()
+        ).save(refresh=True)
 
         # New data
-        UserInstitutionProjectCounts.record(
+        UserInstitutionProjectCounts(
             user_id=user._id,
             institution_id=institution._id,
             department='Biology dept',
             public_project_count=6,
             private_project_count=5,
-        ).save()
+        ).save(refresh=True)
 
-        UserInstitutionProjectCounts.record(
+        UserInstitutionProjectCounts(
             user_id=user2._id,
             institution_id=institution._id,
             department='Psychology dept',
             public_project_count=3,
             private_project_count=2,
-        ).save()
-
-        time.sleep(10)
+        ).save(refresh=True)
 
     @pytest.fixture()
     def populate_more_counts(self, institution, user, user2, user3, populate_counts):
@@ -89,34 +96,30 @@ def populate_more_counts(self, institution, user, user2, user3, populate_counts)
             users.append(AuthUserFactory())
 
         for test_user in users:
-            UserInstitutionProjectCounts.record(
+            UserInstitutionProjectCounts(
                 user_id=test_user._id,
                 institution_id=institution._id,
                 department='Psychology dept',
                 public_project_count=int(10 * random()),
                 private_project_count=int(10 * random()),
-            ).save()
+            ).save(refresh=True)
 
-        UserInstitutionProjectCounts.record(
+        UserInstitutionProjectCounts(
             user_id=user3._id,
             institution_id=institution._id,
             department='Psychology dept',
             public_project_count=int(10 * random()),
             private_project_count=int(10 * random()),
-        ).save()
-
-        time.sleep(10)
+        ).save(refresh=True)
 
     @pytest.fixture()
     def populate_na_department(self, institution, user4):
-        UserInstitutionProjectCounts.record(
+        UserInstitutionProjectCounts(
             user_id=user4._id,
             institution_id=institution._id,
             public_project_count=1,
             private_project_count=1,
-        ).save()
-
-        time.sleep(10)
+        ).save(refresh=True)
 
     @pytest.fixture()
     def url(self, institution):
@@ -218,7 +221,6 @@ def test_filter(self, app, url, admin, populate_counts):
         resp = app.get(f'{url}?filter[department]=Psychology dept', auth=admin.auth)
         assert resp.json['data'][0]['attributes']['department'] == 'Psychology dept'
 
-    @pytest.mark.skipif(settings.CI_ENV, reason='Non-deterministic fails on CI')
     def test_sort_and_pagination(self, app, url, user, user2, user3, admin, populate_counts, populate_more_counts, institution):
         resp = app.get(f'{url}?sort=user_name&page[size]=1&page=2', auth=admin.auth)
         assert resp.status_code == 200
@@ -229,7 +231,6 @@ def test_sort_and_pagination(self, app, url, user, user2, user3, admin, populate
         assert resp.json['links']['meta']['total'] == 11
         assert resp.json['data'][-1]['attributes']['user_name'] == 'Zedd'
 
-    @pytest.mark.skipif(settings.CI_ENV, reason='Non-deterministic fails on CI')
     def test_filter_and_pagination(self, app, user, user2, user3, url, admin, populate_counts, populate_more_counts, institution):
         resp = app.get(f'{url}?page=2', auth=admin.auth)
         assert resp.json['links']['meta']['total'] == 11
@@ -238,7 +239,6 @@ def test_filter_and_pagination(self, app, user, user2, user3, url, admin, popula
         assert resp.json['links']['meta']['total'] == 1
         assert resp.json['data'][0]['attributes']['user_name'] == 'Zedd'
 
-    @pytest.mark.skipif(settings.CI_ENV, reason='Non-deterministic fails on CI')
     def test_filter_and_sort(self, app, url, user, user2, user3, admin, user4, populate_counts, populate_na_department, institution):
         """
         Testing for bug where sorting and filtering would throw 502.
@@ -265,3 +265,374 @@ def test_filter_and_sort(self, app, url, user, user2, user3, admin, user4, popul
         assert data[0]['attributes']['department'] == 'Biology dept'
         assert data[1]['attributes']['department'] == 'N/A'
         assert data[2]['attributes']['department'] == 'Psychology dept'
+
+
+@pytest.mark.es_metrics
+@pytest.mark.django_db
+class TestNewInstitutionUserMetricList:
+    @pytest.fixture(autouse=True)
+    def _waffled(self):
+        with override_flag(osf.features.INSTITUTIONAL_DASHBOARD_2024, active=True):
+            yield  # these tests apply only after institution dashboard improvements
+
+    @pytest.fixture()
+    def institution(self):
+        return InstitutionFactory()
+
+    @pytest.fixture()
+    def rando(self):
+        return AuthUserFactory()
+
+    @pytest.fixture()
+    def institutional_admin(self, institution):
+        _admin_user = AuthUserFactory()
+        institution.get_group('institutional_admins').user_set.add(_admin_user)
+        return _admin_user
+
+    @pytest.fixture()
+    def unshown_reports(self, institution):
+        # unshown because another institution
+        _another_institution = InstitutionFactory()
+        _report_factory('2024-08', _another_institution, user_id='nother_inst')
+        # unshown because old
+        _report_factory('2024-07', institution, user_id='old')
+
+    @pytest.fixture()
+    def reports(self, institution):
+        return [
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_sparse',
+                storage_byte_count=53,
+            ),
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_orc',
+                orcid_id='5555-4444-3333-2222',
+                storage_byte_count=8277,
+            ),
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_blargl',
+                department_name='blargl',
+                storage_byte_count=34834834,
+            ),
+            _report_factory(
+                '2024-08', institution,
+                user_id='u_orcomma',
+                orcid_id='4444-3333-2222-1111',
+                department_name='a department, or so, that happens, incidentally, to have commas',
+                storage_byte_count=736662999298,
+            ),
+        ]
+
+    @pytest.fixture()
+    def url(self, institution):
+        return f'/{API_BASE}institutions/{institution._id}/metrics/users/'
+
+    def test_anon(self, app, url):
+        _resp = app.get(url, expect_errors=True)
+        assert _resp.status_code == 401
+
+    def test_rando(self, app, url, rando):
+        _resp = app.get(url, auth=rando.auth, expect_errors=True)
+        assert _resp.status_code == 403
+
+    def test_get_empty(self, app, url, institutional_admin):
+        _resp = app.get(url, auth=institutional_admin.auth)
+        assert _resp.status_code == 200
+        assert _resp.json['data'] == []
+
+    def test_get_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        _resp = app.get(url, auth=institutional_admin.auth)
+        assert _resp.status_code == 200
+        assert len(_resp.json['data']) == len(reports)
+        _expected_user_ids = {_report.user_id for _report in reports}
+        assert set(_user_ids(_resp)) == _expected_user_ids
+
+    def test_filter_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        for _query, _expected_user_ids in (
+            ({'filter[department]': 'nunavum'}, set()),
+            ({'filter[department]': 'incidentally'}, set()),
+            ({'filter[department]': 'blargl'}, {'u_blargl'}),
+            ({'filter[department]': 'a department, or so, that happens, incidentally, to have commas'}, {'u_orcomma'}),
+            ({'filter[department][eq]': 'nunavum'}, set()),
+            ({'filter[department][eq]': 'blargl'}, {'u_blargl'}),
+            ({'filter[department][eq]': 'a department, or so, that happens, incidentally, to have commas'}, {'u_orcomma'}),
+            ({'filter[department][ne]': 'nunavum'}, {'u_sparse', 'u_blargl', 'u_orc', 'u_orcomma'}),
+
+            ({'filter[orcid_id][eq]': '5555-4444-3333-2222'}, {'u_orc'}),
+            ({'filter[orcid_id][ne]': ''}, {'u_orc', 'u_orcomma'}),
+            ({'filter[orcid_id][eq]': ''}, {'u_sparse', 'u_blargl'}),
+            ({
+                'filter[orcid_id]': '',
+                'filter[department]': 'blargl',
+            }, {'u_blargl'}),
+            ({
+                'filter[orcid_id]': '',
+                'filter[department][ne]': 'blargl',
+            }, {'u_sparse'}),
+            ({
+                'filter[orcid_id]': '5555-4444-3333-2222',
+                'filter[department][ne]': 'blargl',
+            }, {'u_orc'}),
+            ({
+                'filter[orcid_id]': '5555-4444-3333-2222',
+                'filter[department][ne]': '',
+            }, set()),
+        ):
+            _resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
+            assert _resp.status_code == 200
+            assert set(_user_ids(_resp)) == _expected_user_ids
+
+    def test_sort_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        for _query, _expected_user_id_list in (
+            ({'sort': 'storage_byte_count'}, ['u_sparse', 'u_orc', 'u_blargl', 'u_orcomma']),
+            ({'sort': '-storage_byte_count'}, ['u_orcomma', 'u_blargl', 'u_orc', 'u_sparse']),
+        ):
+            _resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
+            assert _resp.status_code == 200
+            assert list(_user_ids(_resp)) == _expected_user_id_list
+
+    def test_paginate_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        for _query, _expected_user_id_list in (
+            ({'sort': 'storage_byte_count', 'page[size]': 2}, ['u_sparse', 'u_orc']),
+            ({'sort': 'storage_byte_count', 'page[size]': 2, 'page': 2}, ['u_blargl', 'u_orcomma']),
+            ({'sort': '-storage_byte_count', 'page[size]': 3}, ['u_orcomma', 'u_blargl', 'u_orc']),
+            ({'sort': '-storage_byte_count', 'page[size]': 3, 'page': 2}, ['u_sparse']),
+        ):
+            _resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
+            assert _resp.status_code == 200
+            assert list(_user_ids(_resp)) == _expected_user_id_list
+
+    @pytest.mark.parametrize('format_type, delimiter, content_type', [
+        ('csv', ',', 'text/csv; charset=utf-8'),
+        ('tsv', '\t', 'text/tab-separated-values; charset=utf-8')
+    ])
+    def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institution, format_type, delimiter,
+                                        content_type):
+        _report_factory(
+            '2024-08',
+            institution,
+            user_id='u_orcomma',
+            account_creation_date='2018-02',
+            user_name='Jason Kelce',
+            orcid_id='4444-3333-2222-1111',
+            department_name='Center, \t Greatest Ever',
+            storage_byte_count=736662999298,
+            embargoed_registration_count=1,
+            published_preprint_count=1,
+            public_registration_count=2,
+            public_project_count=3,
+            public_file_count=4,
+            private_project_count=5,
+            month_last_active='2018-02',
+            month_last_login='2018-02',
+        )
+
+        resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.headers['Content-Type'] == content_type
+
+        current_date = datetime.datetime.now().strftime('%Y-%m')
+        expected_filename = REPORT_FILENAME_FORMAT.format(
+            view_name='institution-user-metrics',
+            date_created=current_date,
+            extension=format_type
+        )
+        assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'
+
+        response_body = resp.text
+        expected_response = [
+            [
+                'account_creation_date',
+                'department',
+                'embargoed_registration_count',
+                'month_last_active',
+                'month_last_login',
+                'orcid_id',
+                'private_projects',
+                'public_file_count',
+                'public_projects',
+                'public_registration_count',
+                'published_preprint_count',
+                'storage_byte_count',
+                'user_name'
+            ],
+            [
+                '2018-02',
+                'Center, \t Greatest Ever',
+                '1',
+                '2018-02',
+                '2018-02',
+                '4444-3333-2222-1111',
+                '5',
+                '4',
+                '3',
+                '2',
+                '1',
+                '736662999298',
+                'Jason Kelce'
+            ]
+        ]
+
+        if delimiter:
+            with StringIO(response_body) as file:
+                reader = csv.reader(file, delimiter=delimiter)
+                response_rows = list(reader)
+                assert response_rows[0] == expected_response[0]
+                assert sorted(response_rows[1:]) == sorted(expected_response[1:])
+
+    @pytest.mark.parametrize('format_type, delimiter, content_type', [
+        ('csv', ',', 'text/csv; charset=utf-8'),
+        ('tsv', '\t', 'text/tab-separated-values; charset=utf-8')
+    ])
+    def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institution, format_type, delimiter,
+                                        content_type):
+        # Create 15 records, exceeding the default page size of 10
+        num_records = 15
+        expected_data = []
+        for i in range(num_records):
+            _report_factory(
+                '2024-08',
+                institution,
+                user_id=f'u_orcomma_{i}',
+                account_creation_date='2018-02',
+                user_name=f'Jalen Hurts #{i}',
+                orcid_id=f'4444-3333-2222-111{i}',
+                department_name='QBatman',
+                storage_byte_count=736662999298 + i,
+                embargoed_registration_count=1,
+                published_preprint_count=1,
+                public_registration_count=2,
+                public_project_count=3,
+                public_file_count=4,
+                private_project_count=5,
+                month_last_active='2018-02',
+                month_last_login='2018-02',
+            )
+            expected_data.append([
+                '2018-02',
+                'QBatman',
+                '1',
+                '2018-02',
+                '2018-02',
+                f'4444-3333-2222-111{i}',
+                '5',
+                '4',
+                '3',
+                '2',
+                '1',
+                str(736662999298 + i),
+                f'Jalen Hurts #{i}',
+            ])
+
+        # Make request for CSV format with page[size]=10
+        resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.headers['Content-Type'] == content_type
+
+        current_date = datetime.datetime.now().strftime('%Y-%m')
+        expected_filename = REPORT_FILENAME_FORMAT.format(
+            view_name='institution-user-metrics',
+            date_created=current_date,
+            extension=format_type
+        )
+        assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'
+
+        # Validate the CSV content contains all 15 records, ignoring the default pagination of 10
+        response_body = resp.text
+        rows = response_body.splitlines()
+
+        assert len(rows) == num_records + 1 == 16  # 1 header + 15 records
+
+        if delimiter:
+            with StringIO(response_body) as file:
+                reader = csv.reader(file, delimiter=delimiter)
+                response_rows = list(reader)
+                # Validate header row
+                expected_header = [
+                    'account_creation_date',
+                    'department',
+                    'embargoed_registration_count',
+                    'month_last_active',
+                    'month_last_login',
+                    'orcid_id',
+                    'private_projects',
+                    'public_file_count',
+                    'public_projects',
+                    'public_registration_count',
+                    'published_preprint_count',
+                    'storage_byte_count',
+                    'user_name'
+                ]
+                assert response_rows[0] == expected_header
+                # Sort both expected and actual rows (ignoring the header) before comparison
+                assert sorted(response_rows[1:]) == sorted(expected_data)
+
+    def test_get_report_format_table_json(self, app, url, institutional_admin, institution):
+        _report_factory(
+            '2024-08',
+            institution,
+            user_id='u_orcomma',
+            account_creation_date='2018-02',
+            user_name='Brian Dawkins',
+            orcid_id='4444-3333-2222-1111',
+            department_name='Safety "The Wolverine" Weapon X',
+            storage_byte_count=736662999298,
+            embargoed_registration_count=1,
+            published_preprint_count=1,
+            public_registration_count=2,
+            public_project_count=3,
+            public_file_count=4,
+            private_project_count=5,
+            month_last_active='2018-02',
+            month_last_login='2018-02',
+        )
+
+        resp = app.get(f'{url}?format=json_report', auth=institutional_admin.auth)
+        assert resp.status_code == 200
+        assert resp.headers['Content-Type'] == 'application/json; charset=utf-8'
+
+        current_date = datetime.datetime.now().strftime('%Y-%m')
+        expected_filename = REPORT_FILENAME_FORMAT.format(
+            view_name='institution-user-metrics',
+            date_created=current_date,
+            extension='json'
+        )
+        assert resp.headers['Content-Disposition'] == f'attachment; filename="{expected_filename}"'
+
+        # Validate JSON structure and content
+        response_data = json.loads(resp.body)
+        expected_data = [
+            {
+                'account_creation_date': '2018-02',
+                'department': 'Safety "The Wolverine" Weapon X',
+                'embargoed_registration_count': 1,
+                'month_last_active': '2018-02',
+                'month_last_login': '2018-02',
+                'orcid_id': '4444-3333-2222-1111',
+                'private_projects': 5,
+                'public_file_count': 4,
+                'public_projects': 3,
+                'public_registration_count': 2,
+                'published_preprint_count': 1,
+                'storage_byte_count': 736662999298,
+                'user_name': 'Brian Dawkins'
+            }
+        ]
+        assert response_data == expected_data
+
+
+def _user_ids(api_response):
+    for _datum in api_response.json['data']:
+        yield _datum['relationships']['user']['data']['id']
+
+def _report_factory(yearmonth, institution, **kwargs):
+    _report = InstitutionalUserReport(
+        report_yearmonth=yearmonth,
+        institution_id=institution._id,
+        **kwargs,
+    )
+    _report.save(refresh=True)
+    return _report
diff --git a/api_tests/metrics/test_composite_query.py b/api_tests/metrics/test_composite_query.py
index fd36c0c5f24..0cd0b3bb180 100644
--- a/api_tests/metrics/test_composite_query.py
+++ b/api_tests/metrics/test_composite_query.py
@@ -29,7 +29,7 @@ def base_url():
     return f'/{API_BASE}metrics/preprints/'
 
 
-@pytest.mark.es
+@pytest.mark.es_metrics
 @pytest.mark.django_db
 class TestElasticSearch:
 
diff --git a/api_tests/metrics/test_preprint_metrics.py b/api_tests/metrics/test_preprint_metrics.py
index 57e31655c40..1bde8719b75 100644
--- a/api_tests/metrics/test_preprint_metrics.py
+++ b/api_tests/metrics/test_preprint_metrics.py
@@ -116,7 +116,7 @@ def test_custom_metric_malformed_query(self, mock_execute, app, user, base_url):
         assert res.status_code == 400
         assert res.json['errors'][0]['detail'] == 'Malformed elasticsearch query.'
 
-    @pytest.mark.es
+    @pytest.mark.es_metrics
     def test_agg_query(self, app, user, base_url):
 
         post_url = f'{base_url}downloads/'
diff --git a/api_tests/metrics/test_raw_metrics.py b/api_tests/metrics/test_raw_metrics.py
index c7feb69426b..6a3b9b8f8c5 100644
--- a/api_tests/metrics/test_raw_metrics.py
+++ b/api_tests/metrics/test_raw_metrics.py
@@ -14,7 +14,7 @@
 pytestmark = pytest.mark.django_db
 
 
-@pytest.mark.es
+@pytest.mark.es_metrics
 class TestRawMetrics:
 
     @pytest.fixture(autouse=True)
@@ -22,6 +22,12 @@ def enable_elasticsearch_metrics(self):
         with override_switch(features.ENABLE_RAW_METRICS, active=True):
             yield
 
+    @pytest.fixture(autouse=True)
+    def teardown_customer_index(self, es6_client):
+        es6_client.indices.delete(index='customer', ignore_unavailable=True)
+        yield
+        es6_client.indices.delete(index='customer', ignore_unavailable=True)
+
     @pytest.fixture
     def user(self):
         user = AuthUserFactory()
@@ -132,7 +138,7 @@ def test_post_and_get(self, app, user, base_url):
 
         time.sleep(3)
 
-        get_url = f'{base_url}_search?q=*'
+        get_url = f'{base_url}customer/_search?q=*'
         res = app.get(get_url, auth=user.auth)
 
         assert res.json['hits']['total'] == 1
diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py
index d8b78cdf5ad..93cde9f1121 100644
--- a/api_tests/metrics/test_registries_moderation_metrics.py
+++ b/api_tests/metrics/test_registries_moderation_metrics.py
@@ -22,7 +22,7 @@ def enable_elasticsearch_metrics(self):
         with override_switch(features.ELASTICSEARCH_METRICS, active=True):
             yield
 
-    @pytest.mark.es
+    @pytest.mark.es_metrics
     def test_record_transitions(self, registration):
         registration._write_registration_action(
             RegistrationModerationStates.INITIAL,
@@ -70,7 +70,7 @@ def other_user(self):
     def base_url(self):
         return '/_/metrics/registries_moderation/transitions/'
 
-    @pytest.mark.es
+    @pytest.mark.es_metrics
     def test_registries_moderation_view(self, app, user, base_url, registration):
         registration._write_registration_action(
             RegistrationModerationStates.INITIAL,
diff --git a/api_tests/share/_utils.py b/api_tests/share/_utils.py
index 9595aaf1b81..a04808cac3c 100644
--- a/api_tests/share/_utils.py
+++ b/api_tests/share/_utils.py
@@ -12,6 +12,7 @@
 )
 from website import settings as website_settings
 from api.share.utils import shtrove_ingest_url, sharev2_push_url
+from osf.metadata.osf_gathering import OsfmapPartition
 
 
 @contextlib.contextmanager
@@ -40,36 +41,67 @@ def mock_update_share():
 
 
 @contextlib.contextmanager
-def expect_ingest_request(mock_share_responses, osfguid, *, token=None, delete=False, count=1):
+def expect_ingest_request(mock_share_responses, osfguid, *, token=None, delete=False, count=1, error_response=False):
     mock_share_responses._calls.reset()
     yield
-    _double_count = count * 2  # pushing to share two ways
-    assert len(mock_share_responses.calls) == _double_count, (
-        f'expected {_double_count} call(s), got {len(mock_share_responses.calls)}: {list(mock_share_responses.calls)}'
+    _legacy_count_per_item = 1
+    _trove_main_count_per_item = 1
+    _trove_supplementary_count_per_item = (
+        0
+        if (error_response or delete)
+        else (len(OsfmapPartition) - 1)
     )
+    _total_count = count * (
+        _legacy_count_per_item
+        + _trove_main_count_per_item
+        + _trove_supplementary_count_per_item
+    )
+    assert len(mock_share_responses.calls) == _total_count, (
+        f'expected {_total_count} call(s), got {len(mock_share_responses.calls)}: {list(mock_share_responses.calls)}'
+    )
+    _trove_ingest_calls = []
+    _trove_supp_ingest_calls = []
+    _legacy_push_calls = []
     for _call in mock_share_responses.calls:
         if _call.request.url.startswith(shtrove_ingest_url()):
-            assert_ingest_request(_call.request, osfguid, token=token, delete=delete)
+            if 'is_supplementary' in _call.request.url:
+                _trove_supp_ingest_calls.append(_call)
+            else:
+                _trove_ingest_calls.append(_call)
         else:
-            assert _call.request.url.startswith(sharev2_push_url())
+            _legacy_push_calls.append(_call)
+    assert len(_trove_ingest_calls) == count
+    assert len(_trove_supp_ingest_calls) == count * _trove_supplementary_count_per_item
+    assert len(_legacy_push_calls) == count
+    for _call in _trove_ingest_calls:
+        assert_ingest_request(_call.request, osfguid, token=token, delete=delete)
+    for _call in _trove_supp_ingest_calls:
+        assert_ingest_request(_call.request, osfguid, token=token, delete=delete, supp=True)
+    for _call in _legacy_push_calls:
+        assert _call.request.url.startswith(sharev2_push_url())
 
 
-def assert_ingest_request(request, expected_osfguid, *, token=None, delete=False):
+def assert_ingest_request(request, expected_osfguid, *, token=None, delete=False, supp=False):
     _querydict = QueryDict(urlsplit(request.path_url).query)
-    assert _querydict['record_identifier'] == expected_osfguid
+    if supp:
+        assert _querydict['record_identifier'].startswith(expected_osfguid)
+        assert _querydict['record_identifier'] != expected_osfguid
+    else:
+        assert _querydict['record_identifier'] == expected_osfguid
     if delete:
         assert request.method == 'DELETE'
     else:
         assert request.method == 'POST'
         _focus_iri = _querydict['focus_iri']
         assert _focus_iri == f'{website_settings.DOMAIN}{expected_osfguid}'
-        assert _focus_iri in request.body.decode('utf-8')
+        _request_body = request.body.decode('utf-8')
+        assert (_focus_iri in _request_body) or (supp and not _request_body.strip())
     _token = token or website_settings.SHARE_API_TOKEN
     assert request.headers['Authorization'] == f'Bearer {_token}'
 
 
 @contextlib.contextmanager
-def expect_preprint_ingest_request(mock_share_responses, preprint, *, delete=False, count=1):
+def expect_preprint_ingest_request(mock_share_responses, preprint, *, delete=False, count=1, error_response=False):
     # same as expect_ingest_request, but with convenience for preprint specifics
     # and postcommit-task handling (so on_preprint_updated actually runs)
     with expect_ingest_request(
@@ -78,6 +110,7 @@ def expect_preprint_ingest_request(mock_share_responses, preprint, *, delete=Fal
         token=preprint.provider.access_token,
         delete=delete,
         count=count,
+        error_response=error_response,
     ):
         # clear out postcommit tasks from factories
         postcommit_queue().clear()
diff --git a/api_tests/share/test_share_preprint.py b/api_tests/share/test_share_preprint.py
index aa4d769d1f7..4ab47963bc8 100644
--- a/api_tests/share/test_share_preprint.py
+++ b/api_tests/share/test_share_preprint.py
@@ -133,7 +133,7 @@ def test_no_call_async_update_on_400_failure(self, mock_share_responses, preprin
         mock_share_responses.replace(responses.POST, shtrove_ingest_url(), status=400)
         mock_share_responses.replace(responses.POST, sharev2_push_url(), status=400)
         preprint.set_published(True, auth=auth, save=True)
-        with expect_preprint_ingest_request(mock_share_responses, preprint, count=1):
+        with expect_preprint_ingest_request(mock_share_responses, preprint, count=1, error_response=True):
             preprint.update_search()
 
     def test_delete_from_share(self, mock_share_responses):
diff --git a/conftest.py b/conftest.py
index 2eb51df076e..6f870093ed4 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,3 +1,4 @@
+import contextlib
 from unittest import mock
 import logging
 import os
@@ -5,7 +6,9 @@
 
 from django.core.management import call_command
 from django.db import transaction
+from elasticsearch import exceptions as es_exceptions
 from elasticsearch_dsl.connections import connections
+from elasticsearch_metrics.registry import registry as es_metrics_registry
 from faker import Factory
 import pytest
 import responses
@@ -133,22 +136,44 @@ def es6_client(setup_connections):
 
 
 @pytest.fixture(scope='function', autouse=True)
-def _es_marker(request):
+def _es_metrics_marker(request):
     """Clear out all indices and index templates before and after
-    tests marked with ``es``.
+    tests marked with `es_metrics`.
     """
-    marker = request.node.get_closest_marker('es')
+    marker = request.node.get_closest_marker('es_metrics')
     if marker:
         es6_client = request.getfixturevalue('es6_client')
-
-        def teardown_es():
-            es6_client.indices.delete(index='*')
-            es6_client.indices.delete_template('*')
-
-        teardown_es()
-        call_command('sync_metrics')
-        yield
-        teardown_es()
+        _temp_prefix = 'temp_metrics_'
+        _temp_wildcard = f'{_temp_prefix}*'
+
+        def _teardown_es_temps():
+            es6_client.indices.delete(index=_temp_wildcard)
+            try:
+                es6_client.indices.delete_template(_temp_wildcard)
+            except es_exceptions.NotFoundError:
+                pass
+
+        @contextlib.contextmanager
+        def _mock_metric_names():
+            with contextlib.ExitStack() as _exit:
+                for _metric_class in es_metrics_registry.get_metrics():
+                    _exit.enter_context(mock.patch.object(
+                        _metric_class,
+                        '_template_name',  # also used to construct index names
+                        f'{_temp_prefix}{_metric_class._template_name}',
+                    ))
+                    _exit.enter_context(mock.patch.object(
+                        _metric_class,
+                        '_template',  # a wildcard string for indexes and templates
+                        f'{_temp_prefix}{_metric_class._template}',
+                    ))
+                yield
+
+        _teardown_es_temps()
+        with _mock_metric_names():
+            call_command('sync_metrics')
+            yield
+        _teardown_es_temps()
     else:
         yield
 
diff --git a/osf/admin.py b/osf/admin.py
index 2bfd8c2cc35..71c0ae8172b 100644
--- a/osf/admin.py
+++ b/osf/admin.py
@@ -6,6 +6,7 @@
 from django.db.models import Q, Count
 from django.http import HttpResponseRedirect
 from django.urls import reverse
+import waffle
 
 from osf.external.spam.tasks import reclassify_domain_references
 from osf.models import OSFUser, Node, NotableDomain, NodeLicense
@@ -140,7 +141,24 @@ def get_queryset(self, request):
         qs = super().get_queryset(request).annotate(number_of_references=Count('domainreference'))
         return qs
 
+
+class _ManygroupWaffleFlagAdmin(waffle.admin.FlagAdmin):
+    '''customized `waffle.admin.FlagAdmin` to support many groups
+
+    waffle assumes "there are likely not that many" groups [0],
+    but in osf there are, in fact, that many groups.
+
+    [0]: https://github.com/jazzband/django-waffle/commit/bf36c19ee03baf1c5850ffe0b284900a5c416f53
+    '''
+    raw_id_fields = (*waffle.admin.FlagAdmin.raw_id_fields, 'groups')
+
+
 admin.site.register(OSFUser, OSFUserAdmin)
 admin.site.register(Node, NodeAdmin)
 admin.site.register(NotableDomain, NotableDomainAdmin)
 admin.site.register(NodeLicense, LicenseAdmin)
+
+# waffle admins, with Flag admin override
+admin.site.register(waffle.models.Flag, _ManygroupWaffleFlagAdmin)
+admin.site.register(waffle.models.Sample, waffle.admin.SampleAdmin)
+admin.site.register(waffle.models.Switch, waffle.admin.SwitchAdmin)
diff --git a/osf/features.yaml b/osf/features.yaml
index c6f02ce2994..a3f0fcc1f14 100644
--- a/osf/features.yaml
+++ b/osf/features.yaml
@@ -189,6 +189,10 @@ flags:
     note: This is not used
     everyone: true
 
+  - flag_name: INSTITUTIONAL_DASHBOARD_2024
+    name: institutional_dashboard_2024
+    note: whether to surface older or updated (in 2024) institutional metrics
+
 switches:
   - flag_name: DISABLE_ENGAGEMENT_EMAILS
     name: disable_engagement_emails
diff --git a/osf/management/commands/make_dummy_pageviews_for_metrics.py b/osf/management/commands/make_dummy_pageviews_for_metrics.py
index 11ff9ca69c9..09de34bf7a8 100644
--- a/osf/management/commands/make_dummy_pageviews_for_metrics.py
+++ b/osf/management/commands/make_dummy_pageviews_for_metrics.py
@@ -74,6 +74,8 @@ def _generate_random_countedusage(self, n, max_age):
                 item_guid=ITEM_GUID,
                 session_id='freshen by key',
                 user_is_authenticated=bool(random.randint(0, 1)),
+                item_public=bool(random.randint(0, 1)),
+                action_labels=[['view', 'download'][random.randint(0, 1)]],
             )
 
     def _run_date_query(self, time_range_filter):
@@ -103,8 +105,8 @@ def _run_date_query(self, time_range_filter):
             },
         })
         return {
-            'min': result.aggs['min-timestamp'].value_as_string,
-            'max': result.aggs['max-timestamp'].value_as_string,
+            'min': result.aggs['min-timestamp'].value,
+            'max': result.aggs['max-timestamp'].value,
             **{
                 str(bucket.key.date()): bucket.doc_count
                 for bucket in result.aggs['by-date']
diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py
index 8f9854a722b..c467640cd15 100644
--- a/osf/management/commands/monthly_reporters_go.py
+++ b/osf/management/commands/monthly_reporters_go.py
@@ -44,8 +44,11 @@ def monthly_reporters_go(report_year=None, report_month=None):
 )
 def monthly_reporter_go(task, reporter_key: str, yearmonth: str):
     _reporter_class = AllMonthlyReporters[reporter_key].value
-    _parsed_yearmonth = YearMonth.from_str(yearmonth)
-    _reporter_class().run_and_record_for_month(_parsed_yearmonth)
+    _reporter = _reporter_class(YearMonth.from_str(yearmonth))
+    _reporter.run_and_record_for_month()
+    _followup = _reporter.followup_task()
+    if _followup is not None:
+        _followup.apply_async()
 
 
 class Command(BaseCommand):
@@ -58,10 +61,8 @@ def add_arguments(self, parser):
         )
 
     def handle(self, *args, **options):
-        errors = monthly_reporters_go(
+        monthly_reporters_go(
             report_year=getattr(options.get('yearmonth'), 'year', None),
             report_month=getattr(options.get('yearmonth'), 'month', None),
         )
-        for error_key, error_val in errors.items():
-            self.stdout.write(self.style.ERROR(f'error running {error_key}: ') + error_val)
-        self.stdout.write(self.style.SUCCESS('done.'))
+        self.stdout.write(self.style.SUCCESS('reporter tasks scheduled.'))
diff --git a/osf/metadata/gather/basket.py b/osf/metadata/gather/basket.py
index f28a4dee6d6..eb28a087ad3 100644
--- a/osf/metadata/gather/basket.py
+++ b/osf/metadata/gather/basket.py
@@ -19,15 +19,14 @@ class Basket:
     def __init__(self, focus: Focus):
         assert isinstance(focus, Focus)
         self.focus = focus
-        self.reset()  # start with an empty basket (except the focus itself)
+        self.reset()  # start with an empty basket
 
     def reset(self):
         self._gathertasks_done = set()
-        self._known_focus_dict = {}
+        self._known_focus_dict = {self.focus.iri: {self.focus}}
         self.gathered_metadata = rdfutils.contextualized_graph()
-        self._add_focus_reference(self.focus)
 
-    def pls_gather(self, predicate_map):  # TODO: async
+    def pls_gather(self, predicate_map, *, include_defaults=True):  # TODO: async
         '''go gatherers, go!
 
         @predicate_map: dict with rdflib.URIRef keys
@@ -48,7 +47,7 @@ def pls_gather(self, predicate_map):  # TODO: async
             },
         })
         '''
-        self._do_gather(self.focus, predicate_map)
+        self._do_gather(self.focus, predicate_map, include_defaults=include_defaults)
 
     def __getitem__(self, slice_or_arg) -> typing.Iterable[rdflib.term.Node]:
         '''convenience for getting values from the basket
@@ -98,14 +97,20 @@ def _maybe_gather_for_predicate_map(self, iri_or_focus, predicate_map):
         else:
             raise ValueError(f'expected `iri_or_focus` to be Focus or URIRef (got {iri_or_focus})')
 
-    def _do_gather(self, focus, predicate_map):
+    def _do_gather(self, focus, predicate_map, *, include_defaults=True):
+        if include_defaults:
+            self._add_focus_reference(focus)
         if not isinstance(predicate_map, dict):
             # allow iterable of predicates with no deeper paths
             predicate_map = {
                 predicate_iri: None
                 for predicate_iri in predicate_map
             }
-        for gatherer in get_gatherers(focus.rdftype, predicate_map.keys()):
+        for gatherer in get_gatherers(
+            focus.rdftype,
+            predicate_map.keys(),
+            include_focustype_defaults=include_defaults,
+        ):
             for (subj, pred, obj) in self._do_a_gathertask(gatherer, focus):
                 if isinstance(obj, Focus):
                     self._add_focus_reference(obj)
diff --git a/osf/metadata/gather/gatherer.py b/osf/metadata/gather/gatherer.py
index 2a8822c9d2a..0630e6d61ae 100644
--- a/osf/metadata/gather/gatherer.py
+++ b/osf/metadata/gather/gatherer.py
@@ -61,11 +61,16 @@ def add_gatherer(gatherer, predicate_iris, focustype_iris):
         )
 
 
-def get_gatherers(focustype_iri, predicate_iris):
+def get_gatherers(focustype_iri, predicate_iris, *, include_focustype_defaults=True):
     gatherer_set = set()
     for focustype in (None, focustype_iri):
         for_focustype = __gatherer_registry.get(focustype, {})
-        for predicate in (None, *predicate_iris):
+        _predicates = (
+            (None, *predicate_iris)
+            if include_focustype_defaults
+            else predicate_iris
+        )
+        for predicate in _predicates:
             gatherer_set.update(for_focustype.get(predicate, ()))
     return gatherer_set
 
diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py
index 6e5e25c6d0b..9783f7b0879 100644
--- a/osf/metadata/osf_gathering.py
+++ b/osf/metadata/osf_gathering.py
@@ -1,11 +1,14 @@
 '''gatherers of metadata from the osf database, in particular
 '''
+import datetime
+import enum
 import logging
 
 from django.contrib.contenttypes.models import ContentType
 from django import db
 import rdflib
 
+from api.caching.tasks import get_storage_usage_total
 from osf import models as osfdb
 from osf.metadata import gather
 from osf.metadata.rdfutils import (
@@ -19,6 +22,7 @@
     OSF,
     OSFIO,
     OWL,
+    PROV,
     RDF,
     ROR,
     SKOS,
@@ -27,7 +31,12 @@
     without_namespace,
     smells_like_iri,
 )
-from osf.utils import workflows as osfworkflows
+from osf.metrics.reports import PublicItemUsageReport
+from osf.metrics.utils import YearMonth
+from osf.utils import (
+    workflows as osfworkflows,
+    permissions as osfpermissions,
+)
 from osf.utils.outcomes import ArtifactTypes
 from website import settings as website_settings
 
@@ -47,13 +56,6 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket:
     return gather.Basket(focus)
 
 
-def osfmap_for_type(rdftype_iri: str):
-    try:
-        return OSFMAP[rdftype_iri]
-    except KeyError:
-        raise ValueError(f'invalid OSFMAP type! expected one of {set(OSFMAP.keys())}, got {rdftype_iri}')
-
-
 ##### END "public" api #####
 
 
@@ -88,6 +90,7 @@ def osfmap_for_type(rdftype_iri: str):
     OSF.isContainedBy: OSF_OBJECT_REFERENCE,
     OSF.fileName: None,
     OSF.filePath: None,
+    OSF.hasFileVersion: None,
 }
 
 OSF_OBJECT = {
@@ -131,16 +134,7 @@ def osfmap_for_type(rdftype_iri: str):
         DCTERMS.creator: OSF_AGENT_REFERENCE,
     },
     OWL.sameAs: None,
-}
-
-OSF_FILEVERSION = {
-    DCTERMS.created: None,
-    DCTERMS.creator: OSF_AGENT_REFERENCE,
-    DCTERMS.extent: None,
-    DCTERMS.modified: None,
-    DCTERMS.requires: None,
-    DCTERMS['format']: None,
-    OSF.versionNumber: None,
+    PROV.qualifiedAttribution: None,
 }
 
 OSFMAP = {
@@ -193,7 +187,7 @@ def osfmap_for_type(rdftype_iri: str):
         DCTERMS.modified: None,
         DCTERMS.title: None,
         DCTERMS.type: None,
-        OSF.hasFileVersion: OSF_FILEVERSION,
+        OSF.hasFileVersion: None,
         OSF.isContainedBy: OSF_OBJECT_REFERENCE,
         OSF.fileName: None,
         OSF.filePath: None,
@@ -211,6 +205,57 @@ def osfmap_for_type(rdftype_iri: str):
     },
 }
 
+# metadata not included in the core record
+OSFMAP_SUPPLEMENT = {
+    OSF.Project: {
+        OSF.hasOsfAddon: None,
+        OSF.storageByteCount: None,
+        OSF.storageRegion: None,
+    },
+    OSF.ProjectComponent: {
+        OSF.hasOsfAddon: None,
+        OSF.storageByteCount: None,
+        OSF.storageRegion: None,
+    },
+    OSF.Registration: {
+        OSF.storageByteCount: None,
+        OSF.storageRegion: None,
+    },
+    OSF.RegistrationComponent: {
+        OSF.storageByteCount: None,
+        OSF.storageRegion: None,
+    },
+    OSF.Preprint: {
+        OSF.storageByteCount: None,
+        OSF.storageRegion: None,
+    },
+    OSF.File: {
+    },
+}
+
+# metadata not included in the core record that expires after a month
+OSFMAP_MONTHLY_SUPPLEMENT = {
+    OSF.Project: {
+        OSF.usage: None,
+    },
+    OSF.ProjectComponent: {
+        OSF.usage: None,
+    },
+    OSF.Registration: {
+        OSF.usage: None,
+    },
+    OSF.RegistrationComponent: {
+        OSF.usage: None,
+    },
+    OSF.Preprint: {
+        OSF.usage: None,
+    },
+    OSF.File: {
+        OSF.usage: None,
+    },
+}
+
+
 OSF_ARTIFACT_PREDICATES = {
     ArtifactTypes.ANALYTIC_CODE: OSF.hasAnalyticCodeResource,
     ArtifactTypes.DATA: OSF.hasDataResource,
@@ -218,6 +263,11 @@ def osfmap_for_type(rdftype_iri: str):
     ArtifactTypes.PAPERS: OSF.hasPapersResource,
     ArtifactTypes.SUPPLEMENTS: OSF.hasSupplementalResource,
 }
+OSF_CONTRIBUTOR_ROLES = {
+    osfpermissions.READ: OSF['readonly-contributor'],
+    osfpermissions.WRITE: OSF['write-contributor'],
+    osfpermissions.ADMIN: OSF['admin-contributor'],
+}
 
 BEPRESS_SUBJECT_SCHEME_URI = 'https://bepress.com/reference_guide_dc/disciplines/'
 BEPRESS_SUBJECT_SCHEME_TITLE = 'bepress Digital Commons Three-Tiered Taxonomy'
@@ -259,6 +309,37 @@ def osfmap_for_type(rdftype_iri: str):
     OSF.Registration: 'StudyRegistration',
 }
 
+
+class OsfmapPartition(enum.Enum):
+    MAIN = OSFMAP
+    SUPPLEMENT = OSFMAP_SUPPLEMENT
+    MONTHLY_SUPPLEMENT = OSFMAP_MONTHLY_SUPPLEMENT
+
+    @property
+    def is_supplementary(self) -> bool:
+        return self is not OsfmapPartition.MAIN
+
+    def osfmap_for_type(self, rdftype_iri: str):
+        try:
+            return self.value[rdftype_iri]
+        except KeyError:
+            if self.is_supplementary:
+                return {}  # allow missing types for non-main partitions
+            raise ValueError(f'invalid OSFMAP type! expected one of {set(self.value.keys())}, got {rdftype_iri}')
+
+    def get_expiration_date(self, basket: gather.Basket) -> datetime.date | None:
+        if self is not OsfmapPartition.MONTHLY_SUPPLEMENT:
+            return None
+        # let a monthly report expire two months after its reporting period ends
+        # (this allows the *next* monthly report up to a month to compute, which
+        # aligns with COUNTER https://www.countermetrics.org/code-of-practice/ )
+        # (HACK: entangled with `gather_last_month_usage` implementation, below)
+        _report_yearmonth_str = next(basket[OSF.usage / DCTERMS.temporal], None)
+        if _report_yearmonth_str is None:
+            return None
+        _report_yearmonth = YearMonth.from_str(_report_yearmonth_str)
+        return _report_yearmonth.next().next().month_end().date()
+
 ##### END osfmap #####
 
 
@@ -619,6 +700,8 @@ def _gather_fileversion(fileversion, fileversion_iri):
     version_sha256 = (fileversion.metadata or {}).get('sha256')
     if version_sha256:
         yield (fileversion_iri, DCTERMS.requires, checksum_iri('sha-256', version_sha256))
+    if fileversion.region is not None:
+        yield from _storage_region_triples(fileversion.region, subject_ref=fileversion_iri)
 
 
 @gather.er(OSF.contains)
@@ -819,11 +902,24 @@ def gather_agents(focus):
     # TODO: preserve order via rdflib.Seq
 
 
+@gather.er(PROV.qualifiedAttribution)
+def gather_qualified_attributions(focus):
+    _contributor_set = getattr(focus.dbmodel, 'contributor_set', None)
+    if _contributor_set is not None:
+        for _contributor in _contributor_set.filter(visible=True).select_related('user'):
+            _osfrole_ref = OSF_CONTRIBUTOR_ROLES.get(_contributor.permission)
+            if _osfrole_ref is not None:
+                _attribution_ref = rdflib.BNode()
+                yield (PROV.qualifiedAttribution, _attribution_ref)
+                yield (_attribution_ref, PROV.agent, OsfFocus(_contributor.user))
+                yield (_attribution_ref, DCAT.hadRole, _osfrole_ref)
+
+
 @gather.er(OSF.affiliation)
 def gather_affiliated_institutions(focus):
     if hasattr(focus.dbmodel, 'get_affiliated_institutions'):   # like OSFUser
         institution_qs = focus.dbmodel.get_affiliated_institutions()
-    elif hasattr(focus.dbmodel, 'affiliated_institutions'):     # like AbstractNode
+    elif hasattr(focus.dbmodel, 'affiliated_institutions'):     # like AbstractNode or Preprint
         institution_qs = focus.dbmodel.affiliated_institutions.all()
     else:
         institution_qs = ()
@@ -1029,3 +1125,63 @@ def gather_cedar_templates(focus):
         template_iri = rdflib.URIRef(record.get_template_semantic_iri())
         yield (OSF.hasCedarTemplate, template_iri)
         yield (template_iri, DCTERMS.title, record.get_template_name())
+
+
+@gather.er(OSF.usage)
+def gather_last_month_usage(focus):
+    _usage_report = PublicItemUsageReport.for_last_month(
+        item_osfid=osfguid_from_iri(focus.iri),
+    )
+    if _usage_report is not None:
+        _usage_report_ref = rdflib.BNode()
+        yield (OSF.usage, _usage_report_ref)
+        yield (_usage_report_ref, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/')))
+        yield (_usage_report_ref, FOAF.primaryTopic, focus.iri)
+        yield (_usage_report_ref, DCTERMS.temporal, rdflib.Literal(
+            str(_usage_report.report_yearmonth),
+            datatype=rdflib.XSD.gYearMonth,
+        ))
+        yield (_usage_report_ref, OSF.viewCount, _usage_report.view_count)
+        yield (_usage_report_ref, OSF.viewSessionCount, _usage_report.view_session_count)
+        yield (_usage_report_ref, OSF.downloadCount, _usage_report.download_count)
+        yield (_usage_report_ref, OSF.downloadSessionCount, _usage_report.download_session_count)
+
+
+@gather.er(OSF.hasOsfAddon)
+def gather_addons(focus):
+    # note: when gravyvalet exists, use `iterate_addons_for_resource`
+    # from osf.external.gravy_valet.request_helpers and get urls like
+    # "https://addons.osf.example/v1/addon-imps/..." instead of a urn
+    for _addon_settings in focus.dbmodel.get_addons():
+        if not _addon_settings.config.added_default:  # skip always-on addons
+            _addon_ref = rdflib.URIRef(f'urn:osf.io:addons:{_addon_settings.short_name}')
+            yield (OSF.hasOsfAddon, _addon_ref)
+            yield (_addon_ref, RDF.type, OSF.AddonImplementation)
+            yield (_addon_ref, DCTERMS.identifier, _addon_settings.short_name)
+            yield (_addon_ref, SKOS.prefLabel, _addon_settings.config.full_name)
+
+
+@gather.er(OSF.storageRegion)
+def gather_storage_region(focus):
+    _region = getattr(focus.dbmodel, 'osfstorage_region', None)
+    if _region is not None:
+        yield from _storage_region_triples(_region)
+
+
+def _storage_region_triples(region, *, subject_ref=None):
+    _region_ref = rdflib.URIRef(region.absolute_api_v2_url)
+    if subject_ref is None:
+        yield (OSF.storageRegion, _region_ref)
+    else:
+        yield (subject_ref, OSF.storageRegion, _region_ref)
+    yield (_region_ref, SKOS.prefLabel, rdflib.Literal(region.name, lang='en'))
+
+
+@gather.er(
+    OSF.storageByteCount,
+    focustype_iris=[OSF.Project, OSF.ProjectComponent, OSF.Registration, OSF.RegistrationComponent, OSF.Preprint]
+)
+def gather_storage_byte_count(focus):
+    _storage_usage_total = get_storage_usage_total(focus.dbmodel)
+    if _storage_usage_total is not None:
+        yield (OSF.storageByteCount, _storage_usage_total)
diff --git a/osf/metadata/rdfutils.py b/osf/metadata/rdfutils.py
index cd944169e20..d2596ad344e 100644
--- a/osf/metadata/rdfutils.py
+++ b/osf/metadata/rdfutils.py
@@ -23,6 +23,7 @@
 RDF = rdflib.Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')   # "resource description framework"
 SKOS = rdflib.Namespace('http://www.w3.org/2004/02/skos/core#')         # "simple knowledge organization system"
 DCAT = rdflib.Namespace('http://www.w3.org/ns/dcat#')                   # "data catalog (vocabulary)"
+PROV = rdflib.Namespace('http://www.w3.org/ns/prov#')                   # "provenance"
 # non-standard namespace for datacite terms (resolves to datacite docs)
 DATACITE = rdflib.Namespace('https://schema.datacite.org/meta/kernel-4/#')
 
@@ -38,6 +39,7 @@
     'skos': SKOS,
     'dcmitype': DCMITYPE,
     'dcat': DCAT,
+    'prov': PROV,
 }
 
 
diff --git a/osf/metadata/serializers/turtle.py b/osf/metadata/serializers/turtle.py
index 649614b0bfa..e90db45f2f6 100644
--- a/osf/metadata/serializers/turtle.py
+++ b/osf/metadata/serializers/turtle.py
@@ -1,4 +1,4 @@
-from osf.metadata.osf_gathering import osfmap_for_type
+from osf.metadata.osf_gathering import OsfmapPartition
 from osf.metadata.serializers import _base
 
 
@@ -9,5 +9,9 @@ def filename_for_itemid(self, itemid: str):
         return f'{itemid}-metadata.ttl'
 
     def serialize(self) -> str:
-        self.basket.pls_gather(osfmap_for_type(self.basket.focus.rdftype))
+        _partition = self.serializer_config.get('osfmap_partition', OsfmapPartition.MAIN)
+        self.basket.pls_gather(
+            _partition.osfmap_for_type(self.basket.focus.rdftype),
+            include_defaults=(_partition is OsfmapPartition.MAIN),
+        )
         return self.basket.gathered_metadata.serialize(format='turtle')
diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py
index e6a3abf9cd5..c3c6d4cc1aa 100644
--- a/osf/metrics/counted_usage.py
+++ b/osf/metrics/counted_usage.py
@@ -10,7 +10,6 @@
 import pytz
 
 from osf.metrics.utils import stable_key
-from osf.models import Guid
 
 
 logger = logging.getLogger(__name__)
@@ -87,6 +86,7 @@ def _autofill_fields(sender, instance, **kwargs):
         _fill_pageview_info(instance)
     item_guid = getattr(instance, 'item_guid', None)
     if item_guid:
+        from osf.models import Guid
         guid_instance = Guid.load(item_guid)
         if guid_instance and guid_instance.referent:
             _fill_osfguid_info(instance, guid_instance.referent)
@@ -104,10 +104,10 @@ def _fill_pageview_info(counted_usage):
 
 def _fill_osfguid_info(counted_usage, guid_referent):
     counted_usage.item_public = _get_ispublic(guid_referent)
-    counted_usage.item_type = type(guid_referent).__name__.lower()
+    counted_usage.item_type = get_item_type(guid_referent)
     counted_usage.surrounding_guids = _get_surrounding_guids(guid_referent)
     if not counted_usage.provider_id:
-        counted_usage.provider_id = _get_provider_id(guid_referent)
+        counted_usage.provider_id = get_provider_id(guid_referent)
 
 
 def _fill_document_id(counted_usage):
@@ -153,7 +153,7 @@ def _get_ispublic(guid_referent):
     return getattr(maybe_public, 'is_public', None)     # quacks like AbstractNode
 
 
-def _get_provider_id(guid_referent):
+def get_provider_id(guid_referent):
     provider = getattr(guid_referent, 'provider', None)
     if isinstance(provider, str):
         return provider         # quacks like BaseFileNode
@@ -162,6 +162,10 @@ def _get_provider_id(guid_referent):
     return 'osf'                # quacks like Node, Comment, WikiPage
 
 
+def get_item_type(guid_referent):
+    return type(guid_referent).__name__.lower()
+
+
 def _get_immediate_wrapper(guid_referent):
     if hasattr(guid_referent, 'verified_publishable'):
         return None                                     # quacks like Preprint
diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py
index 1f8e0fba862..412b1c2bf90 100644
--- a/osf/metrics/reporters/__init__.py
+++ b/osf/metrics/reporters/__init__.py
@@ -4,10 +4,13 @@
 from .storage_addon_usage import StorageAddonUsageReporter
 from .download_count import DownloadCountReporter
 from .institution_summary import InstitutionSummaryReporter
+from .institutional_users import InstitutionalUsersReporter
+from .institution_summary_monthly import InstitutionalSummaryMonthlyReporter
 from .new_user_domain import NewUserDomainReporter
 from .node_count import NodeCountReporter
 from .osfstorage_file_count import OsfstorageFileCountReporter
 from .preprint_count import PreprintCountReporter
+from .public_item_usage import PublicItemUsageReporter
 from .user_count import UserCountReporter
 from .spam_count import SpamCountReporter
 
@@ -26,3 +29,6 @@ class AllDailyReporters(enum.Enum):
 
 class AllMonthlyReporters(enum.Enum):
     SPAM_COUNT = SpamCountReporter
+    INSTITUTIONAL_USERS = InstitutionalUsersReporter
+    INSTITUTIONAL_SUMMARY = InstitutionalSummaryMonthlyReporter
+    ITEM_USAGE = PublicItemUsageReporter
diff --git a/osf/metrics/reporters/_base.py b/osf/metrics/reporters/_base.py
index d3bf1722523..931afe23fd0 100644
--- a/osf/metrics/reporters/_base.py
+++ b/osf/metrics/reporters/_base.py
@@ -1,23 +1,34 @@
+from collections import abc
+import dataclasses
 import logging
 
+import celery
+
+from osf.metrics.reports import MonthlyReport
 from osf.metrics.utils import YearMonth
 
 
 logger = logging.getLogger(__name__)
 
 
+@dataclasses.dataclass
 class MonthlyReporter:
-    def report(self, report_yearmonth: YearMonth):
+    yearmonth: YearMonth
+
+    def report(self) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]:
         """build a report for the given month
         """
         raise NotImplementedError(f'{self.__name__} must implement `report`')
 
-    def run_and_record_for_month(self, report_yearmonth: YearMonth):
-        reports = self.report(report_yearmonth)
+    def run_and_record_for_month(self) -> None:
+        reports = self.report()
         for report in reports:
-            assert report.report_yearmonth == str(report_yearmonth)
+            report.report_yearmonth = self.yearmonth
             report.save()
 
+    def followup_task(self) -> celery.Signature | None:
+        return None
+
 
 class DailyReporter:
     def report(self, report_date):
diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
new file mode 100644
index 00000000000..998cc056298
--- /dev/null
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -0,0 +1,105 @@
+from django.contrib.contenttypes.models import ContentType
+from django.db.models import Q, F, Sum, OuterRef, Exists
+
+from osf.models import Institution, Preprint, AbstractNode, FileVersion, NodeLog, PreprintLog
+from osf.models.spam import SpamStatus
+from addons.osfstorage.models import OsfStorageFile
+from osf.metrics.reports import InstitutionMonthlySummaryReport
+from ._base import MonthlyReporter
+
+
+class InstitutionalSummaryMonthlyReporter(MonthlyReporter):
+    """Generate an InstitutionMonthlySummaryReport for each institution."""
+
+    def report(self):
+        for institution in Institution.objects.all():
+            yield self.generate_report(institution)
+
+    def generate_report(self, institution):
+        node_queryset = institution.nodes.filter(
+            deleted__isnull=True,
+            created__lt=self.yearmonth.month_end()
+        ).exclude(
+            spam_status=SpamStatus.SPAM,
+        )
+
+        preprint_queryset = self.get_published_preprints(institution, self.yearmonth)
+
+        return InstitutionMonthlySummaryReport(
+            institution_id=institution._id,
+            user_count=institution.get_institution_users().count(),
+            private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False),
+            public_project_count=self._get_count(node_queryset, 'osf.node', is_public=True),
+            public_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=True),
+            embargoed_registration_count=self._get_count(node_queryset, 'osf.registration', is_public=False),
+            published_preprint_count=preprint_queryset.count(),
+            storage_byte_count=self.get_storage_size(node_queryset, preprint_queryset),
+            public_file_count=self.get_files(node_queryset, preprint_queryset, is_public=True).count(),
+            monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, self.yearmonth),
+            monthly_active_user_count=self.get_monthly_active_user_count(institution, self.yearmonth),
+        )
+
+    def _get_count(self, node_queryset, node_type, is_public):
+        return node_queryset.filter(type=node_type, is_public=is_public, root_id=F('pk')).count()
+
+    def get_published_preprints(self, institution, yearmonth):
+        queryset = Preprint.objects.can_view().filter(
+            affiliated_institutions=institution,
+            created__lte=yearmonth.month_end()
+        ).exclude(
+            spam_status=SpamStatus.SPAM
+        )
+
+        return queryset
+
+    def get_files(self, node_queryset, preprint_queryset, is_public=None):
+        public_kwargs = {}
+        if is_public:
+            public_kwargs = {'is_public': is_public}
+
+        target_node_q = Q(
+            target_object_id__in=node_queryset.filter(**public_kwargs).values('pk'),
+            target_content_type=ContentType.objects.get_for_model(AbstractNode),
+        )
+        target_preprint_q = Q(
+            target_object_id__in=preprint_queryset.values('pk'),
+            target_content_type=ContentType.objects.get_for_model(Preprint),
+        )
+        return OsfStorageFile.objects.filter(
+            deleted__isnull=True, purged__isnull=True
+        ).filter(target_node_q | target_preprint_q)
+
+    def get_storage_size(self, node_queryset, preprint_queryset):
+        files = self.get_files(node_queryset, preprint_queryset)
+        return FileVersion.objects.filter(
+            size__gt=0,
+            purged__isnull=True,
+            basefilenode__in=files
+        ).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes']
+
+    def get_monthly_logged_in_user_count(self, institution, yearmonth):
+        return institution.get_institution_users().filter(
+            date_last_login__gte=yearmonth.month_start(),
+            date_last_login__lt=yearmonth.month_end()
+        ).count()
+
+    def get_monthly_active_user_count(self, institution, yearmonth):
+        start_date = yearmonth.month_start()
+        end_date = yearmonth.month_end()
+
+        nodelogs = NodeLog.objects.filter(
+            user=OuterRef('pk'),
+            created__gte=start_date,
+            created__lt=end_date
+        )
+        preprintlogs = PreprintLog.objects.filter(
+            user=OuterRef('pk'),
+            created__gte=start_date,
+            created__lt=end_date
+        )
+
+        return institution.get_institution_users().filter(
+            date_disabled__isnull=True
+        ).annotate(
+            has_logs=Exists(nodelogs) | Exists(preprintlogs)
+        ).filter(has_logs=True).count()
diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
new file mode 100644
index 00000000000..e0f7f42a156
--- /dev/null
+++ b/osf/metrics/reporters/institutional_users.py
@@ -0,0 +1,161 @@
+import dataclasses
+import datetime
+
+from django.contrib.contenttypes.models import ContentType
+from django.db.models import Q, F, Sum
+
+from osf import models as osfdb
+from osf.models.spam import SpamStatus
+from addons.osfstorage.models import OsfStorageFile
+from osf.metrics.reports import InstitutionalUserReport
+from osf.metrics.utils import YearMonth
+from ._base import MonthlyReporter
+
+
+_CHUNK_SIZE = 500
+
+
+class InstitutionalUsersReporter(MonthlyReporter):
+    '''build an InstitutionalUserReport for each institution-user affiliation
+
+    built for the institution dashboard at ://osf.example/institutions/<id>/dashboard/,
+    which offers institutional admins insight into how people at their institution are
+    using osf, based on their explicitly-affiliated osf objects
+    '''
+    def report(self):
+        _before_datetime = self.yearmonth.month_end()
+        for _institution in osfdb.Institution.objects.filter(created__lt=_before_datetime):
+            _user_qs = _institution.get_institution_users().filter(created__lt=_before_datetime)
+            for _user in _user_qs.iterator(chunk_size=_CHUNK_SIZE):
+                _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth, _before_datetime)
+                yield _helper.report
+
+
+# helper
+@dataclasses.dataclass
+class _InstiUserReportHelper:
+    institution: osfdb.Institution
+    user: osfdb.OSFUser
+    yearmonth: YearMonth
+    before_datetime: datetime.datetime
+    report: InstitutionalUserReport = dataclasses.field(init=False)
+
+    def __post_init__(self):
+        _affiliation = self.user.get_institution_affiliation(self.institution._id)
+        self.report = InstitutionalUserReport(
+            institution_id=self.institution._id,
+            user_id=self.user._id,
+            user_name=self.user.fullname,
+            department_name=(_affiliation.sso_department or None),
+            month_last_login=(
+                YearMonth.from_date(self.user.date_last_login)
+                if self.user.date_last_login is not None
+                else None
+            ),
+            month_last_active=self._get_last_active(),
+            account_creation_date=YearMonth.from_date(self.user.created),
+            orcid_id=self.user.get_verified_external_id('ORCID', verified_only=True),
+            public_project_count=self._public_project_queryset().count(),
+            private_project_count=self._private_project_queryset().count(),
+            public_registration_count=self._public_registration_queryset().count(),
+            embargoed_registration_count=self._embargoed_registration_queryset().count(),
+            public_file_count=self._public_osfstorage_file_queryset().count(),
+            published_preprint_count=self._published_preprint_queryset().count(),
+            storage_byte_count=self._storage_byte_count(),
+        )
+
+    def _node_queryset(self):
+        _institution_node_qs = self.institution.nodes.filter(
+            created__lt=self.before_datetime,
+            is_deleted=False,
+        ).exclude(spam_status=SpamStatus.SPAM)
+        return osfdb.Node.objects.get_nodes_for_user(
+            user=self.user,
+            base_queryset=_institution_node_qs,
+        )
+
+    def _public_project_queryset(self):
+        return self._node_queryset().filter(
+            type='osf.node',  # `type` field from TypedModel
+            is_public=True,
+            root_id=F('pk'),  # only root nodes
+        )
+
+    def _private_project_queryset(self):
+        return self._node_queryset().filter(
+            type='osf.node',  # `type` field from TypedModel
+            is_public=False,
+            root_id=F('pk'),  # only root nodes
+        )
+
+    def _public_registration_queryset(self):
+        return self._node_queryset().filter(
+            type='osf.registration',  # `type` field from TypedModel
+            is_public=True,
+            root_id=F('pk'),  # only root nodes
+        )
+
+    def _embargoed_registration_queryset(self):
+        return self._node_queryset().filter(
+            type='osf.registration',  # `type` field from TypedModel
+            is_public=False,
+            root_id=F('pk'),  # only root nodes
+            embargo__end_date__gte=self.before_datetime,
+        )
+
+    def _published_preprint_queryset(self):
+        return (
+            osfdb.Preprint.objects.can_view()  # published/publicly-viewable
+            .filter(
+                affiliated_institutions=self.institution,
+                _contributors=self.user,
+                date_published__lt=self.before_datetime,
+            )
+            .exclude(spam_status=SpamStatus.SPAM)
+        )
+
+    def _public_osfstorage_file_queryset(self):
+        _target_node_q = Q(
+            # any public project, registration, project component, or registration component
+            target_object_id__in=self._node_queryset().filter(is_public=True).values('pk'),
+            target_content_type=ContentType.objects.get_for_model(osfdb.AbstractNode),
+        )
+        _target_preprint_q = Q(
+            target_object_id__in=self._published_preprint_queryset().values('pk'),
+            target_content_type=ContentType.objects.get_for_model(osfdb.Preprint),
+        )
+        return (
+            OsfStorageFile.objects
+            .filter(
+                created__lt=self.before_datetime,
+                deleted__isnull=True,
+                purged__isnull=True,
+            )
+            .filter(_target_node_q | _target_preprint_q)
+        )
+
+    def _storage_byte_count(self):
+        return osfdb.FileVersion.objects.filter(
+            size__gt=0,
+            created__lt=self.before_datetime,
+            purged__isnull=True,
+            basefilenode__in=self._public_osfstorage_file_queryset(),
+        ).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes']
+
+    def _get_last_active(self):
+        end_date = self.yearmonth.month_end()
+
+        node_logs = self.user.logs.filter(created__lt=end_date).order_by('-created')
+        preprint_logs = self.user.preprint_logs.filter(created__lt=end_date).order_by('-created')
+
+        dates = filter(bool, [
+            node_logs.values_list('created', flat=True).first(),
+            preprint_logs.values_list('created', flat=True).first(),
+        ])
+
+        latest_activity_date = max(dates, default=None)
+
+        if latest_activity_date:
+            return YearMonth.from_date(latest_activity_date)
+        else:
+            return None
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
new file mode 100644
index 00000000000..ecc34a5d9c7
--- /dev/null
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -0,0 +1,286 @@
+from __future__ import annotations
+import typing
+
+import celery
+if typing.TYPE_CHECKING:
+    import elasticsearch_dsl as edsl
+
+from osf.metadata.osf_gathering import OsfmapPartition
+from osf.metrics.counted_usage import (
+    CountedAuthUsage,
+    get_item_type,
+    get_provider_id,
+)
+from osf.metrics.reports import PublicItemUsageReport
+from osf import models as osfdb
+from website import settings as website_settings
+from ._base import MonthlyReporter
+
+
+_CHUNK_SIZE = 500
+
+_MAX_CARDINALITY_PRECISION = 40000  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_precision_control
+
+
+class _SkipItem(Exception):
+    pass
+
+
+class PublicItemUsageReporter(MonthlyReporter):
+    '''build a PublicItemUsageReport for each public item
+
+    includes projects, project components, registrations, registration components, and preprints
+    '''
+
+    def report(self):
+        # use two composite aggregations in parallel to page thru every
+        # public item viewed or downloaded this month, counting:
+        # - views and downloads for each item (using `CountedAuthUsage.item_guid`)
+        # - views for each item's components and files (using `CountedAuthUsage.surrounding_guids`)
+        for _exact_bucket, _contained_views_bucket in _zip_composite_aggs(
+            self._exact_item_search(), 'agg_osfid',
+            self._contained_item_views_search(), 'agg_surrounding_osfid',
+        ):
+            try:
+                _report = self._report_from_buckets(_exact_bucket, _contained_views_bucket)
+                yield _report
+            except _SkipItem:
+                pass
+
+    def followup_task(self):
+        return task__update_monthly_metadatas.signature(
+            args=[str(self.yearmonth)],
+            countdown=30,  # give index time to settle
+        )
+
+    def _report_from_buckets(self, exact_bucket, contained_views_bucket):
+        # either exact_bucket or contained_views_bucket may be None, but not both
+        assert (exact_bucket is not None) or (contained_views_bucket is not None)
+        _report = (
+            self._init_report_from_exact_bucket(exact_bucket)
+            if exact_bucket is not None
+            else self._init_report_from_osfid(contained_views_bucket.key.osfid)
+        )
+        # view counts include views on contained items (components, files)
+        _report.view_count, _report.view_session_count = self._get_view_counts(_report.item_osfid)
+        return _report
+
+    def _init_report_from_exact_bucket(self, exact_bucket) -> PublicItemUsageReport:
+        # in the (should-be common) case of an item that has been directly viewed in
+        # this month, the stored metrics already have the data required
+        _report = PublicItemUsageReport(
+            item_osfid=exact_bucket.key.osfid,
+            item_type=_agg_keys(exact_bucket.agg_item_type),
+            provider_id=_agg_keys(exact_bucket.agg_provider_id),
+            platform_iri=_agg_keys(exact_bucket.agg_platform_iri),
+            # default counts to zero, will be updated if non-zero
+            view_count=0,
+            view_session_count=0,
+            download_count=0,
+            download_session_count=0,
+        )
+        for _actionbucket in exact_bucket.agg_action:
+            # note: view counts computed separately to avoid double-counting
+            if _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value:
+                _report.download_count = _actionbucket.doc_count
+                _report.download_session_count = _actionbucket.agg_session_count.value
+        return _report
+
+    def _init_report_from_osfid(self, osfid: str) -> PublicItemUsageReport:
+        # for the (should-be unusual) case where the components/files contained by
+        # an item have views in this month, but the item itself does not --
+        # load necessary info via django models, instead
+        _osfguid = osfdb.Guid.load(osfid)
+        if _osfguid is None or not getattr(_osfguid.referent, 'is_public', False):
+            raise _SkipItem
+        return PublicItemUsageReport(
+            item_osfid=osfid,
+            item_type=[get_item_type(_osfguid.referent)],
+            provider_id=[get_provider_id(_osfguid.referent)],
+            platform_iri=[website_settings.DOMAIN],
+            # default counts to zero, will be updated if non-zero
+            view_count=0,
+            view_session_count=0,
+            download_count=0,
+            download_session_count=0,
+        )
+
+    def _base_usage_search(self):
+        return (
+            CountedAuthUsage.search()
+            .filter('term', item_public=True)
+            .filter('range', timestamp={
+                'gte': self.yearmonth.month_start(),
+                'lt': self.yearmonth.month_end(),
+            })
+            .update_from_dict({'size': 0})  # only aggregations, no hits
+        )
+
+    def _exact_item_search(self) -> edsl.Search:
+        '''aggregate views and downloads on each osfid (not including components/files)'''
+        _search = self._base_usage_search()
+        # the main agg: use a composite aggregation to page thru *every* item
+        _agg_osfid = _search.aggs.bucket(
+            'agg_osfid',
+            'composite',
+            sources=[{'osfid': {'terms': {'field': 'item_guid'}}}],
+            size=_CHUNK_SIZE,
+        )
+        # nested agg: for each item, get platform_iri values
+        _agg_osfid.bucket('agg_platform_iri', 'terms', field='platform_iri')
+        # nested agg: for each item, get provider_id values
+        _agg_osfid.bucket('agg_provider_id', 'terms', field='provider_id')
+        # nested agg: for each item, get item_type values
+        _agg_osfid.bucket('agg_item_type', 'terms', field='item_type')
+        # nested agg: for each item, get download count
+        _agg_action = _agg_osfid.bucket(
+            'agg_action',
+            'terms',
+            field='action_labels',
+            include=[
+                CountedAuthUsage.ActionLabel.DOWNLOAD.value,
+            ],
+        )
+        # nested nested agg: get download session count
+        _agg_action.metric(
+            'agg_session_count',
+            'cardinality',
+            field='session_id',
+            precision_threshold=_MAX_CARDINALITY_PRECISION,
+        )
+        return _search
+
+    def _contained_item_views_search(self) -> edsl.Search:
+        '''iterate osfids with views on contained components and files'''
+        _search = (
+            self._base_usage_search()
+            .filter('term', action_labels=CountedAuthUsage.ActionLabel.VIEW.value)
+        )
+        # the main agg: use a composite aggregation to page thru *every* item
+        _search.aggs.bucket(
+            'agg_surrounding_osfid',
+            'composite',
+            sources=[{'osfid': {'terms': {'field': 'surrounding_guids'}}}],
+            size=_CHUNK_SIZE,
+        )
+        return _search
+
+    def _get_view_counts(self, osfid: str) -> tuple[int, int]:
+        '''compute view_session_count separately to avoid double-counting
+
+        (the same session may be represented in both the composite agg on `item_guid`
+        and that on `surrounding_guids`)
+        '''
+        _search = (
+            self._base_usage_search()
+            .query(
+                'bool',
+                filter=[
+                    {'term': {'action_labels': CountedAuthUsage.ActionLabel.VIEW.value}},
+                ],
+                should=[
+                    {'term': {'item_guid': osfid}},
+                    {'term': {'surrounding_guids': osfid}},
+                ],
+                minimum_should_match=1,
+            )
+        )
+        _search.aggs.metric(
+            'agg_session_count',
+            'cardinality',
+            field='session_id',
+            precision_threshold=_MAX_CARDINALITY_PRECISION,
+        )
+        _response = _search.execute()
+        _view_count = _response.hits.total
+        _view_session_count = _response.aggregations.agg_session_count.value
+        return (_view_count, _view_session_count)
+
+
+###
+# followup celery task
+@celery.shared_task
+def task__update_monthly_metadatas(yearmonth: str):
+    from api.share.utils import task__update_share
+    _report_search = (
+        PublicItemUsageReport.search()
+        .filter('term', report_yearmonth=yearmonth)
+        .source(['item_osfid'])  # return only the 'item_osfid' field
+    )
+    for _hit in _report_search.scan():
+        task__update_share.delay(
+            _hit.item_osfid,
+            is_backfill=True,
+            osfmap_partition_name=OsfmapPartition.MONTHLY_SUPPLEMENT.name,
+        )
+
+
+###
+# local helpers
+
+def _agg_keys(bucket_agg_result) -> list:
+    return [_bucket.key for _bucket in bucket_agg_result]
+
+
+def _zip_composite_aggs(
+    search_a: edsl.Search,
+    composite_agg_name_a: str,
+    search_b: edsl.Search,
+    composite_agg_name_b: str,
+):
+    '''iterate thru two composite aggregations, yielding pairs of buckets matched by key
+
+    the composite aggregations must have matching names in `sources` so their keys can be compared
+    '''
+    _iter_a = _iter_composite_buckets(search_a, composite_agg_name_a)
+    _iter_b = _iter_composite_buckets(search_b, composite_agg_name_b)
+    _next_a = next(_iter_a, None)
+    _next_b = next(_iter_b, None)
+    while True:
+        if _next_a is None and _next_b is None:
+            return  # both done
+        elif _next_a is None or _next_b is None:
+            # one is done but not the other -- no matching needed
+            yield (_next_a, _next_b)
+            _next_a = next(_iter_a, None)
+            _next_b = next(_iter_b, None)
+        elif _next_a.key == _next_b.key:
+            # match -- yield and increment both
+            yield (_next_a, _next_b)
+            _next_a = next(_iter_a, None)
+            _next_b = next(_iter_b, None)
+        elif _orderable_key(_next_a) < _orderable_key(_next_b):
+            # mismatch -- yield and increment a (but not b)
+            yield (_next_a, None)
+            _next_a = next(_iter_a, None)
+        else:
+            # mismatch -- yield and increment b (but not a)
+            yield (None, _next_b)
+            _next_b = next(_iter_b, None)
+
+
+def _iter_composite_buckets(search: edsl.Search, composite_agg_name: str):
+    '''iterate thru *all* buckets of a composite aggregation, requesting new pages as needed
+
+    assumes the given search has a composite aggregation of the given name
+
+    updates the search in-place for subsequent pages
+    '''
+    while True:
+        _page_response = search.execute(ignore_cache=True)  # reused search object has the previous page cached
+        try:
+            _agg_result = _page_response.aggregations[composite_agg_name]
+        except KeyError:
+            return  # no data; all done
+        yield from _agg_result.buckets
+        # update the search for the next page
+        try:
+            _next_after = _agg_result.after_key
+        except AttributeError:
+            return  # all done
+        else:
+            search.aggs[composite_agg_name].after = _next_after
+
+
+def _orderable_key(composite_bucket) -> list:
+    return sorted(composite_bucket.key.to_dict().items())
diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py
index 54feae8bee5..94290f96203 100644
--- a/osf/metrics/reporters/spam_count.py
+++ b/osf/metrics/reporters/spam_count.py
@@ -8,12 +8,11 @@
 
 class SpamCountReporter(MonthlyReporter):
 
-    def report(self, report_yearmonth):
-        target_month = report_yearmonth.target_month()
-        next_month = report_yearmonth.next_month()
+    def report(self):
+        target_month = self.yearmonth.month_start()
+        next_month = self.yearmonth.month_end()
 
         report = SpamSummaryReport(
-            report_yearmonth=str(report_yearmonth),
             # Node Log entries
             node_confirmed_spam=NodeLog.objects.filter(
                 action=NodeLog.CONFIRM_SPAM,
diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py
index 609e79fc324..43bdd0fabd1 100644
--- a/osf/metrics/reports.py
+++ b/osf/metrics/reports.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+from collections import abc
 import datetime
 
 from django.dispatch import receiver
@@ -20,10 +22,14 @@ class DailyReport(metrics.Metric):
     There's something we'd like to know about every so often,
     so let's regularly run a report and stash the results here.
     """
-    DAILY_UNIQUE_FIELD = None  # set in subclasses that expect multiple reports per day
+    UNIQUE_TOGETHER_FIELDS: tuple[str, ...] = ('report_date',)  # override in subclasses for multiple reports per day
 
     report_date = metrics.Date(format='strict_date', required=True)
 
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        assert 'report_date' in cls.UNIQUE_TOGETHER_FIELDS, f'DailyReport subclasses must have "report_date" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'
+
     class Meta:
         abstract = True
         dynamic = metrics.MetaField('strict')
@@ -32,7 +38,7 @@ class Meta:
 
 class YearmonthField(metrics.Date):
     def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs, format='strict_year_month', required=True)
+        super().__init__(*args, **kwargs, format='strict_year_month')
 
     def deserialize(self, data):
         if isinstance(data, YearMonth):
@@ -41,8 +47,14 @@ def deserialize(self, data):
             return YearMonth.from_str(data)
         elif isinstance(data, (datetime.datetime, datetime.date)):
             return YearMonth.from_date(data)
+        elif isinstance(data, int):
+            # elasticsearch stores dates in milliseconds since the unix epoch
+            _as_datetime = datetime.datetime.fromtimestamp(data // 1000)
+            return YearMonth.from_date(_as_datetime)
+        elif data is None:
+            return None
         else:
-            raise ValueError('unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth')
+            raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth')
 
     def serialize(self, data):
         if isinstance(data, str):
@@ -51,6 +63,8 @@ def serialize(self, data):
             return str(data)
         elif isinstance(data, (datetime.datetime, datetime.date)):
             return str(YearMonth.from_date(data))
+        elif data is None:
+            return None
         else:
             raise ValueError(f'unsure how to serialize "{data}" (of type {type(data)}) as YYYY-MM')
 
@@ -58,34 +72,57 @@ def serialize(self, data):
 class MonthlyReport(metrics.Metric):
     """MonthlyReport (abstract base for report-based metrics that run monthly)
     """
+    UNIQUE_TOGETHER_FIELDS: tuple[str, ...] = ('report_yearmonth',)  # override in subclasses for multiple reports per month
 
-    report_yearmonth = YearmonthField()
+    report_yearmonth = YearmonthField(required=True)
 
     class Meta:
         abstract = True
         dynamic = metrics.MetaField('strict')
         source = metrics.MetaField(enabled=True)
 
+    @classmethod
+    def most_recent_yearmonth(cls, base_search=None) -> YearMonth | None:
+        _search = base_search or cls.search()
+        _search = _search.update_from_dict({'size': 0})  # omit hits
+        _search.aggs.bucket(
+            'agg_most_recent_yearmonth',
+            'terms',
+            field='report_yearmonth',
+            order={'_key': 'desc'},
+            size=1,
+        )
+        _response = _search.execute()
+        if not _response.aggregations:
+            return None
+        (_bucket,) = _response.aggregations.agg_most_recent_yearmonth.buckets
+        return _bucket.key
+
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        assert 'report_yearmonth' in cls.UNIQUE_TOGETHER_FIELDS, f'MonthlyReport subclasses must have "report_yearmonth" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'
+
 
 @receiver(metrics_pre_save)
 def set_report_id(sender, instance, **kwargs):
-    # Set the document id to a hash of "unique together"
-    # values (just `report_date` by default) to get
-    # "ON CONFLICT UPDATE" behavior -- if the document
-    # already exists, it will be updated rather than duplicated.
-    # Cannot detect/avoid conflicts this way, but that's ok.
-
-    if issubclass(sender, DailyReport):
-        duf_name = instance.DAILY_UNIQUE_FIELD
-        if duf_name is None:
-            instance.meta.id = stable_key(instance.report_date)
-        else:
-            duf_value = getattr(instance, duf_name)
-            if not duf_value or not isinstance(duf_value, str):
-                raise ReportInvalid(f'{sender.__name__}.{duf_name} MUST have a non-empty string value (got {duf_value})')
-            instance.meta.id = stable_key(instance.report_date, duf_value)
-    elif issubclass(sender, MonthlyReport):
-        instance.meta.id = stable_key(instance.report_yearmonth)
+    try:
+        _unique_together_fields = instance.UNIQUE_TOGETHER_FIELDS
+    except AttributeError:
+        pass
+    else:
+        # Set the document id to a hash of "unique together" fields
+        # for "ON CONFLICT UPDATE" behavior -- if the document
+        # already exists, it will be updated rather than duplicated.
+        # Cannot detect/avoid conflicts this way, but that's ok.
+        _key_values = []
+        for _field_name in _unique_together_fields:
+            _field_value = getattr(instance, _field_name)
+            if not _field_value or (
+                isinstance(_field_value, abc.Iterable) and not isinstance(_field_value, str)
+            ):
+                raise ReportInvalid(f'because "{_field_name}" is in {sender.__name__}.UNIQUE_TOGETHER_FIELDS, {sender.__name__}.{_field_name} MUST have a non-empty scalar value (got {_field_value} of type {type(_field_value)})')
+            _key_values.append(_field_value)
+        instance.meta.id = stable_key(*_key_values)
 
 
 #### BEGIN reusable inner objects #####
@@ -157,7 +194,7 @@ class DownloadCountReport(DailyReport):
 
 
 class InstitutionSummaryReport(DailyReport):
-    DAILY_UNIQUE_FIELD = 'institution_id'
+    UNIQUE_TOGETHER_FIELDS = ('report_date', 'institution_id',)
 
     institution_id = metrics.Keyword()
     institution_name = metrics.Keyword()
@@ -169,7 +206,7 @@ class InstitutionSummaryReport(DailyReport):
 
 
 class NewUserDomainReport(DailyReport):
-    DAILY_UNIQUE_FIELD = 'domain_name'
+    UNIQUE_TOGETHER_FIELDS = ('report_date', 'domain_name',)
 
     domain_name = metrics.Keyword()
     new_user_count = metrics.Integer()
@@ -187,7 +224,7 @@ class OsfstorageFileCountReport(DailyReport):
 
 
 class PreprintSummaryReport(DailyReport):
-    DAILY_UNIQUE_FIELD = 'provider_key'
+    UNIQUE_TOGETHER_FIELDS = ('report_date', 'provider_key',)
 
     provider_key = metrics.Keyword()
     preprint_count = metrics.Integer()
@@ -214,3 +251,75 @@ class SpamSummaryReport(MonthlyReport):
     preprint_flagged = metrics.Integer()
     user_marked_as_spam = metrics.Integer()
     user_marked_as_ham = metrics.Integer()
+
+
+class InstitutionalUserReport(MonthlyReport):
+    UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',)
+    institution_id = metrics.Keyword()
+    # user info:
+    user_id = metrics.Keyword()
+    user_name = metrics.Keyword()
+    department_name = metrics.Keyword()
+    month_last_login = YearmonthField()
+    month_last_active = YearmonthField()
+    account_creation_date = YearmonthField()
+    orcid_id = metrics.Keyword()
+    # counts:
+    public_project_count = metrics.Integer()
+    private_project_count = metrics.Integer()
+    public_registration_count = metrics.Integer()
+    embargoed_registration_count = metrics.Integer()
+    published_preprint_count = metrics.Integer()
+    public_file_count = metrics.Long()
+    storage_byte_count = metrics.Long()
+
+
+class InstitutionMonthlySummaryReport(MonthlyReport):
+    UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', )
+    institution_id = metrics.Keyword()
+    user_count = metrics.Integer()
+    public_project_count = metrics.Integer()
+    private_project_count = metrics.Integer()
+    public_registration_count = metrics.Integer()
+    embargoed_registration_count = metrics.Integer()
+    published_preprint_count = metrics.Integer()
+    storage_byte_count = metrics.Long()
+    public_file_count = metrics.Long()
+    monthly_logged_in_user_count = metrics.Long()
+    monthly_active_user_count = metrics.Long()
+
+
+class PublicItemUsageReport(MonthlyReport):
+    UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid')
+
+    # where noted, fields are meant to correspond to defined terms from COUNTER
+    # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
+    # https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html
+    item_osfid = metrics.Keyword()                    # counter:Item (or Dataset)
+    item_type = metrics.Keyword(multi=True)           # counter:Data-Type
+    provider_id = metrics.Keyword(multi=True)         # counter:Database(?)
+    platform_iri = metrics.Keyword(multi=True)        # counter:Platform
+
+    # view counts include views on components or files contained by this item
+    view_count = metrics.Long()                       # counter:Total Investigations
+    view_session_count = metrics.Long()               # counter:Unique Investigations
+
+    # download counts of this item only (not including contained components or files)
+    download_count = metrics.Long()                   # counter:Total Requests
+    download_session_count = metrics.Long()           # counter:Unique Requests
+
+    @classmethod
+    def for_last_month(cls, item_osfid: str) -> PublicItemUsageReport | None:
+        _search = (
+            PublicItemUsageReport.search()
+            .filter('term', item_osfid=item_osfid)
+            # only last month's report
+            .filter('range', report_yearmonth={
+                'gte': 'now-2M/M',
+                'lt': 'now/M',
+            })
+            .sort('-report_yearmonth')
+            [:1]
+        )
+        _response = _search.execute()
+        return _response[0] if _response else None
diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py
index 5ea397fef39..910b1f3104c 100644
--- a/osf/metrics/utils.py
+++ b/osf/metrics/utils.py
@@ -1,9 +1,10 @@
+from __future__ import annotations
+import calendar
+import dataclasses
 import re
 import datetime
-import typing
 from hashlib import sha256
-
-import pytz
+from typing import ClassVar
 
 
 def stable_key(*key_parts):
@@ -20,19 +21,22 @@ def stable_key(*key_parts):
     return sha256(bytes(plain_key, encoding='utf')).hexdigest()
 
 
-class YearMonth(typing.NamedTuple):
+@dataclasses.dataclass(frozen=True)
+class YearMonth:
+    """YearMonth: represents a specific month in a specific year"""
     year: int
     month: int
 
-    YEARMONTH_RE = re.compile(r'(?P<year>\d{4})-(?P<month>\d{2})')
+    YEARMONTH_RE: ClassVar[re.Pattern] = re.compile(r'(?P<year>\d{4})-(?P<month>\d{2})')
 
     @classmethod
-    def from_date(cls, date):
-        assert isinstance(date, (datetime.datetime, datetime.date))
+    def from_date(cls, date: datetime.date) -> YearMonth:
+        """construct a YearMonth from a `datetime.date` (or `datetime.datetime`)"""
         return cls(date.year, date.month)
 
     @classmethod
-    def from_str(cls, input_str):
+    def from_str(cls, input_str: str) -> YearMonth:
+        """construct a YearMonth from a string in "YYYY-MM" format"""
         match = cls.YEARMONTH_RE.fullmatch(input_str)
         if match:
             return cls(
@@ -43,12 +47,21 @@ def from_str(cls, input_str):
             raise ValueError(f'expected YYYY-MM format, got "{input_str}"')
 
     def __str__(self):
+        """convert to string of "YYYY-MM" format"""
         return f'{self.year}-{self.month:0>2}'
 
-    def target_month(self):
-        return datetime.datetime(self.year, self.month, 1, tzinfo=pytz.utc)
+    def next(self) -> YearMonth:
+        """get a new YearMonth for the month after this one"""
+        return (
+            YearMonth(self.year + 1, int(calendar.JANUARY))
+            if self.month == calendar.DECEMBER
+            else YearMonth(self.year, self.month + 1)
+        )
+
+    def month_start(self) -> datetime.datetime:
+        """get a datetime (in UTC timezone) when this YearMonth starts"""
+        return datetime.datetime(self.year, self.month, 1, tzinfo=datetime.UTC)
 
-    def next_month(self):
-        if self.month == 12:
-            return datetime.datetime(self.year + 1, 1, 1, tzinfo=pytz.utc)
-        return datetime.datetime(self.year, self.month + 1, 1, tzinfo=pytz.utc)
+    def month_end(self) -> datetime.datetime:
+        """get a datetime (in UTC timezone) when this YearMonth ends (the start of next month)"""
+        return self.next().month_start()
diff --git a/osf/migrations/0024_institution_link_to_external_reports_archive.py b/osf/migrations/0024_institution_link_to_external_reports_archive.py
new file mode 100644
index 00000000000..8e1a47fcffb
--- /dev/null
+++ b/osf/migrations/0024_institution_link_to_external_reports_archive.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.15 on 2024-08-16 15:21
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('osf', '0023_preprint_affiliated_institutions'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='institution',
+            name='link_to_external_reports_archive',
+            field=models.URLField(blank=True, default='', help_text='Full URL where institutional admins can access archived metrics reports.', max_length=2048),
+        ),
+    ]
diff --git a/osf/models/institution.py b/osf/models/institution.py
index 0c3a9780ac2..d0ce38eacf4 100644
--- a/osf/models/institution.py
+++ b/osf/models/institution.py
@@ -118,6 +118,12 @@ class Institution(DirtyFieldsMixin, Loggable, ObjectIDMixin, BaseModel, Guardian
         blank=True,
         help_text='The full domain this institutions that will appear in DOI metadata.'
     )
+    link_to_external_reports_archive = models.URLField(
+        max_length=2048,
+        blank=True,
+        default='',
+        help_text='Full URL where institutional admins can access archived metrics reports.',
+    )
 
     class Meta:
         # custom permissions for use in the OSF Admin App
diff --git a/osf/models/node.py b/osf/models/node.py
index 9e342308f44..62925966e2e 100644
--- a/osf/models/node.py
+++ b/osf/models/node.py
@@ -80,7 +80,6 @@
 from api.caching.tasks import update_storage_usage
 from api.caching import settings as cache_settings
 from api.caching.utils import storage_usage_cache
-from api.share.utils import update_share
 
 
 logger = logging.getLogger(__name__)
@@ -711,6 +710,7 @@ def should_request_identifiers(self):
 
     @classmethod
     def bulk_update_search(cls, nodes, index=None):
+        from api.share.utils import update_share
         for _node in nodes:
             update_share(_node)
         from website import search
@@ -722,6 +722,7 @@ def bulk_update_search(cls, nodes, index=None):
             log_exception(e)
 
     def update_search(self):
+        from api.share.utils import update_share
         update_share(self)
         from website import search
         try:
diff --git a/osf/models/user.py b/osf/models/user.py
index 22bbfc5baf9..bb0f97f91a9 100644
--- a/osf/models/user.py
+++ b/osf/models/user.py
@@ -34,7 +34,6 @@
                                        MergeConflictError)
 from framework.exceptions import PermissionsError
 from framework.sessions.utils import remove_sessions_for_user
-from api.share.utils import update_share
 from osf.utils.requests import get_current_request
 from osf.exceptions import reraise_django_validation_errors, UserStateError
 from .base import BaseModel, GuidMixin, GuidMixinQuerySet
@@ -1469,6 +1468,7 @@ def is_assumed_ham(self):
         return user_has_trusted_email
 
     def update_search(self):
+        from api.share.utils import update_share
         update_share(self)
         from website.search.search import update_user
         update_user(self)
diff --git a/osf_tests/factories.py b/osf_tests/factories.py
index 860dd967e5e..0bd1664977d 100644
--- a/osf_tests/factories.py
+++ b/osf_tests/factories.py
@@ -188,7 +188,7 @@ class BaseNodeFactory(DjangoModelFactory):
     title = factory.Faker('catch_phrase')
     description = factory.Faker('sentence')
     created = factory.LazyFunction(timezone.now)
-    creator = factory.SubFactory(AuthUserFactory)
+    creator = factory.LazyAttribute(lambda o: AuthUserFactory())
 
     class Meta:
         model = models.Node
diff --git a/osf_tests/metadata/_utils.py b/osf_tests/metadata/_utils.py
index df5ed2b7ac7..fb23bdb16c5 100644
--- a/osf_tests/metadata/_utils.py
+++ b/osf_tests/metadata/_utils.py
@@ -3,23 +3,23 @@
 from osf.metadata import gather
 from osf.metadata.rdfutils import contextualized_graph
 
-def assert_triples(actual_triples, expected_triples):
+def assert_triples(actual_triples, expected_triples, label=''):
     _expected_graph, _expected_focuses = _get_graph_and_focuses(expected_triples)
     _actual_graph, _actual_focuses = _get_graph_and_focuses(actual_triples)
-    assert_graphs_equal(_actual_graph, _expected_graph)
+    assert_graphs_equal(_actual_graph, _expected_graph, label=label)
     assert _expected_focuses == _actual_focuses
 
 
-def assert_graphs_equal(actual_rdflib_graph, expected_rdflib_graph):
+def assert_graphs_equal(actual_rdflib_graph, expected_rdflib_graph, label=''):
     (_overlap, _expected_but_absent, _unexpected_but_present) = rdflib.compare.graph_diff(
         expected_rdflib_graph,
         actual_rdflib_graph,
     )
     assert not _expected_but_absent and not _unexpected_but_present, '\n\t'.join((
-        'unequal triple-sets!',
+        (f'unequal triplesets for "{label}"!' if label else 'unequal triple-sets!'),
         f'overlap size: {len(_overlap)}',
-        f'expected (but absent): {_friendly_graph(_expected_but_absent)}',
-        f'unexpected (but present): {_friendly_graph(_unexpected_but_present)}',
+        f'expected (but absent): {_indented_graph(_expected_but_absent)}',
+        f'unexpected (but present): {_indented_graph(_unexpected_but_present)}',
     ))
 
 
@@ -35,10 +35,9 @@ def _get_graph_and_focuses(triples):
     return _graph, _focuses
 
 
-def _friendly_graph(rdfgraph) -> str:
+def _indented_graph(rdfgraph) -> str:
     _graph_to_print = contextualized_graph(rdfgraph)
     _delim = '\n\t\t'
     return _delim + _delim.join(
-        ' '.join(_term.n3() for _term in triple)
-        for triple in _graph_to_print
+        _graph_to_print.serialize(format='turtle').strip().split('\n')
     )
diff --git a/osf_tests/metadata/expected_metadata_files/file_basic.turtle b/osf_tests/metadata/expected_metadata_files/file_basic.turtle
index 14a78c46c88..3f430b22521 100644
--- a/osf_tests/metadata/expected_metadata_files/file_basic.turtle
+++ b/osf_tests/metadata/expected_metadata_files/file_basic.turtle
@@ -3,6 +3,7 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
 <http://localhost:5000/w3ibb> a osf:File ;
     dcat:accessService <http://localhost:5000> ;
@@ -33,7 +34,8 @@
     dcterms:extent "0.000007 MB" ;
     dcterms:format "img/png" ;
     dcterms:modified "2123-05-04" ;
-    dcterms:requires <urn:checksum:sha-256::6ac3c336e4094835293a3fed8a4b5fedde1b5e2626d9838fed50693bba00af0e> ;
+    dcterms:requires <urn:checksum:sha-256::shashasha> ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> ;
     osf:versionNumber "1" .
 
 <http://localhost:5000/w1ibb> a dcterms:Agent,
@@ -45,3 +47,5 @@
         foaf:Organization ;
     dcterms:identifier "http://localhost:5000" ;
     foaf:name "OSF" .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/file_full.turtle b/osf_tests/metadata/expected_metadata_files/file_full.turtle
index 37dd3c537f0..175ccfb042f 100644
--- a/osf_tests/metadata/expected_metadata_files/file_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/file_full.turtle
@@ -4,6 +4,7 @@
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
 <http://localhost:5000/w3ibb> a osf:File ;
     dcat:accessService <http://localhost:5000> ;
@@ -39,7 +40,8 @@
     dcterms:extent "0.000007 MB" ;
     dcterms:format "img/png" ;
     dcterms:modified "2123-05-04" ;
-    dcterms:requires <urn:checksum:sha-256::6ac3c336e4094835293a3fed8a4b5fedde1b5e2626d9838fed50693bba00af0e> ;
+    dcterms:requires <urn:checksum:sha-256::shashasha> ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> ;
     osf:versionNumber "1" .
 
 <https://moneypockets.example/millions> a osf:FundingAward ;
@@ -76,3 +78,5 @@
     foaf:name "OSF" .
 
 <https://schema.datacite.org/meta/kernel-4/#Dataset> rdfs:label "Dataset"@en .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/file_monthly_supplement.turtle b/osf_tests/metadata/expected_metadata_files/file_monthly_supplement.turtle
new file mode 100644
index 00000000000..845bd149f37
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/file_monthly_supplement.turtle
@@ -0,0 +1,13 @@
+@prefix dcat: <http://www.w3.org/ns/dcat#> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<http://localhost:5000/w3ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
+            dcat:accessService <http://localhost:5000> ;
+            foaf:primaryTopic <http://localhost:5000/w3ibb> ;
+            osf:downloadCount 3 ;
+            osf:downloadSessionCount 2 ;
+            osf:viewCount 7 ;
+            osf:viewSessionCount 5 ] .
diff --git a/osf_tests/metadata/expected_metadata_files/file_supplement.turtle b/osf_tests/metadata/expected_metadata_files/file_supplement.turtle
new file mode 100644
index 00000000000..662c197699d
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/file_supplement.turtle
@@ -0,0 +1 @@
+# correctly empty (for now)
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle
index f6db59e6e24..ee7e866827b 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle
@@ -3,6 +3,7 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
@@ -25,7 +26,9 @@
     dcat:accessService <http://localhost:5000> ;
     osf:hostingInstitution <https://cos.io/> ;
     osf:isSupplementedBy <http://localhost:5000/w2ibb> ;
-    osf:statedConflictOfInterest osf:no-conflict-of-interest .
+    osf:statedConflictOfInterest osf:no-conflict-of-interest ;
+    prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ;
+            prov:agent <http://localhost:5000/w1ibb> ] .
 
 <http://localhost:5000> a dcterms:Agent,
         foaf:Organization ;
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
index 93c69fa4e8c..cdf665fd5fe 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle
@@ -3,6 +3,7 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
@@ -25,7 +26,9 @@
     dcat:accessService <http://localhost:5000> ;
     osf:hostingInstitution <https://cos.io/> ;
     osf:isSupplementedBy <http://localhost:5000/w2ibb> ;
-    osf:statedConflictOfInterest osf:no-conflict-of-interest .
+    osf:statedConflictOfInterest osf:no-conflict-of-interest ;
+    prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ;
+            prov:agent <http://localhost:5000/w1ibb> ] .
 
 <http://localhost:5000> a dcterms:Agent,
         foaf:Organization ;
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_monthly_supplement.turtle b/osf_tests/metadata/expected_metadata_files/preprint_monthly_supplement.turtle
new file mode 100644
index 00000000000..8e6d6fb9331
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/preprint_monthly_supplement.turtle
@@ -0,0 +1,13 @@
+@prefix dcat: <http://www.w3.org/ns/dcat#> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<http://localhost:5000/w4ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
+            dcat:accessService <http://localhost:5000> ;
+            foaf:primaryTopic <http://localhost:5000/w4ibb> ;
+            osf:downloadCount 3 ;
+            osf:downloadSessionCount 2 ;
+            osf:viewCount 7 ;
+            osf:viewSessionCount 5 ] .
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle b/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
new file mode 100644
index 00000000000..9ff0732a509
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
@@ -0,0 +1,7 @@
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+
+<http://localhost:5000/w4ibb> osf:storageByteCount 1337 ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/project_basic.turtle b/osf_tests/metadata/expected_metadata_files/project_basic.turtle
index c3846782273..aa8244da1fd 100644
--- a/osf_tests/metadata/expected_metadata_files/project_basic.turtle
+++ b/osf_tests/metadata/expected_metadata_files/project_basic.turtle
@@ -3,7 +3,9 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
 <http://localhost:5000/w2ibb> a osf:Project ;
     dcterms:created "2123-05-04" ;
@@ -23,7 +25,9 @@
     dcat:accessService <http://localhost:5000> ;
     osf:contains <http://localhost:5000/w3ibb> ;
     osf:hostingInstitution <https://cos.io/> ;
-    osf:supplements <http://localhost:5000/w4ibb> .
+    osf:supplements <http://localhost:5000/w4ibb> ;
+    prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ;
+            prov:agent <http://localhost:5000/w1ibb> ] .
 
 <http://localhost:5000/w4ibb> a osf:Preprint ;
     dcterms:created "2123-05-04" ;
@@ -53,8 +57,19 @@
     dcterms:modified "2123-05-04" ;
     osf:fileName "my-file.blarg" ;
     osf:filePath "/my-file.blarg" ;
+    osf:hasFileVersion <http://localhost:5000/w3ibb?revision=1> ;
     osf:isContainedBy <http://localhost:5000/w2ibb> .
 
+<http://localhost:5000/w3ibb?revision=1> a osf:FileVersion ;
+    dcterms:created "2123-05-04" ;
+    dcterms:creator <http://localhost:5000/w1ibb> ;
+    dcterms:extent "0.000007 MB" ;
+    dcterms:format "img/png" ;
+    dcterms:modified "2123-05-04" ;
+    dcterms:requires <urn:checksum:sha-256::shashasha> ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> ;
+    osf:versionNumber "1" .
+
 <https://cos.io/> a dcterms:Agent,
         foaf:Organization ;
     dcterms:identifier "https://cos.io/",
@@ -85,3 +100,5 @@
 <https://schema.datacite.org/meta/kernel-4/#Preprint> rdfs:label "Preprint"@en .
 
 <https://schema.datacite.org/meta/kernel-4/#StudyRegistration> rdfs:label "StudyRegistration"@en .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle
index 6a84d141440..63946b2f80b 100644
--- a/osf_tests/metadata/expected_metadata_files/project_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle
@@ -3,7 +3,9 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
 <http://localhost:5000/w2ibb> a osf:Project ;
     dcterms:created "2123-05-04" ;
@@ -29,7 +31,9 @@
     osf:hasFunding <https://moneypockets.example/millions>,
         <https://moneypockets.example/millions-more> ;
     osf:hostingInstitution <https://cos.io/> ;
-    osf:supplements <http://localhost:5000/w4ibb> .
+    osf:supplements <http://localhost:5000/w4ibb> ;
+    prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ;
+            prov:agent <http://localhost:5000/w1ibb> ] .
 
 <http://localhost:5000/w4ibb> a osf:Preprint ;
     dcterms:created "2123-05-04" ;
@@ -59,8 +63,19 @@
     dcterms:modified "2123-05-04" ;
     osf:fileName "my-file.blarg" ;
     osf:filePath "/my-file.blarg" ;
+    osf:hasFileVersion <http://localhost:5000/w3ibb?revision=1> ;
     osf:isContainedBy <http://localhost:5000/w2ibb> .
 
+<http://localhost:5000/w3ibb?revision=1> a osf:FileVersion ;
+    dcterms:created "2123-05-04" ;
+    dcterms:creator <http://localhost:5000/w1ibb> ;
+    dcterms:extent "0.000007 MB" ;
+    dcterms:format "img/png" ;
+    dcterms:modified "2123-05-04" ;
+    dcterms:requires <urn:checksum:sha-256::shashasha> ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> ;
+    osf:versionNumber "1" .
+
 <https://moneypockets.example/millions> a osf:FundingAward ;
     dcterms:contributor <https://doi.org/10.$$$$> ;
     dcterms:identifier "https://moneypockets.example/millions" ;
@@ -116,3 +131,5 @@
 <https://schema.datacite.org/meta/kernel-4/#Dataset> rdfs:label "Dataset"@en .
 
 <https://schema.datacite.org/meta/kernel-4/#StudyRegistration> rdfs:label "StudyRegistration"@en .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/project_monthly_supplement.turtle b/osf_tests/metadata/expected_metadata_files/project_monthly_supplement.turtle
new file mode 100644
index 00000000000..dd9c54b1f93
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/project_monthly_supplement.turtle
@@ -0,0 +1,13 @@
+@prefix dcat: <http://www.w3.org/ns/dcat#> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<http://localhost:5000/w2ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
+            dcat:accessService <http://localhost:5000> ;
+            foaf:primaryTopic <http://localhost:5000/w2ibb> ;
+            osf:downloadCount 3 ;
+            osf:downloadSessionCount 2 ;
+            osf:viewCount 7 ;
+            osf:viewSessionCount 5 ] .
diff --git a/osf_tests/metadata/expected_metadata_files/project_supplement.turtle b/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
new file mode 100644
index 00000000000..d055e97554f
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
@@ -0,0 +1,13 @@
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+
+<http://localhost:5000/w2ibb> osf:hasOsfAddon <urn:osf.io:addons:gitlab> ;
+    osf:storageByteCount 7 ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> .
+
+<urn:osf.io:addons:gitlab> a osf:AddonImplementation ;
+    dcterms:identifier "gitlab" ;
+    skos:prefLabel "GitLab" .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/registration_basic.turtle b/osf_tests/metadata/expected_metadata_files/registration_basic.turtle
index eae4a92336c..9601477944f 100644
--- a/osf_tests/metadata/expected_metadata_files/registration_basic.turtle
+++ b/osf_tests/metadata/expected_metadata_files/registration_basic.turtle
@@ -3,7 +3,9 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
 <http://localhost:5000/w5ibb> a osf:Registration ;
     dcterms:conformsTo <http://fake.example/schema/for/test> ;
@@ -21,7 +23,10 @@
     dcterms:title "this is a project title!" ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4/#StudyRegistration> ;
     dcat:accessService <http://localhost:5000> ;
-    osf:hostingInstitution <https://cos.io/> .
+    osf:contains <http://localhost:5000/w6ibb> ;
+    osf:hostingInstitution <https://cos.io/> ;
+    prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ;
+            prov:agent <http://localhost:5000/w1ibb> ] .
 
 <http://localhost:5000/w2ibb> a osf:Project ;
     dcterms:created "2123-05-04" ;
@@ -36,6 +41,25 @@
     dcterms:title "this is a project title!" ;
     owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> .
 
+<http://localhost:5000/w6ibb> a osf:File ;
+    dcterms:created "2123-05-04" ;
+    dcterms:identifier "http://localhost:5000/w6ibb" ;
+    dcterms:modified "2123-05-04" ;
+    osf:fileName "my-reg-file.blarg" ;
+    osf:filePath "/my-reg-file.blarg" ;
+    osf:hasFileVersion <http://localhost:5000/w6ibb?revision=1> ;
+    osf:isContainedBy <http://localhost:5000/w5ibb> .
+
+<http://localhost:5000/w6ibb?revision=1> a osf:FileVersion ;
+    dcterms:created "2123-05-04" ;
+    dcterms:creator <http://localhost:5000/w1ibb> ;
+    dcterms:extent "0.000016 MB" ;
+    dcterms:format "img/png" ;
+    dcterms:modified "2123-05-04" ;
+    dcterms:requires <urn:checksum:sha-256::shashasha> ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> ;
+    osf:versionNumber "1" .
+
 <https://cos.io/> a dcterms:Agent,
         foaf:Organization ;
     dcterms:identifier "https://cos.io/",
@@ -61,3 +85,5 @@
 <http://fake.example/schema/for/test> dcterms:title "Open-Ended Registration" .
 
 <https://schema.datacite.org/meta/kernel-4/#StudyRegistration> rdfs:label "StudyRegistration"@en .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/registration_full.turtle b/osf_tests/metadata/expected_metadata_files/registration_full.turtle
index d30c4594bbe..4ab508c2f17 100644
--- a/osf_tests/metadata/expected_metadata_files/registration_full.turtle
+++ b/osf_tests/metadata/expected_metadata_files/registration_full.turtle
@@ -3,7 +3,9 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix osf: <https://osf.io/vocab/2022/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 
 <http://localhost:5000/w5ibb> a osf:Registration ;
     dcterms:conformsTo <http://fake.example/schema/for/test> ;
@@ -21,7 +23,10 @@
     dcterms:title "this is a project title!" ;
     dcterms:type <https://schema.datacite.org/meta/kernel-4/#StudyRegistration> ;
     dcat:accessService <http://localhost:5000> ;
-    osf:hostingInstitution <https://cos.io/> .
+    osf:contains <http://localhost:5000/w6ibb> ;
+    osf:hostingInstitution <https://cos.io/> ;
+    prov:qualifiedAttribution [ dcat:hadRole osf:admin-contributor ;
+            prov:agent <http://localhost:5000/w1ibb> ] .
 
 <http://localhost:5000/w2ibb> a osf:Project ;
     dcterms:created "2123-05-04" ;
@@ -41,6 +46,25 @@
     osf:hasFunding <https://moneypockets.example/millions>,
         <https://moneypockets.example/millions-more> .
 
+<http://localhost:5000/w6ibb> a osf:File ;
+    dcterms:created "2123-05-04" ;
+    dcterms:identifier "http://localhost:5000/w6ibb" ;
+    dcterms:modified "2123-05-04" ;
+    osf:fileName "my-reg-file.blarg" ;
+    osf:filePath "/my-reg-file.blarg" ;
+    osf:hasFileVersion <http://localhost:5000/w6ibb?revision=1> ;
+    osf:isContainedBy <http://localhost:5000/w5ibb> .
+
+<http://localhost:5000/w6ibb?revision=1> a osf:FileVersion ;
+    dcterms:created "2123-05-04" ;
+    dcterms:creator <http://localhost:5000/w1ibb> ;
+    dcterms:extent "0.000016 MB" ;
+    dcterms:format "img/png" ;
+    dcterms:modified "2123-05-04" ;
+    dcterms:requires <urn:checksum:sha-256::shashasha> ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> ;
+    osf:versionNumber "1" .
+
 <https://moneypockets.example/millions> a osf:FundingAward ;
     dcterms:contributor <https://doi.org/10.$$$$> ;
     dcterms:identifier "https://moneypockets.example/millions" ;
@@ -91,3 +115,5 @@
 <http://fake.example/schema/for/test> dcterms:title "Open-Ended Registration" .
 
 <https://schema.datacite.org/meta/kernel-4/#StudyRegistration> rdfs:label "StudyRegistration"@en .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/registration_monthly_supplement.turtle b/osf_tests/metadata/expected_metadata_files/registration_monthly_supplement.turtle
new file mode 100644
index 00000000000..435f7f4f921
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/registration_monthly_supplement.turtle
@@ -0,0 +1,13 @@
+@prefix dcat: <http://www.w3.org/ns/dcat#> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<http://localhost:5000/w5ibb> osf:usage [ dcterms:temporal "2123-05"^^xsd:gYearMonth ;
+            dcat:accessService <http://localhost:5000> ;
+            foaf:primaryTopic <http://localhost:5000/w5ibb> ;
+            osf:downloadCount 3 ;
+            osf:downloadSessionCount 2 ;
+            osf:viewCount 7 ;
+            osf:viewSessionCount 5 ] .
diff --git a/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle b/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
new file mode 100644
index 00000000000..9e8201b7915
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
@@ -0,0 +1,7 @@
+@prefix osf: <https://osf.io/vocab/2022/> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+
+<http://localhost:5000/w5ibb> osf:storageByteCount 17 ;
+    osf:storageRegion <http://localhost:8000/v2/regions/us/> .
+
+<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/user_monthly_supplement.turtle b/osf_tests/metadata/expected_metadata_files/user_monthly_supplement.turtle
new file mode 100644
index 00000000000..662c197699d
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/user_monthly_supplement.turtle
@@ -0,0 +1 @@
+# correctly empty (for now)
diff --git a/osf_tests/metadata/expected_metadata_files/user_supplement.turtle b/osf_tests/metadata/expected_metadata_files/user_supplement.turtle
new file mode 100644
index 00000000000..662c197699d
--- /dev/null
+++ b/osf_tests/metadata/expected_metadata_files/user_supplement.turtle
@@ -0,0 +1 @@
+# correctly empty (for now)
diff --git a/osf_tests/metadata/test_basket.py b/osf_tests/metadata/test_basket.py
index 1fa8381cf08..c34ded3e2c5 100644
--- a/osf_tests/metadata/test_basket.py
+++ b/osf_tests/metadata/test_basket.py
@@ -34,7 +34,7 @@ def test_goodbasket():
     basket = gather.Basket(focus)
     assert basket.focus == focus
     assert isinstance(basket.gathered_metadata, rdflib.Graph)
-    assert len(basket.gathered_metadata) == 1
+    assert len(basket.gathered_metadata) == 0
     assert len(basket._gathertasks_done) == 0
     assert len(basket._known_focus_dict) == 1
     # no repeat gathertasks:
@@ -78,5 +78,6 @@ def test_goodbasket():
 
     # reset
     basket.reset()
-    assert len(basket.gathered_metadata) == 1
+    assert len(basket.gathered_metadata) == 0
     assert len(basket._gathertasks_done) == 0
+    assert len(basket._known_focus_dict) == 1
diff --git a/osf_tests/metadata/test_gatherer_registry.py b/osf_tests/metadata/test_gatherer_registry.py
index fda28eaf680..c139946ab80 100644
--- a/osf_tests/metadata/test_gatherer_registry.py
+++ b/osf_tests/metadata/test_gatherer_registry.py
@@ -74,6 +74,10 @@ def gather_agent_name(focus):
         gather_preprint_or_project_creator,
         gather_special_preprint_creator,
     }
+    assert get_gatherers(BAZ.Preprint, [BAZ.creator], include_focustype_defaults=False) == {
+        gather_preprint_or_project_creator,
+        gather_special_preprint_creator,
+    }
     assert get_gatherers(BAZ.Agent, [FOO.name, FOO.identifier, FOO.unknown]) == {
         gather_agent_name,
         gather_identifiers,
diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py
index 7bd72770aba..4c064c8a690 100644
--- a/osf_tests/metadata/test_osf_gathering.py
+++ b/osf_tests/metadata/test_osf_gathering.py
@@ -1,4 +1,5 @@
 import datetime
+from unittest import mock
 
 from django.test import TestCase
 import rdflib
@@ -11,15 +12,19 @@
     FOAF,
     OSF,
     OSFIO,
+    DCAT,
     DCTERMS,
     DCMITYPE,
     DOI,
     OWL,
+    PROV,
     RDF,
     SKOS,
     checksum_iri,
 )
 from osf import models as osfdb
+from osf.metrics.reports import PublicItemUsageReport
+from osf.metrics.utils import YearMonth
 from osf.utils import permissions, workflows
 from osf_tests import factories
 from website import settings as website_settings
@@ -36,12 +41,13 @@ def setUpTestData(cls):
             external_identity={'ORCID': {'1234-4321-5678-8765': 'VERIFIED'}},
         )
         cls.user__readonly = factories.UserFactory(
-            external_identity={'ORCID': {'1234-4321-6789-9876': 'CREATE'}},
+            external_identity={'ORCID': {'1234-4321-6789-9876': 'CREATE'}},  # unverified orcid
             social={
                 'profileWebsites': ['http://mysite.example', 'http://myothersite.example/foo'],
                 'baiduScholar': 'blarg',
             },
         )
+        cls.user__invisible = factories.UserFactory()
         # cedar metadata template
         cls.cedar_template = factories.CedarMetadataTemplateFactory(
             cedar_id='https://repo.metadatacenter.org/templates/this-is-a-cedar-id',
@@ -51,8 +57,11 @@ def setUpTestData(cls):
         )
         # project (with components):
         cls.project = factories.ProjectFactory(creator=cls.user__admin, is_public=True)
+        cls.project.add_addon('box', auth=None)
+        cls.project.add_addon('gitlab', auth=None)
         cls.project.add_contributor(cls.user__readwrite, permissions=permissions.WRITE)
-        cls.project.add_contributor(cls.user__readonly, permissions=permissions.READ, visible=False)
+        cls.project.add_contributor(cls.user__readonly, permissions=permissions.READ)
+        cls.project.add_contributor(cls.user__invisible, permissions=permissions.WRITE, visible=False)
         cls.component = factories.ProjectFactory(parent=cls.project, creator=cls.user__admin, is_public=True)
         cls.sibcomponent = factories.ProjectFactory(parent=cls.project, creator=cls.user__admin, is_public=True)
         cls.subcomponent = factories.ProjectFactory(parent=cls.component, creator=cls.user__admin, is_public=True)
@@ -89,7 +98,8 @@ def setUpTestData(cls):
             is_public=True,
         )
         cls.preprint.add_contributor(cls.user__readwrite, permissions=permissions.WRITE)
-        cls.preprint.add_contributor(cls.user__readonly, permissions=permissions.READ, visible=False)
+        cls.preprint.add_contributor(cls.user__readonly, permissions=permissions.READ)
+        cls.preprint.add_contributor(cls.user__invisible, permissions=permissions.WRITE, visible=False)
         cls.registration_cedar_record = factories.CedarMetadataRecordFactory(
             template=cls.cedar_template,
             is_published=True,
@@ -453,6 +463,7 @@ def test_gather_versions(self):
         # focus: file
         fileversion = self.file.versions.first()
         fileversion_iri = URIRef(f'{self.filefocus.iri}?revision={fileversion.identifier}')
+        storageregion_iri = URIRef(f'{website_settings.API_DOMAIN}v2/regions/us/')
         assert_triples(osf_gathering.gather_versions(self.filefocus), {
             (self.filefocus.iri, OSF.hasFileVersion, fileversion_iri),
             (fileversion_iri, RDF.type, OSF.FileVersion),
@@ -462,7 +473,9 @@ def test_gather_versions(self):
             (fileversion_iri, DCTERMS['format'], Literal(fileversion.content_type)),
             (fileversion_iri, DCTERMS.extent, Literal('0.118 MB')),
             (fileversion_iri, OSF.versionNumber, Literal(fileversion.identifier)),
-            (fileversion_iri, DCTERMS.requires, checksum_iri('sha-256', self.file_sha256))
+            (fileversion_iri, DCTERMS.requires, checksum_iri('sha-256', self.file_sha256)),
+            (fileversion_iri, OSF.storageRegion, storageregion_iri),
+            (storageregion_iri, SKOS.prefLabel, Literal('United States', lang='en')),
         })
 
     def test_gather_files(self):
@@ -521,11 +534,19 @@ def test_gather_agents(self):
         assert_triples(osf_gathering.gather_agents(self.projectfocus), {
             (self.projectfocus.iri, DCTERMS.creator, self.userfocus__admin),
             (self.projectfocus.iri, DCTERMS.creator, self.userfocus__readwrite),
+            (self.projectfocus.iri, DCTERMS.creator, self.userfocus__readonly),
         })
         # focus: registration
         assert_triples(osf_gathering.gather_agents(self.registrationfocus), {
             (self.registrationfocus.iri, DCTERMS.creator, self.userfocus__admin),
             (self.registrationfocus.iri, DCTERMS.creator, self.userfocus__readwrite),
+            (self.registrationfocus.iri, DCTERMS.creator, self.userfocus__readonly),
+        })
+        # focus: preprint
+        assert_triples(osf_gathering.gather_agents(self.preprintfocus), {
+            (self.preprintfocus.iri, DCTERMS.creator, self.userfocus__admin),
+            (self.preprintfocus.iri, DCTERMS.creator, self.userfocus__readwrite),
+            (self.preprintfocus.iri, DCTERMS.creator, self.userfocus__readonly),
         })
         # focus: file
         assert_triples(osf_gathering.gather_agents(self.filefocus), set())
@@ -750,3 +771,116 @@ def test_gather_cedar_templates(self):
             (self.filefocus.iri, OSF.hasCedarTemplate, cedar_template_iri),
             (cedar_template_iri, DCTERMS.title, Literal(self.cedar_template.schema_name))
         })
+
+    def test_gather_last_month_usage(self):
+        # no usage report:
+        with mock.patch(
+            'osf.metrics.reports.PublicItemUsageReport.for_last_month',
+            return_value=None,
+        ):
+            assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), set())
+        # yes usage report:
+        _ym = YearMonth.from_date(datetime.datetime.now(tz=datetime.UTC))
+        with mock.patch(
+            'osf.metrics.reports.PublicItemUsageReport.for_last_month',
+            return_value=PublicItemUsageReport(
+                item_osfid=self.project._id,
+                report_yearmonth=_ym,
+                view_count=71,
+                view_session_count=13,
+                download_count=43,
+                download_session_count=11,
+            ),
+        ):
+            _usage_bnode = rdflib.BNode()
+            assert_triples(osf_gathering.gather_last_month_usage(self.projectfocus), {
+                (self.projectfocus.iri, OSF.usage, _usage_bnode),
+                (_usage_bnode, DCTERMS.temporal, Literal(str(_ym), datatype=rdflib.XSD.gYearMonth)),
+                (_usage_bnode, DCAT.accessService, rdflib.URIRef(website_settings.DOMAIN.rstrip('/'))),
+                (_usage_bnode, FOAF.primaryTopic, self.projectfocus.iri),
+                (_usage_bnode, OSF.viewCount, Literal(71)),
+                (_usage_bnode, OSF.viewSessionCount, Literal(13)),
+                (_usage_bnode, OSF.downloadCount, Literal(43)),
+                (_usage_bnode, OSF.downloadSessionCount, Literal(11)),
+            })
+
+    def test_gather_addons(self):
+        # registration (without non-default addon)
+        assert_triples(osf_gathering.gather_addons(self.registrationfocus), set())
+        # project (with non-default addons)
+        _box_ref = rdflib.URIRef('urn:osf.io:addons:box')
+        _gitlab_ref = rdflib.URIRef('urn:osf.io:addons:gitlab')
+        assert_triples(osf_gathering.gather_addons(self.projectfocus), {
+            (self.projectfocus.iri, OSF.hasOsfAddon, _box_ref),
+            (_box_ref, RDF.type, OSF.AddonImplementation),
+            (_box_ref, DCTERMS.identifier, Literal('box')),
+            (_box_ref, SKOS.prefLabel, Literal('Box')),
+            (self.projectfocus.iri, OSF.hasOsfAddon, _gitlab_ref),
+            (_gitlab_ref, RDF.type, OSF.AddonImplementation),
+            (_gitlab_ref, DCTERMS.identifier, Literal('gitlab')),
+            (_gitlab_ref, SKOS.prefLabel, Literal('GitLab')),
+        })
+
+    def test_gather_storage_region(self):
+        _default_region_ref = rdflib.URIRef(f'{website_settings.API_DOMAIN}v2/regions/us/')
+        assert_triples(osf_gathering.gather_storage_region(self.projectfocus), {
+            (self.projectfocus.iri, OSF.storageRegion, _default_region_ref),
+            (_default_region_ref, SKOS.prefLabel, Literal('United States', lang='en')),
+        })
+        assert_triples(osf_gathering.gather_storage_region(self.registrationfocus), {
+            (self.registrationfocus.iri, OSF.storageRegion, _default_region_ref),
+            (_default_region_ref, SKOS.prefLabel, Literal('United States', lang='en')),
+        })
+        assert_triples(osf_gathering.gather_storage_region(self.preprintfocus), {
+            (self.preprintfocus.iri, OSF.storageRegion, _default_region_ref),
+            (_default_region_ref, SKOS.prefLabel, Literal('United States', lang='en')),
+        })
+
+    def test_gather_qualified_attributions(self):
+        _attribution_admin = rdflib.BNode()
+        _attribution_readwrite = rdflib.BNode()
+        _attribution_readonly = rdflib.BNode()
+        assert_triples(osf_gathering.gather_qualified_attributions(self.projectfocus), {
+            (self.projectfocus.iri, PROV.qualifiedAttribution, _attribution_admin),
+            (_attribution_admin, PROV.agent, self.userfocus__admin),
+            (_attribution_admin, DCAT.hadRole, OSF['admin-contributor']),
+            (self.projectfocus.iri, PROV.qualifiedAttribution, _attribution_readwrite),
+            (_attribution_readwrite, PROV.agent, self.userfocus__readwrite),
+            (_attribution_readwrite, DCAT.hadRole, OSF['write-contributor']),
+            (self.projectfocus.iri, PROV.qualifiedAttribution, _attribution_readonly),
+            (_attribution_readonly, PROV.agent, self.userfocus__readonly),
+            (_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']),
+        })
+        assert_triples(osf_gathering.gather_qualified_attributions(self.registrationfocus), {
+            (self.registrationfocus.iri, PROV.qualifiedAttribution, _attribution_admin),
+            (_attribution_admin, PROV.agent, self.userfocus__admin),
+            (_attribution_admin, DCAT.hadRole, OSF['admin-contributor']),
+            (self.registrationfocus.iri, PROV.qualifiedAttribution, _attribution_readwrite),
+            (_attribution_readwrite, PROV.agent, self.userfocus__readwrite),
+            (_attribution_readwrite, DCAT.hadRole, OSF['write-contributor']),
+            (self.registrationfocus.iri, PROV.qualifiedAttribution, _attribution_readonly),
+            (_attribution_readonly, PROV.agent, self.userfocus__readonly),
+            (_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']),
+        })
+        assert_triples(osf_gathering.gather_qualified_attributions(self.preprintfocus), {
+            (self.preprintfocus.iri, PROV.qualifiedAttribution, _attribution_admin),
+            (_attribution_admin, PROV.agent, self.userfocus__admin),
+            (_attribution_admin, DCAT.hadRole, OSF['admin-contributor']),
+            (self.preprintfocus.iri, PROV.qualifiedAttribution, _attribution_readwrite),
+            (_attribution_readwrite, PROV.agent, self.userfocus__readwrite),
+            (_attribution_readwrite, DCAT.hadRole, OSF['write-contributor']),
+            (self.preprintfocus.iri, PROV.qualifiedAttribution, _attribution_readonly),
+            (_attribution_readonly, PROV.agent, self.userfocus__readonly),
+            (_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']),
+        })
+
+    def test_gather_storage_byte_count(self):
+        assert_triples(osf_gathering.gather_storage_byte_count(self.projectfocus), {
+            (self.projectfocus.iri, OSF.storageByteCount, Literal(123456)),
+        })
+        assert_triples(osf_gathering.gather_storage_byte_count(self.registrationfocus), {
+            (self.registrationfocus.iri, OSF.storageByteCount, Literal(0)),
+        })
+        assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), {
+            (self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)),
+        })
diff --git a/osf_tests/metadata/test_serialized_metadata.py b/osf_tests/metadata/test_serialized_metadata.py
index 0c74961778a..c8a0eee95ac 100644
--- a/osf_tests/metadata/test_serialized_metadata.py
+++ b/osf_tests/metadata/test_serialized_metadata.py
@@ -5,8 +5,11 @@
 import rdflib
 
 from osf import models as osfdb
+from osf.metadata.osf_gathering import OsfmapPartition
 from osf.metadata.rdfutils import OSF, DCTERMS
 from osf.metadata.tools import pls_gather_metadata_file
+from osf.metrics.reports import PublicItemUsageReport
+from osf.metrics.utils import YearMonth
 from osf.models.licenses import NodeLicense
 from api_tests.utils import create_test_file
 from osf_tests import factories
@@ -22,53 +25,103 @@
 
 BASIC_METADATA_SCENARIO = {
     OSF.Project: {
-        'turtle': 'project_basic.turtle',
-        'datacite-xml': 'project_basic.datacite.xml',
-        'datacite-json': 'project_basic.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'project_basic.turtle',
+            'datacite-xml': 'project_basic.datacite.xml',
+            'datacite-json': 'project_basic.datacite.json',
+        },
     },
     OSF.Preprint: {
-        'turtle': 'preprint_basic.turtle',
-        'datacite-xml': 'preprint_basic.datacite.xml',
-        'datacite-json': 'preprint_basic.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'preprint_basic.turtle',
+            'datacite-xml': 'preprint_basic.datacite.xml',
+            'datacite-json': 'preprint_basic.datacite.json',
+        },
     },
     OSF.Registration: {
-        'turtle': 'registration_basic.turtle',
-        'datacite-xml': 'registration_basic.datacite.xml',
-        'datacite-json': 'registration_basic.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'registration_basic.turtle',
+            'datacite-xml': 'registration_basic.datacite.xml',
+            'datacite-json': 'registration_basic.datacite.json',
+        },
     },
     OSF.File: {
-        'turtle': 'file_basic.turtle',
-        'datacite-xml': 'file_basic.datacite.xml',
-        'datacite-json': 'file_basic.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'file_basic.turtle',
+            'datacite-xml': 'file_basic.datacite.xml',
+            'datacite-json': 'file_basic.datacite.json',
+        },
     },
     DCTERMS.Agent: {
-        'turtle': 'user_basic.turtle',
+        OsfmapPartition.MAIN: {
+            'turtle': 'user_basic.turtle',
+        },
     },
 }
 
 FULL_METADATA_SCENARIO = {
     OSF.Project: {
-        'turtle': 'project_full.turtle',
-        'datacite-xml': 'project_full.datacite.xml',
-        'datacite-json': 'project_full.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'project_full.turtle',
+            'datacite-xml': 'project_full.datacite.xml',
+            'datacite-json': 'project_full.datacite.json',
+        },
+        OsfmapPartition.SUPPLEMENT: {
+            'turtle': 'project_supplement.turtle',
+        },
+        OsfmapPartition.MONTHLY_SUPPLEMENT: {
+            'turtle': 'project_monthly_supplement.turtle',
+        },
     },
     OSF.Preprint: {
-        'turtle': 'preprint_full.turtle',
-        'datacite-xml': 'preprint_full.datacite.xml',
-        'datacite-json': 'preprint_full.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'preprint_full.turtle',
+            'datacite-xml': 'preprint_full.datacite.xml',
+            'datacite-json': 'preprint_full.datacite.json',
+        },
+        OsfmapPartition.SUPPLEMENT: {
+            'turtle': 'preprint_supplement.turtle',
+        },
+        OsfmapPartition.MONTHLY_SUPPLEMENT: {
+            'turtle': 'preprint_monthly_supplement.turtle',
+        },
     },
     OSF.Registration: {
-        'turtle': 'registration_full.turtle',
-        'datacite-xml': 'registration_full.datacite.xml',
-        'datacite-json': 'registration_full.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'registration_full.turtle',
+            'datacite-xml': 'registration_full.datacite.xml',
+            'datacite-json': 'registration_full.datacite.json',
+        },
+        OsfmapPartition.SUPPLEMENT: {
+            'turtle': 'registration_supplement.turtle',
+        },
+        OsfmapPartition.MONTHLY_SUPPLEMENT: {
+            'turtle': 'registration_monthly_supplement.turtle',
+        },
     },
     OSF.File: {
-        'turtle': 'file_full.turtle',
-        'datacite-xml': 'file_full.datacite.xml',
-        'datacite-json': 'file_full.datacite.json',
+        OsfmapPartition.MAIN: {
+            'turtle': 'file_full.turtle',
+            'datacite-xml': 'file_full.datacite.xml',
+            'datacite-json': 'file_full.datacite.json',
+        },
+        OsfmapPartition.SUPPLEMENT: {
+            'turtle': 'file_supplement.turtle',
+        },
+        OsfmapPartition.MONTHLY_SUPPLEMENT: {
+            'turtle': 'file_monthly_supplement.turtle',
+        },
     },
     DCTERMS.Agent: {
-        'turtle': 'user_full.turtle',
+        OsfmapPartition.MAIN: {
+            'turtle': 'user_full.turtle',
+        },
+        OsfmapPartition.SUPPLEMENT: {
+            'turtle': 'user_supplement.turtle',
+        },
+        OsfmapPartition.MONTHLY_SUPPLEMENT: {
+            'turtle': 'user_monthly_supplement.turtle',
+        },
     },
 }
 
@@ -124,8 +177,7 @@ def setUp(self):
             mock.patch('django.utils.timezone.now', new=forever_now),
             mock.patch('osf.models.metaschema.RegistrationSchema.absolute_api_v2_url', new='http://fake.example/schema/for/test'),
         ):
-            patcher.start()
-            self.addCleanup(patcher.stop)
+            self.enterContext(patcher)
         # build test objects
         self.user = factories.AuthUserFactory(
             fullname='Person McNamington',
@@ -147,12 +199,13 @@ def setUp(self):
             category='doi',
             value=f'10.70102/FK2osf.io/{self.project._id}',
         )
+        self.project.add_addon('gitlab', auth=None)
         self.file = create_test_file(
             self.project,
             self.user,
             filename='my-file.blarg',
             size=7,
-            sha256='6ac3c336e4094835293a3fed8a4b5fedde1b5e2626d9838fed50693bba00af0e',
+            sha256='shashasha',
         )
         osf_preprint_provider = factories.PreprintProviderFactory(_id='osf')
         another_provider = factories.PreprintProviderFactory(
@@ -208,9 +261,26 @@ def setUp(self):
                 doi_prefix='11.rp',
             ),
         )
+        self.reg_file = create_test_file(
+            self.registration,
+            self.user,
+            filename='my-reg-file.blarg',
+            size=17,
+            sha256='shashasha',
+        )
         osfdb.GuidMetadataRecord.objects.for_guid(self.registration._id).update({
             'resource_type_general': 'StudyRegistration',
         }, auth=self.user)
+        self.enterContext(mock.patch(
+            'osf.metrics.reports.PublicItemUsageReport.for_last_month',
+            return_value=PublicItemUsageReport(
+                report_yearmonth=YearMonth.from_date(forever_now()),
+                view_count=7,
+                view_session_count=5,
+                download_count=3,
+                download_session_count=2,
+            ),
+        ))
         self.guid_dict = {
             OSF.Project: self.project._id,
             OSF.Preprint: self.preprint._id,
@@ -261,27 +331,37 @@ def test_serialized_metadata(self):
         self._assert_scenario(FULL_METADATA_SCENARIO)
 
     def _assert_scenario(self, scenario_dict):
-        for focus_type, expected_files in scenario_dict.items():
-            for format_key, filename in expected_files.items():
-                osfguid = self.guid_dict[focus_type]
-                gathered_file = pls_gather_metadata_file(osfguid, format_key)
-                with self.subTest(focus_type=focus_type, format_key=format_key, testpath='pls_gather_metadata_file'):
-                    self.assertEqual(gathered_file.mediatype, EXPECTED_MEDIATYPE[format_key])
-                    # to update expected metadata, uncomment `_write_expected_file` and this
-                    # next line (being careful not to leave it uncommented...) and run tests
-                    # self._write_expected_file(filename, gathered_file.serialized_metadata)
-                    self._assert_expected_file(filename, gathered_file.serialized_metadata)
+        for focus_type, by_partition in scenario_dict.items():
+            for osfmap_partition, expected_files in by_partition.items():
+                for format_key, filename in expected_files.items():
+                    self._assert_scenario_file(focus_type, osfmap_partition, format_key, filename)
 
-                with self.subTest(focus_type=focus_type, format_key=format_key, testpath='metadata download'):
-                    resp = self.app.get(f'/{osfguid}/metadata/?format={format_key}')
-                    assert resp.status_code == 200
-                    self.assertEqual(resp.status_code, 200)
-                    self.assertEqual(resp.headers['Content-Type'], EXPECTED_MEDIATYPE[format_key])
-                    self.assertEqual(
-                        resp.headers['Content-Disposition'],
-                        f'attachment; filename={gathered_file.filename}',
-                    )
-                    self._assert_expected_file(filename, resp.text)
+    def _assert_scenario_file(
+        self,
+        focus_type: str,
+        osfmap_partition: OsfmapPartition,
+        format_key: str,
+        filename: str,
+    ):
+        osfguid = self.guid_dict[focus_type]
+        gathered_file = pls_gather_metadata_file(osfguid, format_key, {'osfmap_partition': osfmap_partition})
+        with self.subTest(focus_type=focus_type, format_key=format_key, testpath='pls_gather_metadata_file'):
+            self.assertEqual(gathered_file.mediatype, EXPECTED_MEDIATYPE[format_key])
+            # to update expected metadata, uncomment `_write_expected_file` and this
+            # next line (being careful not to leave it uncommented...) and run tests
+            # self._write_expected_file(filename, gathered_file.serialized_metadata)
+            self._assert_expected_file(filename, gathered_file.serialized_metadata)
+        if not osfmap_partition.is_supplementary:
+            with self.subTest(focus_type=focus_type, format_key=format_key, testpath='metadata download'):
+                resp = self.app.get(f'/{osfguid}/metadata/?format={format_key}')
+                assert resp.status_code == 200
+                self.assertEqual(resp.status_code, 200)
+                self.assertEqual(resp.headers['Content-Type'], EXPECTED_MEDIATYPE[format_key])
+                self.assertEqual(
+                    resp.headers['Content-Disposition'],
+                    f'attachment; filename={gathered_file.filename}',
+                )
+                self._assert_expected_file(filename, resp.text)
 
     def _assert_expected_file(self, filename, actual_metadata):
         _open_mode = ('rb' if isinstance(actual_metadata, bytes) else 'r')
@@ -290,16 +370,16 @@ def _assert_expected_file(self, filename, actual_metadata):
         if filename.endswith('.turtle'):
             # HACK: because the turtle serializer may output things in different order
             # TODO: stable turtle serializer (or another primitive rdf serialization)
-            self._assert_equivalent_turtle(actual_metadata, _expected_metadata)
+            self._assert_equivalent_turtle(actual_metadata, _expected_metadata, filename)
         else:
             self.assertEqual(actual_metadata, _expected_metadata)
 
-    def _assert_equivalent_turtle(self, actual_turtle, expected_turtle):
+    def _assert_equivalent_turtle(self, actual_turtle, expected_turtle, filename):
         _actual = rdflib.Graph()
         _actual.parse(data=actual_turtle, format='turtle')
         _expected = rdflib.Graph()
         _expected.parse(data=expected_turtle, format='turtle')
-        assert_graphs_equal(_actual, _expected)
+        assert_graphs_equal(_actual, _expected, label=filename)
 
     # def _write_expected_file(self, filename, expected_metadata):
     #     '''for updating expected metadata files from current serializers
diff --git a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
new file mode 100644
index 00000000000..715a2cd1553
--- /dev/null
+++ b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
@@ -0,0 +1,286 @@
+import time
+import datetime
+import logging
+from django.test import TestCase
+from osf.metrics.reporters import InstitutionalSummaryMonthlyReporter
+from osf.metrics.utils import YearMonth
+from osf_tests.factories import (
+    InstitutionFactory,
+    ProjectFactory,
+    RegistrationFactory,
+    PreprintFactory,
+    AuthUserFactory,
+)
+
+
+class TestInstiSummaryMonthlyReporter(TestCase):
+
+    @classmethod
+    def setUpTestData(cls):
+        cls._yearmonth = YearMonth(2018, 2)  # February 2018
+        cls._institution = InstitutionFactory()
+        cls._now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC)
+
+        # Existing data for the primary institution
+        cls._public_project = cls._create_affiliated_project(cls._institution, is_public=True, created=cls._now)
+        cls._private_project = cls._create_affiliated_project(cls._institution, is_public=False, created=cls._now)
+        cls._public_registration = cls._create_affiliated_registration(cls._institution, is_public=True, created=cls._now)
+        cls._embargoed_registration = cls._create_affiliated_registration(cls._institution, is_public=False, created=cls._now)
+
+        cls._published_preprint = cls._create_affiliated_preprint(cls._institution, is_public=True, created=cls._now)
+
+        cls._logged_in_user = cls._create_logged_in_user(cls._institution, date_last_login=cls._now)
+        cls._active_user = cls._create_active_user(cls._institution, date_confirmed=cls._now - datetime.timedelta(days=1))
+
+    @classmethod
+    def _create_affiliated_preprint(cls, institution, is_public, created):
+        published_preprint = PreprintFactory(is_public=is_public)
+        published_preprint.affiliated_institutions.add(institution)
+        published_preprint.created = created
+        published_preprint.save()
+        return published_preprint
+
+    @classmethod
+    def _create_affiliated_project(cls, institution, is_public, created):
+        project = ProjectFactory(is_public=is_public)
+        project.affiliated_institutions.add(institution)
+        project.created = created
+        project.save()
+        return project
+
+    @classmethod
+    def _create_affiliated_registration(cls, institution, is_public, created):
+        registration = RegistrationFactory(is_public=is_public)
+        registration.affiliated_institutions.add(institution)
+        registration.created = created
+        registration.save()
+        return registration
+
+    @classmethod
+    def _create_logged_in_user(cls, institution, date_last_login):
+        user = AuthUserFactory()
+        user.add_or_update_affiliated_institution(institution)
+        user.date_last_login = date_last_login
+        user.save()
+        return user
+
+    @classmethod
+    def _create_active_user(cls, institution, date_confirmed):
+        user = AuthUserFactory()
+        user.add_or_update_affiliated_institution(institution)
+        user.date_confirmed = date_confirmed
+        ProjectFactory(creator=user)  # adds log to make active
+        log = user.logs.get()
+        log.created = date_confirmed
+        log.save()
+        user.save()
+        return user
+
+    def test_report_generation(self):
+        reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
+        reports = list(reporter.report())
+        self.assertEqual(len(reports), 1)
+
+        report = reports[0]
+        self.assertEqual(report.institution_id, self._institution._id)
+        self.assertEqual(report.user_count, 2)  # _logged_in_user and _active_user
+        self.assertEqual(report.public_project_count, 1)
+        self.assertEqual(report.private_project_count, 1)
+        self.assertEqual(report.public_registration_count, 1)
+        self.assertEqual(report.embargoed_registration_count, 1)
+        self.assertEqual(report.published_preprint_count, 1)
+        self.assertEqual(report.storage_byte_count, 1337)  # test value for one file
+        self.assertEqual(report.public_file_count, 1)
+        self.assertEqual(report.monthly_logged_in_user_count, 1)
+        self.assertEqual(report.monthly_active_user_count, 1)
+
+    def test_report_generation_multiple_institutions(self):
+        institution2 = InstitutionFactory()
+        institution3 = InstitutionFactory()
+
+        # Set up dates for different months
+        last_month = datetime.datetime(2018, 1, 15, tzinfo=datetime.UTC)
+        next_month = datetime.datetime(2018, 3, 10, tzinfo=datetime.UTC)
+
+        self._create_affiliated_project(institution2, is_public=True, created=self._now)
+        self._create_affiliated_project(institution3, is_public=True, created=last_month)
+
+        # Create future projects for self._institution (should not be counted)
+        self._create_affiliated_project(self._institution, is_public=True, created=next_month)
+
+        # Create users affiliated with different institutions
+        self._create_active_user(institution2, date_confirmed=self._now)
+        self._create_active_user(institution3, date_confirmed=last_month)
+
+        # Run the reporter for the current month (February 2018)
+        reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
+        reports = list(reporter.report())
+        self.assertEqual(len(reports), 3)  # Reports for self._institution, institution2, institution3
+
+        # Extract reports by institution
+        report_institution = next(r for r in reports if r.institution_id == self._institution._id)
+        report_institution2 = next(r for r in reports if r.institution_id == institution2._id)
+
+        # Validate report for self._institution
+        self.assertEqual(report_institution.public_project_count, 1)
+        self.assertEqual(report_institution.private_project_count, 1)
+        self.assertEqual(report_institution.user_count, 2)
+        self.assertEqual(report_institution.monthly_active_user_count, 1)
+        self.assertEqual(report_institution.monthly_logged_in_user_count, 1)
+
+        # Validate report for institution2
+        self.assertEqual(report_institution2.public_project_count, 1)
+        self.assertEqual(report_institution2.private_project_count, 0)
+        self.assertEqual(report_institution2.user_count, 1)
+        self.assertEqual(report_institution2.monthly_active_user_count, 1)
+        self.assertEqual(report_institution2.monthly_logged_in_user_count, 0)  # No logged-in users
+
+
+class TestSummaryMonthlyReporterBenchmarker(TestCase):
+
+    @classmethod
+    def setUpTestData(cls):
+        cls.logger = logging.getLogger(__name__)
+        logging.basicConfig(level=logging.INFO)
+        cls._yearmonth = YearMonth(2018, 2)  # February 2018
+        cls._institution = InstitutionFactory()
+        cls._now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC)
+        cls.enable_benchmarking = True
+
+    @classmethod
+    def _create_affiliated_preprint(cls, institution, is_public, created, creator=None):
+        published_preprint = PreprintFactory(is_public=is_public, creator=creator)
+        published_preprint.affiliated_institutions.add(institution)
+        published_preprint.created = created
+        published_preprint.save()
+        return published_preprint
+
+    @classmethod
+    def _create_affiliated_project(cls, institution, is_public, created, creator=None):
+        project = ProjectFactory(is_public=is_public, creator=creator)
+        project.affiliated_institutions.add(institution)
+        project.created = created
+        project.save()
+        return project
+
+    @classmethod
+    def _create_affiliated_registration(cls, institution, is_public, created, creator=None):
+        registration = RegistrationFactory(is_public=is_public, creator=creator)
+        registration.affiliated_institutions.add(institution)
+        registration.created = created
+        registration.save()
+        return registration
+
+    @classmethod
+    def _create_logged_in_user(cls, institution, date_last_login):
+        user = AuthUserFactory()
+        user.add_or_update_affiliated_institution(institution)
+        user.date_last_login = date_last_login
+        user.save()
+        return user
+
+    @classmethod
+    def _create_active_user(cls, institution, date_confirmed):
+        user = AuthUserFactory()
+        user.add_or_update_affiliated_institution(institution)
+        user.date_confirmed = date_confirmed
+        ProjectFactory(creator=user)  # adds log to make active
+        log = user.logs.get()
+        log.created = date_confirmed
+        log.save()
+        user.save()
+        return user
+
+    def test_high_counts_multiple_institutions(self):
+        """
+        Test the report generation with configurable high counts for institutions, users, and their objects.
+        Benchmarking can be enabled by setting the 'enable_benchmarking' attribute to True.
+        """
+        # Check if benchmarking is enabled
+        enable_benchmarking = self.enable_benchmarking
+
+        # Configure counts (adjust these numbers as needed)
+        additional_institution_count = 1  # Number of institutions (adjust as needed)
+        users_per_institution = 3  # Number of users per institution (adjust as needed)
+        objects_per_user = 3  # Number of objects per user (adjust as needed)
+
+        # Timing variables
+        if enable_benchmarking:
+            total_start_time = time.time()
+            data_creation_start_time = time.time()
+
+        # Create institutions
+        institutions = [self._institution]
+        institutions += [InstitutionFactory() for _ in range(additional_institution_count)]
+
+        if enable_benchmarking:
+            institutions_creation_time = time.time()
+            self.logger.info(
+                f"Time taken to create {additional_institution_count + 1} institutions: {institutions_creation_time - data_creation_start_time:.2f} seconds")
+
+        # Generate data for each institution
+        if enable_benchmarking:
+            users_creation_start_time = time.time()
+        institution_users = {}
+        for institution in institutions:
+            # Create users for the institution
+            users = []
+            for _ in range(users_per_institution):
+                user = AuthUserFactory()
+                user.add_or_update_affiliated_institution(institution)
+                user.date_last_login = self._now
+                user.date_confirmed = self._now - datetime.timedelta(days=1)
+                user.save()
+                users.append(user)
+            institution_users[institution] = users
+
+        if enable_benchmarking:
+            users_creation_time = time.time()
+            self.logger.info(f"Time taken to create users: {users_creation_time - users_creation_start_time:.2f} seconds")
+
+        # Create projects, registrations, and preprints for each user
+        if enable_benchmarking:
+            objects_creation_start_time = time.time()
+        for institution in institutions:
+            users = institution_users[institution]
+            for user in users:
+                for _ in range(objects_per_user):
+                    self._create_affiliated_project(institution, is_public=True, created=self._now, creator=user)
+                    self._create_affiliated_project(institution, is_public=False, created=self._now, creator=user)
+                    self._create_affiliated_registration(institution, is_public=True, created=self._now, creator=user)
+                    self._create_affiliated_registration(institution, is_public=False, created=self._now, creator=user)
+                    self._create_affiliated_preprint(institution, is_public=True, created=self._now, creator=user)
+
+        if enable_benchmarking:
+            objects_creation_time = time.time()
+            self.logger.info(
+                f"Time taken to create objects: {objects_creation_time - objects_creation_start_time:.2f} seconds")
+            data_creation_end_time = time.time()
+            self.logger.info(
+                f"Total time taken to create data: {data_creation_end_time - data_creation_start_time:.2f} seconds")
+
+        # Run the reporter
+        if enable_benchmarking:
+            reporter_start_time = time.time()
+        reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
+        reports = list(reporter.report())
+        assert len(reports) == additional_institution_count + 1
+
+        if enable_benchmarking:
+            reporter_end_time = time.time()
+            self.logger.info(f"Time taken to run the reporter: {reporter_end_time - reporter_start_time:.2f} seconds")
+            total_end_time = time.time()
+            self.logger.info(f"Total test execution time: {total_end_time - total_start_time:.2f} seconds")
+
+        self.assertEqual(len(reports), additional_institution_count + 1)
+
+        # Validate counts for each institution
+        expected_count = users_per_institution * objects_per_user
+        for report in reports:
+            self.assertEqual(report.public_project_count, expected_count)
+            self.assertEqual(report.private_project_count, expected_count)
+            self.assertEqual(report.public_registration_count, expected_count)
+            self.assertEqual(report.embargoed_registration_count, expected_count)
+            self.assertEqual(report.published_preprint_count, expected_count)
+            self.assertEqual(report.user_count, users_per_institution)
+            self.assertEqual(report.monthly_logged_in_user_count, users_per_institution)
diff --git a/osf_tests/metrics/reporters/test_institutional_users_reporter.py b/osf_tests/metrics/reporters/test_institutional_users_reporter.py
new file mode 100644
index 00000000000..876fd08cf9b
--- /dev/null
+++ b/osf_tests/metrics/reporters/test_institutional_users_reporter.py
@@ -0,0 +1,262 @@
+from __future__ import annotations
+import dataclasses
+import datetime
+import unittest
+
+from django.test import TestCase
+
+from api_tests.utils import create_test_file
+from osf import models as osfdb
+from osf.metrics.reports import InstitutionalUserReport
+from osf.metrics.reporters import InstitutionalUsersReporter
+from osf.metrics.utils import YearMonth
+from osf_tests.factories import (
+    InstitutionFactory,
+    PreprintFactory,
+    ProjectFactory,
+    RegistrationFactory,
+    UserFactory,
+    EmbargoFactory,
+)
+
+
+def _patch_now(fakenow: datetime.datetime):
+    return unittest.mock.patch('django.utils.timezone.now', return_value=fakenow)
+
+
+class TestInstiUsersReporter(TestCase):
+    @classmethod
+    def setUpTestData(cls):
+        cls._yearmonth = YearMonth(2012, 7)
+        cls._now = datetime.datetime(
+            cls._yearmonth.year,
+            cls._yearmonth.month,
+            13,  # just some day in the month
+            tzinfo=datetime.UTC,
+        )
+        with _patch_now(cls._now):
+            cls._institution = InstitutionFactory()
+            cls._user_setup_with_nothing = _InstiUserSetup(0, 0, 0, 0, 0, cls._institution, cls._now)
+            cls._user_setup_with_ones = _InstiUserSetup(1, 1, 1, 1, 1, cls._institution, cls._now)
+            cls._user_setup_with_stuff = _InstiUserSetup(
+                2, 3, 5, 3, 2, cls._institution, cls._now,
+                orcid_id='1111-2222-3333-4444',
+                department_name='blargl studies',
+            )
+            cls._user_setup_with_stuff.fill_uncounted_objects()
+
+    def _assert_report_matches_setup(self, report: InstitutionalUserReport, setup: _InstiUserSetup):
+        self.assertEqual(report.institution_id, setup.institution._id)
+        # user info:
+        self.assertEqual(report.user_id, setup.user._id)
+        self.assertEqual(report.user_name, setup.user.fullname)
+        self.assertEqual(report.department_name, setup.department_name)
+        self.assertEqual(report.month_last_login, YearMonth.from_date(setup.user.date_last_login))
+        if setup.month_last_active:
+            self.assertEqual(report.month_last_active, YearMonth.from_date(setup.month_last_active))
+        else:
+            self.assertEqual(report.month_last_active, setup.month_last_active)
+
+        self.assertEqual(report.account_creation_date, YearMonth.from_date(setup.user.created))
+        self.assertEqual(report.orcid_id, setup.orcid_id)
+        # counts (NOTE: report.public_file_count and report.storage_byte_count tested separately)
+        self.assertEqual(report.public_project_count, setup.public_project_count)
+        self.assertEqual(report.private_project_count, setup.private_project_count)
+        self.assertEqual(report.public_registration_count, setup.public_registration_count)
+        self.assertEqual(report.embargoed_registration_count, setup.embargoed_registration_count)
+        self.assertEqual(report.published_preprint_count, setup.published_preprint_count)
+
+    def test_no_users(self):
+        _actual_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        self.assertEqual(_actual_reports, [])
+
+    def test_one_user_with_nothing(self):
+        self._user_setup_with_nothing.affiliate_user()
+        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        self.assertEqual(len(_reports), 1)
+        self._assert_report_matches_setup(_reports[0], self._user_setup_with_nothing)
+
+    def test_one_user_with_ones(self):
+        self._user_setup_with_ones.affiliate_user()
+        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        self.assertEqual(len(_reports), 1)
+        self._assert_report_matches_setup(_reports[0], self._user_setup_with_ones)
+
+    def test_one_user_with_stuff_and_no_files(self):
+        self._user_setup_with_stuff.affiliate_user()
+        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        self.assertEqual(len(_reports), 1)
+        self._assert_report_matches_setup(_reports[0], self._user_setup_with_stuff)
+        self.assertEqual(_reports[0].public_file_count, 2)  # preprint 2 files
+        self.assertEqual(_reports[0].storage_byte_count, 2674)  # preprint bytes
+
+    def test_one_user_with_stuff_and_a_file(self):
+        self._user_setup_with_stuff.affiliate_user()
+        _user = self._user_setup_with_stuff.user
+        _project = _user.nodes.first()
+        with _patch_now(self._now):
+            create_test_file(target=_project, user=_user, size=37)
+        (_report,) = InstitutionalUsersReporter(self._yearmonth).report()
+        self._assert_report_matches_setup(_report, self._user_setup_with_stuff)
+        self.assertEqual(_report.public_file_count, 3)  # 2 preprint files
+        self.assertEqual(_report.storage_byte_count, 2711)  # 2 preprint files
+
+    def test_one_user_with_stuff_and_multiple_files(self):
+        self._user_setup_with_stuff.affiliate_user()
+        _user = self._user_setup_with_stuff.user
+        _project = _user.nodes.first()
+        with _patch_now(self._now):
+            create_test_file(target=_project, user=_user, size=37, filename='b')
+            create_test_file(target=_project, user=_user, size=73, filename='bl')
+            _component = ProjectFactory(parent=_project, creator=_user, is_public=True)
+            _component.affiliated_institutions.add(self._institution)
+            create_test_file(target=_component, user=_user, size=53, filename='bla')
+            create_test_file(target=_component, user=_user, size=51, filename='blar')
+            create_test_file(target=_component, user=_user, size=47, filename='blarg')
+        (_report,) = InstitutionalUsersReporter(self._yearmonth).report()
+        self._assert_report_matches_setup(_report, self._user_setup_with_stuff)
+        self.assertEqual(_report.public_file_count, 7)  # 2 preprint files
+        self.assertEqual(_report.storage_byte_count, 2935)  # 2 preprint files + 37 + 73 + 53 + 51 + 47
+
+    def test_several_users(self):
+        _setups = [
+            self._user_setup_with_nothing,
+            self._user_setup_with_ones,
+            self._user_setup_with_stuff,
+        ]
+        for _setup in _setups:
+            _setup.affiliate_user()
+        _setup_by_userid = {
+            _setup.user._id: _setup
+            for _setup in _setups
+        }
+        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        self.assertEqual(len(_reports), len(_setup_by_userid))
+        for _actual_report in _reports:
+            _setup = _setup_by_userid[_actual_report.user_id]
+            self._assert_report_matches_setup(_actual_report, _setup)
+
+
+@dataclasses.dataclass
+class _InstiUserSetup:
+    '''helper class to simplify database setup for a test-case
+
+    (note: public_file_count and storage_byte_count set up separately)
+    '''
+    public_project_count: int
+    private_project_count: int
+    public_registration_count: int
+    embargoed_registration_count: int
+    published_preprint_count: int
+    institution: osfdb.Institution
+    now: datetime.datetime
+    department_name: str | None = None
+    orcid_id: str | None = None
+    user: osfdb.OSFUser = dataclasses.field(init=False)
+    month_last_active: datetime.datetime | None = dataclasses.field(init=False)
+
+    def __post_init__(self):
+        self.user = UserFactory(
+            date_last_login=self.now,
+            external_identity=(
+                {'ORCID': {self.orcid_id: 'VERIFIED'}}
+                if self.orcid_id
+                else {}
+            ),
+        )
+        self._add_affiliations(self._generate_counted_objects())
+        node_logs = self.user.logs.order_by('-created')
+        preprint_logs = self.user.preprint_logs.order_by('-created')
+
+        dates = filter(bool, [
+            node_logs.values_list('created', flat=True).first(),
+            preprint_logs.values_list('created', flat=True).first(),
+        ])
+
+        self.month_last_active = max(dates, default=None)
+
+    def affiliate_user(self):
+        self.user.add_or_update_affiliated_institution(
+            self.institution,
+            sso_department=self.department_name,
+        )
+
+    @property
+    def future_timestamp(self):
+        return self.now + datetime.timedelta(days=123)
+
+    def fill_uncounted_objects(self):
+        # uncounted because not affiliated:
+        self._add_public_project()
+        self._add_private_project()
+        self._add_public_registration()
+        self._add_embargoed_registration()
+        self._add_published_preprint()
+        # uncounted because affiliated with another institution:
+        self._add_affiliations((
+            self._add_public_project(),
+            self._add_private_project(),
+            self._add_public_registration(),
+            self._add_embargoed_registration(),
+            self._add_published_preprint(),
+        ), institution=InstitutionFactory())
+        # uncounted because created after the report's time range:
+        with _patch_now(self.future_timestamp):
+            self._add_affiliations((
+                self._add_public_project(),
+                self._add_private_project(),
+                self._add_public_registration(),
+                self._add_embargoed_registration(),
+                self._add_published_preprint(),
+            ))
+
+    def _add_affiliations(self, objs, institution=None):
+        for _obj in objs:
+            if _obj is not None:
+                _obj.affiliated_institutions.add(institution or self.institution)
+
+    def _generate_counted_objects(self):
+        for _ in range(self.public_project_count):
+            yield self._add_public_project()
+        for _ in range(self.private_project_count):
+            yield self._add_private_project()
+        for _ in range(self.public_registration_count):
+            yield self._add_public_registration()
+        for _ in range(self.embargoed_registration_count):
+            yield self._add_embargoed_registration()
+        for _ in range(self.published_preprint_count):
+            yield self._add_published_preprint()
+
+    def _add_public_project(self) -> osfdb.Node:
+        return ProjectFactory(
+            creator=self.user,
+            is_public=True,
+        )
+
+    def _add_private_project(self) -> osfdb.Node:
+        return ProjectFactory(
+            creator=self.user,
+            is_public=False,
+        )
+
+    def _add_public_registration(self) -> osfdb.Registration:
+        return RegistrationFactory(
+            creator=self.user,
+            is_public=True,
+        )
+
+    def _add_embargoed_registration(self) -> osfdb.Registration:
+        return RegistrationFactory(
+            creator=self.user,
+            is_public=False,
+            embargo=EmbargoFactory(
+                user=self.user,
+                end_date=self.future_timestamp,
+            ),
+        )
+
+    def _add_published_preprint(self) -> osfdb.Preprint | None:
+        return PreprintFactory(
+            creator=self.user,
+            is_public=True,
+        )
diff --git a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
new file mode 100644
index 00000000000..454b8d6700d
--- /dev/null
+++ b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
@@ -0,0 +1,238 @@
+from datetime import timedelta
+from operator import attrgetter
+from unittest import mock
+
+import pytest
+
+from osf.metrics.counted_usage import CountedAuthUsage
+from osf.metrics.reporters.public_item_usage import PublicItemUsageReporter
+from osf.metrics.reports import PublicItemUsageReport
+from osf.metrics.utils import YearMonth
+
+
+@pytest.mark.es_metrics
+class TestPublicItemUsageReporter:
+    @pytest.fixture(autouse=True)
+    def _mocks(self):
+        with (
+            # set a tiny page size to force aggregation pagination:
+            mock.patch('osf.metrics.reporters.public_item_usage._CHUNK_SIZE', 1),
+            # HACK: skip auto-filling fields from the database:
+            mock.patch('osf.models.base.Guid.load', return_value=None),
+        ):
+            yield
+
+    @pytest.fixture
+    def ym_empty(self) -> YearMonth:
+        return YearMonth(2012, 7)
+
+    @pytest.fixture
+    def ym_sparse(self) -> YearMonth:
+        return YearMonth(2017, 7)
+
+    @pytest.fixture
+    def ym_busy(self) -> YearMonth:
+        return YearMonth(2023, 7)
+
+    @pytest.fixture
+    def sparse_month_usage(self, ym_sparse):
+        # "sparse" month:
+        #   item0: 3 views, 0 downloads, 2 sessions
+        #   item1: 1 views, 1 download, 1 session (plus 1 view from child item2)
+        #   item2: 1 views, 0 downloads, 1 session
+        _month_start = ym_sparse.month_start()
+        _save_usage(
+            timestamp=_month_start,
+            item_guid='item0',
+            session_id='sesh0',
+            action_labels=['view'],
+        )
+        _save_usage(
+            timestamp=_month_start + timedelta(minutes=2),
+            item_guid='item0',
+            session_id='sesh0',
+            action_labels=['view'],
+        )
+        _save_usage(
+            timestamp=_month_start + timedelta(minutes=3),
+            item_guid='item1',
+            session_id='sesh0',
+            action_labels=['download'],
+        )
+        _save_usage(
+            timestamp=_month_start + timedelta(days=17),
+            item_guid='item0',
+            session_id='sesh1',
+            action_labels=['view'],
+        )
+        _save_usage(
+            timestamp=_month_start + timedelta(days=17, minutes=3),
+            item_guid='item1',
+            session_id='sesh1',
+            action_labels=['view'],
+        )
+        _save_usage(
+            timestamp=_month_start + timedelta(days=17, minutes=5),
+            item_guid='item2',
+            surrounding_guids=['item1'],
+            session_id='sesh1',
+            action_labels=['view'],
+        )
+        _save_usage(
+            timestamp=_month_start + timedelta(days=17, minutes=11),
+            item_guid='item2',
+            surrounding_guids=['item1'],
+            session_id='sesh1',
+            action_labels=['download'],
+        )
+
+    @pytest.fixture
+    def busy_month_item0(self, ym_busy):
+        # item0: 4 sessions, 4*7 views, 4*5 downloads
+        _month_start = ym_busy.month_start()
+        for _sesh in range(0, 4):
+            _sesh_start = _month_start + timedelta(days=_sesh)
+            for _minute in range(0, 7):
+                _save_usage(
+                    timestamp=_sesh_start + timedelta(minutes=_minute),
+                    item_guid='item0',
+                    session_id=f'sesh0{_sesh}',
+                    action_labels=['view'],
+                )
+            for _minute in range(10, 15):
+                _save_usage(
+                    timestamp=_sesh_start + timedelta(minutes=_minute),
+                    item_guid='item0',
+                    session_id=f'sesh0{_sesh}',
+                    action_labels=['download'],
+                )
+
+    @pytest.fixture
+    def busy_month_item1(self, ym_busy):
+        # item1: 10 sessions, 6*9 views, 5*7 downloads, 2 providers
+        # (plus 11 views in 11 sessions from child item2)
+        _month_start = ym_busy.month_start()
+        for _sesh in range(0, 6):
+            _sesh_start = _month_start + timedelta(days=_sesh)
+            for _minute in range(0, 9):
+                _save_usage(
+                    timestamp=_sesh_start + timedelta(minutes=_minute),
+                    item_guid='item1',
+                    session_id=f'sesh1{_sesh}',
+                    action_labels=['view'],
+                )
+        for _sesh in range(5, 10):
+            _sesh_start = _month_start + timedelta(days=_sesh)
+            for _minute in range(10, 17):
+                _save_usage(
+                    timestamp=_sesh_start + timedelta(minutes=_minute),
+                    item_guid='item1',
+                    session_id=f'sesh1{_sesh}',
+                    action_labels=['download'],
+                    provider_id='prov1',  # additional provider_id
+                )
+
+    @pytest.fixture
+    def busy_month_item2(self, ym_busy):
+        # item2: 11 sessions, 11 views, 11 downloads (child of item1)
+        _month_start = ym_busy.month_start()
+        for _sesh in range(1, 12):
+            _save_usage(
+                timestamp=_month_start + timedelta(days=_sesh),
+                item_guid='item2',
+                surrounding_guids=['item1'],
+                session_id=f'sesh2{_sesh}',
+                action_labels=['view'],
+            )
+            _save_usage(
+                timestamp=_month_start + timedelta(days=_sesh, hours=_sesh),
+                item_guid='item2',
+                surrounding_guids=['item1'],
+                session_id=f'sesh2{_sesh}',
+                action_labels=['download'],
+            )
+
+    def test_no_data(self, ym_empty):
+        _reporter = PublicItemUsageReporter(ym_empty)
+        _empty = list(_reporter.report())
+        assert _empty == []
+
+    def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2):
+        _empty = list(PublicItemUsageReporter(ym_empty).report())
+        _sparse = list(PublicItemUsageReporter(ym_sparse).report())
+        _busy = list(PublicItemUsageReporter(ym_busy).report())
+
+        # empty month:
+        assert _empty == []
+
+        # sparse month:
+        assert len(_sparse) == 3
+        _sparse_item0, _sparse_item1, _sparse_item2 = sorted(_sparse, key=attrgetter('item_osfid'))
+        # sparse-month item0
+        assert isinstance(_sparse_item0, PublicItemUsageReport)
+        assert _sparse_item0.item_osfid == 'item0'
+        assert _sparse_item0.provider_id == ['prov0']
+        assert _sparse_item0.platform_iri == ['http://osf.example']
+        assert _sparse_item0.view_count == 3
+        assert _sparse_item0.view_session_count == 2
+        assert _sparse_item0.download_count == 0
+        assert _sparse_item0.download_session_count == 0
+        # sparse-month item1
+        assert isinstance(_sparse_item1, PublicItemUsageReport)
+        assert _sparse_item1.item_osfid == 'item1'
+        assert _sparse_item1.provider_id == ['prov0']
+        assert _sparse_item1.platform_iri == ['http://osf.example']
+        assert _sparse_item1.view_count == 2  # including item2
+        assert _sparse_item1.view_session_count == 1  # including item2
+        assert _sparse_item1.download_count == 1  # NOT including item2
+        assert _sparse_item1.download_session_count == 1  # NOT including item2
+        # sparse-month item2
+        assert isinstance(_sparse_item1, PublicItemUsageReport)
+        assert _sparse_item2.item_osfid == 'item2'
+        assert _sparse_item2.provider_id == ['prov0']
+        assert _sparse_item2.platform_iri == ['http://osf.example']
+        assert _sparse_item2.view_count == 1
+        assert _sparse_item2.view_session_count == 1
+        assert _sparse_item2.download_count == 1
+        assert _sparse_item2.download_session_count == 1
+
+        # busy month:
+        assert len(_busy) == 3
+        _busy_item0, _busy_item1, _busy_item2 = sorted(_busy, key=attrgetter('item_osfid'))
+        # busy-month item0
+        assert isinstance(_busy_item0, PublicItemUsageReport)
+        assert _busy_item0.item_osfid == 'item0'
+        assert _busy_item0.provider_id == ['prov0']
+        assert _busy_item0.platform_iri == ['http://osf.example']
+        assert _busy_item0.view_count == 4 * 7
+        assert _busy_item0.view_session_count == 4
+        assert _busy_item0.download_count == 4 * 5
+        assert _busy_item0.download_session_count == 4
+        # busy-month item1
+        assert isinstance(_busy_item1, PublicItemUsageReport)
+        assert _busy_item1.item_osfid == 'item1'
+        assert _busy_item1.provider_id == ['prov0', 'prov1']
+        assert _busy_item1.platform_iri == ['http://osf.example']
+        assert _busy_item1.view_count == 6 * 9 + 11
+        assert _busy_item1.view_session_count == 6 + 11
+        assert _busy_item1.download_count == 5 * 7
+        assert _busy_item1.download_session_count == 5
+        # busy-month item2
+        assert isinstance(_busy_item2, PublicItemUsageReport)
+        assert _busy_item2.item_osfid == 'item2'
+        assert _busy_item2.provider_id == ['prov0']
+        assert _busy_item2.platform_iri == ['http://osf.example']
+        assert _busy_item2.view_count == 11
+        assert _busy_item2.view_session_count == 11
+        assert _busy_item2.download_count == 11
+        assert _busy_item2.download_session_count == 11
+
+
+def _save_usage(**kwargs):
+    _kwargs = {  # overridable defaults:
+        'platform_iri': 'http://osf.example',
+        'item_public': True,
+        'provider_id': 'prov0',
+        **kwargs,
+    }
+    CountedAuthUsage(**_kwargs).save(refresh=True)
diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py
index 2089e7279c9..3840f5dba21 100644
--- a/osf_tests/metrics/test_daily_report.py
+++ b/osf_tests/metrics/test_daily_report.py
@@ -37,11 +37,11 @@ class Meta:
             assert report.meta.id == expected_key
             mock_save.reset_mock()
 
-    def test_with_duf(self, mock_save):
+    def test_with_unique_together(self, mock_save):
         # multiple reports of this type per day, unique by given field
         class UniqueByDateAndField(DailyReport):
-            DAILY_UNIQUE_FIELD = 'duf'
-            duf = metrics.Keyword()
+            UNIQUE_TOGETHER_FIELDS = ('report_date', 'uniquefield',)
+            uniquefield = metrics.Keyword()
 
             class Meta:
                 app_label = 'osf'
@@ -49,7 +49,7 @@ class Meta:
         today = date(2022, 5, 18)
 
         expected_blah = 'dca57e6cde89b19274ea24bc713971dab137a896b8e06d43a11a3f437cd1d151'
-        blah_report = UniqueByDateAndField(report_date=today, duf='blah')
+        blah_report = UniqueByDateAndField(report_date=today, uniquefield='blah')
         blah_report.save()
         assert mock_save.call_count == 1
         assert mock_save.call_args[0][0] is blah_report
@@ -57,13 +57,16 @@ class Meta:
         mock_save.reset_mock()
 
         expected_fleh = 'e7dd5ff6b087807efcfa958077dc713878f21c65af79b3ccdb5dc2409bf5ad99'
-        fleh_report = UniqueByDateAndField(report_date=today, duf='fleh')
+        fleh_report = UniqueByDateAndField(report_date=today, uniquefield='fleh')
         fleh_report.save()
         assert mock_save.call_count == 1
         assert mock_save.call_args[0][0] is fleh_report
         assert fleh_report.meta.id == expected_fleh
         mock_save.reset_mock()
 
-        bad_report = UniqueByDateAndField(report_date=today)
-        with pytest.raises(ReportInvalid):
-            bad_report.save()
+        for _bad_report in (
+            UniqueByDateAndField(report_date=today),
+            UniqueByDateAndField(report_date=today, uniquefield=['list', 'of', 'things']),
+        ):
+            with pytest.raises(ReportInvalid):
+                _bad_report.save()
diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py
new file mode 100644
index 00000000000..23546eb1fb3
--- /dev/null
+++ b/osf_tests/metrics/test_monthly_report.py
@@ -0,0 +1,146 @@
+import datetime
+from unittest import mock
+
+import pytest
+from elasticsearch_metrics import metrics
+
+from osf.metrics.reports import MonthlyReport, ReportInvalid, PublicItemUsageReport
+from osf.metrics.utils import YearMonth
+
+
+class TestMonthlyReportKey:
+    @pytest.fixture
+    def mock_save(self):
+        with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
+            yield mock_save
+
+    def test_default(self, mock_save):
+        # only one of this type of report per month
+        class UniqueByMonth(MonthlyReport):
+            blah = metrics.Keyword()
+
+            class Meta:
+                app_label = 'osf'
+
+        yearmonth = YearMonth(2022, 5)
+
+        reports = [
+            UniqueByMonth(report_yearmonth=yearmonth),
+            UniqueByMonth(report_yearmonth=yearmonth, blah='blah'),
+            UniqueByMonth(report_yearmonth=yearmonth, blah='fleh'),
+        ]
+        expected_key = '8463aac67c1e5a038049196781d8f100f069225352d1829651892cf3fbfc50e2'
+
+        for report in reports:
+            report.save()
+            assert mock_save.call_count == 1
+            assert mock_save.call_args[0][0] is report
+            assert report.meta.id == expected_key
+            mock_save.reset_mock()
+
+    def test_with_unique_together(self, mock_save):
+        # multiple reports of this type per day, unique by given field
+        class UniqueByMonthAndField(MonthlyReport):
+            UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'uniquefield',)
+            uniquefield = metrics.Keyword()
+
+            class Meta:
+                app_label = 'osf'
+
+        yearmonth = YearMonth(2022, 5)
+
+        expected_blah = '62ebf38317cd8402e27a50ce99f836d1734b3f545adf7d144d0e1cf37a0d9d08'
+        blah_report = UniqueByMonthAndField(report_yearmonth=yearmonth, uniquefield='blah')
+        blah_report.save()
+        assert mock_save.call_count == 1
+        assert mock_save.call_args[0][0] is blah_report
+        assert blah_report.meta.id == expected_blah
+        mock_save.reset_mock()
+
+        expected_fleh = '385700db282f6d6089a0d21836db5ee8423f548615e515b6e034bcc90a14500f'
+        fleh_report = UniqueByMonthAndField(report_yearmonth=yearmonth, uniquefield='fleh')
+        fleh_report.save()
+        assert mock_save.call_count == 1
+        assert mock_save.call_args[0][0] is fleh_report
+        assert fleh_report.meta.id == expected_fleh
+        mock_save.reset_mock()
+
+        for _bad_report in (
+            UniqueByMonthAndField(report_yearmonth=yearmonth),
+            UniqueByMonthAndField(report_yearmonth=yearmonth, uniquefield=['list']),
+        ):
+            with pytest.raises(ReportInvalid):
+                _bad_report.save()
+
+
+@pytest.mark.es_metrics
+class TestLastMonthReport:
+    @pytest.fixture
+    def osfid(self):
+        return 'abced'
+
+    @pytest.fixture
+    def this_month(self):
+        return YearMonth.from_date(datetime.date.today())
+
+    @pytest.fixture
+    def last_month(self, this_month):
+        return _prior_yearmonth(this_month)
+
+    @pytest.fixture
+    def two_months_back(self, last_month):
+        return _prior_yearmonth(last_month)
+
+    @pytest.fixture
+    def three_months_back(self, two_months_back):
+        return _prior_yearmonth(two_months_back)
+
+    @pytest.fixture
+    def this_month_report(self, osfid, this_month):
+        return _item_usage_report(this_month, osfid, view_count=77)
+
+    @pytest.fixture
+    def last_month_report(self, osfid, last_month):
+        return _item_usage_report(last_month, osfid, view_count=57)
+
+    @pytest.fixture
+    def diff_last_month_report(self, last_month):
+        return _item_usage_report(last_month, 'zyxvt', view_count=17)
+
+    @pytest.fixture
+    def two_months_back_report(self, osfid, two_months_back):
+        return _item_usage_report(two_months_back, osfid, view_count=27)
+
+    @pytest.fixture
+    def three_months_back_report(self, osfid, three_months_back):
+        return _item_usage_report(three_months_back, osfid, view_count=37)
+
+    def test_with_none(self, osfid):
+        assert PublicItemUsageReport.for_last_month(osfid) is None
+
+    def test_with_others(self, osfid, this_month_report, three_months_back_report, diff_last_month_report):
+        assert PublicItemUsageReport.for_last_month(osfid) is None
+
+    def test_with_prior_month(self, osfid, this_month_report, two_months_back_report, three_months_back_report, diff_last_month_report):
+        assert PublicItemUsageReport.for_last_month(osfid) == two_months_back_report
+
+    def test_with_last_month(self, osfid, this_month_report, last_month_report, two_months_back_report, three_months_back_report, diff_last_month_report):
+        assert PublicItemUsageReport.for_last_month(osfid) == last_month_report
+
+
+def _prior_yearmonth(ym: YearMonth) -> YearMonth:
+    return (
+        YearMonth(ym.year - 1, 1)
+        if ym.month == 1
+        else YearMonth(ym.year, ym.month - 1)
+    )
+
+
+def _item_usage_report(ym: YearMonth, osfid: str, **kwargs):
+    _report = PublicItemUsageReport(
+        report_yearmonth=ym,
+        item_osfid=osfid,
+        **kwargs
+    )
+    _report.save(refresh=True)
+    return _report
diff --git a/osf_tests/metrics/test_yearmonth.txt b/osf_tests/metrics/test_yearmonth.txt
new file mode 100644
index 00000000000..646c73c42f9
--- /dev/null
+++ b/osf_tests/metrics/test_yearmonth.txt
@@ -0,0 +1,48 @@
+YearMonth tests
+(doctest-style, in a way pytest will run; see https://docs.pytest.org/en/stable/how-to/doctest.html )
+>>> from osf.metrics.utils import YearMonth
+
+basic dataclass behavior:
+>>> YearMonth(2000, 2)
+YearMonth(year=2000, month=2)
+>>> YearMonth(1999, 9)
+YearMonth(year=1999, month=9)
+>>> ym = YearMonth(2050, 2)
+>>> ym.year
+2050
+>>> ym.month
+2
+
+`from_date` constructor, accepts either `datetime.date` or `datetime.datetime`:
+>>> import datetime
+>>> YearMonth.from_date(datetime.date(1973, 1, 1))
+YearMonth(year=1973, month=1)
+>>> YearMonth.from_date(datetime.datetime(1974, 3, 2))
+YearMonth(year=1974, month=3)
+
+`from_str` constructor, accepts "YYYY-MM" format:
+>>> YearMonth.from_str('2000-12')
+YearMonth(year=2000, month=12)
+
+`__str__` method gives "YYYY-MM" format:
+>>> str(YearMonth(1491, 7))
+'1491-07'
+
+`next` method gives the next year-month:
+>>> ym = YearMonth(1491, 11)
+>>> ym.next()
+YearMonth(year=1491, month=12)
+>>> ym.next().next()
+YearMonth(year=1492, month=1)
+
+`month_start` method:
+>>> YearMonth(3333, 3).month_start()
+datetime.datetime(3333, 3, 1, 0, 0, tzinfo=datetime.timezone.utc)
+>>> YearMonth(1999, 12).month_start().isoformat()
+'1999-12-01T00:00:00+00:00'
+
+`month_end` method:
+>>> YearMonth(3333, 3).month_end()
+datetime.datetime(3333, 4, 1, 0, 0, tzinfo=datetime.timezone.utc)
+>>> YearMonth(1999, 12).month_end().isoformat()
+'2000-01-01T00:00:00+00:00'
diff --git a/osf_tests/test_management_commands.py b/osf_tests/test_management_commands.py
index 8f29e72bc93..26e34601648 100644
--- a/osf_tests/test_management_commands.py
+++ b/osf_tests/test_management_commands.py
@@ -265,7 +265,7 @@ def test_data_storage_usage_command(self):
                 assert (key, expected_summary_data[key]) == (key, actual_summary_data[key])
 
 
-@pytest.mark.es
+@pytest.mark.es_metrics
 @pytest.mark.django_db
 class TestInstitutionMetricsUpdate:
 

From d8e34ab08913b63fa32de79923b9e07b8cbf4199 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Thu, 14 Nov 2024 13:51:43 -0500
Subject: [PATCH 12/35] Update CHANGELOG, bump version

---
 CHANGELOG    | 5 +++++
 package.json | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 94705b38a79..32a02066ce0 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,11 @@
 
 We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.
 
+24.09.0 (2024-11-14)
+====================
+
+- Institutional Dashboard Project BE Release
+
 24.08.0 (2024-10-30)
 ====================
 
diff --git a/package.json b/package.json
index 8b0edd12961..7fcf0590044 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "OSF",
-  "version": "24.08.0",
+  "version": "24.09.0",
   "description": "Facilitating Open Science",
   "repository": "https://github.com/CenterForOpenScience/osf.io",
   "author": "Center for Open Science",

From 30b2df79de142490bd796d5fa755351e8c4ff8be Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 14 Nov 2024 13:54:52 -0500
Subject: [PATCH 13/35] skip deleted/private items in recatalog by default

add `--also-decatalog` param to decatalog all deleted/private items
---
 osf/management/commands/recatalog_metadata.py | 18 ++++++
 .../test_recatalog_metadata.py                | 60 +++++++++++++++----
 2 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/osf/management/commands/recatalog_metadata.py b/osf/management/commands/recatalog_metadata.py
index 43c647e5861..d46face0ba6 100644
--- a/osf/management/commands/recatalog_metadata.py
+++ b/osf/management/commands/recatalog_metadata.py
@@ -144,6 +144,11 @@ def add_arguments(self, parser):
             default=int(9e9),
             help='maximum number of chunks (default all/enough/lots)',
         )
+        parser.add_argument(
+            '--also-decatalog',
+            action='store_true',
+            help='also remove private and deleted items from the catalog',
+        )
 
     def handle(self, *args, **options):
         pls_all_types = options['all_types']
@@ -157,6 +162,7 @@ def handle(self, *args, **options):
         chunk_size = options['chunk_size']
         chunk_count = options['chunk_count']
         datacite_custom_types = options['datacite_custom_types']
+        also_decatalog = options['also_decatalog']
 
         if datacite_custom_types:  # temporary arg for datacite 4.5 migration
             assert not start_id, 'oh no, cannot resume with `--datacite-custom-types`'
@@ -185,4 +191,16 @@ def handle(self, *args, **options):
                 _queryset = _queryset.filter(
                     provider__in=AbstractProvider.objects.filter(_id__in=provider_ids),
                 )
+            if not also_decatalog:
+                if provided_model is OsfStorageFile:
+                    _queryset = _queryset.filter(deleted__isnull=True)
+                elif provided_model is OSFUser:
+                    _queryset = _queryset.filter(
+                        deleted__isnull=True,
+                        is_active=True,
+                    ).exclude(allow_indexing=False)
+                elif provided_model is Preprint:
+                    _queryset = _queryset.filter(is_public=True, is_published=True, deleted__isnull=True)
+                else:
+                    _queryset = _queryset.filter(is_public=True, deleted__isnull=True)
             recatalog(_queryset, start_id, chunk_count, chunk_size)
diff --git a/osf_tests/management_commands/test_recatalog_metadata.py b/osf_tests/management_commands/test_recatalog_metadata.py
index 85742b76094..4354a54048e 100644
--- a/osf_tests/management_commands/test_recatalog_metadata.py
+++ b/osf_tests/management_commands/test_recatalog_metadata.py
@@ -1,3 +1,4 @@
+import datetime
 import pytest
 from unittest import mock
 from operator import attrgetter
@@ -12,6 +13,7 @@
     ProjectFactory,
     RegistrationProviderFactory,
     RegistrationFactory,
+    UserFactory,
 )
 
 
@@ -41,18 +43,15 @@ def registration_provider(self):
     @pytest.fixture
     def registrations(self, registration_provider):
         return sorted_by_id([
-            RegistrationFactory(provider=registration_provider)
+            RegistrationFactory(provider=registration_provider, is_public=True)
             for _ in range(7)
         ])
 
     @pytest.fixture
     def projects(self, registrations):
         return sorted_by_id([
-            ProjectFactory()
+            ProjectFactory(is_public=True)
             for _ in range(7)
-        ] + [
-            registration.registered_from
-            for registration in registrations
         ])
 
     @pytest.fixture
@@ -93,6 +92,23 @@ def items_with_custom_datacite_type(self, preprints, registrations, projects, fi
             *_nonpreprint_sample,
         }
 
+    @pytest.fixture
+    def decatalog_items(self, registrations):
+        _user = UserFactory(allow_indexing=False)
+        _registration = RegistrationFactory(is_public=False, creator=_user)
+        _implicit_projects = [
+            _registration.registered_from,
+            *(_reg.registered_from for _reg in registrations),
+        ]
+        return [
+            _user,
+            _registration,
+            *_implicit_projects,
+            PreprintFactory(is_published=False, creator=_user),
+            ProjectFactory(is_public=False, creator=_user),
+            ProjectFactory(deleted=datetime.datetime.now(), creator=_user),
+        ]
+
     def test_recatalog_metadata(
         self,
         mock_update_share_task,
@@ -104,7 +120,14 @@ def test_recatalog_metadata(
         files,
         users,
         items_with_custom_datacite_type,
+        decatalog_items,
     ):
+        def _actual_osfids() -> set[str]:
+            return {
+                _call[-1]['kwargs']['guid']
+                for _call in mock_update_share_task.apply_async.mock_calls
+            }
+
         # test preprints
         call_command(
             'recatalog_metadata',
@@ -189,11 +212,28 @@ def test_recatalog_metadata(
             '--datacite-custom-types',
         )
         _expected_osfids = set(_iter_osfids(items_with_custom_datacite_type))
-        _actual_osfids = {
-            _call[-1]['kwargs']['guid']
-            for _call in mock_update_share_task.apply_async.mock_calls
-        }
-        assert _expected_osfids == _actual_osfids
+        assert _expected_osfids == _actual_osfids()
+
+        mock_update_share_task.reset_mock()
+
+        # all types
+        _all_public_items = [*preprints, *registrations, *projects, *files, *users]
+        call_command(
+            'recatalog_metadata',
+            '--all-types',
+        )
+        _expected_osfids = set(_iter_osfids(_all_public_items))
+        assert _expected_osfids == _actual_osfids()
+
+        # also decatalog private/deleted items
+        _all_items = [*_all_public_items, *decatalog_items]
+        call_command(
+            'recatalog_metadata',
+            '--all-types',
+            '--also-decatalog',
+        )
+        _expected_osfids = set(_iter_osfids(_all_items))
+        assert _expected_osfids == _actual_osfids()
 
 
 ###

From a57467fc2bf328311107546d3ac10f074e11229e Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 14 Nov 2024 13:57:14 -0500
Subject: [PATCH 14/35] remove 'temporary arg'

---
 osf/management/commands/recatalog_metadata.py | 30 -------------------
 .../test_recatalog_metadata.py                | 28 -----------------
 2 files changed, 58 deletions(-)

diff --git a/osf/management/commands/recatalog_metadata.py b/osf/management/commands/recatalog_metadata.py
index d46face0ba6..be52e9b0a0e 100644
--- a/osf/management/commands/recatalog_metadata.py
+++ b/osf/management/commands/recatalog_metadata.py
@@ -55,21 +55,6 @@ def _recatalog_all(queryset, chunk_size):
     recatalog(queryset, start_id=0, chunk_count=int(9e9), chunk_size=chunk_size)
 
 
-def _recatalog_datacite_custom_types(chunk_size):
-    logger.info('recataloguing items with datacite custom type...')
-    # all preprints
-    _recatalog_all(Preprint.objects, chunk_size)
-    # objects with custom resource_type_general
-    for _model in {Registration, Node, OsfStorageFile}:
-        _queryset = (
-            _model.objects
-            .exclude(guids__metadata_record__isnull=True)
-            .exclude(guids__metadata_record__resource_type_general='')
-        )
-        _recatalog_all(_queryset, chunk_size)
-    logger.info('done recataloguing items with datacite custom type!')
-
-
 class Command(BaseCommand):
     def add_arguments(self, parser):
         type_group = parser.add_mutually_exclusive_group(required=True)
@@ -103,14 +88,6 @@ def add_arguments(self, parser):
             action='store_true',
             help='recatalog metadata for users',
         )
-        type_group.add_argument(
-            '--datacite-custom-types',
-            action='store_true',
-            help='''recatalog metadata for items with a specific datacite type,
-            including all preprints and items with custom resource_type_general
-            (may be slow for lack of database indexes)
-            ''',
-        )
 
         provider_group = parser.add_mutually_exclusive_group()
         provider_group.add_argument(
@@ -161,15 +138,8 @@ def handle(self, *args, **options):
         start_id = options['start_id']
         chunk_size = options['chunk_size']
         chunk_count = options['chunk_count']
-        datacite_custom_types = options['datacite_custom_types']
         also_decatalog = options['also_decatalog']
 
-        if datacite_custom_types:  # temporary arg for datacite 4.5 migration
-            assert not start_id, 'oh no, cannot resume with `--datacite-custom-types`'
-            assert not provider_ids, 'oh no, cannot filter providers with `--datacite-custom-types`'
-            _recatalog_datacite_custom_types(chunk_size)
-            return  # end
-
         if pls_all_types:
             assert not start_id, 'choose a specific type to resume with --start-id'
             provided_models = [Preprint, Registration, Node, OSFUser, OsfStorageFile]
diff --git a/osf_tests/management_commands/test_recatalog_metadata.py b/osf_tests/management_commands/test_recatalog_metadata.py
index 4354a54048e..550f06e4d13 100644
--- a/osf_tests/management_commands/test_recatalog_metadata.py
+++ b/osf_tests/management_commands/test_recatalog_metadata.py
@@ -2,11 +2,9 @@
 import pytest
 from unittest import mock
 from operator import attrgetter
-import random
 
 from django.core.management import call_command
 
-from osf.models.metadata import GuidMetadataRecord
 from osf_tests.factories import (
     PreprintProviderFactory,
     PreprintFactory,
@@ -77,21 +75,6 @@ def users(self, preprints, registrations, projects):
             for preprint in preprints
         ])))
 
-    @pytest.fixture
-    def items_with_custom_datacite_type(self, preprints, registrations, projects, files):
-        _nonpreprint_sample = [
-            random.choice(_items)
-            for _items in (registrations, projects, files)
-        ]
-        for _item in _nonpreprint_sample:
-            _guid_record = GuidMetadataRecord.objects.for_guid(_item)
-            _guid_record.resource_type_general = 'BookChapter'  # datacite resourceTypeGeneral value
-            _guid_record.save()
-        return {
-            *preprints,  # every preprint has datacite type "Preprint"
-            *_nonpreprint_sample,
-        }
-
     @pytest.fixture
     def decatalog_items(self, registrations):
         _user = UserFactory(allow_indexing=False)
@@ -119,7 +102,6 @@ def test_recatalog_metadata(
         projects,
         files,
         users,
-        items_with_custom_datacite_type,
         decatalog_items,
     ):
         def _actual_osfids() -> set[str]:
@@ -206,16 +188,6 @@ def _actual_osfids() -> set[str]:
 
         mock_update_share_task.reset_mock()
 
-        # datacite custom types
-        call_command(
-            'recatalog_metadata',
-            '--datacite-custom-types',
-        )
-        _expected_osfids = set(_iter_osfids(items_with_custom_datacite_type))
-        assert _expected_osfids == _actual_osfids()
-
-        mock_update_share_task.reset_mock()
-
         # all types
         _all_public_items = [*preprints, *registrations, *projects, *files, *users]
         call_command(

From 292dca292a0c0a5558cd0c4509db145719bd8d59 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Tue, 5 Nov 2024 10:41:31 -0500
Subject: [PATCH 15/35] [ENG-6364] Migrate Preprint Affilations (#10787)

* add management command to migrate preprint affiliations

* make sure migrations uses primary instead of replica, improve tests

* remove redundant permission

---------

Co-authored-by: John Tordoff <>
---
 .../commands/migrate_preprint_affiliation.py  | 113 +++++++++++++++++
 .../test_migrate_preprint_affiliations.py     | 115 ++++++++++++++++++
 2 files changed, 228 insertions(+)
 create mode 100644 osf/management/commands/migrate_preprint_affiliation.py
 create mode 100644 osf_tests/management_commands/test_migrate_preprint_affiliations.py

diff --git a/osf/management/commands/migrate_preprint_affiliation.py b/osf/management/commands/migrate_preprint_affiliation.py
new file mode 100644
index 00000000000..78e7b2786ff
--- /dev/null
+++ b/osf/management/commands/migrate_preprint_affiliation.py
@@ -0,0 +1,113 @@
+import datetime
+import logging
+
+from django.core.management.base import BaseCommand
+from django.db import transaction
+from django.db.models import F, Exists, OuterRef
+
+from osf.models import PreprintContributor, InstitutionAffiliation
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    """Assign affiliations from users to preprints where they have write or admin permissions, with optional exclusion by user GUIDs."""
+
+    help = 'Assign affiliations from users to preprints where they have write or admin permissions.'
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            '--exclude-guids',
+            nargs='+',
+            dest='exclude_guids',
+            help='List of user GUIDs to exclude from affiliation assignment'
+        )
+        parser.add_argument(
+            '--dry-run',
+            action='store_true',
+            dest='dry_run',
+            help='If true, performs a dry run without making changes'
+        )
+        parser.add_argument(
+            '--batch-size',
+            type=int,
+            default=1000,
+            dest='batch_size',
+            help='Number of contributors to process in each batch'
+        )
+
+    def handle(self, *args, **options):
+        start_time = datetime.datetime.now()
+        logger.info(f'Script started at: {start_time}')
+
+        exclude_guids = set(options.get('exclude_guids') or [])
+        dry_run = options.get('dry_run', False)
+        batch_size = options.get('batch_size', 1000)
+
+        if dry_run:
+            logger.info('Dry run mode activated.')
+
+        processed_count, updated_count = assign_affiliations_to_preprints(
+            exclude_guids=exclude_guids,
+            dry_run=dry_run,
+            batch_size=batch_size
+        )
+
+        finish_time = datetime.datetime.now()
+        logger.info(f'Script finished at: {finish_time}')
+        logger.info(f'Total processed: {processed_count}, Updated: {updated_count}')
+        logger.info(f'Total run time: {finish_time - start_time}')
+
+
+def assign_affiliations_to_preprints(exclude_guids=None, dry_run=True, batch_size=1000):
+    exclude_guids = exclude_guids or set()
+    processed_count = updated_count = 0
+
+    # Subquery to check if the user has any affiliated institutions
+    user_has_affiliations = Exists(
+        InstitutionAffiliation.objects.filter(
+            user=OuterRef('user')
+        )
+    )
+
+    contributors_qs = PreprintContributor.objects.filter(
+        preprint__preprintgroupobjectpermission__permission__codename__in=['write_preprint'],
+        preprint__preprintgroupobjectpermission__group__user=F('user'),
+    ).filter(
+        user_has_affiliations
+    ).select_related(
+        'user',
+        'preprint'
+    ).exclude(
+        user__guids___id__in=exclude_guids
+    ).order_by('pk')  # Ensure consistent ordering for batching
+
+    total_contributors = contributors_qs.count()
+    logger.info(f'Total contributors to process: {total_contributors}')
+
+    # Process contributors in batches
+    with transaction.atomic():
+        for offset in range(0, total_contributors, batch_size):
+            # Use select_for_update() to ensure query hits the primary database
+            batch_contributors = contributors_qs[offset:offset + batch_size].select_for_update()
+
+            logger.info(f'Processing contributors {offset + 1} to {min(offset + batch_size, total_contributors)}')
+
+            for contributor in batch_contributors:
+                user = contributor.user
+                preprint = contributor.preprint
+
+                user_institutions = user.get_affiliated_institutions()
+                processed_count += 1
+                if not dry_run:
+                    preprint.affiliated_institutions.add(*user_institutions)
+                    updated_count += 1
+                    logger.info(
+                        f'Assigned {len(user_institutions)} affiliations from user <{user._id}> to preprint <{preprint._id}>.'
+                    )
+                else:
+                    logger.info(
+                        f'Dry run: Would assign {len(user_institutions)} affiliations from user <{user._id}> to preprint <{preprint._id}>.'
+                    )
+
+    return processed_count, updated_count
diff --git a/osf_tests/management_commands/test_migrate_preprint_affiliations.py b/osf_tests/management_commands/test_migrate_preprint_affiliations.py
new file mode 100644
index 00000000000..701638251f5
--- /dev/null
+++ b/osf_tests/management_commands/test_migrate_preprint_affiliations.py
@@ -0,0 +1,115 @@
+import pytest
+from osf.management.commands.migrate_preprint_affiliation import assign_affiliations_to_preprints
+from osf_tests.factories import (
+    PreprintFactory,
+    InstitutionFactory,
+    AuthUserFactory,
+)
+
+
+@pytest.mark.django_db
+class TestAssignAffiliationsToPreprints:
+
+    @pytest.fixture()
+    def institution(self):
+        return InstitutionFactory()
+
+    @pytest.fixture()
+    def user_with_affiliation(self, institution):
+        user = AuthUserFactory()
+        user.add_or_update_affiliated_institution(institution)
+        user.save()
+        return user
+
+    @pytest.fixture()
+    def user_without_affiliation(self):
+        return AuthUserFactory()
+
+    @pytest.fixture()
+    def preprint_with_affiliated_contributor(self, user_with_affiliation):
+        preprint = PreprintFactory()
+        preprint.add_contributor(
+            user_with_affiliation,
+            permissions='admin',
+            visible=True
+        )
+        return preprint
+
+    @pytest.fixture()
+    def preprint_with_non_affiliated_contributor(self, user_without_affiliation):
+        preprint = PreprintFactory()
+        preprint.add_contributor(
+            user_without_affiliation,
+            permissions='admin',
+            visible=True
+        )
+        return preprint
+
+    @pytest.mark.parametrize('dry_run', [True, False])
+    def test_assign_affiliations_with_affiliated_contributor(self, preprint_with_affiliated_contributor, institution, dry_run):
+        preprint = preprint_with_affiliated_contributor
+        preprint.affiliated_institutions.clear()
+        preprint.save()
+
+        assign_affiliations_to_preprints(dry_run=dry_run)
+
+        if dry_run:
+            assert not preprint.affiliated_institutions.exists()
+        else:
+            assert institution in preprint.affiliated_institutions.all()
+
+    @pytest.mark.parametrize('dry_run', [True, False])
+    def test_no_affiliations_for_non_affiliated_contributor(self, preprint_with_non_affiliated_contributor, dry_run):
+        preprint = preprint_with_non_affiliated_contributor
+        preprint.affiliated_institutions.clear()
+        preprint.save()
+
+        assign_affiliations_to_preprints(dry_run=dry_run)
+
+        assert not preprint.affiliated_institutions.exists()
+
+    @pytest.mark.parametrize('dry_run', [True, False])
+    def test_exclude_contributor_by_guid(self, preprint_with_affiliated_contributor, user_with_affiliation, institution, dry_run):
+        preprint = preprint_with_affiliated_contributor
+        preprint.affiliated_institutions.clear()
+        preprint.save()
+
+        assert user_with_affiliation.get_affiliated_institutions()
+        assert user_with_affiliation in preprint.contributors.all()
+        exclude_guids = {user._id for user in preprint.contributors.all()}
+
+        assign_affiliations_to_preprints(exclude_guids=exclude_guids, dry_run=dry_run)
+
+        assert not preprint.affiliated_institutions.exists()
+
+    @pytest.mark.parametrize('dry_run', [True, False])
+    def test_affiliations_from_multiple_contributors(self, institution, dry_run):
+        institution_not_include = InstitutionFactory()
+        read_contrib = AuthUserFactory()
+        read_contrib.add_or_update_affiliated_institution(institution_not_include)
+        read_contrib.save()
+
+        write_contrib = AuthUserFactory()
+        write_contrib.add_or_update_affiliated_institution(institution)
+        write_contrib.save()
+
+        admin_contrib = AuthUserFactory()
+        institution2 = InstitutionFactory()
+        admin_contrib.add_or_update_affiliated_institution(institution2)
+        admin_contrib.save()
+
+        preprint = PreprintFactory()
+        preprint.affiliated_institutions.clear()
+        preprint.add_contributor(read_contrib, permissions='read', visible=True)
+        preprint.add_contributor(write_contrib, permissions='write', visible=True)
+        preprint.add_contributor(admin_contrib, permissions='admin', visible=True)
+        preprint.save()
+
+        assign_affiliations_to_preprints(dry_run=dry_run)
+
+        if dry_run:
+            assert not preprint.affiliated_institutions.exists()
+        else:
+            affiliations = set(preprint.affiliated_institutions.all())
+            assert affiliations == {institution, institution2}
+            assert institution_not_include not in affiliations

From f832e5ed7ff36e2ac854789894cac3fc84e596a0 Mon Sep 17 00:00:00 2001
From: Uditi Mehta <57388785+uditijmehta@users.noreply.github.com>
Date: Tue, 5 Nov 2024 14:59:44 -0500
Subject: [PATCH 16/35] [ENG-4438] Add OOPSpam and Akismet metrics to spam
 report (#10783)

* Add OOPSpam and Akismet metrics to spam report

* Add spam/ham metrics for OOPSpam and Akismet with FK join fix, test cleanup

* Add private spam metrics report with preprint inclusion

* Validate category; add PrivateSpamMetricsReporter to monthly reports

---------

Co-authored-by: Uditi Mehta <uditimehta@COSs-MBP.lan>
---
 osf/external/askismet/client.py               | 34 +++++++++++++++++
 osf/external/oopspam/client.py                | 34 +++++++++++++++++
 osf/metrics/reporters/__init__.py             |  2 +
 osf/metrics/reporters/private_spam_metrics.py | 28 ++++++++++++++
 osf/metrics/reporters/spam_count.py           |  1 -
 osf_tests/external/akismet/test_akismet.py    | 36 ++++++++++++++++++
 osf_tests/external/oopspam/test_oopspam.py    | 36 ++++++++++++++++++
 osf_tests/metrics/test_spam_count_reporter.py | 38 +++++++++++++++++++
 8 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 osf/metrics/reporters/private_spam_metrics.py
 create mode 100644 osf_tests/metrics/test_spam_count_reporter.py

diff --git a/osf/external/askismet/client.py b/osf/external/askismet/client.py
index 877f7ec4c23..db57b1d3cfa 100644
--- a/osf/external/askismet/client.py
+++ b/osf/external/askismet/client.py
@@ -133,3 +133,37 @@ def submit_ham(self, user_ip, user_agent, **kwargs):
         )
         if res.status_code != requests.codes.ok:
             raise AkismetClientError(reason=res.text)
+
+    def get_flagged_count(self, start_date, end_date, category='node'):
+        from osf.models import NodeLog, PreprintLog
+
+        if category not in ['node', 'preprint']:
+            raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")
+
+        log_model = NodeLog if category == 'node' else PreprintLog
+
+        flagged_count = log_model.objects.filter(
+            action=log_model.FLAG_SPAM,
+            created__gt=start_date,
+            created__lt=end_date,
+            **{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']}
+        ).count()
+
+        return flagged_count
+
+    def get_hammed_count(self, start_date, end_date, category='node'):
+        from osf.models import NodeLog, PreprintLog
+
+        if category not in ['node', 'preprint']:
+            raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")
+
+        log_model = NodeLog if category == 'node' else PreprintLog
+
+        hammed_count = log_model.objects.filter(
+            action=log_model.CONFIRM_HAM,
+            created__gt=start_date,
+            created__lt=end_date,
+            **{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']}
+        ).count()
+
+        return hammed_count
diff --git a/osf/external/oopspam/client.py b/osf/external/oopspam/client.py
index ef22864a43d..0abdfdd021f 100644
--- a/osf/external/oopspam/client.py
+++ b/osf/external/oopspam/client.py
@@ -45,3 +45,37 @@ def check_content(self, user_ip, content, **kwargs):
 
         #  OOPSpam returns a spam score out of 6. 3 or higher indicates spam
         return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json
+
+    def get_flagged_count(self, start_date, end_date, category='node'):
+        from osf.models import NodeLog, PreprintLog
+
+        if category not in ['node', 'preprint']:
+            raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")
+
+        log_model = NodeLog if category == 'node' else PreprintLog
+
+        flagged_count = log_model.objects.filter(
+            action=log_model.FLAG_SPAM,
+            created__gt=start_date,
+            created__lt=end_date,
+            **{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']}
+        ).count()
+
+        return flagged_count
+
+    def get_hammed_count(self, start_date, end_date, category='node'):
+        from osf.models import NodeLog, PreprintLog
+
+        if category not in ['node', 'preprint']:
+            raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")
+
+        log_model = NodeLog if category == 'node' else PreprintLog
+
+        hammed_count = log_model.objects.filter(
+            action=log_model.CONFIRM_HAM,
+            created__gt=start_date,
+            created__lt=end_date,
+            **{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']}
+        ).count()
+
+        return hammed_count
diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py
index 412b1c2bf90..e6966414c3c 100644
--- a/osf/metrics/reporters/__init__.py
+++ b/osf/metrics/reporters/__init__.py
@@ -13,6 +13,7 @@
 from .public_item_usage import PublicItemUsageReporter
 from .user_count import UserCountReporter
 from .spam_count import SpamCountReporter
+from .private_spam_metrics import PrivateSpamMetricsReporter
 
 
 class AllDailyReporters(enum.Enum):
@@ -32,3 +33,4 @@ class AllMonthlyReporters(enum.Enum):
     INSTITUTIONAL_USERS = InstitutionalUsersReporter
     INSTITUTIONAL_SUMMARY = InstitutionalSummaryMonthlyReporter
     ITEM_USAGE = PublicItemUsageReporter
+    PRIVATE_SPAM_METRICS = PrivateSpamMetricsReporter
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
new file mode 100644
index 00000000000..d6beba3a804
--- /dev/null
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -0,0 +1,28 @@
+from osf.metrics.reports import SpamSummaryReport
+from osf.external.oopspam.client import OOPSpamClient
+from osf.external.askismet.client import AkismetClient
+from ._base import MonthlyReporter
+
+class PrivateSpamMetricsReporter(MonthlyReporter):
+    report_name = 'Private Spam Metrics'
+
+    def report(self, report_yearmonth):
+        target_month = report_yearmonth.target_month()
+        next_month = report_yearmonth.next_month()
+
+        oopspam_client = OOPSpamClient()
+        akismet_client = AkismetClient()
+
+        report = SpamSummaryReport(
+            report_yearmonth=str(report_yearmonth),
+            node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
+            node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
+            node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'),
+            node_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='node'),
+            preprint_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='preprint'),
+            preprint_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='preprint'),
+            preprint_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='preprint'),
+            preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint')
+        )
+
+        return [report]
diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py
index 94290f96203..56fc03f8d32 100644
--- a/osf/metrics/reporters/spam_count.py
+++ b/osf/metrics/reporters/spam_count.py
@@ -5,7 +5,6 @@
 from osf.models import PreprintLog, NodeLog
 from osf.models.spam import SpamStatus
 
-
 class SpamCountReporter(MonthlyReporter):
 
     def report(self):
diff --git a/osf_tests/external/akismet/test_akismet.py b/osf_tests/external/akismet/test_akismet.py
index db3c5d0d584..46729e485e8 100644
--- a/osf_tests/external/akismet/test_akismet.py
+++ b/osf_tests/external/akismet/test_akismet.py
@@ -237,3 +237,39 @@ def test_meetings_skip_spam_check(self, mock_akismet, user, node_in_conference,
         node.check_spam(user, {'title'}, request_headers)
         node.refresh_from_db()
         assert node.spam_status == SpamStatus.FLAGGED
+
+    @mock.patch('osf.models.NodeLog.objects.filter')
+    def test_get_flagged_count(self, mock_filter, user):
+        from osf.external.askismet.client import AkismetClient
+        from datetime import datetime
+
+        client = AkismetClient()
+        start_date = datetime(2024, 10, 1)
+        end_date = datetime(2024, 10, 31)
+
+        client.get_flagged_count(start_date, end_date)
+
+        mock_filter.assert_called_with(
+            action='flag_spam',
+            created__gt=start_date,
+            created__lt=end_date,
+            node__spam_data__who_flagged__in=['akismet', 'both']
+        )
+
+    @mock.patch('osf.models.NodeLog.objects.filter')
+    def test_get_hammed_count(self, mock_filter, user):
+        from osf.external.askismet.client import AkismetClient
+        from datetime import datetime
+
+        client = AkismetClient()
+        start_date = datetime(2024, 10, 1)
+        end_date = datetime(2024, 10, 31)
+
+        client.get_hammed_count(start_date, end_date)
+
+        mock_filter.assert_called_with(
+            action='confirm_ham',
+            created__gt=start_date,
+            created__lt=end_date,
+            node__spam_data__who_flagged__in=['akismet', 'both']
+        )
diff --git a/osf_tests/external/oopspam/test_oopspam.py b/osf_tests/external/oopspam/test_oopspam.py
index 36740148116..96656ecc6da 100644
--- a/osf_tests/external/oopspam/test_oopspam.py
+++ b/osf_tests/external/oopspam/test_oopspam.py
@@ -125,3 +125,39 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers):
         )
 
         assert user.spam_status == SpamStatus.UNKNOWN
+
+    @mock.patch('osf.models.NodeLog.objects.filter')
+    def test_get_flagged_count(self, mock_filter, user):
+        from osf.external.oopspam.client import OOPSpamClient
+        from datetime import datetime
+
+        client = OOPSpamClient()
+        start_date = datetime(2024, 10, 1)
+        end_date = datetime(2024, 10, 31)
+
+        client.get_flagged_count(start_date, end_date)
+
+        mock_filter.assert_called_with(
+            action='flag_spam',
+            created__gt=start_date,
+            created__lt=end_date,
+            node__spam_data__who_flagged__in=['oopspam', 'both']
+        )
+
+    @mock.patch('osf.models.NodeLog.objects.filter')
+    def test_get_hammed_count(self, mock_filter, user):
+        from osf.external.oopspam.client import OOPSpamClient
+        from datetime import datetime
+
+        client = OOPSpamClient()
+        start_date = datetime(2024, 10, 1)
+        end_date = datetime(2024, 10, 31)
+
+        client.get_hammed_count(start_date, end_date)
+
+        mock_filter.assert_called_with(
+            action='confirm_ham',
+            created__gt=start_date,
+            created__lt=end_date,
+            node__spam_data__who_flagged__in=['oopspam', 'both']
+        )
diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py
new file mode 100644
index 00000000000..db44dc848ff
--- /dev/null
+++ b/osf_tests/metrics/test_spam_count_reporter.py
@@ -0,0 +1,38 @@
+import pytest
+from datetime import datetime
+from osf.metrics.reporters.private_spam_metrics import PrivateSpamMetricsReporter
+from osf.metrics.utils import YearMonth
+from osf_tests.factories import NodeLogFactory, NodeFactory
+from unittest.mock import patch
+
+@pytest.mark.django_db
+def test_private_spam_metrics_reporter():
+    start_date = datetime(2024, 10, 1)
+
+    oopspam_node = NodeFactory(spam_data={'who_flagged': 'oopspam'})
+    akismet_node = NodeFactory(spam_data={'who_flagged': 'akismet'})
+
+    NodeLogFactory.create_batch(10, action='flag_spam', created=start_date, node=oopspam_node)
+    NodeLogFactory.create_batch(5, action='confirm_ham', created=start_date, node=oopspam_node)
+    NodeLogFactory.create_batch(20, action='flag_spam', created=start_date, node=akismet_node)
+    NodeLogFactory.create_batch(10, action='confirm_ham', created=start_date, node=akismet_node)
+
+    report_yearmonth = YearMonth(2024, 10)
+
+    with patch('osf.external.oopspam.client.OOPSpamClient.get_flagged_count') as mock_oopspam_get_flagged_count, \
+         patch('osf.external.oopspam.client.OOPSpamClient.get_hammed_count') as mock_oopspam_get_hammed_count, \
+         patch('osf.external.askismet.client.AkismetClient.get_flagged_count') as mock_akismet_get_flagged_count, \
+         patch('osf.external.askismet.client.AkismetClient.get_hammed_count') as mock_akismet_get_hammed_count:
+
+        mock_oopspam_get_flagged_count.return_value = 10
+        mock_oopspam_get_hammed_count.return_value = 5
+        mock_akismet_get_flagged_count.return_value = 20
+        mock_akismet_get_hammed_count.return_value = 10
+
+        reporter = PrivateSpamMetricsReporter()
+        report = reporter.report(report_yearmonth)[0]
+
+        assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}"
+        assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}"
+        assert report.node_akismet_flagged == 20, f"Expected 20, got {report.node_akismet_flagged}"
+        assert report.node_akismet_hammed == 10, f"Expected 10, got {report.node_akismet_hammed}"

From f67b86facec647cef3a930aad14117e33eed6fdf Mon Sep 17 00:00:00 2001
From: mfraezz <maf7sm@virginia.edu>
Date: Thu, 7 Nov 2024 15:13:07 -0500
Subject: [PATCH 17/35] Add PrivateSpamMetricsReport (#10791)

---
 osf/metrics/reporters/private_spam_metrics.py |  4 ++--
 osf/metrics/reports.py                        | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index d6beba3a804..39b5fb16cb7 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -1,4 +1,4 @@
-from osf.metrics.reports import SpamSummaryReport
+from osf.metrics.reports import PrivateSpamMetricsReport
 from osf.external.oopspam.client import OOPSpamClient
 from osf.external.askismet.client import AkismetClient
 from ._base import MonthlyReporter
@@ -13,7 +13,7 @@ def report(self, report_yearmonth):
         oopspam_client = OOPSpamClient()
         akismet_client = AkismetClient()
 
-        report = SpamSummaryReport(
+        report = PrivateSpamMetricsReport(
             report_yearmonth=str(report_yearmonth),
             node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
             node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py
index 43bdd0fabd1..d1e21db9c45 100644
--- a/osf/metrics/reports.py
+++ b/osf/metrics/reports.py
@@ -323,3 +323,14 @@ def for_last_month(cls, item_osfid: str) -> PublicItemUsageReport | None:
         )
         _response = _search.execute()
         return _response[0] if _response else None
+
+
+class PrivateSpamMetricsReport(MonthlyReport):
+    node_oopspam_flagged = metrics.Integer()
+    node_oopspam_hammed = metrics.Integer()
+    node_akismet_flagged = metrics.Integer()
+    node_akismet_hammed = metrics.Integer()
+    preprint_oopspam_flagged = metrics.Integer()
+    preprint_oopspam_hammed = metrics.Integer()
+    preprint_akismet_flagged = metrics.Integer()
+    preprint_akismet_hammed = metrics.Integer()

From eadb41fd8bc5b2d7b06b2721131002a435b2b14c Mon Sep 17 00:00:00 2001
From: abram axel booth <aaxelb@users.noreply.github.com>
Date: Fri, 15 Nov 2024 13:54:45 -0500
Subject: [PATCH 18/35] [ENG-6435] Fix: duplicate reports when run for past
 years (#10800)

---
 osf/metrics/reports.py                   | 27 +++++++++++++++++-------
 osf/metrics/utils.py                     | 10 +++++++++
 osf_tests/metrics/test_daily_report.py   | 21 +++++++++++++++---
 osf_tests/metrics/test_monthly_report.py |  5 +++++
 osf_tests/metrics/test_yearmonth.txt     | 18 ++++++++++++++++
 5 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py
index d1e21db9c45..28ca6cdb964 100644
--- a/osf/metrics/reports.py
+++ b/osf/metrics/reports.py
@@ -30,6 +30,16 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         assert 'report_date' in cls.UNIQUE_TOGETHER_FIELDS, f'DailyReport subclasses must have "report_date" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'
 
+    def save(self, *args, **kwargs):
+        if self.timestamp is None:
+            self.timestamp = datetime.datetime(
+                self.report_date.year,
+                self.report_date.month,
+                self.report_date.day,
+                tzinfo=datetime.UTC,
+            )
+        super().save(*args, **kwargs)
+
     class Meta:
         abstract = True
         dynamic = metrics.MetaField('strict')
@@ -41,19 +51,15 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs, format='strict_year_month')
 
     def deserialize(self, data):
-        if isinstance(data, YearMonth):
-            return data
-        elif isinstance(data, str):
-            return YearMonth.from_str(data)
-        elif isinstance(data, (datetime.datetime, datetime.date)):
-            return YearMonth.from_date(data)
-        elif isinstance(data, int):
+        if isinstance(data, int):
             # elasticsearch stores dates in milliseconds since the unix epoch
             _as_datetime = datetime.datetime.fromtimestamp(data // 1000)
             return YearMonth.from_date(_as_datetime)
         elif data is None:
             return None
-        else:
+        try:
+            return YearMonth.from_any(data)
+        except ValueError:
             raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth')
 
     def serialize(self, data):
@@ -102,6 +108,11 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         assert 'report_yearmonth' in cls.UNIQUE_TOGETHER_FIELDS, f'MonthlyReport subclasses must have "report_yearmonth" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'
 
+    def save(self, *args, **kwargs):
+        if self.timestamp is None:
+            self.timestamp = YearMonth.from_any(self.report_yearmonth).month_start()
+        super().save(*args, **kwargs)
+
 
 @receiver(metrics_pre_save)
 def set_report_id(sender, instance, **kwargs):
diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py
index 910b1f3104c..7c9fed2c6fb 100644
--- a/osf/metrics/utils.py
+++ b/osf/metrics/utils.py
@@ -46,6 +46,16 @@ def from_str(cls, input_str: str) -> YearMonth:
         else:
             raise ValueError(f'expected YYYY-MM format, got "{input_str}"')
 
+    @classmethod
+    def from_any(cls, data) -> YearMonth:
+        if isinstance(data, YearMonth):
+            return data
+        elif isinstance(data, str):
+            return YearMonth.from_str(data)
+        elif isinstance(data, (datetime.datetime, datetime.date)):
+            return YearMonth.from_date(data)
+        raise ValueError(f'cannot coerce {data} into YearMonth')
+
     def __str__(self):
         """convert to string of "YYYY-MM" format"""
         return f'{self.year}-{self.month:0>2}'
diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py
index 3840f5dba21..46375184f95 100644
--- a/osf_tests/metrics/test_daily_report.py
+++ b/osf_tests/metrics/test_daily_report.py
@@ -1,4 +1,4 @@
-from datetime import date
+import datetime
 from unittest import mock
 
 import pytest
@@ -21,7 +21,13 @@ class UniqueByDate(DailyReport):
             class Meta:
                 app_label = 'osf'
 
-        today = date(2022, 5, 18)
+        today = datetime.date(2022, 5, 18)
+        expected_timestamp = datetime.datetime(
+            today.year,
+            today.month,
+            today.day,
+            tzinfo=datetime.UTC,
+        )
 
         reports = [
             UniqueByDate(report_date=today),
@@ -35,6 +41,7 @@ class Meta:
             assert mock_save.call_count == 1
             assert mock_save.call_args[0][0] is report
             assert report.meta.id == expected_key
+            assert report.timestamp == expected_timestamp
             mock_save.reset_mock()
 
     def test_with_unique_together(self, mock_save):
@@ -46,7 +53,13 @@ class UniqueByDateAndField(DailyReport):
             class Meta:
                 app_label = 'osf'
 
-        today = date(2022, 5, 18)
+        today = datetime.date(2022, 5, 18)
+        expected_timestamp = datetime.datetime(
+            today.year,
+            today.month,
+            today.day,
+            tzinfo=datetime.UTC,
+        )
 
         expected_blah = 'dca57e6cde89b19274ea24bc713971dab137a896b8e06d43a11a3f437cd1d151'
         blah_report = UniqueByDateAndField(report_date=today, uniquefield='blah')
@@ -54,6 +67,7 @@ class Meta:
         assert mock_save.call_count == 1
         assert mock_save.call_args[0][0] is blah_report
         assert blah_report.meta.id == expected_blah
+        assert blah_report.timestamp == expected_timestamp
         mock_save.reset_mock()
 
         expected_fleh = 'e7dd5ff6b087807efcfa958077dc713878f21c65af79b3ccdb5dc2409bf5ad99'
@@ -62,6 +76,7 @@ class Meta:
         assert mock_save.call_count == 1
         assert mock_save.call_args[0][0] is fleh_report
         assert fleh_report.meta.id == expected_fleh
+        assert fleh_report.timestamp == expected_timestamp
         mock_save.reset_mock()
 
         for _bad_report in (
diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py
index 23546eb1fb3..0c0302a7f08 100644
--- a/osf_tests/metrics/test_monthly_report.py
+++ b/osf_tests/metrics/test_monthly_report.py
@@ -23,6 +23,7 @@ class Meta:
                 app_label = 'osf'
 
         yearmonth = YearMonth(2022, 5)
+        expected_timestamp = datetime.datetime(yearmonth.year, yearmonth.month, 1, tzinfo=datetime.UTC)
 
         reports = [
             UniqueByMonth(report_yearmonth=yearmonth),
@@ -36,6 +37,7 @@ class Meta:
             assert mock_save.call_count == 1
             assert mock_save.call_args[0][0] is report
             assert report.meta.id == expected_key
+            assert report.timestamp == expected_timestamp
             mock_save.reset_mock()
 
     def test_with_unique_together(self, mock_save):
@@ -48,6 +50,7 @@ class Meta:
                 app_label = 'osf'
 
         yearmonth = YearMonth(2022, 5)
+        expected_timestamp = datetime.datetime(yearmonth.year, yearmonth.month, 1, tzinfo=datetime.UTC)
 
         expected_blah = '62ebf38317cd8402e27a50ce99f836d1734b3f545adf7d144d0e1cf37a0d9d08'
         blah_report = UniqueByMonthAndField(report_yearmonth=yearmonth, uniquefield='blah')
@@ -55,6 +58,7 @@ class Meta:
         assert mock_save.call_count == 1
         assert mock_save.call_args[0][0] is blah_report
         assert blah_report.meta.id == expected_blah
+        assert blah_report.timestamp == expected_timestamp
         mock_save.reset_mock()
 
         expected_fleh = '385700db282f6d6089a0d21836db5ee8423f548615e515b6e034bcc90a14500f'
@@ -63,6 +67,7 @@ class Meta:
         assert mock_save.call_count == 1
         assert mock_save.call_args[0][0] is fleh_report
         assert fleh_report.meta.id == expected_fleh
+        assert fleh_report.timestamp == expected_timestamp
         mock_save.reset_mock()
 
         for _bad_report in (
diff --git a/osf_tests/metrics/test_yearmonth.txt b/osf_tests/metrics/test_yearmonth.txt
index 646c73c42f9..17d847f689b 100644
--- a/osf_tests/metrics/test_yearmonth.txt
+++ b/osf_tests/metrics/test_yearmonth.txt
@@ -24,6 +24,24 @@ YearMonth(year=1974, month=3)
 >>> YearMonth.from_str('2000-12')
 YearMonth(year=2000, month=12)
 
+`from_any` constructor, accepts YearMonth, "YYYY-MM", or date/datetime
+>>> YearMonth.from_any('2000-12')
+YearMonth(year=2000, month=12)
+>>> YearMonth.from_any(_) is _
+True
+>>> YearMonth.from_any(datetime.date(1973, 1, 1))
+YearMonth(year=1973, month=1)
+>>> YearMonth.from_any(datetime.datetime(1974, 3, 2))
+YearMonth(year=1974, month=3)
+>>> YearMonth.from_any(None)
+Traceback (most recent call last):
+    ...
+ValueError: cannot coerce None into YearMonth
+>>> YearMonth.from_any(7)
+Traceback (most recent call last):
+    ...
+ValueError: cannot coerce 7 into YearMonth
+
 `__str__` method gives "YYYY-MM" format:
 >>> str(YearMonth(1491, 7))
 '1491-07'

From 913889d200a258b15dd06a0c0eba5838d6ac3e3b Mon Sep 17 00:00:00 2001
From: abram axel booth <aaxelb@users.noreply.github.com>
Date: Fri, 15 Nov 2024 13:55:16 -0500
Subject: [PATCH 19/35] [ENG-6506] Fix: counted-usage clobbers (#10799)

prevent counted-usages with different `action_labels` from overwriting
each other -- deduplicate only when `action_labels` match exactly
---
 api_tests/metrics/test_counted_usage.py | 16 ++++++++--------
 osf/metrics/counted_usage.py            |  1 +
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py
index 9e20f2c0238..568d663be9e 100644
--- a/api_tests/metrics/test_counted_usage.py
+++ b/api_tests/metrics/test_counted_usage.py
@@ -99,8 +99,8 @@ def test_by_client_session_id(self, app, mock_save, user):
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
-            # doc_id: sha256(b'http://example.foo/|http://example.foo/blahblah/blee|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3').hexdigest()
-            expected_doc_id='55fffffdc0d674d15a5e8763d14e4ae90f658fbfb6fbf94f88a5d24978f02e72',
+            # doc_id: sha256(b'http://example.foo/|http://example.foo/blahblah/blee|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3|api,view').hexdigest()
+            expected_doc_id='3239044c7462dd318edd0522a0ed7d84b9c6502ef16cb40dfcae6c1f456d57a2',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
                 'item_guid': 'zyxwv',
@@ -132,8 +132,8 @@ def test_by_client_session_id_anon(self, app, mock_save):
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
-            # doc_id: sha256(b'http://example.foo/|http://example.foo/bliz/|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3').hexdigest()
-            expected_doc_id='e559ffbc4bd3e3e69252d34c273f0e771ec89ee455ec9b60fbbadf3944e4af4e',
+            # doc_id: sha256(b'http://example.foo/|http://example.foo/bliz/|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3|view,web').hexdigest()
+            expected_doc_id='d01759e963893f9dc9b2ccf016a5ef29135673779802b5578f31449543677e82',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
                 'item_guid': 'zyxwv',
@@ -166,8 +166,8 @@ def test_by_user_auth(self, app, mock_save, user):
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
-            # doc_id: sha256(b'http://example.foo/|http://osf.io/mst3k|ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a|1981-01-01|3').hexdigest()
-            expected_doc_id='743494d8a55079b91e202da1dbdfce5aea72e310c57a34b36df2c2af5ed4d362',
+            # doc_id: sha256(b'http://example.foo/|http://osf.io/mst3k|ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a|1981-01-01|3|view,web').hexdigest()
+            expected_doc_id='7b8bc27c6d90fb45aa5bbd02deceba9f7384ed61b9a6e7253317c262020b94c2',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
                 'item_guid': 'yxwvu',
@@ -196,8 +196,8 @@ def test_by_useragent_header(self, app, mock_save):
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
-            # doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3').hexdigest()
-            expected_doc_id='a50ac1b2dc1c918cdea7be50b005117fdb6ee00ea069ca3aa4aaf03c0f905fa0',
+            # doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3|api,view').hexdigest()
+            expected_doc_id='d669528b30f443ffe506e183537af9624ef290090e90a200ecce7b7ca19c77f7',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
                 'item_guid': 'yxwvu',
diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py
index c3c6d4cc1aa..39b3b74129b 100644
--- a/osf/metrics/counted_usage.py
+++ b/osf/metrics/counted_usage.py
@@ -142,6 +142,7 @@ def _fill_document_id(counted_usage):
         counted_usage.session_id,
         counted_usage.timestamp.date(),
         time_window,
+        ','.join(sorted(counted_usage.action_labels)),
     )
 
 

From 674231ed2225d740edd25409bd2726fbde129989 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Wed, 13 Nov 2024 11:38:24 -0500
Subject: [PATCH 20/35] Add undated AGU conference campaign for annual use

---
 framework/auth/campaigns.py                   |  9 +++++++
 framework/auth/views.py                       |  2 +-
 tests/test_campaigns.py                       |  1 +
 tests/test_views.py                           |  6 ++---
 website/mails/mails.py                        |  4 +++
 .../emails/confirm_agu_conference.html.mako   | 26 +++++++++++++++++++
 website/util/metrics.py                       |  1 +
 7 files changed, 45 insertions(+), 4 deletions(-)
 create mode 100644 website/templates/emails/confirm_agu_conference.html.mako

diff --git a/framework/auth/campaigns.py b/framework/auth/campaigns.py
index 8a902245817..a47b3cf637b 100644
--- a/framework/auth/campaigns.py
+++ b/framework/auth/campaigns.py
@@ -100,6 +100,15 @@ def get_campaigns():
                 }
             })
 
+            newest_campaigns.update({
+                'agu_conference': {
+                    'system_tag': CampaignSourceTags.AguConference.value,
+                    'redirect_url': furl(DOMAIN).add(path='dashboard/').url,
+                    'confirmation_email_template': mails.CONFIRM_EMAIL_AGU_CONFERENCE,
+                    'login_type': 'native',
+                }
+            })
+
             CAMPAIGNS = newest_campaigns
             CAMPAIGNS_LAST_REFRESHED = timezone.now()
 
diff --git a/framework/auth/views.py b/framework/auth/views.py
index e398a6db0a5..5f999aaaca6 100644
--- a/framework/auth/views.py
+++ b/framework/auth/views.py
@@ -944,7 +944,7 @@ def register_user(**kwargs):
         )
 
     if settings.CONFIRM_REGISTRATIONS_BY_EMAIL:
-        send_confirm_email_async(user, email=user.username)
+        send_confirm_email(user, email=user.username)
         message = language.REGISTRATION_SUCCESS.format(email=user.username)
         return {'message': message}
     else:
diff --git a/tests/test_campaigns.py b/tests/test_campaigns.py
index 587aaaa82d8..1df6a32169a 100644
--- a/tests/test_campaigns.py
+++ b/tests/test_campaigns.py
@@ -46,6 +46,7 @@ def setUp(self):
             'osf-registries',
             'osf-registered-reports',
             'agu_conference_2023',
+            'agu_conference',
         ]
         self.refresh = timezone.now()
         campaigns.CAMPAIGNS = None  # force campaign refresh now that preprint providers are populated
diff --git a/tests/test_views.py b/tests/test_views.py
index f1dbaa3285d..d78e7760c17 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -3438,8 +3438,8 @@ def test_register_after_being_invited_as_unreg_contributor(self, mock_update_sea
         assert new_user.check_password(password)
         assert new_user.fullname == real_name
 
-    @mock.patch('framework.auth.views.send_confirm_email_async')
-    def test_register_sends_user_registered_signal(self, mock_send_confirm_email_async):
+    @mock.patch('framework.auth.views.send_confirm_email')
+    def test_register_sends_user_registered_signal(self, mock_send_confirm_email):
         url = api_url_for('register_user')
         name, email, password = fake.name(), fake_email(), 'underpressure'
         with capture_signals() as mock_signals:
@@ -3453,7 +3453,7 @@ def test_register_sends_user_registered_signal(self, mock_send_confirm_email_asy
                 }
             )
         assert mock_signals.signals_sent() == {auth.signals.user_registered, auth.signals.unconfirmed_user_created}
-        assert mock_send_confirm_email_async.called
+        assert mock_send_confirm_email.called
 
     @mock.patch('framework.auth.views.mails.send_mail')
     def test_resend_confirmation(self, send_mail: MagicMock):
diff --git a/website/mails/mails.py b/website/mails/mails.py
index da66ad8d083..afca9e78f03 100644
--- a/website/mails/mails.py
+++ b/website/mails/mails.py
@@ -191,6 +191,10 @@ def get_english_article(word):
     'confirm_agu_conference_2023',
     subject='OSF Account Verification, from the American Geophysical Union Conference'
 )
+CONFIRM_EMAIL_AGU_CONFERENCE = Mail(
+    'confirm_agu_conference',
+    subject='OSF Account Verification, from the American Geophysical Union Conference'
+)
 CONFIRM_EMAIL_PREPRINTS = lambda name, provider: Mail(
     f'confirm_preprints_{name}',
     subject=f'OSF Account Verification, {provider}'
diff --git a/website/templates/emails/confirm_agu_conference.html.mako b/website/templates/emails/confirm_agu_conference.html.mako
new file mode 100644
index 00000000000..603e2c39e8d
--- /dev/null
+++ b/website/templates/emails/confirm_agu_conference.html.mako
@@ -0,0 +1,26 @@
+<%inherit file="notify_base.mako" />
+
+<%def name="content()">
+<tr>
+  <td style="border-collapse: collapse;">
+    Hello ${user.fullname},<br>
+    <br>
+
+    Thank you for joining us at the AGU Open Science Pavilion, and welcome to the Open Science Framework (OSF).
+
+    We are pleased to offer a special AGU attendees exclusive 1:1 consultation to continue our conversation and to help
+    you get oriented on the OSF. This is an opportunity for us to show you useful OSF features, talk about
+    open science in Earth and space sciences, and for you to ask any questions you may have.
+    You can sign up to participate by completing this form, and a member of our team will be in touch to
+    determine your availability:
+    <br>
+    https://docs.google.com/forms/d/e/1FAIpQLSeJ23YPaEMdbLY1OqbcP85Tt6rhLpFoOtH0Yg4vY_wSKULRcw/viewform?usp=sf_link
+    <br><br>
+    To confirm your OSF account, please verify your email address by visiting this link:<br>
+    <br>
+    ${confirmation_url}<br>
+    <br>
+    From the team at the Center for Open Science<br>
+
+</tr>
+</%def>
diff --git a/website/util/metrics.py b/website/util/metrics.py
index 7324a410138..c76adb89f5a 100644
--- a/website/util/metrics.py
+++ b/website/util/metrics.py
@@ -57,6 +57,7 @@ class CampaignSourceTags(Enum):
     OsfRegisteredReports = campaign_source_tag('osf_registered_reports')
     Osf4m = campaign_source_tag('osf4m')
     AguConference2023 = campaign_source_tag('agu_conference_2023')
+    AguConference = campaign_source_tag('agu_conference')
 
 
 class OsfClaimedTags(Enum):

From cb0c07844eb87a9b7344777f1986c23861a6adb1 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 22 Nov 2024 06:54:43 -0500
Subject: [PATCH 21/35] [ENG-6590] Fix: Monthly Usage Data

- update monthly reporters with `iter_report_kwargs`
  (mostly affects `PublicItemUsageReporter`, which was
   badly optimized to generate many reports at once)
- add `schedule_monthly_reporter` task that schedules
  tasks from `iter_report_kwargs` results
- change `MonthlyReporter.followup_task()` to run per-report
---
 admin/management/views.py                     |   8 +-
 osf/features.yaml                             |   5 +
 .../commands/monthly_reporters_go.py          | 127 ++++--
 osf/metrics/preprint_metrics.py               |   4 +-
 osf/metrics/reporters/_base.py                |  19 +-
 .../reporters/institution_summary_monthly.py  |  13 +-
 osf/metrics/reporters/institutional_users.py  |  33 +-
 osf/metrics/reporters/public_item_usage.py    | 378 ++++++++++--------
 osf/metrics/reporters/spam_count.py           |   7 +-
 osf/metrics/utils.py                          |   8 +
 osf_tests/metrics/reporters/__init__.py       |   0
 osf_tests/metrics/reporters/_testutils.py     |  10 +
 .../test_institutional_summary_reporter.py    |   7 +-
 .../test_institutional_users_reporter.py      |  15 +-
 .../test_public_item_usage_reporter.py        | 148 ++++---
 osf_tests/metrics/test_yearmonth.txt          |   7 +
 16 files changed, 485 insertions(+), 304 deletions(-)
 create mode 100644 osf_tests/metrics/reporters/__init__.py
 create mode 100644 osf_tests/metrics/reporters/_testutils.py

diff --git a/admin/management/views.py b/admin/management/views.py
index 88548a518d1..bb7065c1062 100644
--- a/admin/management/views.py
+++ b/admin/management/views.py
@@ -12,6 +12,7 @@
 from scripts.find_spammy_content import manage_spammy_content
 from django.urls import reverse
 from django.shortcuts import redirect
+from osf.metrics.utils import YearMonth
 from osf.models import Preprint, Node, Registration
 
 
@@ -122,8 +123,11 @@ def post(self, request, *args, **kwargs):
             report_date = None
 
         errors = monthly_reporters_go(
-            report_month=getattr(report_date, 'month', None),
-            report_year=getattr(report_date, 'year', None)
+            yearmonth=(
+                str(YearMonth.from_date(report_date))
+                if report_date is not None
+                else ''
+            ),
         )
 
         if errors:
diff --git a/osf/features.yaml b/osf/features.yaml
index a3f0fcc1f14..1b41e4b2cdc 100644
--- a/osf/features.yaml
+++ b/osf/features.yaml
@@ -221,3 +221,8 @@ switches:
   - flag_name: ENABLE_INACTIVE_SCHEMAS
     name: enable_inactive_schemas
     note: This is no longer used
+
+  - flag_name: COUNTEDUSAGE_UNIFIED_METRICS_2024
+    name: countedusage_unified_metrics_2024
+    note: use only `osf.metrics.counted_usage`-based metrics where possible; un-use PageCounter, PreprintView, PreprintDownload, etc
+    active: false
diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py
index c467640cd15..7ab7b843434 100644
--- a/osf/management/commands/monthly_reporters_go.py
+++ b/osf/management/commands/monthly_reporters_go.py
@@ -1,68 +1,125 @@
+import datetime
 import logging
 
 from django.core.management.base import BaseCommand
-from django.db.utils import OperationalError
-from django.utils import timezone
+from django.db import OperationalError as DjangoOperationalError
+from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
+from psycopg2 import OperationalError as PostgresOperationalError
 
 from framework.celery_tasks import app as celery_app
+import framework.sentry
 from osf.metrics.reporters import AllMonthlyReporters
 from osf.metrics.utils import YearMonth
-from website.app import init_app
 
 
 logger = logging.getLogger(__name__)
 
 
-MAXMONTH = 12
-
+_CONTINUE_AFTER_ERRORS = (
+    DjangoOperationalError,
+    ElasticConnectionError,
+    PostgresOperationalError,
+)
 
 @celery_app.task(name='management.commands.monthly_reporters_go')
-def monthly_reporters_go(report_year=None, report_month=None):
-    init_app()  # OSF-specific setup
-
-    if report_year and report_month:
-        report_yearmonth = YearMonth(report_year, report_month)
-    else:  # default to last month if year and month not provided
-        today = timezone.now().date()
-        report_yearmonth = YearMonth(
-            year=today.year if today.month > 1 else today.year - 1,
-            month=today.month - 1 or MAXMONTH,
-        )
-    for _reporter_key in AllMonthlyReporters.__members__.keys():
-        monthly_reporter_go.apply_async(kwargs={
+def monthly_reporters_go(yearmonth: str = '', reporter_key: str = ''):
+    _yearmonth = (
+        YearMonth.from_str(yearmonth)
+        if yearmonth
+        else YearMonth.from_date(datetime.date.today()).prior()  # default last month
+    )
+    _reporter_keys = (
+        [reporter_key]
+        if reporter_key
+        else _enum_names(AllMonthlyReporters)
+    )
+    for _reporter_key in _reporter_keys:
+        schedule_monthly_reporter.apply_async(kwargs={
+            'yearmonth': str(_yearmonth),
             'reporter_key': _reporter_key,
-            'yearmonth': str(report_yearmonth),
         })
 
 
+@celery_app.task(name='management.commands.schedule_monthly_reporter')
+def schedule_monthly_reporter(
+    yearmonth: str,
+    reporter_key: str,
+    continue_after: dict | None = None,
+):
+    _reporter = _get_reporter(reporter_key, yearmonth)
+    _last_kwargs = None
+    try:
+        for _kwargs in _reporter.iter_report_kwargs(continue_after=continue_after):
+            monthly_reporter_do.apply_async(kwargs={
+                'yearmonth': yearmonth,
+                'reporter_key': reporter_key,
+                'report_kwargs': _kwargs,
+            })
+            _last_kwargs = _kwargs
+    except _CONTINUE_AFTER_ERRORS as _error:
+        # let the celery task succeed but log the error
+        framework.sentry.log_exception(_error)
+        # schedule another task to continue scheduling
+        if _last_kwargs is not None:
+            schedule_monthly_reporter.apply_async(kwargs={
+                'yearmonth': yearmonth,
+                'reporter_key': reporter_key,
+                'continue_after': _last_kwargs,
+            })
+
+
 @celery_app.task(
-    name='management.commands.monthly_reporter_go',
-    autoretry_for=(OperationalError,),
+    name='management.commands.monthly_reporter_do',
+    autoretry_for=(
+        DjangoOperationalError,
+        ElasticConnectionError,
+        PostgresOperationalError,
+    ),
     max_retries=5,
     retry_backoff=True,
-    bind=True,
 )
-def monthly_reporter_go(task, reporter_key: str, yearmonth: str):
-    _reporter_class = AllMonthlyReporters[reporter_key].value
-    _reporter = _reporter_class(YearMonth.from_str(yearmonth))
-    _reporter.run_and_record_for_month()
-    _followup = _reporter.followup_task()
-    if _followup is not None:
-        _followup.apply_async()
+def monthly_reporter_do(reporter_key: str, yearmonth: str, report_kwargs: dict):
+    _reporter = _get_reporter(reporter_key, yearmonth)
+    _report = _reporter.report(**report_kwargs)
+    if _report is not None:
+        _report.report_yearmonth = _reporter.yearmonth
+        _report.save()
+        _followup_task = _reporter.followup_task(_report)
+        if _followup_task is not None:
+            _followup_task.apply_async()
 
 
 class Command(BaseCommand):
     def add_arguments(self, parser):
         parser.add_argument(
             'yearmonth',
-            type=YearMonth.from_str,
-            default={'year': None, 'month': None},
+            type=str,
             help='year and month (YYYY-MM)',
         )
+        parser.add_argument(
+            '-r', '--reporter',
+            type=str,
+            choices={_name.lower() for _name in _enum_names(AllMonthlyReporters)},
+            default='',
+            help='name of the reporter to run (default all)',
+        )
 
-    def handle(self, *args, **options):
+    def handle(self, *args, **kwargs):
         monthly_reporters_go(
-            report_year=getattr(options.get('yearmonth'), 'year', None),
-            report_month=getattr(options.get('yearmonth'), 'month', None),
+            yearmonth=kwargs['yearmonth'],
+            reporter_key=kwargs['reporter'].upper(),
         )
-        self.stdout.write(self.style.SUCCESS('reporter tasks scheduled.'))
+        self.stdout.write(self.style.SUCCESS(
+            f'scheduling tasks for monthly reporter "{kwargs['reporter']}"...'
+            if kwargs['reporter']
+            else 'scheduling tasks for all monthly reporters...'
+        ))
+
+
+def _get_reporter(reporter_key: str, yearmonth: str):
+    _reporter_class = AllMonthlyReporters[reporter_key].value
+    return _reporter_class(YearMonth.from_str(yearmonth))
+
+
+def _enum_names(enum_cls) -> list[str]:
+    return list(enum_cls.__members__.keys())
diff --git a/osf/metrics/preprint_metrics.py b/osf/metrics/preprint_metrics.py
index 472cd01f698..4b64398a5c6 100644
--- a/osf/metrics/preprint_metrics.py
+++ b/osf/metrics/preprint_metrics.py
@@ -37,8 +37,8 @@ def record_for_preprint(cls, preprint, user=None, **kwargs):
         )
 
     @classmethod
-    def get_count_for_preprint(cls, preprint, after=None, before=None, index=None):
-        search = cls.search(after=after, before=before, index=index).filter('match', preprint_id=preprint._id)
+    def get_count_for_preprint(cls, preprint, after=None, before=None, index=None) -> int:
+        search = cls.search(index=index).filter('term', preprint_id=preprint._id)
         timestamp = {}
         if after:
             timestamp['gte'] = after
diff --git a/osf/metrics/reporters/_base.py b/osf/metrics/reporters/_base.py
index 931afe23fd0..707e869522b 100644
--- a/osf/metrics/reporters/_base.py
+++ b/osf/metrics/reporters/_base.py
@@ -15,18 +15,17 @@
 class MonthlyReporter:
     yearmonth: YearMonth
 
-    def report(self) -> abc.Iterable[MonthlyReport] | abc.Iterator[MonthlyReport]:
+    def iter_report_kwargs(self, continue_after: dict | None = None) -> abc.Iterator[dict]:
+        # override for multiple reports per month
+        if continue_after is None:
+            yield {}  # by default, calls `.report()` once with no kwargs
+
+    def report(self, **report_kwargs) -> MonthlyReport | None:
         """build a report for the given month
         """
-        raise NotImplementedError(f'{self.__name__} must implement `report`')
-
-    def run_and_record_for_month(self) -> None:
-        reports = self.report()
-        for report in reports:
-            report.report_yearmonth = self.yearmonth
-            report.save()
+        raise NotImplementedError(f'{self.__class__.__name__} must implement `report`')
 
-    def followup_task(self) -> celery.Signature | None:
+    def followup_task(self, report) -> celery.Signature | None:
         return None
 
 
@@ -36,7 +35,7 @@ def report(self, report_date):
 
         return an iterable of DailyReport (unsaved)
         """
-        raise NotImplementedError(f'{self.__name__} must implement `report`')
+        raise NotImplementedError(f'{self.__class__.__name__} must implement `report`')
 
     def run_and_record_for_date(self, report_date):
         reports = self.report(report_date)
diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index 998cc056298..4748860db32 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -11,9 +11,16 @@
 class InstitutionalSummaryMonthlyReporter(MonthlyReporter):
     """Generate an InstitutionMonthlySummaryReport for each institution."""
 
-    def report(self):
-        for institution in Institution.objects.all():
-            yield self.generate_report(institution)
+    def iter_report_kwargs(self, continue_after: dict | None = None):
+        _inst_qs = Institution.objects.order_by('pk')
+        if continue_after:
+            _inst_qs = _inst_qs.filter(pk__gt=continue_after['institution_pk'])
+        for _pk in _inst_qs.values_list('pk', flat=True):
+            yield {'institution_pk': _pk}
+
+    def report(self, **report_kwargs):
+        _institution = Institution.objects.get(pk=report_kwargs['institution_pk'])
+        return self.generate_report(_institution)
 
     def generate_report(self, institution):
         node_queryset = institution.nodes.filter(
diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
index e0f7f42a156..e34875d4b28 100644
--- a/osf/metrics/reporters/institutional_users.py
+++ b/osf/metrics/reporters/institutional_users.py
@@ -1,5 +1,4 @@
 import dataclasses
-import datetime
 
 from django.contrib.contenttypes.models import ContentType
 from django.db.models import Q, F, Sum
@@ -12,9 +11,6 @@
 from ._base import MonthlyReporter
 
 
-_CHUNK_SIZE = 500
-
-
 class InstitutionalUsersReporter(MonthlyReporter):
     '''build an InstitutionalUserReport for each institution-user affiliation
 
@@ -22,13 +18,27 @@ class InstitutionalUsersReporter(MonthlyReporter):
     which offers institutional admins insight into how people at their institution are
     using osf, based on their explicitly-affiliated osf objects
     '''
-    def report(self):
+    def iter_report_kwargs(self, continue_after: dict | None = None):
         _before_datetime = self.yearmonth.month_end()
-        for _institution in osfdb.Institution.objects.filter(created__lt=_before_datetime):
+        _inst_qs = (
+            osfdb.Institution.objects
+            .filter(created__lt=_before_datetime)
+            .order_by('pk')
+        )
+        if continue_after:
+            _inst_qs = _inst_qs.filter(pk__gte=continue_after['institution_pk'])
+        for _institution in _inst_qs:
             _user_qs = _institution.get_institution_users().filter(created__lt=_before_datetime)
-            for _user in _user_qs.iterator(chunk_size=_CHUNK_SIZE):
-                _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth, _before_datetime)
-                yield _helper.report
+            if continue_after and (_institution.pk == continue_after['institution_pk']):
+                _user_qs = _user_qs.filter(pk__gt=continue_after['user_pk'])
+            for _user_pk in _user_qs.values_list('pk', flat=True):
+                yield {'institution_pk': _institution.pk, 'user_pk': _user_pk}
+
+    def report(self, **report_kwargs):
+        _institution = osfdb.Institution.objects.get(pk=report_kwargs['institution_pk'])
+        _user = osfdb.OSFUser.objects.get(pk=report_kwargs['user_pk'])
+        _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth)
+        return _helper.report
 
 
 # helper
@@ -37,7 +47,6 @@ class _InstiUserReportHelper:
     institution: osfdb.Institution
     user: osfdb.OSFUser
     yearmonth: YearMonth
-    before_datetime: datetime.datetime
     report: InstitutionalUserReport = dataclasses.field(init=False)
 
     def __post_init__(self):
@@ -64,6 +73,10 @@ def __post_init__(self):
             storage_byte_count=self._storage_byte_count(),
         )
 
+    @property
+    def before_datetime(self):
+        return self.yearmonth.month_end()
+
     def _node_queryset(self):
         _institution_node_qs = self.institution.nodes.filter(
             created__lt=self.before_datetime,
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index ecc34a5d9c7..cc401d50bd7 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -1,17 +1,24 @@
 from __future__ import annotations
+import datetime
 import typing
 
-import celery
+import waffle
 if typing.TYPE_CHECKING:
     import elasticsearch_dsl as edsl
 
+import osf.features
 from osf.metadata.osf_gathering import OsfmapPartition
 from osf.metrics.counted_usage import (
     CountedAuthUsage,
     get_item_type,
     get_provider_id,
 )
+from osf.metrics.preprint_metrics import (
+    PreprintDownload,
+    PreprintView,
+)
 from osf.metrics.reports import PublicItemUsageReport
+from osf.metrics.utils import YearMonth
 from osf import models as osfdb
 from website import settings as website_settings
 from ._base import MonthlyReporter
@@ -31,80 +38,128 @@ class PublicItemUsageReporter(MonthlyReporter):
 
     includes projects, project components, registrations, registration components, and preprints
     '''
-
-    def report(self):
-        # use two composite aggregations in parallel to page thru every
-        # public item viewed or downloaded this month, counting:
-        # - views and downloads for each item (using `CountedAuthUsage.item_guid`)
-        # - views for each item's components and files (using `CountedAuthUsage.surrounding_guids`)
-        for _exact_bucket, _contained_views_bucket in _zip_composite_aggs(
-            self._exact_item_search(), 'agg_osfid',
-            self._contained_item_views_search(), 'agg_surrounding_osfid',
+    def iter_report_kwargs(self, continue_after: dict | None = None):
+        _after_osfid = continue_after['osfid'] if continue_after else None
+        for _osfid in _zip_sorted(
+            self._countedusage_osfids(_after_osfid),
+            self._preprintview_osfids(_after_osfid),
+            self._preprintdownload_osfids(_after_osfid),
         ):
-            try:
-                _report = self._report_from_buckets(_exact_bucket, _contained_views_bucket)
-                yield _report
-            except _SkipItem:
-                pass
+            yield {'osfid': _osfid}
+
+    def report(self, **report_kwargs):
+        _osfid = report_kwargs['osfid']
+        # get usage metrics from several sources:
+        # - osf.metrics.counted_usage:
+        #   - views and downloads for each item (using `CountedAuthUsage.item_guid`)
+        #   - views for each item's components and files (using `CountedAuthUsage.surrounding_guids`)
+        # - osf.metrics.preprint_metrics:
+        #   - preprint views and downloads
+        # - PageCounter? (no)
+        try:
+            _guid = osfdb.Guid.load(_osfid)
+            if _guid is None or _guid.referent is None:
+                raise _SkipItem
+            _obj = _guid.referent
+            _report = self._init_report(_obj)
+            self._fill_report_counts(_report, _obj)
+            if not any((
+                _report.view_count,
+                _report.view_session_count,
+                _report.download_count,
+                _report.download_session_count,
+            )):
+                raise _SkipItem
+            return _report
+        except _SkipItem:
+            return None
+
+    def followup_task(self, report):
+        _is_last_month = report.report_yearmonth.next() == YearMonth.from_date(datetime.date.today())
+        if _is_last_month:
+            from api.share.utils import task__update_share
+            return task__update_share.signature(
+                args=(report.item_osfid,),
+                kwargs={
+                    'is_backfill': True,
+                    'osfmap_partition_name': OsfmapPartition.MONTHLY_SUPPLEMENT.name,
+                },
+                countdown=30,  # give index time to settle
+            )
 
-    def followup_task(self):
-        return task__update_monthly_metadatas.signature(
-            args=[str(self.yearmonth)],
-            countdown=30,  # give index time to settle
+    def _countedusage_osfids(self, after_osfid: str | None) -> typing.Iterator[str]:
+        _search = self._base_usage_search()
+        _search.aggs.bucket(
+            'agg_osfid',
+            'composite',
+            sources=[{'osfid': {'terms': {'field': 'item_guid'}}}],
+            size=_CHUNK_SIZE,
         )
+        return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid)
 
-    def _report_from_buckets(self, exact_bucket, contained_views_bucket):
-        # either exact_bucket or contained_views_bucket may be None, but not both
-        assert (exact_bucket is not None) or (contained_views_bucket is not None)
-        _report = (
-            self._init_report_from_exact_bucket(exact_bucket)
-            if exact_bucket is not None
-            else self._init_report_from_osfid(contained_views_bucket.key.osfid)
+    def _preprintview_osfids(self, after_osfid: str | None) -> typing.Iterator[str]:
+        _search = (
+            PreprintView.search()
+            .filter('range', timestamp={
+                'gte': self.yearmonth.month_start(),
+                'lt': self.yearmonth.month_end(),
+            })
+            .extra(size=0)  # only aggregations, no hits
+        )
+        _search.aggs.bucket(
+            'agg_osfid',
+            'composite',
+            sources=[{'osfid': {'terms': {'field': 'preprint_id'}}}],
+            size=_CHUNK_SIZE,
         )
-        # view counts include views on contained items (components, files)
-        _report.view_count, _report.view_session_count = self._get_view_counts(_report.item_osfid)
-        return _report
+        return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid)
 
-    def _init_report_from_exact_bucket(self, exact_bucket) -> PublicItemUsageReport:
-        # in the (should-be common) case of an item that has been directly viewed in
-        # this month, the stored metrics already have the data required
-        _report = PublicItemUsageReport(
-            item_osfid=exact_bucket.key.osfid,
-            item_type=_agg_keys(exact_bucket.agg_item_type),
-            provider_id=_agg_keys(exact_bucket.agg_provider_id),
-            platform_iri=_agg_keys(exact_bucket.agg_platform_iri),
-            # default counts to zero, will be updated if non-zero
-            view_count=0,
-            view_session_count=0,
-            download_count=0,
-            download_session_count=0,
+    def _preprintdownload_osfids(self, after_osfid: str | None) -> typing.Iterator[str]:
+        _search = (
+            PreprintDownload.search()
+            .filter('range', timestamp={
+                'gte': self.yearmonth.month_start(),
+                'lt': self.yearmonth.month_end(),
+            })
+            .extra(size=0)  # only aggregations, no hits
         )
-        for _actionbucket in exact_bucket.agg_action:
-            # note: view counts computed separately to avoid double-counting
-            if _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value:
-                _report.download_count = _actionbucket.doc_count
-                _report.download_session_count = _actionbucket.agg_session_count.value
-        return _report
+        _search.aggs.bucket(
+            'agg_osfid',
+            'composite',
+            sources=[{'osfid': {'terms': {'field': 'preprint_id'}}}],
+            size=_CHUNK_SIZE,
+        )
+        return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid)
 
-    def _init_report_from_osfid(self, osfid: str) -> PublicItemUsageReport:
-        # for the (should-be unusual) case where the components/files contained by
-        # an item have views in this month, but the item itself does not --
-        # load necessary info via django models, instead
-        _osfguid = osfdb.Guid.load(osfid)
-        if _osfguid is None or not getattr(_osfguid.referent, 'is_public', False):
+    def _init_report(self, osf_obj) -> PublicItemUsageReport:
+        if not _is_item_public(osf_obj):
             raise _SkipItem
         return PublicItemUsageReport(
-            item_osfid=osfid,
-            item_type=[get_item_type(_osfguid.referent)],
-            provider_id=[get_provider_id(_osfguid.referent)],
+            item_osfid=osf_obj._id,
+            item_type=[get_item_type(osf_obj)],
+            provider_id=[get_provider_id(osf_obj)],
             platform_iri=[website_settings.DOMAIN],
-            # default counts to zero, will be updated if non-zero
-            view_count=0,
-            view_session_count=0,
-            download_count=0,
-            download_session_count=0,
+            # leave counts null; will be set if there's data
         )
 
+    def _fill_report_counts(self, report, osf_obj):
+        if (
+            isinstance(osf_obj, osfdb.Preprint)
+            and not waffle.switch_is_active(osf.features.COUNTEDUSAGE_UNIFIED_METRICS_2024)  # type: ignore[attr-defined]
+        ):
+            # note: no session-count info in preprint metrics
+            report.view_count = self._preprint_views(osf_obj)
+            report.download_count = self._preprint_downloads(osf_obj)
+        else:
+            (
+                report.view_count,
+                report.view_session_count,
+            ) = self._countedusage_view_counts(osf_obj)
+            (
+                report.download_count,
+                report.download_session_count,
+            ) = self._countedusage_download_counts(osf_obj)
+
     def _base_usage_search(self):
         return (
             CountedAuthUsage.search()
@@ -113,59 +168,10 @@ def _base_usage_search(self):
                 'gte': self.yearmonth.month_start(),
                 'lt': self.yearmonth.month_end(),
             })
-            .update_from_dict({'size': 0})  # only aggregations, no hits
+            .extra(size=0)  # only aggregations, no hits
         )
 
-    def _exact_item_search(self) -> edsl.Search:
-        '''aggregate views and downloads on each osfid (not including components/files)'''
-        _search = self._base_usage_search()
-        # the main agg: use a composite aggregation to page thru *every* item
-        _agg_osfid = _search.aggs.bucket(
-            'agg_osfid',
-            'composite',
-            sources=[{'osfid': {'terms': {'field': 'item_guid'}}}],
-            size=_CHUNK_SIZE,
-        )
-        # nested agg: for each item, get platform_iri values
-        _agg_osfid.bucket('agg_platform_iri', 'terms', field='platform_iri')
-        # nested agg: for each item, get provider_id values
-        _agg_osfid.bucket('agg_provider_id', 'terms', field='provider_id')
-        # nested agg: for each item, get item_type values
-        _agg_osfid.bucket('agg_item_type', 'terms', field='item_type')
-        # nested agg: for each item, get download count
-        _agg_action = _agg_osfid.bucket(
-            'agg_action',
-            'terms',
-            field='action_labels',
-            include=[
-                CountedAuthUsage.ActionLabel.DOWNLOAD.value,
-            ],
-        )
-        # nested nested agg: get download session count
-        _agg_action.metric(
-            'agg_session_count',
-            'cardinality',
-            field='session_id',
-            precision_threshold=_MAX_CARDINALITY_PRECISION,
-        )
-        return _search
-
-    def _contained_item_views_search(self) -> edsl.Search:
-        '''iterate osfids with views on contained components and files'''
-        _search = (
-            self._base_usage_search()
-            .filter('term', action_labels=CountedAuthUsage.ActionLabel.VIEW.value)
-        )
-        # the main agg: use a composite aggregation to page thru *every* item
-        _search.aggs.bucket(
-            'agg_surrounding_osfid',
-            'composite',
-            sources=[{'osfid': {'terms': {'field': 'surrounding_guids'}}}],
-            size=_CHUNK_SIZE,
-        )
-        return _search
-
-    def _get_view_counts(self, osfid: str) -> tuple[int, int]:
+    def _countedusage_view_counts(self, osf_obj) -> tuple[int, int]:
         '''compute view_session_count separately to avoid double-counting
 
         (the same session may be represented in both the composite agg on `item_guid`
@@ -179,8 +185,8 @@ def _get_view_counts(self, osfid: str) -> tuple[int, int]:
                     {'term': {'action_labels': CountedAuthUsage.ActionLabel.VIEW.value}},
                 ],
                 should=[
-                    {'term': {'item_guid': osfid}},
-                    {'term': {'surrounding_guids': osfid}},
+                    {'term': {'item_guid': osf_obj._id}},
+                    {'term': {'surrounding_guids': osf_obj._id}},
                 ],
                 minimum_should_match=1,
             )
@@ -193,86 +199,108 @@ def _get_view_counts(self, osfid: str) -> tuple[int, int]:
         )
         _response = _search.execute()
         _view_count = _response.hits.total
-        _view_session_count = _response.aggregations.agg_session_count.value
+        _view_session_count = (
+            _response.aggregations.agg_session_count.value
+            if 'agg_session_count' in _response.aggregations
+            else 0
+        )
         return (_view_count, _view_session_count)
 
-
-###
-# followup celery task
-@celery.shared_task
-def task__update_monthly_metadatas(yearmonth: str):
-    from api.share.utils import task__update_share
-    _report_search = (
-        PublicItemUsageReport.search()
-        .filter('term', report_yearmonth=yearmonth)
-        .source(['item_osfid'])  # return only the 'item_osfid' field
-    )
-    for _hit in _report_search.scan():
-        task__update_share.delay(
-            _hit.item_osfid,
-            is_backfill=True,
-            osfmap_partition_name=OsfmapPartition.MONTHLY_SUPPLEMENT.name,
+    def _countedusage_download_counts(self, osf_obj) -> tuple[int, int]:
+        '''aggregate downloads on each osfid (not including components/files)'''
+        _search = (
+            self._base_usage_search()
+            .filter('term', item_guid=osf_obj._id)
+            .filter('term', action_labels=CountedAuthUsage.ActionLabel.DOWNLOAD.value)
+        )
+        # agg: get download session count
+        _search.aggs.metric(
+            'agg_session_count',
+            'cardinality',
+            field='session_id',
+            precision_threshold=_MAX_CARDINALITY_PRECISION,
+        )
+        _response = _search.execute()
+        _download_count = _response.hits.total
+        _download_session_count = (
+            _response.aggregations.agg_session_count.value
+            if 'agg_session_count' in _response.aggregations
+            else 0
+        )
+        return (_download_count, _download_session_count)
+
+    def _preprint_views(self, preprint: osfdb.Preprint) -> int:
+        '''aggregate views on each preprint'''
+        return PreprintView.get_count_for_preprint(
+            preprint=preprint,
+            after=self.yearmonth.month_start(),
+            before=self.yearmonth.month_end(),
         )
 
+    def _preprint_downloads(self, preprint: osfdb.Preprint) -> int:
+        '''aggregate downloads on each preprint'''
+        return PreprintDownload.get_count_for_preprint(
+            preprint=preprint,
+            after=self.yearmonth.month_start(),
+            before=self.yearmonth.month_end(),
+        )
 
-###
-# local helpers
-
-def _agg_keys(bucket_agg_result) -> list:
-    return [_bucket.key for _bucket in bucket_agg_result]
 
+def _is_item_public(osfid_referent) -> bool:
+    if isinstance(osfid_referent, osfdb.Preprint):
+        return bool(osfid_referent.verified_publishable)        # quacks like Preprint
+    return getattr(osfid_referent, 'is_public', False)    # quacks like AbstractNode
 
-def _zip_composite_aggs(
-    search_a: edsl.Search,
-    composite_agg_name_a: str,
-    search_b: edsl.Search,
-    composite_agg_name_b: str,
-):
-    '''iterate thru two composite aggregations, yielding pairs of buckets matched by key
 
-    the composite aggregations must have matching names in `sources` so their keys can be compared
+def _zip_sorted(
+    *iterators: typing.Iterator[str],
+) -> typing.Iterator[str]:
+    '''loop thru multiple iterators on sorted (ascending) sequences of strings
     '''
-    _iter_a = _iter_composite_buckets(search_a, composite_agg_name_a)
-    _iter_b = _iter_composite_buckets(search_b, composite_agg_name_b)
-    _next_a = next(_iter_a, None)
-    _next_b = next(_iter_b, None)
+    _nexts = {  # holds the next value from each iterator, or None
+        _i: next(_iter, None)
+        for _i, _iter in enumerate(iterators)
+    }
     while True:
-        if _next_a is None and _next_b is None:
-            return  # both done
-        elif _next_a is None or _next_b is None:
-            # one is done but not the other -- no matching needed
-            yield (_next_a, _next_b)
-            _next_a = next(_iter_a, None)
-            _next_b = next(_iter_b, None)
-        elif _next_a.key == _next_b.key:
-            # match -- yield and increment both
-            yield (_next_a, _next_b)
-            _next_a = next(_iter_a, None)
-            _next_b = next(_iter_b, None)
-        elif _orderable_key(_next_a) < _orderable_key(_next_b):
-            # mismatch -- yield and increment a (but not b)
-            yield (_next_a, None)
-            _next_a = next(_iter_a, None)
-        else:
-            # mismatch -- yield and increment b (but not a)
-            yield (None, _next_b)
-            _next_b = next(_iter_b, None)
-
-
-def _iter_composite_buckets(search: edsl.Search, composite_agg_name: str):
+        _nonnull_nexts = [
+            _next
+            for _next in _nexts.values()
+            if _next is not None
+        ]
+        if not _nonnull_nexts:
+            return  # all done
+        _value = min(_nonnull_nexts)
+        yield _value
+        for _i, _iter in enumerate(iterators):
+            if _nexts[_i] == _value:
+                _nexts[_i] = next(_iter, None)
+
+
+def _iter_composite_bucket_keys(
+    search: edsl.Search,
+    composite_agg_name: str,
+    composite_source_name: str,
+    after: str | None = None,
+) -> typing.Iterator[str]:
     '''iterate thru *all* buckets of a composite aggregation, requesting new pages as needed
 
     assumes the given search has a composite aggregation of the given name
+    with a single value source of the given name
 
     updates the search in-place for subsequent pages
     '''
+    if after is not None:
+        search.aggs[composite_agg_name].after = {composite_source_name: after}
     while True:
         _page_response = search.execute(ignore_cache=True)  # reused search object has the previous page cached
         try:
             _agg_result = _page_response.aggregations[composite_agg_name]
         except KeyError:
             return  # no data; all done
-        yield from _agg_result.buckets
+        for _bucket in _agg_result.buckets:
+            _key = _bucket.key.to_dict()
+            assert set(_key.keys()) == {composite_source_name}, f'expected only one key ("{composite_source_name}") in {_bucket.key}'
+            yield _key[composite_source_name]
         # update the search for the next page
         try:
             _next_after = _agg_result.after_key
@@ -280,7 +308,3 @@ def _iter_composite_buckets(search: edsl.Search, composite_agg_name: str):
             return  # all done
         else:
             search.aggs[composite_agg_name].after = _next_after
-
-
-def _orderable_key(composite_bucket) -> list:
-    return sorted(composite_bucket.key.to_dict().items())
diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py
index 94290f96203..cb1c3eeb641 100644
--- a/osf/metrics/reporters/spam_count.py
+++ b/osf/metrics/reporters/spam_count.py
@@ -8,11 +8,12 @@
 
 class SpamCountReporter(MonthlyReporter):
 
-    def report(self):
+    def report(self, **report_kwargs):
+        assert not report_kwargs
         target_month = self.yearmonth.month_start()
         next_month = self.yearmonth.month_end()
 
-        report = SpamSummaryReport(
+        return SpamSummaryReport(
             # Node Log entries
             node_confirmed_spam=NodeLog.objects.filter(
                 action=NodeLog.CONFIRM_SPAM,
@@ -79,5 +80,3 @@ def report(self):
                 created__lt=next_month,
             ).count()
         )
-
-        return [report]
diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py
index 910b1f3104c..febfd24d6d2 100644
--- a/osf/metrics/utils.py
+++ b/osf/metrics/utils.py
@@ -58,6 +58,14 @@ def next(self) -> YearMonth:
             else YearMonth(self.year, self.month + 1)
         )
 
+    def prior(self) -> YearMonth:
+        """get a new YearMonth for the month before this one"""
+        return (
+            YearMonth(self.year - 1, int(calendar.DECEMBER))
+            if self.month == calendar.JANUARY
+            else YearMonth(self.year, self.month - 1)
+        )
+
     def month_start(self) -> datetime.datetime:
         """get a datetime (in UTC timezone) when this YearMonth starts"""
         return datetime.datetime(self.year, self.month, 1, tzinfo=datetime.UTC)
diff --git a/osf_tests/metrics/reporters/__init__.py b/osf_tests/metrics/reporters/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/osf_tests/metrics/reporters/_testutils.py b/osf_tests/metrics/reporters/_testutils.py
new file mode 100644
index 00000000000..0d18f3bcac9
--- /dev/null
+++ b/osf_tests/metrics/reporters/_testutils.py
@@ -0,0 +1,10 @@
+from osf.metrics.reporters._base import MonthlyReporter
+from osf.metrics.reports import MonthlyReport
+
+
+def list_monthly_reports(reporter: MonthlyReporter) -> list[MonthlyReport]:
+    _reports = (
+        reporter.report(**_kwargs)
+        for _kwargs in reporter.iter_report_kwargs()
+    )
+    return [_report for _report in _reports if (_report is not None)]
diff --git a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
index 715a2cd1553..05baa4d38e7 100644
--- a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
+++ b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
@@ -11,6 +11,7 @@
     PreprintFactory,
     AuthUserFactory,
 )
+from ._testutils import list_monthly_reports
 
 
 class TestInstiSummaryMonthlyReporter(TestCase):
@@ -78,7 +79,7 @@ def _create_active_user(cls, institution, date_confirmed):
 
     def test_report_generation(self):
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports = list(reporter.report())
+        reports = list_monthly_reports(reporter)
         self.assertEqual(len(reports), 1)
 
         report = reports[0]
@@ -114,7 +115,7 @@ def test_report_generation_multiple_institutions(self):
 
         # Run the reporter for the current month (February 2018)
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports = list(reporter.report())
+        reports = list_monthly_reports(reporter)
         self.assertEqual(len(reports), 3)  # Reports for self._institution, institution2, institution3
 
         # Extract reports by institution
@@ -263,7 +264,7 @@ def test_high_counts_multiple_institutions(self):
         if enable_benchmarking:
             reporter_start_time = time.time()
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports = list(reporter.report())
+        reports = list_monthly_reports(reporter)
         assert len(reports) == additional_institution_count + 1
 
         if enable_benchmarking:
diff --git a/osf_tests/metrics/reporters/test_institutional_users_reporter.py b/osf_tests/metrics/reporters/test_institutional_users_reporter.py
index 876fd08cf9b..275fcb1e8a1 100644
--- a/osf_tests/metrics/reporters/test_institutional_users_reporter.py
+++ b/osf_tests/metrics/reporters/test_institutional_users_reporter.py
@@ -18,6 +18,7 @@
     UserFactory,
     EmbargoFactory,
 )
+from ._testutils import list_monthly_reports
 
 
 def _patch_now(fakenow: datetime.datetime):
@@ -67,24 +68,24 @@ def _assert_report_matches_setup(self, report: InstitutionalUserReport, setup: _
         self.assertEqual(report.published_preprint_count, setup.published_preprint_count)
 
     def test_no_users(self):
-        _actual_reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        _actual_reports = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self.assertEqual(_actual_reports, [])
 
     def test_one_user_with_nothing(self):
         self._user_setup_with_nothing.affiliate_user()
-        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        _reports = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self.assertEqual(len(_reports), 1)
         self._assert_report_matches_setup(_reports[0], self._user_setup_with_nothing)
 
     def test_one_user_with_ones(self):
         self._user_setup_with_ones.affiliate_user()
-        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        _reports = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self.assertEqual(len(_reports), 1)
         self._assert_report_matches_setup(_reports[0], self._user_setup_with_ones)
 
     def test_one_user_with_stuff_and_no_files(self):
         self._user_setup_with_stuff.affiliate_user()
-        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        _reports = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self.assertEqual(len(_reports), 1)
         self._assert_report_matches_setup(_reports[0], self._user_setup_with_stuff)
         self.assertEqual(_reports[0].public_file_count, 2)  # preprint 2 files
@@ -96,7 +97,7 @@ def test_one_user_with_stuff_and_a_file(self):
         _project = _user.nodes.first()
         with _patch_now(self._now):
             create_test_file(target=_project, user=_user, size=37)
-        (_report,) = InstitutionalUsersReporter(self._yearmonth).report()
+        (_report,) = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self._assert_report_matches_setup(_report, self._user_setup_with_stuff)
         self.assertEqual(_report.public_file_count, 3)  # 2 preprint files
         self.assertEqual(_report.storage_byte_count, 2711)  # 2 preprint files
@@ -113,7 +114,7 @@ def test_one_user_with_stuff_and_multiple_files(self):
             create_test_file(target=_component, user=_user, size=53, filename='bla')
             create_test_file(target=_component, user=_user, size=51, filename='blar')
             create_test_file(target=_component, user=_user, size=47, filename='blarg')
-        (_report,) = InstitutionalUsersReporter(self._yearmonth).report()
+        (_report,) = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self._assert_report_matches_setup(_report, self._user_setup_with_stuff)
         self.assertEqual(_report.public_file_count, 7)  # 2 preprint files
         self.assertEqual(_report.storage_byte_count, 2935)  # 2 preprint files + 37 + 73 + 53 + 51 + 47
@@ -130,7 +131,7 @@ def test_several_users(self):
             _setup.user._id: _setup
             for _setup in _setups
         }
-        _reports = list(InstitutionalUsersReporter(self._yearmonth).report())
+        _reports = list_monthly_reports(InstitutionalUsersReporter(self._yearmonth))
         self.assertEqual(len(_reports), len(_setup_by_userid))
         for _actual_report in _reports:
             _setup = _setup_by_userid[_actual_report.user_id]
diff --git a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
index 454b8d6700d..b75c420b1a2 100644
--- a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
+++ b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
@@ -1,27 +1,48 @@
-from datetime import timedelta
+from datetime import datetime, timedelta
 from operator import attrgetter
 from unittest import mock
 
 import pytest
 
 from osf.metrics.counted_usage import CountedAuthUsage
+from osf.metrics.preprint_metrics import (
+    PreprintDownload,
+    PreprintView,
+)
 from osf.metrics.reporters.public_item_usage import PublicItemUsageReporter
 from osf.metrics.reports import PublicItemUsageReport
 from osf.metrics.utils import YearMonth
+from osf import models as osfdb
+from osf_tests import factories
+from ._testutils import list_monthly_reports
 
 
 @pytest.mark.es_metrics
+@pytest.mark.django_db
 class TestPublicItemUsageReporter:
     @pytest.fixture(autouse=True)
-    def _mocks(self):
-        with (
-            # set a tiny page size to force aggregation pagination:
-            mock.patch('osf.metrics.reporters.public_item_usage._CHUNK_SIZE', 1),
-            # HACK: skip auto-filling fields from the database:
-            mock.patch('osf.models.base.Guid.load', return_value=None),
-        ):
+    def _patch_settings(self):
+        with mock.patch('website.settings.DOMAIN', 'http://osf.example'):
             yield
 
+    @pytest.fixture
+    def item0(self):
+        _item0 = factories.PreprintFactory(is_public=True)
+        _item0._id = 'item0'
+        return _item0
+
+    @pytest.fixture
+    def item1(self):
+        _item1 = factories.ProjectFactory(is_public=True)
+        _item1._id = 'item1'
+        return _item1
+
+    @pytest.fixture
+    def item2(self, item1):
+        _item2 = factories.ProjectFactory(is_public=True, parent=item1)
+        _item2._id = 'item2'
+        return _item2
+
     @pytest.fixture
     def ym_empty(self) -> YearMonth:
         return YearMonth(2012, 7)
@@ -35,89 +56,87 @@ def ym_busy(self) -> YearMonth:
         return YearMonth(2023, 7)
 
     @pytest.fixture
-    def sparse_month_usage(self, ym_sparse):
+    def sparse_month_usage(self, ym_sparse, item0, item1, item2):
         # "sparse" month:
         #   item0: 3 views, 0 downloads, 2 sessions
         #   item1: 1 views, 1 download, 1 session (plus 1 view from child item2)
         #   item2: 1 views, 0 downloads, 1 session
         _month_start = ym_sparse.month_start()
         _save_usage(
+            item0,
             timestamp=_month_start,
-            item_guid='item0',
             session_id='sesh0',
             action_labels=['view'],
         )
         _save_usage(
+            item0,
             timestamp=_month_start + timedelta(minutes=2),
-            item_guid='item0',
             session_id='sesh0',
             action_labels=['view'],
         )
         _save_usage(
+            item1,
             timestamp=_month_start + timedelta(minutes=3),
-            item_guid='item1',
             session_id='sesh0',
             action_labels=['download'],
         )
         _save_usage(
+            item0,
             timestamp=_month_start + timedelta(days=17),
-            item_guid='item0',
             session_id='sesh1',
             action_labels=['view'],
         )
         _save_usage(
+            item1,
             timestamp=_month_start + timedelta(days=17, minutes=3),
-            item_guid='item1',
             session_id='sesh1',
             action_labels=['view'],
         )
         _save_usage(
+            item2,
             timestamp=_month_start + timedelta(days=17, minutes=5),
-            item_guid='item2',
-            surrounding_guids=['item1'],
             session_id='sesh1',
             action_labels=['view'],
         )
         _save_usage(
+            item2,
             timestamp=_month_start + timedelta(days=17, minutes=11),
-            item_guid='item2',
-            surrounding_guids=['item1'],
             session_id='sesh1',
             action_labels=['download'],
         )
 
     @pytest.fixture
-    def busy_month_item0(self, ym_busy):
+    def busy_month_item0(self, ym_busy, item0):
         # item0: 4 sessions, 4*7 views, 4*5 downloads
         _month_start = ym_busy.month_start()
         for _sesh in range(0, 4):
             _sesh_start = _month_start + timedelta(days=_sesh)
             for _minute in range(0, 7):
                 _save_usage(
+                    item0,
                     timestamp=_sesh_start + timedelta(minutes=_minute),
-                    item_guid='item0',
                     session_id=f'sesh0{_sesh}',
                     action_labels=['view'],
                 )
             for _minute in range(10, 15):
                 _save_usage(
+                    item0,
                     timestamp=_sesh_start + timedelta(minutes=_minute),
-                    item_guid='item0',
                     session_id=f'sesh0{_sesh}',
                     action_labels=['download'],
                 )
 
     @pytest.fixture
-    def busy_month_item1(self, ym_busy):
-        # item1: 10 sessions, 6*9 views, 5*7 downloads, 2 providers
+    def busy_month_item1(self, ym_busy, item1):
+        # item1: 10 sessions, 6*9 views, 5*7 downloads
         # (plus 11 views in 11 sessions from child item2)
         _month_start = ym_busy.month_start()
         for _sesh in range(0, 6):
             _sesh_start = _month_start + timedelta(days=_sesh)
             for _minute in range(0, 9):
                 _save_usage(
+                    item1,
                     timestamp=_sesh_start + timedelta(minutes=_minute),
-                    item_guid='item1',
                     session_id=f'sesh1{_sesh}',
                     action_labels=['view'],
                 )
@@ -125,42 +144,39 @@ def busy_month_item1(self, ym_busy):
             _sesh_start = _month_start + timedelta(days=_sesh)
             for _minute in range(10, 17):
                 _save_usage(
+                    item1,
                     timestamp=_sesh_start + timedelta(minutes=_minute),
-                    item_guid='item1',
                     session_id=f'sesh1{_sesh}',
                     action_labels=['download'],
-                    provider_id='prov1',  # additional provider_id
                 )
 
     @pytest.fixture
-    def busy_month_item2(self, ym_busy):
+    def busy_month_item2(self, ym_busy, item2):
         # item2: 11 sessions, 11 views, 11 downloads (child of item1)
         _month_start = ym_busy.month_start()
         for _sesh in range(1, 12):
             _save_usage(
+                item2,
                 timestamp=_month_start + timedelta(days=_sesh),
-                item_guid='item2',
-                surrounding_guids=['item1'],
                 session_id=f'sesh2{_sesh}',
                 action_labels=['view'],
             )
             _save_usage(
+                item2,
                 timestamp=_month_start + timedelta(days=_sesh, hours=_sesh),
-                item_guid='item2',
-                surrounding_guids=['item1'],
                 session_id=f'sesh2{_sesh}',
                 action_labels=['download'],
             )
 
     def test_no_data(self, ym_empty):
         _reporter = PublicItemUsageReporter(ym_empty)
-        _empty = list(_reporter.report())
+        _empty = list_monthly_reports(_reporter)
         assert _empty == []
 
-    def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2):
-        _empty = list(PublicItemUsageReporter(ym_empty).report())
-        _sparse = list(PublicItemUsageReporter(ym_sparse).report())
-        _busy = list(PublicItemUsageReporter(ym_busy).report())
+    def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2, item0):
+        _empty = list_monthly_reports(PublicItemUsageReporter(ym_empty))
+        _sparse = list_monthly_reports(PublicItemUsageReporter(ym_sparse))
+        _busy = list_monthly_reports(PublicItemUsageReporter(ym_busy))
 
         # empty month:
         assert _empty == []
@@ -171,16 +187,16 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m
         # sparse-month item0
         assert isinstance(_sparse_item0, PublicItemUsageReport)
         assert _sparse_item0.item_osfid == 'item0'
-        assert _sparse_item0.provider_id == ['prov0']
+        assert _sparse_item0.provider_id == [item0.provider._id]
         assert _sparse_item0.platform_iri == ['http://osf.example']
         assert _sparse_item0.view_count == 3
-        assert _sparse_item0.view_session_count == 2
+        assert _sparse_item0.view_session_count is None  # no session count for preprints
         assert _sparse_item0.download_count == 0
-        assert _sparse_item0.download_session_count == 0
+        assert _sparse_item0.download_session_count is None  # no session count for preprints
         # sparse-month item1
         assert isinstance(_sparse_item1, PublicItemUsageReport)
         assert _sparse_item1.item_osfid == 'item1'
-        assert _sparse_item1.provider_id == ['prov0']
+        assert _sparse_item1.provider_id == ['osf']
         assert _sparse_item1.platform_iri == ['http://osf.example']
         assert _sparse_item1.view_count == 2  # including item2
         assert _sparse_item1.view_session_count == 1  # including item2
@@ -189,7 +205,7 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m
         # sparse-month item2
         assert isinstance(_sparse_item1, PublicItemUsageReport)
         assert _sparse_item2.item_osfid == 'item2'
-        assert _sparse_item2.provider_id == ['prov0']
+        assert _sparse_item2.provider_id == ['osf']
         assert _sparse_item2.platform_iri == ['http://osf.example']
         assert _sparse_item2.view_count == 1
         assert _sparse_item2.view_session_count == 1
@@ -202,16 +218,16 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m
         # busy-month item0
         assert isinstance(_busy_item0, PublicItemUsageReport)
         assert _busy_item0.item_osfid == 'item0'
-        assert _busy_item0.provider_id == ['prov0']
+        assert _busy_item0.provider_id == [item0.provider._id]
         assert _busy_item0.platform_iri == ['http://osf.example']
         assert _busy_item0.view_count == 4 * 7
-        assert _busy_item0.view_session_count == 4
+        assert _busy_item0.view_session_count is None  # no session count for preprints
         assert _busy_item0.download_count == 4 * 5
-        assert _busy_item0.download_session_count == 4
+        assert _busy_item0.download_session_count is None  # no session count for preprints
         # busy-month item1
         assert isinstance(_busy_item1, PublicItemUsageReport)
         assert _busy_item1.item_osfid == 'item1'
-        assert _busy_item1.provider_id == ['prov0', 'prov1']
+        assert _busy_item1.provider_id == ['osf']
         assert _busy_item1.platform_iri == ['http://osf.example']
         assert _busy_item1.view_count == 6 * 9 + 11
         assert _busy_item1.view_session_count == 6 + 11
@@ -220,7 +236,7 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m
         # busy-month item2
         assert isinstance(_busy_item2, PublicItemUsageReport)
         assert _busy_item2.item_osfid == 'item2'
-        assert _busy_item2.provider_id == ['prov0']
+        assert _busy_item2.provider_id == ['osf']
         assert _busy_item2.platform_iri == ['http://osf.example']
         assert _busy_item2.view_count == 11
         assert _busy_item2.view_session_count == 11
@@ -228,11 +244,41 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m
         assert _busy_item2.download_session_count == 11
 
 
-def _save_usage(**kwargs):
-    _kwargs = {  # overridable defaults:
+def _save_usage(
+    item,
+    *,
+    timestamp: datetime,
+    action_labels: list[str],
+    **kwargs,
+):
+    _countedusage_kwargs = {
+        'timestamp': timestamp,
+        'item_guid': item._id,
+        'action_labels': action_labels,
         'platform_iri': 'http://osf.example',
-        'item_public': True,
-        'provider_id': 'prov0',
         **kwargs,
     }
-    CountedAuthUsage(**_kwargs).save(refresh=True)
+    CountedAuthUsage(**_countedusage_kwargs).save(refresh=True)
+    if isinstance(item, osfdb.Preprint):
+        if 'view' in action_labels:
+            _save_preprint_view(item, timestamp)
+        if 'download' in action_labels:
+            _save_preprint_download(item, timestamp)
+
+
+def _save_preprint_view(preprint, timestamp):
+    PreprintView(
+        timestamp=timestamp,
+        count=1,
+        preprint_id=preprint._id,
+        provider_id=preprint.provider._id,
+    ).save(refresh=True)
+
+
+def _save_preprint_download(preprint, timestamp):
+    PreprintDownload(
+        timestamp=timestamp,
+        count=1,
+        preprint_id=preprint._id,
+        provider_id=preprint.provider._id,
+    ).save(refresh=True)
diff --git a/osf_tests/metrics/test_yearmonth.txt b/osf_tests/metrics/test_yearmonth.txt
index 646c73c42f9..fae6b990c36 100644
--- a/osf_tests/metrics/test_yearmonth.txt
+++ b/osf_tests/metrics/test_yearmonth.txt
@@ -35,6 +35,13 @@ YearMonth(year=1491, month=12)
 >>> ym.next().next()
 YearMonth(year=1492, month=1)
 
+`prior` method gives the prior year-month:
+>>> ym = YearMonth(1492, 2)
+>>> ym.prior()
+YearMonth(year=1492, month=1)
+>>> ym.prior().prior()
+YearMonth(year=1491, month=12)
+
 `month_start` method:
 >>> YearMonth(3333, 3).month_start()
 datetime.datetime(3333, 3, 1, 0, 0, tzinfo=datetime.timezone.utc)

From 0ec9101d1bbcc4df8e163fc7283ee9b5b1e7da2b Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 2 Dec 2024 15:45:02 -0500
Subject: [PATCH 22/35] Avoid Sequence Scans on BFN

---
 osf/metrics/reporters/institutional_users.py | 37 ++++++++++++++------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
index e34875d4b28..512472a3d96 100644
--- a/osf/metrics/reporters/institutional_users.py
+++ b/osf/metrics/reporters/institutional_users.py
@@ -68,7 +68,7 @@ def __post_init__(self):
             private_project_count=self._private_project_queryset().count(),
             public_registration_count=self._public_registration_queryset().count(),
             embargoed_registration_count=self._embargoed_registration_queryset().count(),
-            public_file_count=self._public_osfstorage_file_queryset().count(),
+            public_file_count=self._public_osfstorage_file_count(),
             published_preprint_count=self._published_preprint_queryset().count(),
             storage_byte_count=self._storage_byte_count(),
         )
@@ -127,7 +127,7 @@ def _published_preprint_queryset(self):
             .exclude(spam_status=SpamStatus.SPAM)
         )
 
-    def _public_osfstorage_file_queryset(self):
+    def _public_osfstorage_file_querysets(self):
         _target_node_q = Q(
             # any public project, registration, project component, or registration component
             target_object_id__in=self._node_queryset().filter(is_public=True).values('pk'),
@@ -137,23 +137,40 @@ def _public_osfstorage_file_queryset(self):
             target_object_id__in=self._published_preprint_queryset().values('pk'),
             target_content_type=ContentType.objects.get_for_model(osfdb.Preprint),
         )
-        return (
+        return (  # split into two queries to avoid a parallel sequence scan on BFN
+            OsfStorageFile.objects
+            .filter(
+                created__lt=self.before_datetime,
+                deleted__isnull=True,
+                purged__isnull=True,
+            )
+            .filter(_target_node_q),
             OsfStorageFile.objects
             .filter(
                 created__lt=self.before_datetime,
                 deleted__isnull=True,
                 purged__isnull=True,
             )
-            .filter(_target_node_q | _target_preprint_q)
+            .filter(_target_preprint_q)
+        )
+
+    def _public_osfstorage_file_count(self):
+        return sum(
+            _target_queryset.count() for _target_queryset
+            in self._public_osfstorage_file_querysets()
         )
 
     def _storage_byte_count(self):
-        return osfdb.FileVersion.objects.filter(
-            size__gt=0,
-            created__lt=self.before_datetime,
-            purged__isnull=True,
-            basefilenode__in=self._public_osfstorage_file_queryset(),
-        ).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes']
+        return sum(
+            osfdb.FileVersion.objects.filter(
+                size__gt=0,
+                created__lt=self.before_datetime,
+                purged__isnull=True,
+                basefilenode__in=_target_queryset,
+            ).aggregate(storage_bytes=Sum('size', default=0))['storage_bytes']
+            for _target_queryset
+            in self._public_osfstorage_file_querysets()
+        )
 
     def _get_last_active(self):
         end_date = self.yearmonth.month_end()

From 0a510f5cf477a8018a2c1886cdda3cdbb5a1ccf6 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 2 Dec 2024 15:47:55 -0500
Subject: [PATCH 23/35] Use low queue for metric reporters - h/t @aaxelb

---
 website/settings/defaults.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index 0467ef3c166..91e3c1bacc6 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -446,6 +446,7 @@ class CeleryConfig:
         'osf.management.commands.daily_reporters_go',
         'osf.management.commands.monthly_reporters_go',
         'osf.management.commands.ingest_cedar_metadata_templates',
+        'osf.metrics.reporters',
     }
 
     med_pri_modules = {

From d34cac037c9435b4dfdd3358f53d3d1c3a5eaf68 Mon Sep 17 00:00:00 2001
From: Longze Chen <cslzchen@gmail.com>
Date: Wed, 4 Dec 2024 16:11:57 -0500
Subject: [PATCH 24/35] Fix failures caused by base class MonthlyReporter
 update

---
 osf/metrics/reporters/private_spam_metrics.py | 8 ++++----
 osf_tests/metrics/test_spam_count_reporter.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index 39b5fb16cb7..6e92f7f279b 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -6,15 +6,15 @@
 class PrivateSpamMetricsReporter(MonthlyReporter):
     report_name = 'Private Spam Metrics'
 
-    def report(self, report_yearmonth):
-        target_month = report_yearmonth.target_month()
-        next_month = report_yearmonth.next_month()
+    def report(self):
+        target_month = self.yearmonth.target_month()
+        next_month = self.yearmonth.next_month()
 
         oopspam_client = OOPSpamClient()
         akismet_client = AkismetClient()
 
         report = PrivateSpamMetricsReport(
-            report_yearmonth=str(report_yearmonth),
+            report_yearmonth=str(self.yearmonth),
             node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
             node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
             node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'),
diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py
index db44dc848ff..30d53cd4c1b 100644
--- a/osf_tests/metrics/test_spam_count_reporter.py
+++ b/osf_tests/metrics/test_spam_count_reporter.py
@@ -29,8 +29,8 @@ def test_private_spam_metrics_reporter():
         mock_akismet_get_flagged_count.return_value = 20
         mock_akismet_get_hammed_count.return_value = 10
 
-        reporter = PrivateSpamMetricsReporter()
-        report = reporter.report(report_yearmonth)[0]
+        reporter = PrivateSpamMetricsReporter(report_yearmonth)
+        report = reporter.report()[0]
 
         assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}"
         assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}"

From 8997814196694447ddb2184acc580bb9cc460bd6 Mon Sep 17 00:00:00 2001
From: Longze Chen <cslzchen@gmail.com>
Date: Thu, 5 Dec 2024 10:20:22 -0500
Subject: [PATCH 25/35] Follow-up fix for target/next (start/end) month

---
 osf/metrics/reporters/private_spam_metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index 6e92f7f279b..32ed9ac57d3 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -7,8 +7,8 @@ class PrivateSpamMetricsReporter(MonthlyReporter):
     report_name = 'Private Spam Metrics'
 
     def report(self):
-        target_month = self.yearmonth.target_month()
-        next_month = self.yearmonth.next_month()
+        target_month = self.yearmonth.month_start()
+        next_month = self.yearmonth.month_end()
 
         oopspam_client = OOPSpamClient()
         akismet_client = AkismetClient()

From 40e7f269346a7b09ff470d40e52470a9d8056295 Mon Sep 17 00:00:00 2001
From: Longze Chen <cslzchen@gmail.com>
Date: Thu, 5 Dec 2024 11:23:05 -0500
Subject: [PATCH 26/35] Update changelog and bump versions

---
 CHANGELOG    | 10 ++++++++++
 package.json |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 32a02066ce0..082f35c282f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,16 @@
 
 We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.
 
+24.10.0 (2024-12-05)
+====================
+
+- Migrate Preprint Affilations
+- Add OOPSpam and Akismet metrics to spam report
+- Add PrivateSpamMetricsReport
+- Update PrivateSpamMetricsReporter to work with refactored MonthlyReporter
+- Fix duplicate reports when run for past years
+- Fix counted-usage clobbers
+
 24.09.0 (2024-11-14)
 ====================
 
diff --git a/package.json b/package.json
index 7fcf0590044..904ec2be4d2 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "OSF",
-  "version": "24.09.0",
+  "version": "24.10.0",
   "description": "Facilitating Open Science",
   "repository": "https://github.com/CenterForOpenScience/osf.io",
   "author": "Center for Open Science",

From d9b459805d3185cc649ee15f804a028ab9994252 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Fri, 6 Dec 2024 10:19:17 -0500
Subject: [PATCH 27/35] Fix backfill, report

---
 .../commands/migrate_preprint_affiliation.py  |  5 +++
 osf/metrics/reporters/private_spam_metrics.py |  2 +-
 .../test_migrate_preprint_affiliations.py     | 38 ++++++++++++++++++-
 osf_tests/metrics/test_spam_count_reporter.py |  2 +-
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/osf/management/commands/migrate_preprint_affiliation.py b/osf/management/commands/migrate_preprint_affiliation.py
index 78e7b2786ff..e34c6dc6b27 100644
--- a/osf/management/commands/migrate_preprint_affiliation.py
+++ b/osf/management/commands/migrate_preprint_affiliation.py
@@ -9,6 +9,8 @@
 
 logger = logging.getLogger(__name__)
 
+AFFILIATION_TARGET_DATE = datetime.datetime(2024, 9, 19, 14, 37, 48, tzinfo=datetime.timezone.utc)
+
 
 class Command(BaseCommand):
     """Assign affiliations from users to preprints where they have write or admin permissions, with optional exclusion by user GUIDs."""
@@ -97,6 +99,9 @@ def assign_affiliations_to_preprints(exclude_guids=None, dry_run=True, batch_siz
                 user = contributor.user
                 preprint = contributor.preprint
 
+                if preprint.created > AFFILIATION_TARGET_DATE:
+                    continue
+
                 user_institutions = user.get_affiliated_institutions()
                 processed_count += 1
                 if not dry_run:
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index 32ed9ac57d3..40f259af325 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -25,4 +25,4 @@ def report(self):
             preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint')
         )
 
-        return [report]
+        return report
diff --git a/osf_tests/management_commands/test_migrate_preprint_affiliations.py b/osf_tests/management_commands/test_migrate_preprint_affiliations.py
index 701638251f5..8c80737b3dd 100644
--- a/osf_tests/management_commands/test_migrate_preprint_affiliations.py
+++ b/osf_tests/management_commands/test_migrate_preprint_affiliations.py
@@ -1,5 +1,6 @@
 import pytest
-from osf.management.commands.migrate_preprint_affiliation import assign_affiliations_to_preprints
+from datetime import timedelta
+from osf.management.commands.migrate_preprint_affiliation import AFFILIATION_TARGET_DATE, assign_affiliations_to_preprints
 from osf_tests.factories import (
     PreprintFactory,
     InstitutionFactory,
@@ -33,6 +34,8 @@ def preprint_with_affiliated_contributor(self, user_with_affiliation):
             permissions='admin',
             visible=True
         )
+        preprint.created = AFFILIATION_TARGET_DATE - timedelta(days=1)
+        preprint.save()
         return preprint
 
     @pytest.fixture()
@@ -43,6 +46,20 @@ def preprint_with_non_affiliated_contributor(self, user_without_affiliation):
             permissions='admin',
             visible=True
         )
+        preprint.created = AFFILIATION_TARGET_DATE - timedelta(days=1)
+        preprint.save()
+        return preprint
+
+    @pytest.fixture()
+    def preprint_past_target_date_with_affiliated_contributor(self, user_with_affiliation):
+        preprint = PreprintFactory()
+        preprint.add_contributor(
+            user_with_affiliation,
+            permissions='admin',
+            visible=True
+        )
+        preprint.created = AFFILIATION_TARGET_DATE + timedelta(days=1)
+        preprint.save()
         return preprint
 
     @pytest.mark.parametrize('dry_run', [True, False])
@@ -100,6 +117,7 @@ def test_affiliations_from_multiple_contributors(self, institution, dry_run):
 
         preprint = PreprintFactory()
         preprint.affiliated_institutions.clear()
+        preprint.created = AFFILIATION_TARGET_DATE - timedelta(days=1)
         preprint.add_contributor(read_contrib, permissions='read', visible=True)
         preprint.add_contributor(write_contrib, permissions='write', visible=True)
         preprint.add_contributor(admin_contrib, permissions='admin', visible=True)
@@ -113,3 +131,21 @@ def test_affiliations_from_multiple_contributors(self, institution, dry_run):
             affiliations = set(preprint.affiliated_institutions.all())
             assert affiliations == {institution, institution2}
             assert institution_not_include not in affiliations
+
+    @pytest.mark.parametrize('dry_run', [True, False])
+    def test_exclude_recent_preprints(self, preprint_past_target_date_with_affiliated_contributor, preprint_with_affiliated_contributor, institution, dry_run):
+        new_preprint = preprint_past_target_date_with_affiliated_contributor
+        new_preprint.affiliated_institutions.clear()
+        new_preprint.save()
+
+        old_preprint = preprint_with_affiliated_contributor
+        old_preprint.affiliated_institutions.clear()
+        old_preprint.save()
+
+        assign_affiliations_to_preprints(dry_run=dry_run)
+
+        assert not new_preprint.affiliated_institutions.exists()
+        if dry_run:
+            assert not old_preprint.affiliated_institutions.exists()
+        else:
+            assert institution in old_preprint.affiliated_institutions.all()
diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py
index 30d53cd4c1b..0e7ba6956bf 100644
--- a/osf_tests/metrics/test_spam_count_reporter.py
+++ b/osf_tests/metrics/test_spam_count_reporter.py
@@ -30,7 +30,7 @@ def test_private_spam_metrics_reporter():
         mock_akismet_get_hammed_count.return_value = 10
 
         reporter = PrivateSpamMetricsReporter(report_yearmonth)
-        report = reporter.report()[0]
+        report = reporter.report()
 
         assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}"
         assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}"

From c966face6faa251419d64fcedc4da2e80fbf70aa Mon Sep 17 00:00:00 2001
From: mfraezz <maf7sm@virginia.edu>
Date: Wed, 11 Dec 2024 13:07:18 -0500
Subject: [PATCH 28/35] [Feature] Dashboard B&I (#10843)

Co-authored-by: abram axel booth <boothaa@gmail.com>
---
 api/institutions/serializers.py                              | 2 ++
 .../institutions/views/test_institution_summary_metrics.py   | 2 ++
 .../institutions/views/test_institution_user_metric_list.py  | 5 +++++
 3 files changed, 9 insertions(+)

diff --git a/api/institutions/serializers.py b/api/institutions/serializers.py
index e3679b2a9c5..1d1e0761715 100644
--- a/api/institutions/serializers.py
+++ b/api/institutions/serializers.py
@@ -330,6 +330,7 @@ class Meta:
     })
 
     id = IDField(source='meta.id', read_only=True)
+    report_yearmonth = YearmonthField(read_only=True)
     user_name = ser.CharField(read_only=True)
     department = ser.CharField(read_only=True, source='department_name')
     orcid_id = ser.CharField(read_only=True)
@@ -372,6 +373,7 @@ class Meta:
 
     id = IDField(read_only=True)
 
+    report_yearmonth = YearmonthField(read_only=True)
     user_count = ser.IntegerField(read_only=True)
     public_project_count = ser.IntegerField(read_only=True)
     private_project_count = ser.IntegerField(read_only=True)
diff --git a/api_tests/institutions/views/test_institution_summary_metrics.py b/api_tests/institutions/views/test_institution_summary_metrics.py
index d423663ea89..f1641ea923c 100644
--- a/api_tests/institutions/views/test_institution_summary_metrics.py
+++ b/api_tests/institutions/views/test_institution_summary_metrics.py
@@ -188,6 +188,7 @@ def test_get_report(self, app, url, institutional_admin, institution, reports, u
         assert data['type'] == 'institution-summary-metrics'
 
         attributes = data['attributes']
+        assert attributes['report_yearmonth'] == '2024-08'
         assert attributes['user_count'] == 200
         assert attributes['public_project_count'] == 150
         assert attributes['private_project_count'] == 125
@@ -254,6 +255,7 @@ def test_get_report_with_multiple_months_and_institutions(
 
         attributes = data['attributes']
 
+        assert attributes['report_yearmonth'] == '2024-09'
         assert attributes['user_count'] == 250
         assert attributes['public_project_count'] == 200
         assert attributes['private_project_count'] == 150
diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py
index f83fd7fc3fa..b1bf3490788 100644
--- a/api_tests/institutions/views/test_institution_user_metric_list.py
+++ b/api_tests/institutions/views/test_institution_user_metric_list.py
@@ -445,6 +445,7 @@ def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institu
         response_body = resp.text
         expected_response = [
             [
+                'report_yearmonth',
                 'account_creation_date',
                 'department',
                 'embargoed_registration_count',
@@ -460,6 +461,7 @@ def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institu
                 'user_name'
             ],
             [
+                '2024-08',
                 '2018-02',
                 'Center, \t Greatest Ever',
                 '1',
@@ -512,6 +514,7 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu
                 month_last_login='2018-02',
             )
             expected_data.append([
+                '2024-08',
                 '2018-02',
                 'QBatman',
                 '1',
@@ -552,6 +555,7 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu
                 response_rows = list(reader)
                 # Validate header row
                 expected_header = [
+                    'report_yearmonth',
                     'account_creation_date',
                     'department',
                     'embargoed_registration_count',
@@ -606,6 +610,7 @@ def test_get_report_format_table_json(self, app, url, institutional_admin, insti
         response_data = json.loads(resp.body)
         expected_data = [
             {
+                'report_yearmonth': '2024-08',
                 'account_creation_date': '2018-02',
                 'department': 'Safety "The Wolverine" Weapon X',
                 'embargoed_registration_count': 1,

From 4d1708f966e325dc017c2d47326c04c9878110cd Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Wed, 11 Dec 2024 13:09:28 -0500
Subject: [PATCH 29/35] Update CHANGELOG, bump version

---
 CHANGELOG    | 4 ++++
 package.json | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 082f35c282f..be32693ee55 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,10 @@
 
 We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.
 
+24.11.0 (2024-12-11)
+====================
+- Institutional Dashboard Project Bugfix Release
+
 24.10.0 (2024-12-05)
 ====================
 
diff --git a/package.json b/package.json
index 904ec2be4d2..fba6f9fe0b7 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "OSF",
-  "version": "24.10.0",
+  "version": "24.11.0",
   "description": "Facilitating Open Science",
   "repository": "https://github.com/CenterForOpenScience/osf.io",
   "author": "Center for Open Science",

From 6dce520bf30f138824d3eceedbfe42027c844307 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Tue, 3 Dec 2024 15:23:38 -0500
Subject: [PATCH 30/35] Assume default  for global_ notifications

---
 website/notifications/emails.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/notifications/emails.py b/website/notifications/emails.py
index 245baf9f0af..d26d43351d5 100644
--- a/website/notifications/emails.py
+++ b/website/notifications/emails.py
@@ -176,7 +176,7 @@ def get_user_subscriptions(user, event):
     if user_subscription:
         return {key: list(getattr(user_subscription, key).all().values_list('guids___id', flat=True)) for key in constants.NOTIFICATION_TYPES}
     else:
-        return {key: [] for key in constants.NOTIFICATION_TYPES}
+        return {key: [user._id] if (event in constants.USER_SUBSCRIPTIONS_AVAILABLE and key == 'email_transactional') else [] for key in constants.NOTIFICATION_TYPES}
 
 
 def get_node_lineage(node):

From 1f3be808830ae8e81d2501505102645d609029fa Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Thu, 19 Dec 2024 05:35:33 -0500
Subject: [PATCH 31/35] Avoid superfluous PrivateLink query

---
 website/routes.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/website/routes.py b/website/routes.py
index 61d6c96c9aa..2acd71db1a6 100644
--- a/website/routes.py
+++ b/website/routes.py
@@ -176,8 +176,11 @@ def get_globals():
 def is_private_link_anonymous_view():
     # Avoid circular import
     from osf.models import PrivateLink
+    view_only = request.args.get('view_only')
+    if not view_only:
+        return False
     try:
-        return PrivateLink.objects.filter(key=request.args.get('view_only')).values_list('anonymous', flat=True).get()
+        return PrivateLink.objects.filter(key=view_only).values_list('anonymous', flat=True).get()
     except PrivateLink.DoesNotExist:
         return False
 

From bf3c7d8b22f98355aed46a4c668ff97236dfde66 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Thu, 19 Dec 2024 12:51:28 -0500
Subject: [PATCH 32/35] Improve script resumability, update template

---
 osf/management/commands/email_all_users.py    | 19 ++++++++++---------
 .../test_email_all_users.py                   |  8 ++++----
 website/templates/emails/tou_notif.html.mako  |  8 ++++----
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/osf/management/commands/email_all_users.py b/osf/management/commands/email_all_users.py
index 334ad58933b..f5cbd677fb7 100644
--- a/osf/management/commands/email_all_users.py
+++ b/osf/management/commands/email_all_users.py
@@ -19,13 +19,13 @@
 
 OFFSET = 500000
 
-def email_all_users(email_template, dry_run=False, ids=None, run=0, offset=OFFSET):
+def email_all_users(email_template, dry_run=False, ids=None, start_id=0, offset=OFFSET):
 
     if ids:
         active_users = OSFUser.objects.filter(id__in=ids)
     else:
-        lower_bound = run * offset
-        upper_bound = (run + 1) * offset
+        lower_bound = start_id
+        upper_bound = start_id + offset
         base_query = OSFUser.objects.filter(date_confirmed__isnull=False, deleted=None).exclude(date_disabled__isnull=False).exclude(is_active=False)
         active_users = base_query.filter(id__gt=lower_bound, id__lte=upper_bound).order_by('id')
 
@@ -42,11 +42,12 @@ def email_all_users(email_template, dry_run=False, ids=None, run=0, offset=OFFSE
 
     total_sent = 0
     for user in active_users.iterator():
+        logger.info(f'Sending email to {user.id}')
         try:
             mails.send_mail(
                 to_addr=user.email,
                 mail=template,
-                fullname=user.fullname,
+                given_name=user.given_name or user.fullname,
             )
         except Exception as e:
             logger.error(f'Exception encountered sending email to {user.id}')
@@ -80,11 +81,11 @@ def add_arguments(self, parser):
         )
 
         parser.add_argument(
-            '--r',
+            '--start-id',
             type=int,
-            dest='run',
+            dest='start_id',
             default=0,
-            help='Specify which run this is'
+            help='Specify id to start from.'
         )
 
         parser.add_argument(
@@ -105,9 +106,9 @@ def add_arguments(self, parser):
     def handle(self, *args, **options):
         dry_run = options.get('dry_run', False)
         template = options.get('template')
-        run = options.get('run')
+        start_id = options.get('start_id')
         ids = options.get('ids')
         offset = options.get('offset', OFFSET)
-        email_all_users(template, dry_run, run=run, ids=ids, offset=offset)
+        email_all_users(template, dry_run, start_id=start_id, ids=ids, offset=offset)
         if dry_run:
             raise RuntimeError('Dry run, only superusers emailed')
diff --git a/osf_tests/management_commands/test_email_all_users.py b/osf_tests/management_commands/test_email_all_users.py
index 3392e77a470..c10c84b49d1 100644
--- a/osf_tests/management_commands/test_email_all_users.py
+++ b/osf_tests/management_commands/test_email_all_users.py
@@ -49,7 +49,7 @@ def test_email_all_users_dry(self, mock_email, superuser):
         mock_email.assert_called_with(
             to_addr=superuser.email,
             mail=mails.TOU_NOTIF,
-            fullname=superuser.fullname
+            given_name=superuser.given_name
         )
 
     @pytest.mark.django_db
@@ -64,10 +64,10 @@ def test_dont_email_inactive_users(
     @pytest.mark.django_db
     @mock.patch('website.mails.send_mail')
     def test_email_all_users_offset(self, mock_email, user, user2):
-        email_all_users('TOU_NOTIF', offset=1, run=0)
+        email_all_users('TOU_NOTIF', offset=1, start_id=0)
 
-        email_all_users('TOU_NOTIF', offset=1, run=1)
+        email_all_users('TOU_NOTIF', offset=1, start_id=1)
 
-        email_all_users('TOU_NOTIF', offset=1, run=2)
+        email_all_users('TOU_NOTIF', offset=1, start_id=2)
 
         assert mock_email.call_count == 2
diff --git a/website/templates/emails/tou_notif.html.mako b/website/templates/emails/tou_notif.html.mako
index 1da8c0cbc07..56130626668 100644
--- a/website/templates/emails/tou_notif.html.mako
+++ b/website/templates/emails/tou_notif.html.mako
@@ -3,12 +3,12 @@
 <%def name="content()">
 <tr>
   <td style="border-collapse: collapse;">
-    Hi ${fullname},<br>
+    Hi ${given_name},<br>
     <br>
-    On August 10, 2020 the COS Websites and Services <a href="https://github.com/CenterForOpenScience/cos.io/blob/master/TERMS_OF_USE.md">Terms of Use</a> will change. The updates to the Terms are necessary to support continued use of the Websites and Services by the public.<br>
-    To better understand what has changed, go <a href="https://github.com/CenterForOpenScience/cos.io/pull/1025/files">here</a>.<br>
+    On Friday, January 10, 2025 the COS Websites and Services <a href="https://github.com/CenterForOpenScience/cos.io/blob/master/TERMS_OF_USE.md">Terms of Use</a> and <a href="https://github.com/CenterForOpenScience/cos.io/blob/master/PRIVACY_POLICY.md">Privacy Policy</a> will change. The updates to the Terms are necessary to support continued use of the Websites and Services by the public.<br>
+    To better understand what has changed, see the <a href="https://github.com/CenterForOpenScience/cos.io/pull/1033/files">Terms of Use change summary</a> and <a href="https://github.com/CenterForOpenScience/cos.io/pull/1034/files">Privacy Policy change summary</a>.<br>
     <br>
-    If you have any questions email support@osf.io.<br>
+    You do not need to take any actions to acknowledge these updates. If you have any questions, please email support@osf.io.<br>
     <br>
     Regards,<br>
     <br>

From 3bf4fc1861b35f535a4399467eea2c0bd61add77 Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Mon, 9 Dec 2024 15:17:49 -0500
Subject: [PATCH 33/35] Add internal policy views - Pull latest policy versions
 on build

---
 Dockerfile                                    |  4 ++++
 website/policies/views.py                     | 19 +++++++++++++++++++
 website/routes.py                             | 13 +++++++++++++
 website/settings/defaults.py                  | 11 ++++++++---
 .../templates/policies/generic_policy.mako    | 16 ++++++++++++++++
 5 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 website/policies/views.py
 create mode 100644 website/templates/policies/generic_policy.mako

diff --git a/Dockerfile b/Dockerfile
index 189b0e998b9..d5dce5b303a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -52,6 +52,10 @@ RUN set -ex \
         libffi-dev
 
 WORKDIR /code
+
+# Policies
+ADD https://github.com/CenterForOpenScience/cos.io.git#master ./COS_POLICIES/
+
 COPY pyproject.toml .
 COPY poetry.lock .
 # Fix: https://github.com/CenterForOpenScience/osf.io/pull/6783
diff --git a/website/policies/views.py b/website/policies/views.py
new file mode 100644
index 00000000000..c13ad197dae
--- /dev/null
+++ b/website/policies/views.py
@@ -0,0 +1,19 @@
+import markdown
+
+from website.settings import \
+    PRIVACY_POLICY_PATH, PRIVACY_POLICY_GITHUB_LINK, \
+    TERMS_POLICY_PATH, TERMS_POLICY_GITHUB_LINK
+
+def privacy_policy():
+    with open(PRIVACY_POLICY_PATH, 'r') as policy_file:
+        return {
+            'policy_content': markdown.markdown(policy_file.read(), extensions=['toc']),
+            'POLICY_GITHUB_LINK': PRIVACY_POLICY_GITHUB_LINK
+        }
+
+def terms_policy():
+    with open(TERMS_POLICY_PATH, 'r') as policy_file:
+        return {
+            'policy_content': markdown.markdown(policy_file.read(), extensions=['toc']),
+            'POLICY_GITHUB_LINK': TERMS_POLICY_GITHUB_LINK
+        }
diff --git a/website/routes.py b/website/routes.py
index 2acd71db1a6..ce328c3dcd7 100644
--- a/website/routes.py
+++ b/website/routes.py
@@ -53,6 +53,7 @@
 from addons.base import views as addon_views
 from website.discovery import views as discovery_views
 from website.conferences import views as conference_views
+from website.policies import views as policy_views
 from website.preprints import views as preprint_views
 from website.registries import views as registries_views
 from website.reviews import views as reviews_views
@@ -1145,6 +1146,18 @@ def make_url_map(app):
 
         Rule('/goodbye/', 'get', goodbye, notemplate),
 
+        Rule(
+            '/privacy_policy/',
+            'get',
+            policy_views.privacy_policy,
+            OsfWebRenderer('policies/generic_policy.mako', trust=True)
+        ),
+        Rule(
+            '/terms_of_use/',
+            'get',
+            policy_views.terms_policy,
+            OsfWebRenderer('policies/generic_policy.mako', trust=True)
+        ),
         Rule(
             [
                 '/project/<pid>/',
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index 91e3c1bacc6..ee667f4130e 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -26,6 +26,9 @@ def parent_dir(path):
 STATIC_FOLDER = os.path.join(BASE_PATH, 'static')
 STATIC_URL_PATH = '/static'
 ASSET_HASH_PATH = os.path.join(APP_PATH, 'webpack-assets.json')
+POLICY_PATH = os.path.join(APP_PATH, 'COS_POLICIES')
+PRIVACY_POLICY_PATH = os.path.join(POLICY_PATH, 'PRIVACY_POLICY.md')
+TERMS_POLICY_PATH = os.path.join(POLICY_PATH, 'TERMS_OF_USE.md')
 ROOT = os.path.join(BASE_PATH, '..')
 BCRYPT_LOG_ROUNDS = 12
 LOG_LEVEL = logging.INFO
@@ -2048,10 +2051,12 @@ class CeleryConfig:
 OSF_REGISTRIES_LOGO = 'osf_registries'
 OSF_LOGO_LIST = [OSF_LOGO, OSF_PREPRINTS_LOGO, OSF_MEETINGS_LOGO, OSF_PREREG_LOGO, OSF_REGISTRIES_LOGO]
 
+PRIVACY_POLICY_GITHUB_LINK = 'https://github.com/CenterForOpenScience/centerforopenscience.org/blob/master/PRIVACY_POLICY.md'
+TERMS_POLICY_GITHUB_LINK = 'https://github.com/CenterForOpenScience/centerforopenscience.org/blob/master/TERMS_OF_USE.md'
 FOOTER_LINKS = {
-    'terms': 'https://github.com/CenterForOpenScience/centerforopenscience.org/blob/master/TERMS_OF_USE.md',
-    'privacyPolicy': 'https://github.com/CenterForOpenScience/centerforopenscience.org/blob/master/PRIVACY_POLICY.md',
-    'cookies': 'https://github.com/CenterForOpenScience/centerforopenscience.org/blob/master/PRIVACY_POLICY.md#f-cookies',
+    'terms': 'https://osf.io/terms_of_use/',
+    'privacyPolicy': 'https://osf.io/privacy_policy/',
+    'cookies': 'https://osf.io/privacy_policy/#f-cookies',
     'cos': 'https://cos.io',
     'statusPage': 'https://status.cos.io/',
     'apiDocs': 'https://developer.osf.io/',
diff --git a/website/templates/policies/generic_policy.mako b/website/templates/policies/generic_policy.mako
new file mode 100644
index 00000000000..6ae8581d350
--- /dev/null
+++ b/website/templates/policies/generic_policy.mako
@@ -0,0 +1,16 @@
+<%inherit file="base.mako"/>
+
+<%def name="content()">
+<div id="policy" class="container">
+    <div class="row">
+        <div class="col-md-12">
+            <br>
+            ${policy_content}
+        </div>
+        <div class="col-md-12">
+            <br>
+            Version history for this policy is available <a href='${POLICY_GITHUB_LINK}'>here</a>
+        </div>
+    </div>
+</div><!-- end container policy -->
+</%def>

From d053a6289a17eb7cc34e53e1986c1659b1bca21e Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Tue, 7 Jan 2025 15:55:34 +0200
Subject: [PATCH 34/35] fixed yearmonth method

---
 osf_tests/metrics/test_monthly_report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py
index 0c0302a7f08..3c841e6555c 100644
--- a/osf_tests/metrics/test_monthly_report.py
+++ b/osf_tests/metrics/test_monthly_report.py
@@ -135,7 +135,7 @@ def test_with_last_month(self, osfid, this_month_report, last_month_report, two_
 
 def _prior_yearmonth(ym: YearMonth) -> YearMonth:
     return (
-        YearMonth(ym.year - 1, 1)
+        YearMonth(ym.year - 1, 12)
         if ym.month == 1
         else YearMonth(ym.year, ym.month - 1)
     )

From a42ee32fe08f98765b8e6e2b3722a6fbd1bb3dee Mon Sep 17 00:00:00 2001
From: Matt Frazier <maf7sm@virginia.edu>
Date: Thu, 9 Jan 2025 10:36:41 -0500
Subject: [PATCH 35/35] Add view, form to update moderation state

---
 admin/nodes/urls.py             |  1 +
 admin/nodes/views.py            | 11 +++++++++++
 admin/templates/nodes/node.html |  7 ++++++-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/admin/nodes/urls.py b/admin/nodes/urls.py
index 5036b9dd06d..d081b544d61 100644
--- a/admin/nodes/urls.py
+++ b/admin/nodes/urls.py
@@ -38,4 +38,5 @@
     re_path(r'^(?P<guid>[a-z0-9]+)/make_private/$', views.NodeMakePrivate.as_view(), name='make-private'),
     re_path(r'^(?P<guid>[a-z0-9]+)/make_public/$', views.NodeMakePublic.as_view(), name='make-public'),
     re_path(r'^(?P<guid>[a-z0-9]+)/remove_notifications/$', views.NodeRemoveNotificationView.as_view(), name='node-remove-notifications'),
+    re_path(r'^(?P<guid>[a-z0-9]+)/update_moderation_state/$', views.NodeUpdateModerationStateView.as_view(), name='node-update-mod-state'),
 ]
diff --git a/admin/nodes/views.py b/admin/nodes/views.py
index 74b6b08feae..f1e90d72c09 100644
--- a/admin/nodes/views.py
+++ b/admin/nodes/views.py
@@ -118,6 +118,17 @@ def post(self, request, *args, **kwargs):
 
         return redirect('nodes:node', guid=kwargs.get('guid'))
 
+
+class NodeUpdateModerationStateView(View):
+    def post(self, request, *args, **kwargs):
+        guid = kwargs.get('guid')
+        node = AbstractNode.load(guid)
+        node.update_moderation_state()
+        messages.success(request, 'Moderation state successfully updated.')
+
+        return redirect('nodes:node', guid=kwargs.get('guid'))
+
+
 class NodeSearchView(PermissionRequiredMixin, FormView):
     """ Allows authorized users to search for a node by it's guid.
     """
diff --git a/admin/templates/nodes/node.html b/admin/templates/nodes/node.html
index 6ec71e2dfdc..cb12f49c375 100644
--- a/admin/templates/nodes/node.html
+++ b/admin/templates/nodes/node.html
@@ -64,7 +64,12 @@ <h2>{{ node.type|cut:'osf.'|title }}: <b>{{ node.title }}</b> <a href="{{ node.a
                     </tr>
                     <tr>
                         <td>Moderation State</td>
-                        <td>{{ node.moderation_state }}</td>
+                        <td>{{ node.moderation_state }}
+                        <form method="post" action="{% url 'nodes:node-update-mod-state' node.guid %}">
+                            {% csrf_token %}
+                            <button type="submit" class="btn btn-primary">Update Moderation State</button>
+                        </form>
+                        </td>
                     </tr>
                     <tr>
                         <td>Creator</td>