diff --git a/froide/foirequest/forms/request.py b/froide/foirequest/forms/request.py index 9f4140ebf..0e9f80d70 100644 --- a/froide/foirequest/forms/request.py +++ b/froide/foirequest/forms/request.py @@ -490,5 +490,6 @@ def save(self, request: HttpRequest, foirequest: FoiRequest): ) foirequest.description_redacted = redacted_description + foirequest.clear_render_cache() foirequest.save() diff --git a/froide/foirequest/management/commands/cache_text_redactions.py b/froide/foirequest/management/commands/cache_text_redactions.py index 9f000a4fe..3b9a98077 100644 --- a/froide/foirequest/management/commands/cache_text_redactions.py +++ b/froide/foirequest/management/commands/cache_text_redactions.py @@ -4,6 +4,9 @@ from django.db.models.functions import Length from django.utils import translation +from froide.foirequest.models.request import FoiRequest +from froide.helper.text_diff import CONTENT_CACHE_THRESHOLD + class Command(BaseCommand): help = "Pre-calculate redaction diffs and markup for long texts." @@ -13,6 +16,7 @@ def handle(self, *args, **options): from froide.foirequest.models import FoiMessage from froide.foirequest.templatetags.foirequest_tags import ( render_message_content, + render_request_description, ) needs_calculation = ( @@ -24,7 +28,7 @@ def handle(self, *args, **options): msgs: QuerySet[FoiMessage] = ( FoiMessage.objects.annotate(plaintext_length=Length("plaintext")) - .filter(plaintext_length__gt=FoiMessage.CONTENT_CACHE_THRESHOLD) + .filter(plaintext_length__gt=CONTENT_CACHE_THRESHOLD) .filter(needs_calculation) ) @@ -36,3 +40,19 @@ def handle(self, *args, **options): # Cache the rendered message content for the foi request page render_message_content(message, True) render_message_content(message, False) + + reqs: QuerySet[FoiRequest] = ( + FoiRequest.objects.annotate(description_length=Length("description")) + .filter(description_length__gt=CONTENT_CACHE_THRESHOLD) + .filter( + Q(redacted_description_auth__isnull=True) + | Q(redacted_description_anon__isnull=True) + | Q(rendered_description_auth__isnull=True) + | Q(rendered_description_anon__isnull=True) + ) + ) + for req in reqs: + req.get_redacted_description(True) + req.get_redacted_description(False) + render_request_description(req, True) + render_request_description(req, False) diff --git a/froide/foirequest/migrations/0070_foirequest_redacted_description_anon_and_more.py b/froide/foirequest/migrations/0070_foirequest_redacted_description_anon_and_more.py new file mode 100644 index 000000000..ac3cbddd9 --- /dev/null +++ b/froide/foirequest/migrations/0070_foirequest_redacted_description_anon_and_more.py @@ -0,0 +1,33 @@ +# Generated by Django 4.2.16 on 2025-01-22 14:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("foirequest", "0069_alter_foimessage_plaintext"), + ] + + operations = [ + migrations.AddField( + model_name="foirequest", + name="redacted_description_anon", + field=models.JSONField(blank=True, null=True), + ), + migrations.AddField( + model_name="foirequest", + name="redacted_description_auth", + field=models.JSONField(blank=True, null=True), + ), + migrations.AddField( + model_name="foirequest", + name="rendered_description_anon", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="foirequest", + name="rendered_description_auth", + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/froide/foirequest/models/message.py b/froide/foirequest/models/message.py index 69eca030b..1f6443547 100644 --- a/froide/foirequest/models/message.py +++ b/froide/foirequest/models/message.py @@ -15,7 +15,7 @@ from taggit.models import TagBase, TaggedItemBase from froide.helper.email_utils import make_address -from froide.helper.text_diff import get_differences +from froide.helper.text_diff import CONTENT_CACHE_THRESHOLD, get_differences from froide.helper.text_utils import quote_text, redact_plaintext, redact_subject from froide.publicbody.models import PublicBody @@ -86,8 +86,6 @@ class MessageKind(models.TextChoices): class FoiMessage(models.Model): - CONTENT_CACHE_THRESHOLD = 5000 - request = models.ForeignKey( FoiRequest, verbose_name=_("Freedom of Information Request"), @@ -728,7 +726,7 @@ def get_cached_rendered_content(self, authenticated_read): return self.content_rendered_anon def set_cached_rendered_content(self, authenticated_read, content): - needs_caching = len(self.content) > self.CONTENT_CACHE_THRESHOLD + needs_caching = len(self.content) > CONTENT_CACHE_THRESHOLD if needs_caching: if authenticated_read: update = {"content_rendered_auth": content} diff --git a/froide/foirequest/models/request.py b/froide/foirequest/models/request.py index ad80713f9..44c4bde6d 100644 --- a/froide/foirequest/models/request.py +++ b/froide/foirequest/models/request.py @@ -22,7 +22,7 @@ from froide.campaign.models import Campaign from froide.helper.email_utils import make_address -from froide.helper.text_diff import get_differences +from froide.helper.text_diff import CONTENT_CACHE_THRESHOLD, get_differences from froide.helper.text_utils import redact_plaintext from froide.publicbody.models import FoiLaw, Jurisdiction, PublicBody from froide.team.models import Team @@ -300,6 +300,11 @@ class FoiRequest(models.Model): slug = models.SlugField(_("Slug"), max_length=255, unique=True) description = models.TextField(_("Description"), blank=True) description_redacted = models.TextField(_("Redacted Description"), blank=True) + redacted_description_auth = models.JSONField(blank=True, null=True) + redacted_description_anon = models.JSONField(blank=True, null=True) + rendered_description_auth = models.TextField(blank=True, null=True) + rendered_description_anon = models.TextField(blank=True, null=True) + summary = models.TextField(_("Summary"), blank=True) public_body = models.ForeignKey( @@ -651,25 +656,53 @@ def get_description(self): self.description_redacted = redact_plaintext( self.description, user_replacements=user_replacements ) + self.clear_render_cache() if ( self.description_redacted ): # description might be empty, if so, don't save again - self.save(update_fields=["description_redacted"]) + self.save() return self.description_redacted def get_redacted_description(self, auth: bool) -> List[Tuple[bool, str]]: if auth: - show, hide = ( + show, hide, cache_field = ( self.description, self.get_description(), + "redacted_description_auth", ) else: - show, hide = ( + show, hide, cache_field = ( self.get_description(), self.description, + "redacted_description_anon", + ) + + if getattr(self, cache_field) is None: + redacted_content = [list(x) for x in get_differences(show, hide)] + setattr(self, cache_field, redacted_content) + FoiRequest.objects.filter(id=self.pk).update( + **{cache_field: redacted_content} ) + return getattr(self, cache_field) + + def clear_render_cache(self): + self.redacted_description_anon = None + self.redacted_description_auth = None - return list(get_differences(show, hide)) + def get_cached_rendered_description(self, authenticated_read): + if authenticated_read: + return self.rendered_description_auth + else: + return self.rendered_description_anon + + def set_cached_rendered_description(self, authenticated_read, description): + needs_caching = len(self.description) > CONTENT_CACHE_THRESHOLD + if needs_caching: + if authenticated_read: + update = {"rendered_description_auth": description} + else: + update = {"rendered_description_anon": description} + FoiRequest.objects.filter(id=self.pk).update(**update) def response_messages(self): return list(filter(lambda m: m.is_response, self.messages)) diff --git a/froide/foirequest/templatetags/foirequest_tags.py b/froide/foirequest/templatetags/foirequest_tags.py index 9697af38c..0dc72da00 100644 --- a/froide/foirequest/templatetags/foirequest_tags.py +++ b/froide/foirequest/templatetags/foirequest_tags.py @@ -62,12 +62,7 @@ def highlight_request(message, request): real_description = unify(message.request.description) redacted_description = unify(message.request.get_description()) - description_with_markup = markup_redacted_content( - real_description, - redacted_description, - authenticated_read=auth_read, - message_id=message.id, - ) + description_with_markup = render_request_description(message.request, auth_read) if auth_read: content = real_content @@ -159,6 +154,15 @@ def redact_request_description( foirequest: FoiRequest, request: HttpRequest ) -> SafeString: authenticated_read = can_read_foirequest_authenticated(foirequest, request) + return render_request_description(foirequest, authenticated_read) + + +def render_request_description( + foirequest: FoiRequest, authenticated_read: bool +) -> SafeString: + cached_content = foirequest.get_cached_rendered_description(authenticated_read) + if cached_content is not None: + return mark_safe(cached_content) real_content = unify(foirequest.description) redacted_content = unify(foirequest.get_description()) @@ -169,6 +173,10 @@ def redact_request_description( authenticated_read=authenticated_read, ) + foirequest.set_cached_rendered_description( + authenticated_read=authenticated_read, description=content + ) + return content diff --git a/froide/foirequest/tests/test_misc.py b/froide/foirequest/tests/test_misc.py index 68c8a9474..f853c90c8 100644 --- a/froide/foirequest/tests/test_misc.py +++ b/froide/foirequest/tests/test_misc.py @@ -28,6 +28,7 @@ ) from froide.foirequest.tests import factories from froide.foirequest.utils import MailAttachmentSizeChecker +from froide.helper.text_diff import CONTENT_CACHE_THRESHOLD class TemplateTagTest(TestCase): @@ -360,7 +361,7 @@ def test_redacted_content_cache(foi_message_factory, django_assert_num_queries, def test_cached_rendered_content( foi_message_factory, django_assert_num_queries, auth, faker ): - req_text = faker.text(max_nb_chars=FoiMessage.CONTENT_CACHE_THRESHOLD) + req_text = faker.text(max_nb_chars=CONTENT_CACHE_THRESHOLD) redacted_foi_message = foi_message_factory( plaintext=f"Dear Mx. Example,\n\nPlease send me the following documents:\n{req_text}\n\nGreetings,\nAlex Example", plaintext_redacted=f"Dear <>,\n\nPlease send me the following documents:\n{req_text}\n\nGreetings,\n<>", diff --git a/froide/helper/text_diff.py b/froide/helper/text_diff.py index 8a90f040c..63fcd3b9e 100644 --- a/froide/helper/text_diff.py +++ b/froide/helper/text_diff.py @@ -8,6 +8,7 @@ SPLITTER = r"([\u0000-\u002C\u003B-\u003F\u005B-\u005e\u0060\u007B-\u007E])" SPLITTER_RE = re.compile(SPLITTER) SPLITTER_MATCH_RE = re.compile("^%s$" % SPLITTER) +CONTENT_CACHE_THRESHOLD = 5000 def get_diff_chunks(content: str) -> List[str]: