diff --git a/.app-template/urls.py-tpl b/.app-template/urls.py-tpl
index 71d3b4d5..02eab9de 100644
--- a/.app-template/urls.py-tpl
+++ b/.app-template/urls.py-tpl
@@ -1,8 +1,6 @@
-from django.urls import path
-
-from . import views
+from django.urls import URLPattern
app_name = "{{ app_name }}"
-urlpatterns = [
+urlpatterns: list[URLPattern] = [
]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d722332c..e01513fe 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -79,12 +79,14 @@ repos:
rev: v1.36.1
hooks:
- id: djlint-reformat-django
+ exclude: "ninja/swagger.html"
- id: djlint-django
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
+ exclude: ".envrc"
args: [-e, SC1091]
- repo: https://github.com/thibaudcolas/pre-commit-stylelint
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e081e82..87d9d205 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog
+## 24.11.1
+
+- Reduced allow times in which daily updates are run. We still support bi-hourly cron runs.
+- Display a contact email to all authenticated users.
+- Add an API:
+ - The documentation is available at `/api/docs/`.
+ - You can manage application tokens in your profile.
+ - You can get auth tokens from these applications tokens to use the API.
+
## 24.10.3
- Correct display of titles with HTML entities when adding an article.
diff --git a/config/api.py b/config/api.py
new file mode 100644
index 00000000..4595f8e8
--- /dev/null
+++ b/config/api.py
@@ -0,0 +1,11 @@
+from ninja import NinjaAPI
+from ninja.security import django_auth
+
+from legadilo.feeds.api import feeds_api_router
+from legadilo.reading.api import reading_api_router
+from legadilo.users.api import AuthBearer, users_api_router
+
+api = NinjaAPI(title="Legadilo API", auth=[django_auth, AuthBearer()], docs_url="/docs/")
+api.add_router("reading/", reading_api_router)
+api.add_router("feeds/", feeds_api_router)
+api.add_router("users/", users_api_router)
diff --git a/config/settings.py b/config/settings.py
index 65306ed4..3690d7af 100644
--- a/config/settings.py
+++ b/config/settings.py
@@ -2,6 +2,7 @@
import concurrent
import warnings
+from datetime import timedelta
from pathlib import Path
import asgiref
@@ -113,6 +114,7 @@
"django.forms",
]
THIRD_PARTY_APPS = [
+ "ninja",
"django_version_checks",
"extra_checks",
"anymail",
@@ -607,8 +609,18 @@ def before_send_to_sentry(event, hint):
print("Failed to import sentry_sdk") # noqa: T201 print found
+# django-ninja
+# ------------------------------------------------------------------------------
+# See https://django-ninja.dev/reference/settings/
+NINJA_PAGINATION_MAX_LIMIT = 500
+NINJA_PAGINATION_CLASS = "legadilo.utils.pagination.LimitOffsetPagination"
+
+
# Your stuff...
# ------------------------------------------------------------------------------
ARTICLE_FETCH_TIMEOUT = env.int("LEGADILO_ARTICLE_FETCH_TIMEOUT", default=50)
RSS_FETCH_TIMEOUT = env.int("LEGADILO_RSS_FETCH_TIMEOUT", default=300)
CONTACT_EMAIL = env.str("LEGADILO_CONTACT_EMAIL", default=None)
+TOKEN_LENGTH = 50
+JWT_ALGORITHM = "HS256"
+JWT_MAX_AGE = timedelta(hours=4)
diff --git a/config/urls.py b/config/urls.py
index 239f7a1d..9dccc3e9 100644
--- a/config/urls.py
+++ b/config/urls.py
@@ -4,6 +4,8 @@
from django.urls import include, path
from django.views import defaults as default_views
+from config.api import api
+
def _correct_admin_url(path: str) -> str:
path = path.removeprefix("/")
@@ -26,6 +28,7 @@ def _correct_admin_url(path: str) -> str:
path("feeds/", include("legadilo.feeds.urls", namespace="feeds")),
path("reading/", include("legadilo.reading.urls", namespace="reading")),
path("import-export/", include("legadilo.import_export.urls", namespace="import_export")),
+ path("api/", api.urls),
] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
diff --git a/docs/adrs/0007-api.md b/docs/adrs/0007-api.md
new file mode 100644
index 00000000..feb91991
--- /dev/null
+++ b/docs/adrs/0007-api.md
@@ -0,0 +1,103 @@
+# 7 - API
+
+* **Date:** 2024-10-26
+* **Status:** Accepted
+
+## Context
+
+I’d like to build a browser extension to save articles and subscribe to feeds more easily directly on the page we are on.
+I think it’s a pretty common feature of feed aggregators and links savers (mostly links savers to be honest).
+We have two options:
+* Build a proper API and make the browser extension use this API:
+ * We have several possibilities to help us build the backend.
+ DRF and Django Ninja being the two real pretenders.
+ Since we have only a simple feature set and little times, I think Django Ninja is a better fit here: it’s easier to use and should allow us to develop the API faster.
+ It’s also async compatible out of the box and based on Pydantic (a package I use at work) which we can reuse for other validation!
+ * We will have to develop the API and dig a bit into how to do it properly with Django Ninja.
+ * It will unlok other possibilities in the long run in how to use the app (mobile app, integration with 3rd parties…).
+ * It should be easier to authenticate to the app: we can manage authentication differently and let the user configure the extension with an application token.
+ We could let the cookies be transmitted and rely on that (not sure how it will behave though).
+ And it makes the extension very tied to the connection to the app in the browser.
+ Whereas normal flow in this case is to never be disconnected.
+ Handling connection with MFA might be a problem too: we can’t display much in the extension and may have to redirect the user the app anyway to login.
+ That would be a very poor UX.
+ * It should also be easier to post and retrieve data to manipulate it as part of the browser extension.
+* Call the standard views (ie the views that powers the app):
+ * We will have to adapt how we manage CSRF tokens to be able to supply them to our views.
+ It’s doable, I’ve done it in the past, but I always disliked it.
+ * We will have to post data as form data.
+ Not the cleanest way, but manageable.
+ Having a view that accepts both JSON and form is too much of a hassle in bare Django for me to do that.
+ And if I’m not building an API, there isn’t really a point into bringing a package just for that.
+ * We will manipulate HTML.
+ It may ease display (but I don’t think we will have anything fancy to display) at the cost of harder manipulations.
+ And we won’t be able to use the "normal" templates since we won’t have the same display.
+ This implies to make some views or templates even more complex.
+
+I think it’s worth trying to develop the API and see how it goes.
+If it’s not too much work, I think we should commit to it.
+
+See:
+* https://github.com/Jenselme/legadilo/issues/318
+* https://github.com/Jenselme/legadilo/issues/320
+* https://github.com/Jenselme/legadilo/issues/156
+
+
+## Decisions
+
+The test is a success and I think I achieved something good.
+Let’s commit the API with Ninja!
+
+### Tokens and API protection
+
+Auth: Django Ninja doesn’t provide anything out of the box to build an auth with tokens.
+It does however allow us to protect endpoints, routers or the full API with authentication.
+It also gives us the basic tooling to fetch a token from the `Authorization` header and validate it.
+If it’s valid, access is granted to the endpoint, if not the user gets an error message.
+Django Ninja also allows us to have a list of authentication methods to use, so we can use token based auth for the extension and cookie auth as usual to try stuff in the browser (and in tests).
+
+How to create tokens to access the API?
+* We could create them from username and password.
+ But as part of the extension, this would involve to store them as part of the configuration.
+ I don’t think it’s desirable.
+ It would also make working with MFA harder.
+ And if the password needs to be changed, it would impact all consumers of the API.
+* I think it’s safer to have the user create application "passwords" like in many other apps and create the access tokens from that.
+ These applications passwords would act as a refresh token.
+* These passwords won’t be transmitted much over the network: we will transmit them only to get an access token.
+ They can be revoked easily from the user profile without impacting anything else.
+ We should be able to give them an expiration date just in case.
+* They may be transmitted a bit too much and I may be relying too much on my knowledge to build this.
+ Not a problem right now, but I’ll have to recheck all this when I have more time.
+ Right now, I think it’s safe enough for myself.
+
+### Summary
+
+* We will build the API with Django Ninja.
+* We will authenticate to the API with application tokens that will be used to create an actual access token usable to access the API.
+ They will:
+ * Be a long random string stored in the database generated with the `secrets` module.
+ * Be visible to the user only at creation to prevent stealing.
+ * Have an optional expiration date.
+ If unset, they are valid until manually deleted.
+* The access tokens:
+ * Will be relatively short-lived to prevent attacks if it leaks.
+ * Will be in the JWT format and will contain the name of the application token used to generate it (mostly for debug purpose) and will contain the id of the user to use.
+ This will enable us to store and validate the token using well a known format with well known libraries.
+
+
+## Consequences
+
+* Let’s use Pydantic instead of JSON schemas to validate our JSON model fields and the data we read from external sources.
+ This will help us limit the number of libraries we use.
+ At this stage, I think Pydantic is a safe choice: it’s well known and maintained.
+ It’s also used a lot in the community nowadays and has become very popular.
+* The API won’t allow all capabilities in the first time to gain time.
+ We will develop first and foremost what we need for the extension.
+* We already unlock some API usage for everybody!
+ We will improve it later as part of https://github.com/Jenselme/legadilo/issues/320.
+* The API will be documented more or less automatically and browsable thanks to Swagger.
+* We should dig further to make sure our model is secured.
+ This is not a problem *right now* since we don’t have users, but can become in the future.
+ I’m mostly thinking of [Wallabag](https://doc.wallabag.org/en/developer/api/oauth) which has a different way to handle tokens.
+ This is logged here: https://github.com/Jenselme/legadilo/issues/325
diff --git a/legadilo/core/forms/widgets.py b/legadilo/core/forms/widgets.py
index ac25c072..9f78fb2c 100644
--- a/legadilo/core/forms/widgets.py
+++ b/legadilo/core/forms/widgets.py
@@ -55,3 +55,7 @@ def format_value(self, value):
return value
except (JSONDecodeError, ValueError, TypeError):
return super().format_value(value)
+
+
+class DateTimeWidget(widgets.DateTimeInput):
+ input_type = "datetime-local"
diff --git a/legadilo/core/models/timezone.py b/legadilo/core/models/timezone.py
index 7e554e56..7f9d4e8b 100644
--- a/legadilo/core/models/timezone.py
+++ b/legadilo/core/models/timezone.py
@@ -15,6 +15,7 @@
# along with this program. If not, see .
from typing import TYPE_CHECKING
+from zoneinfo import ZoneInfo
from django.db import models
@@ -45,3 +46,7 @@ def __str__(self):
def __repr__(self):
return f"Timezone(name={self.name})"
+
+ @property
+ def zone_info(self) -> ZoneInfo:
+ return ZoneInfo(self.name)
diff --git a/legadilo/feeds/api.py b/legadilo/feeds/api.py
new file mode 100644
index 00000000..f085ca98
--- /dev/null
+++ b/legadilo/feeds/api.py
@@ -0,0 +1,289 @@
+# Legadilo
+# Copyright (C) 2023-2024 by Legadilo contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+from datetime import datetime
+from http import HTTPStatus
+from operator import xor
+from typing import Annotated, Self
+
+from asgiref.sync import sync_to_async
+from django.shortcuts import aget_object_or_404
+from ninja import ModelSchema, PatchDict, Router, Schema
+from ninja.errors import ValidationError as NinjaValidationError
+from ninja.pagination import paginate
+from pydantic import model_validator
+
+from legadilo.feeds import constants
+from legadilo.feeds.models import Feed, FeedCategory, FeedTag
+from legadilo.feeds.services.feed_parsing import (
+ FeedFileTooBigError,
+ MultipleFeedFoundError,
+ NoFeedUrlFoundError,
+ get_feed_data,
+)
+from legadilo.reading.models import Tag
+from legadilo.users.models import User
+from legadilo.users.user_types import AuthenticatedApiRequest
+from legadilo.utils.api import ApiError, update_model_from_patch_dict
+from legadilo.utils.http_utils import get_rss_async_client
+from legadilo.utils.validators import (
+ CleanedString,
+ FullSanitizeValidator,
+ ValidUrlValidator,
+ remove_falsy_items,
+)
+
+feeds_api_router = Router(tags=["feeds"])
+
+
+class OutFeedCategorySchema(ModelSchema):
+ class Meta:
+ model = FeedCategory
+ exclude = ("user", "created_at", "updated_at")
+
+
+class OutFeedSchema(ModelSchema):
+ category: OutFeedCategorySchema | None
+
+ class Meta:
+ model = Feed
+ exclude = ("user", "created_at", "updated_at", "articles")
+
+
+@feeds_api_router.get(
+ "", response=list[OutFeedSchema], url_name="list_feeds", summary="List all you feeds"
+)
+@paginate
+async def list_feeds_view(request: AuthenticatedApiRequest): # noqa: RUF029 paginate is async!
+ return Feed.objects.get_queryset().for_user(request.auth).select_related("category")
+
+
+class FeedSubscription(Schema):
+ feed_url: Annotated[str, ValidUrlValidator]
+ refresh_delay: constants.FeedRefreshDelays = constants.FeedRefreshDelays.DAILY_AT_NOON
+ article_retention_time: int = 0
+ category_id: int | None = None
+ tags: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+ open_original_link_by_default: bool = False
+
+
+@feeds_api_router.post(
+ "",
+ response={
+ HTTPStatus.CREATED: OutFeedSchema,
+ HTTPStatus.CONFLICT: ApiError,
+ HTTPStatus.NOT_ACCEPTABLE: ApiError,
+ },
+ url_name="subscribe_to_feed",
+ summary="Subscribe to feed from its link",
+)
+async def subscribe_to_feed_view(request: AuthenticatedApiRequest, payload: FeedSubscription):
+ """Many parameters of the feed can be customized directly at creation."""
+ category = await _get_category(request.auth, payload.category_id)
+
+ try:
+ async with get_rss_async_client() as client:
+ feed_medata = await get_feed_data(payload.feed_url, client=client)
+ tags = await sync_to_async(Tag.objects.get_or_create_from_list)(request.auth, payload.tags)
+ feed, created = await sync_to_async(Feed.objects.create_from_metadata)(
+ feed_medata,
+ request.auth,
+ payload.refresh_delay,
+ payload.article_retention_time,
+ tags,
+ category,
+ open_original_link_by_default=payload.open_original_link_by_default,
+ )
+ except (NoFeedUrlFoundError, MultipleFeedFoundError):
+ return HTTPStatus.NOT_ACCEPTABLE, {
+ "detail": "We failed to find a feed at the supplied URL."
+ }
+ except FeedFileTooBigError:
+ return HTTPStatus.NOT_ACCEPTABLE, {"detail": "The feed is too big."}
+ except Exception: # noqa: BLE001 Do not catch blind exception: `Exception`
+ # That's the catch of weird validation, parsing and network errors.
+ return HTTPStatus.NOT_ACCEPTABLE, {
+ "detail": "We failed to access or parse the feed you supplied. Please make sure it is "
+ "accessible and valid."
+ }
+
+ if not created:
+ return HTTPStatus.CONFLICT, {"detail": "You are already subscribed to this feed"}
+
+ return HTTPStatus.CREATED, feed
+
+
+async def _get_category(user: User, category_id: int | None) -> FeedCategory | None:
+ if category_id is None:
+ return None
+
+ try:
+ return await FeedCategory.objects.aget(id=category_id, user=user)
+ except FeedCategory.DoesNotExist as e:
+ raise NinjaValidationError([
+ {"category_id": f"We failed to find the category with id: {category_id}"}
+ ]) from e
+
+
+@feeds_api_router.get(
+ "/{int:feed_id}/",
+ response=OutFeedSchema,
+ url_name="get_feed",
+ summary="View the details of a specific feed",
+)
+async def get_feed_view(request: AuthenticatedApiRequest, feed_id: int):
+ return await aget_object_or_404(
+ Feed.objects.get_queryset().select_related("category"), id=feed_id, user=request.auth
+ )
+
+
+class FeedUpdate(Schema):
+ disabled_reason: Annotated[str, FullSanitizeValidator] = ""
+ disabled_at: datetime | None = None
+ category_id: int | None = None
+ tags: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+ refresh_delay: constants.FeedRefreshDelays
+ article_retention_time: int
+ open_original_link_by_default: bool
+
+ @model_validator(mode="after")
+ def check_disabled(self) -> Self:
+ if xor(bool(self.disabled_reason), bool(self.disabled_at)):
+ raise ValueError(
+ "You must supply none of disabled_reason and disabled_at or both of them"
+ )
+
+ if self.disabled_reason is None:
+ self.disabled_reason = ""
+
+ return self
+
+
+@feeds_api_router.patch(
+ "/{int:feed_id}/", response=OutFeedSchema, url_name="update_feed", summary="Update a feed"
+)
+async def update_feed_view(
+ request: AuthenticatedApiRequest,
+ feed_id: int,
+ payload: PatchDict[FeedUpdate], # type: ignore[type-arg]
+):
+ qs = Feed.objects.get_queryset().select_related("category")
+ feed = await aget_object_or_404(qs, id=feed_id, user=request.auth)
+
+ if (tags := payload.pop("tags", None)) is not None:
+ await _update_feed_tags(request.auth, feed, tags)
+
+ # We must refresh to update generated fields & tags.
+ await update_model_from_patch_dict(feed, payload, must_refresh=True, refresh_qs=qs)
+
+ return feed
+
+
+async def _update_feed_tags(user: User, feed: Feed, new_tags: tuple[str, ...]):
+ tags = await sync_to_async(Tag.objects.get_or_create_from_list)(user, new_tags)
+ await sync_to_async(FeedTag.objects.associate_feed_with_tag_slugs)(
+ feed, [tag.slug for tag in tags], clear_existing=True
+ )
+
+
+@feeds_api_router.delete(
+ "/{int:feed_id}/",
+ response={HTTPStatus.NO_CONTENT: None},
+ url_name="delete_feed",
+ summary="Delete a feed",
+)
+async def delete_feed_view(request: AuthenticatedApiRequest, feed_id: int):
+ feed = await aget_object_or_404(Feed, id=feed_id, user=request.auth)
+
+ await feed.adelete()
+
+ return HTTPStatus.NO_CONTENT, None
+
+
+@feeds_api_router.get(
+ "/categories/",
+ response=list[OutFeedCategorySchema],
+ url_name="list_feed_categories",
+ summary="List all your feed categories",
+)
+@paginate
+async def list_categories_view(request: AuthenticatedApiRequest): # noqa: RUF029 paginate is async!
+ return FeedCategory.objects.get_queryset().for_user(request.auth)
+
+
+class FeedCategoryPayload(Schema):
+ title: str
+
+
+@feeds_api_router.post(
+ "/categories/",
+ response={HTTPStatus.CREATED: OutFeedCategorySchema, HTTPStatus.CONFLICT: ApiError},
+ url_name="create_feed_category",
+ summary="Create a feed category",
+)
+async def create_category_view(request: AuthenticatedApiRequest, payload: FeedCategoryPayload):
+ # For some reason, I always get a 400 error with a useless HTML body if I do a try/catch with
+ # the IntegrityError. Let's use aget_or_create to prevent this.
+ category, created = await FeedCategory.objects.aget_or_create(
+ title=payload.title, user=request.auth
+ )
+
+ if not created:
+ return HTTPStatus.CONFLICT, {"detail": "A category with this title already exists."}
+
+ return HTTPStatus.CREATED, category
+
+
+@feeds_api_router.get(
+ "/categories/{int:category_id}",
+ response=OutFeedCategorySchema,
+ url_name="get_feed_category",
+ summary="View a specific feed category",
+)
+async def get_category_view(request: AuthenticatedApiRequest, category_id: int):
+ return await aget_object_or_404(FeedCategory, id=category_id, user=request.auth)
+
+
+@feeds_api_router.patch(
+ "/categories/{int:category_id}/",
+ response=OutFeedCategorySchema,
+ url_name="update_feed_category",
+ summary="Update a feed category",
+)
+async def update_category_view(
+ request: AuthenticatedApiRequest,
+ category_id: int,
+ payload: PatchDict[FeedCategoryPayload], # type: ignore[type-arg]
+) -> FeedCategory:
+ category = await aget_object_or_404(FeedCategory, id=category_id, user=request.auth)
+
+ await update_model_from_patch_dict(category, payload)
+
+ return category
+
+
+@feeds_api_router.delete(
+ "/categories/{int:category_id}/",
+ url_name="delete_feed_category",
+ response={HTTPStatus.NO_CONTENT: None},
+ summary="Delete a feed category",
+)
+async def delete_category_view(request: AuthenticatedApiRequest, category_id: int):
+ category = await aget_object_or_404(FeedCategory, id=category_id, user=request.auth)
+
+ await category.adelete()
+
+ return HTTPStatus.NO_CONTENT, None
diff --git a/legadilo/feeds/migrations/0009_feedupdate_ignored_article_links_and_more.py b/legadilo/feeds/migrations/0009_feedupdate_ignored_article_links_and_more.py
index c25de1ca..97d66606 100644
--- a/legadilo/feeds/migrations/0009_feedupdate_ignored_article_links_and_more.py
+++ b/legadilo/feeds/migrations/0009_feedupdate_ignored_article_links_and_more.py
@@ -35,12 +35,7 @@ class Migration(migrations.Migration):
field=models.JSONField(
blank=True,
default=list,
- validators=[
- legadilo.utils.validators.JsonSchemaValidator({
- "items": {"type": "string"},
- "type": "array",
- })
- ],
+ validators=[legadilo.utils.validators.list_of_strings_validator],
),
),
migrations.AlterField(
diff --git a/legadilo/feeds/models/feed.py b/legadilo/feeds/models/feed.py
index 473b31b0..82a228af 100644
--- a/legadilo/feeds/models/feed.py
+++ b/legadilo/feeds/models/feed.py
@@ -208,7 +208,7 @@ def get_by_categories(self, user: User) -> dict[str | None, list[Feed]]:
self.get_queryset()
.for_user(user)
.select_related("category")
- .order_by("category__title")
+ .order_by("category__title", "id")
):
category_title = feed.category.title if feed.category else None
feeds_by_categories.setdefault(category_title, []).append(feed)
diff --git a/legadilo/feeds/models/feed_update.py b/legadilo/feeds/models/feed_update.py
index 6ef20a7b..954d5da8 100644
--- a/legadilo/feeds/models/feed_update.py
+++ b/legadilo/feeds/models/feed_update.py
@@ -22,7 +22,7 @@
from django.db import models
from ...utils.time_utils import utcnow
-from ...utils.validators import list_of_strings_json_schema_validator
+from ...utils.validators import list_of_strings_validator
from .. import constants
if TYPE_CHECKING:
@@ -115,7 +115,7 @@ def _get_feed_deactivation_error_time_window(self, feed: Feed) -> relativedelta:
class FeedUpdate(models.Model):
status = models.CharField(choices=constants.FeedUpdateStatus.choices, max_length=100)
ignored_article_links = models.JSONField(
- validators=[list_of_strings_json_schema_validator], blank=True, default=list
+ validators=[list_of_strings_validator], blank=True, default=list
)
error_message = models.TextField(blank=True)
technical_debug_data = models.JSONField(blank=True, null=True)
diff --git a/legadilo/feeds/services/feed_parsing.py b/legadilo/feeds/services/feed_parsing.py
index 0fc8a181..12807629 100644
--- a/legadilo/feeds/services/feed_parsing.py
+++ b/legadilo/feeds/services/feed_parsing.py
@@ -18,66 +18,50 @@
import re
import sys
import time
-from dataclasses import dataclass
from datetime import UTC, datetime
from html import unescape
from itertools import chain
+from typing import Annotated
from urllib.parse import parse_qs, urlparse
import httpx
from bs4 import BeautifulSoup
from feedparser import FeedParserDict
from feedparser import parse as parse_feed
+from pydantic import BaseModel as BaseSchema
from legadilo.reading.services.article_fetching import (
ArticleData,
- build_article_data,
parse_tags_list,
)
from legadilo.utils.security import full_sanitize
from ...utils.time_utils import dt_to_http_date
-from ...utils.validators import normalize_url
+from ...utils.validators import (
+ FullSanitizeValidator,
+ ValidUrlValidator,
+ default_frozen_model_config,
+ normalize_url,
+ truncate,
+)
from .. import constants
logger = logging.getLogger(__name__)
-@dataclass(frozen=True)
-class FeedData:
- feed_url: str
- site_url: str
- title: str
- description: str
+class FeedData(BaseSchema):
+ model_config = default_frozen_model_config
+
+ feed_url: Annotated[str, ValidUrlValidator]
+ site_url: Annotated[str, ValidUrlValidator]
+ title: Annotated[str, FullSanitizeValidator, truncate(constants.FEED_TITLE_MAX_LENGTH)]
+ description: Annotated[str, FullSanitizeValidator]
feed_type: constants.SupportedFeedType
etag: str
last_modified: datetime | None
articles: list[ArticleData]
-def build_feed_data( # noqa: PLR0913 too many arguments
- *,
- feed_url: str,
- site_url: str,
- title: str,
- description: str,
- feed_type: constants.SupportedFeedType,
- etag: str,
- last_modified: datetime | None,
- articles: list[ArticleData],
-) -> FeedData:
- return FeedData(
- feed_url=feed_url,
- site_url=site_url,
- title=full_sanitize(title)[: constants.FEED_TITLE_MAX_LENGTH],
- description=full_sanitize(description),
- feed_type=feed_type,
- articles=articles,
- etag=etag,
- last_modified=last_modified,
- )
-
-
class NoFeedUrlFoundError(Exception):
pass
@@ -153,8 +137,9 @@ def _find_youtube_rss_feed_link(url: str) -> str:
def build_feed_data_from_parsed_feed(parsed_feed: FeedParserDict, resolved_url: str) -> FeedData:
- feed_title = full_sanitize(parsed_feed.feed.get("title", ""))
- return build_feed_data(
+ feed_title = parsed_feed.feed.get("title", "")
+
+ return FeedData(
feed_url=resolved_url,
site_url=_normalize_found_link(parsed_feed.feed.get("link", resolved_url)),
title=feed_title,
@@ -241,7 +226,7 @@ def _parse_articles_in_feed(
article_link = _get_article_link(feed_url, entry)
content = _get_article_content(entry)
articles_data.append(
- build_article_data(
+ ArticleData(
external_article_id=entry.get("id", ""),
title=entry.title,
summary=_get_summary(article_link, entry),
diff --git a/legadilo/feeds/tests/factories.py b/legadilo/feeds/tests/factories.py
index 0cb445e0..9f82cd5b 100644
--- a/legadilo/feeds/tests/factories.py
+++ b/legadilo/feeds/tests/factories.py
@@ -23,6 +23,7 @@
from .. import constants
from ..models import Feed, FeedCategory, FeedDeletedArticle
+from ..services.feed_parsing import FeedData
class FeedCategoryFactory(DjangoModelFactory):
@@ -64,3 +65,17 @@ class FeedDeletedArticleFactory(DjangoModelFactory):
class Meta:
model = FeedDeletedArticle
+
+
+class FeedDataFactory(factory.DictFactory):
+ feed_url = factory.Sequence(lambda n: f"https://example.com/feeds-{n}.rss")
+ site_url = "https://example.com"
+ title = factory.Sequence(lambda n: f"Feed {n}")
+ description = "Some feed description"
+ feed_type = SupportedFeedType.rss
+ etag = ""
+ last_modified = None
+ articles = factory.ListFactory()
+
+ class Meta:
+ model = FeedData
diff --git a/legadilo/feeds/tests/snapshots/test_api/test_get/feed.json b/legadilo/feeds/tests/snapshots/test_api/test_get/feed.json
new file mode 100644
index 00000000..8b5c5942
--- /dev/null
+++ b/legadilo/feeds/tests/snapshots/test_api/test_get/feed.json
@@ -0,0 +1,16 @@
+{
+ "article_retention_time": 0,
+ "category": null,
+ "description": "",
+ "disabled_at": null,
+ "disabled_reason": "",
+ "enabled": true,
+ "feed_type": "rss",
+ "feed_url": "https://example.com/feed.rss",
+ "id": 1,
+ "open_original_link_by_default": false,
+ "refresh_delay": "DAILY_AT_NOON",
+ "site_url": "https://example.com",
+ "slug": "feed-slug",
+ "title": "Feed title"
+}
\ No newline at end of file
diff --git a/legadilo/feeds/tests/snapshots/test_api/test_list/feeds.json b/legadilo/feeds/tests/snapshots/test_api/test_list/feeds.json
new file mode 100644
index 00000000..f236fc17
--- /dev/null
+++ b/legadilo/feeds/tests/snapshots/test_api/test_list/feeds.json
@@ -0,0 +1,21 @@
+{
+ "count": 1,
+ "items": [
+ {
+ "article_retention_time": 0,
+ "category": null,
+ "description": "",
+ "disabled_at": null,
+ "disabled_reason": "",
+ "enabled": true,
+ "feed_type": "rss",
+ "feed_url": "https://example.com/feed.rss",
+ "id": 1,
+ "open_original_link_by_default": false,
+ "refresh_delay": "DAILY_AT_NOON",
+ "site_url": "https://example.com",
+ "slug": "feed-slug",
+ "title": "Feed title"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/legadilo/feeds/tests/snapshots/test_api/test_subscribe_to_feed/feed.json b/legadilo/feeds/tests/snapshots/test_api/test_subscribe_to_feed/feed.json
new file mode 100644
index 00000000..67b9fdb5
--- /dev/null
+++ b/legadilo/feeds/tests/snapshots/test_api/test_subscribe_to_feed/feed.json
@@ -0,0 +1,20 @@
+{
+ "article_retention_time": 100,
+ "category": {
+ "id": 10,
+ "slug": "category-slug",
+ "title": "Category title"
+ },
+ "description": "Some feed description",
+ "disabled_at": null,
+ "disabled_reason": "",
+ "enabled": true,
+ "feed_type": "rss",
+ "feed_url": "https://example.com/feed.rss",
+ "id": 1,
+ "open_original_link_by_default": true,
+ "refresh_delay": "HOURLY",
+ "site_url": "https://example.com",
+ "slug": "feed-slug",
+ "title": "Feed title"
+}
\ No newline at end of file
diff --git a/legadilo/feeds/tests/snapshots/test_api/test_subscribe_to_feed_with_just_url/feed.json b/legadilo/feeds/tests/snapshots/test_api/test_subscribe_to_feed_with_just_url/feed.json
new file mode 100644
index 00000000..7f09ee52
--- /dev/null
+++ b/legadilo/feeds/tests/snapshots/test_api/test_subscribe_to_feed_with_just_url/feed.json
@@ -0,0 +1,16 @@
+{
+ "article_retention_time": 0,
+ "category": null,
+ "description": "Some feed description",
+ "disabled_at": null,
+ "disabled_reason": "",
+ "enabled": true,
+ "feed_type": "rss",
+ "feed_url": "https://example.com/feed.rss",
+ "id": 1,
+ "open_original_link_by_default": false,
+ "refresh_delay": "DAILY_AT_NOON",
+ "site_url": "https://example.com",
+ "slug": "feed-slug",
+ "title": "Feed title"
+}
\ No newline at end of file
diff --git a/legadilo/feeds/tests/snapshots/test_api/test_update/feed.json b/legadilo/feeds/tests/snapshots/test_api/test_update/feed.json
new file mode 100644
index 00000000..0aaca195
--- /dev/null
+++ b/legadilo/feeds/tests/snapshots/test_api/test_update/feed.json
@@ -0,0 +1,20 @@
+{
+ "article_retention_time": 600,
+ "category": {
+ "id": 10,
+ "slug": "category-slug",
+ "title": "Category title"
+ },
+ "description": "",
+ "disabled_at": null,
+ "disabled_reason": "",
+ "enabled": true,
+ "feed_type": "rss",
+ "feed_url": "https://example.com/feed.rss",
+ "id": 1,
+ "open_original_link_by_default": false,
+ "refresh_delay": "TWICE_A_WEEK",
+ "site_url": "https://example.com",
+ "slug": "feed-slug",
+ "title": "Feed title"
+}
\ No newline at end of file
diff --git a/legadilo/feeds/tests/snapshots/test_api/test_update_tags/feed.json b/legadilo/feeds/tests/snapshots/test_api/test_update_tags/feed.json
new file mode 100644
index 00000000..e8e38208
--- /dev/null
+++ b/legadilo/feeds/tests/snapshots/test_api/test_update_tags/feed.json
@@ -0,0 +1,20 @@
+{
+ "article_retention_time": 0,
+ "category": {
+ "id": 10,
+ "slug": "category-slug",
+ "title": "Category title"
+ },
+ "description": "",
+ "disabled_at": null,
+ "disabled_reason": "",
+ "enabled": true,
+ "feed_type": "rss",
+ "feed_url": "https://example.com/feed.rss",
+ "id": 1,
+ "open_original_link_by_default": false,
+ "refresh_delay": "DAILY_AT_NOON",
+ "site_url": "https://example.com",
+ "slug": "feed-slug",
+ "title": "Feed title"
+}
\ No newline at end of file
diff --git a/legadilo/feeds/tests/test_api.py b/legadilo/feeds/tests/test_api.py
new file mode 100644
index 00000000..3eabf749
--- /dev/null
+++ b/legadilo/feeds/tests/test_api.py
@@ -0,0 +1,572 @@
+# Legadilo
+# Copyright (C) 2023-2024 by Legadilo contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+from http import HTTPStatus
+from typing import Any
+
+import httpx
+import pytest
+from django.urls import reverse
+
+from legadilo.feeds import constants
+from legadilo.feeds.models import Feed, FeedCategory
+from legadilo.feeds.tests.factories import FeedCategoryFactory, FeedDataFactory, FeedFactory
+from legadilo.reading.tests.factories import TagFactory
+from legadilo.utils.testing import serialize_for_snapshot
+from legadilo.utils.time_utils import utcdt
+
+
+def _prepare_feed_for_snapshot(data: dict[str, Any], feed: Feed) -> dict[str, Any]:
+ data = data.copy()
+ assert data["id"] == feed.id
+ assert data["slug"] == feed.slug
+ assert data["title"] == feed.title
+ assert data["feed_url"] == feed.feed_url
+ assert (feed.category_id is None and data["category"] is None) or (
+ feed.category_id == data["category"]["id"]
+ )
+
+ data["id"] = 1
+ data["slug"] = "feed-slug"
+ data["title"] = "Feed title"
+ data["feed_url"] = "https://example.com/feed.rss"
+ if data.get("category"):
+ data["category"]["id"] = 10
+ data["category"]["title"] = "Category title"
+ data["category"]["slug"] = "category-slug"
+
+ return data
+
+
+@pytest.mark.django_db
+class TestListCategoriesView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self, user):
+ self.url = reverse("api-1.0.0:list_feed_categories")
+ self.feed_category = FeedCategoryFactory(user=user)
+
+ def test_not_logged_in(self, client):
+ response = client.get(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_list_other_user(self, logged_in_other_user_sync_client):
+ response = logged_in_other_user_sync_client.get(self.url)
+
+ assert response.status_code == HTTPStatus.OK
+ assert response.json() == {"count": 0, "items": []}
+
+ def test_list(self, logged_in_sync_client, django_assert_num_queries):
+ with django_assert_num_queries(7):
+ response = logged_in_sync_client.get(self.url)
+
+ assert response.status_code == HTTPStatus.OK
+ assert response.json() == {
+ "count": 1,
+ "items": [
+ {
+ "id": self.feed_category.id,
+ "slug": self.feed_category.slug,
+ "title": self.feed_category.title,
+ }
+ ],
+ }
+
+
+@pytest.mark.django_db
+class TestCreateCategoryView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self):
+ self.url = reverse("api-1.0.0:create_feed_category")
+
+ def test_not_logged_in(self, client):
+ response = client.get(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_create(self, logged_in_sync_client, user, django_assert_num_queries):
+ with django_assert_num_queries(9):
+ response = logged_in_sync_client.post(
+ self.url, {"title": "Test category"}, content_type="application/json"
+ )
+
+ assert response.status_code == HTTPStatus.CREATED
+ assert FeedCategory.objects.count() == 1
+ feed_category = FeedCategory.objects.get()
+ assert feed_category.title == "Test category"
+ assert feed_category.user == user
+ assert response.json() == {
+ "id": feed_category.id,
+ "slug": feed_category.slug,
+ "title": feed_category.title,
+ }
+
+ def test_create_duplicate(self, user, logged_in_sync_client):
+ feed_category = FeedCategoryFactory(user=user)
+
+ response = logged_in_sync_client.post(
+ self.url, {"title": feed_category.title}, content_type="application/json"
+ )
+
+ assert response.status_code == HTTPStatus.CONFLICT
+ assert response.json() == {"detail": "A category with this title already exists."}
+ assert FeedCategory.objects.count() == 1
+
+
+@pytest.mark.django_db
+class TestGetCategoryView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self, user):
+ self.feed_category = FeedCategoryFactory(user=user)
+ self.url = reverse(
+ "api-1.0.0:get_feed_category", kwargs={"category_id": self.feed_category.id}
+ )
+
+ def test_not_logged_in(self, client):
+ response = client.get(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_get_other_user(self, logged_in_other_user_sync_client):
+ response = logged_in_other_user_sync_client.get(self.url)
+
+ assert response.status_code == HTTPStatus.NOT_FOUND
+
+ def test_get(self, logged_in_sync_client, django_assert_num_queries):
+ with django_assert_num_queries(6):
+ response = logged_in_sync_client.get(self.url)
+
+ assert response.status_code == HTTPStatus.OK
+ assert response.json() == {
+ "id": self.feed_category.id,
+ "slug": self.feed_category.slug,
+ "title": self.feed_category.title,
+ }
+
+
+@pytest.mark.django_db
+class TestUpdateCategoryView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self, user):
+ self.feed_category = FeedCategoryFactory(user=user)
+ self.url = reverse(
+ "api-1.0.0:update_feed_category", kwargs={"category_id": self.feed_category.id}
+ )
+
+ def test_not_logged_in(self, client):
+ response = client.patch(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_update_other_user(self, logged_in_other_user_sync_client):
+ response = logged_in_other_user_sync_client.patch(
+ self.url, {"title": "New title"}, content_type="application/json"
+ )
+
+ assert response.status_code == HTTPStatus.NOT_FOUND
+
+ def test_update(self, logged_in_sync_client, django_assert_num_queries):
+ with django_assert_num_queries(7):
+ response = logged_in_sync_client.patch(
+ self.url, {"title": "New title"}, content_type="application/json"
+ )
+
+ assert response.status_code == HTTPStatus.OK
+ self.feed_category.refresh_from_db()
+ assert self.feed_category.title == "New title"
+ assert response.json() == {
+ "id": self.feed_category.id,
+ "slug": self.feed_category.slug,
+ "title": "New title",
+ }
+
+
+@pytest.mark.django_db
+class TestDeleteCategoryView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self, user):
+ self.feed_category = FeedCategoryFactory(user=user)
+ self.url = reverse(
+ "api-1.0.0:delete_feed_category", kwargs={"category_id": self.feed_category.id}
+ )
+
+ def test_not_logged_in(self, client):
+ response = client.delete(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_delete_other_user(self, logged_in_other_user_sync_client):
+ response = logged_in_other_user_sync_client.delete(self.url)
+
+ assert response.status_code == HTTPStatus.NOT_FOUND
+
+ def test_delete(self, logged_in_sync_client, django_assert_num_queries):
+ with django_assert_num_queries(8):
+ response = logged_in_sync_client.delete(self.url)
+
+ assert response.status_code == HTTPStatus.NO_CONTENT
+ assert FeedCategory.objects.count() == 0
+
+
+@pytest.mark.django_db
+class TestListFeedsView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self, user):
+ self.feed = FeedFactory(user=user)
+ self.url = reverse("api-1.0.0:list_feeds")
+
+ def test_not_logged_in(self, client):
+ response = client.get(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_list_other_user(self, logged_in_other_user_sync_client):
+ response = logged_in_other_user_sync_client.get(self.url)
+
+ assert response.status_code == HTTPStatus.OK
+ assert response.json() == {"count": 0, "items": []}
+
+ def test_list(self, logged_in_sync_client, django_assert_num_queries, snapshot):
+ with django_assert_num_queries(7):
+ response = logged_in_sync_client.get(self.url)
+
+ assert response.status_code == HTTPStatus.OK
+ data = response.json()
+ assert len(data["items"]) == 1
+ data["items"][0] = _prepare_feed_for_snapshot(data["items"][0], self.feed)
+ snapshot.assert_match(serialize_for_snapshot(data), "feeds.json")
+
+
+@pytest.mark.django_db
+class TestSubscribeToFeedView:
+ @pytest.fixture(autouse=True)
+ def _setup_data(self):
+ self.url = reverse("api-1.0.0:subscribe_to_feed")
+
+ def test_not_logged_in(self, client):
+ response = client.post(self.url)
+
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+ def test_subscribe_to_feed_invalid_url(self, logged_in_other_user_sync_client):
+ response = logged_in_other_user_sync_client.post(
+ self.url, {"feed_url": "toto"}, content_type="application/json"
+ )
+
+ assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
+ assert response.json() == {
+ "detail": [
+ {
+ "ctx": {"error": "toto is not a valid url"},
+ "loc": ["body", "payload", "feed_url"],
+ "msg": "Value error, toto is not a valid url",
+ "type": "value_error",
+ }
+ ]
+ }
+
+ def test_subscribe_to_feed_with_just_url(
+ self, user, logged_in_sync_client, mocker, django_assert_num_queries, snapshot
+ ):
+ feed_url = "https://example.com/feed.rss"
+ mocker.patch(
+ "legadilo.feeds.api.get_feed_data", return_value=FeedDataFactory(feed_url=feed_url)
+ )
+
+ with django_assert_num_queries(19):
+ response = logged_in_sync_client.post(
+ self.url, {"feed_url": feed_url}, content_type="application/json"
+ )
+
+ assert response.status_code == HTTPStatus.CREATED
+ assert Feed.objects.count() == 1
+ feed = Feed.objects.get()
+ assert feed.feed_url == feed_url
+ assert feed.category is None
+ assert feed.user == user
+ snapshot.assert_match(
+ serialize_for_snapshot(_prepare_feed_for_snapshot(response.json(), feed)), "feed.json"
+ )
+
+ def test_subscribe_to_feed(
+ self, user, logged_in_sync_client, mocker, django_assert_num_queries, snapshot
+ ):
+ feed_url = "https://example.com/feed.rss"
+ mocker.patch(
+ "legadilo.feeds.api.get_feed_data", return_value=FeedDataFactory(feed_url=feed_url)
+ )
+ category = FeedCategoryFactory(user=user)
+ existing_tag = TagFactory(user=user)
+
+ with django_assert_num_queries(23):
+ response = logged_in_sync_client.post(
+ self.url,
+ {
+ "feed_url": feed_url,
+ "refresh_delay": constants.FeedRefreshDelays.HOURLY.value,
+ "article_retention_time": 100,
+ "category_id": category.id,
+ "tags": ["", "
",
"table_of_content": [],
"title": "Article 7",
- "updated_at": "2020-10-04T19:00:19.463831+00:00",
+ "updated_at": "2020-10-04T19:00:19.463Z",
"was_opened": false
}
]
\ No newline at end of file
diff --git a/legadilo/import_export/tests/services/snapshots/test_custom_csv/test_import_custom_csv/feeds.json b/legadilo/import_export/tests/services/snapshots/test_custom_csv/test_import_custom_csv/feeds.json
index 3c3f050a..0ab495fc 100644
--- a/legadilo/import_export/tests/services/snapshots/test_custom_csv/test_import_custom_csv/feeds.json
+++ b/legadilo/import_export/tests/services/snapshots/test_custom_csv/test_import_custom_csv/feeds.json
@@ -48,7 +48,7 @@
"article_retention_time": 0,
"category__title": null,
"description": "",
- "disabled_at": "2024-05-17T13:00:00+00:00",
+ "disabled_at": "2024-05-17T13:00:00Z",
"disabled_reason": "Failed to reach feed URL while importing from custom CSV.",
"enabled": false,
"feed_type": "rss",
diff --git a/legadilo/import_export/tests/services/test_wallabag.py b/legadilo/import_export/tests/services/test_wallabag.py
index bbed58ee..322fbe38 100644
--- a/legadilo/import_export/tests/services/test_wallabag.py
+++ b/legadilo/import_export/tests/services/test_wallabag.py
@@ -15,7 +15,7 @@
# along with this program. If not, see .
import pytest
-from jsonschema import ValidationError as JsonSchemaValidationError
+from pydantic import ValidationError as PydanticValidationError
from legadilo.import_export.services.wallabag import _import_wallabag_data
from legadilo.reading import constants as reading_constants
@@ -24,7 +24,7 @@
def test_import_invalid_data(user):
- with pytest.raises(JsonSchemaValidationError):
+ with pytest.raises(PydanticValidationError):
_import_wallabag_data(user, [{"key": "value"}])
diff --git a/legadilo/import_export/tests/views/snapshots/test_import_export_articles_views/test_import_valid_file/walabag_articles.json b/legadilo/import_export/tests/views/snapshots/test_import_export_articles_views/test_import_valid_file/walabag_articles.json
index a6ba305d..d442b035 100644
--- a/legadilo/import_export/tests/views/snapshots/test_import_export_articles_views/test_import_valid_file/walabag_articles.json
+++ b/legadilo/import_export/tests/views/snapshots/test_import_export_articles_views/test_import_valid_file/walabag_articles.json
@@ -16,14 +16,14 @@
"opened_at": null,
"preview_picture_alt": "",
"preview_picture_url": "https://examplec.com/preview.png",
- "published_at": "2024-04-19T17:18:29+00:00",
+ "published_at": "2024-04-19T17:18:29Z",
"read_at": null,
"reading_time": 0,
"slug": "refactoring-with-ai",
"summary": "
Some data
",
"table_of_content": [],
"title": "Refactoring with AI",
- "updated_at": "2024-04-20T17:17:54+00:00",
+ "updated_at": "2024-04-20T17:17:54Z",
"was_opened": false
}
]
\ No newline at end of file
diff --git a/legadilo/import_export/views/import_export_articles_views.py b/legadilo/import_export/views/import_export_articles_views.py
index 917764bf..f9df7e97 100644
--- a/legadilo/import_export/views/import_export_articles_views.py
+++ b/legadilo/import_export/views/import_export_articles_views.py
@@ -26,8 +26,7 @@
from django.template.response import TemplateResponse
from django.utils.translation import gettext_lazy as _
from django.views.decorators.http import require_GET, require_http_methods
-from jsonschema import ValidationError as JsonSchemaValidationError
-from jsonschema.exceptions import ValidationError as JsonValidationError
+from pydantic import ValidationError as PydanticValidationError
from legadilo.import_export.services.exceptions import DataImportError
from legadilo.users.models import User
@@ -102,7 +101,7 @@ async def _import_custom_csv(request: AuthenticatedHttpRequest):
nb_imported_feeds,
nb_imported_categories,
) = await import_custom_csv_file(await request.auser(), file_path)
- except (JsonSchemaValidationError, DataImportError, UnicodeDecodeError):
+ except (DataImportError, UnicodeDecodeError, PydanticValidationError):
status = HTTPStatus.BAD_REQUEST
messages.error(request, _("The file you supplied is not valid."))
else:
@@ -127,7 +126,7 @@ async def _import_wallabag(request: AuthenticatedHttpRequest):
nb_imported_articles = await sync_to_async(import_wallabag_file)(
await request.auser(), import_wallabag_form.cleaned_data["wallabag_file"]
)
- except (JSONDecodeError, UnicodeDecodeError, JsonValidationError):
+ except (JSONDecodeError, UnicodeDecodeError, PydanticValidationError):
status = HTTPStatus.BAD_REQUEST
messages.error(request, _("The file you supplied is not valid."))
else:
diff --git a/legadilo/reading/api.py b/legadilo/reading/api.py
new file mode 100644
index 00000000..0859c94f
--- /dev/null
+++ b/legadilo/reading/api.py
@@ -0,0 +1,161 @@
+# Legadilo
+# Copyright (C) 2023-2024 by Legadilo contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from datetime import datetime
+from http import HTTPStatus
+from operator import xor
+from typing import Annotated, Self
+
+from asgiref.sync import sync_to_async
+from django.shortcuts import aget_object_or_404
+from ninja import ModelSchema, PatchDict, Router, Schema
+from pydantic import model_validator
+
+from legadilo.reading import constants
+from legadilo.reading.models import Article, ArticleTag, Tag
+from legadilo.reading.services.article_fetching import (
+ build_article_data_from_content,
+ get_article_from_url,
+)
+from legadilo.users.models import User
+from legadilo.users.user_types import AuthenticatedApiRequest
+from legadilo.utils.api import update_model_from_patch_dict
+from legadilo.utils.validators import (
+ CleanedString,
+ FullSanitizeValidator,
+ ValidUrlValidator,
+ remove_falsy_items,
+)
+
+reading_api_router = Router(tags=["reading"])
+
+
+class OutArticleSchema(ModelSchema):
+ class Meta:
+ model = Article
+ exclude = ("user", "obj_created_at", "obj_updated_at")
+
+
+class ArticleCreation(Schema):
+ link: Annotated[str, ValidUrlValidator]
+ title: Annotated[str, FullSanitizeValidator] = ""
+ # We must not sanitize this yet: we need the raw content when building the article to fetch some
+ # data (like authors, canonicals…). It will be sanitized later when we extract the actual
+ # content of the article.
+ content: str = ""
+ tags: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+
+ @model_validator(mode="after")
+ def check_title_and_content(self) -> Self:
+ if xor(len(self.title) > 0, len(self.content) > 0):
+ raise ValueError("You must supply either both title and content or none of them")
+
+ return self
+
+ @property
+ def has_data(self) -> bool:
+ return bool(self.title) and bool(self.content)
+
+
+@reading_api_router.post(
+ "/articles/",
+ response={HTTPStatus.CREATED: OutArticleSchema},
+ url_name="create_article",
+ summary="Create a new article",
+)
+async def create_article_view(request: AuthenticatedApiRequest, payload: ArticleCreation):
+ """Create an article either just with a link or with a link, a title and some content."""
+ if payload.has_data:
+ article_data = build_article_data_from_content(
+ url=payload.link, title=payload.title, content=payload.content
+ )
+ else:
+ article_data = await get_article_from_url(payload.link)
+
+ # Tags specified in article data are the raw tags used in feeds, they are not used to link an
+ # article to tag objects.
+ tags = await sync_to_async(Tag.objects.get_or_create_from_list)(request.auth, payload.tags)
+ article_data = article_data.model_copy(update={"tags": ()})
+
+ articles = await sync_to_async(Article.objects.update_or_create_from_articles_list)(
+ request.auth, [article_data], tags, source_type=constants.ArticleSourceType.MANUAL
+ )
+ return HTTPStatus.CREATED, articles[0]
+
+
+@reading_api_router.get(
+ "/articles/{int:article_id}/",
+ url_name="get_article",
+ response=OutArticleSchema,
+ summary="View the details of a specific article",
+)
+async def get_article_view(request: AuthenticatedApiRequest, article_id: int) -> Article:
+ return await aget_object_or_404(Article, id=article_id, user=request.auth)
+
+
+class ArticleUpdate(Schema):
+ title: Annotated[str, FullSanitizeValidator]
+ tags: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+ read_at: datetime
+ is_favorite: bool
+ is_for_later: bool
+ reading_time: int
+
+
+@reading_api_router.patch(
+ "/articles/{int:article_id}/",
+ response=OutArticleSchema,
+ url_name="update_article",
+ summary="Update an article",
+)
+async def update_article_view(
+ request: AuthenticatedApiRequest,
+ article_id: int,
+ payload: PatchDict[ArticleUpdate], # type: ignore[type-arg]
+) -> Article:
+ article = await aget_object_or_404(Article, id=article_id, user=request.auth)
+
+ if (tags := payload.pop("tags", None)) is not None:
+ await _update_article_tags(request.auth, article, tags)
+
+ # Required to update tags and generated fields
+ await update_model_from_patch_dict(article, payload, must_refresh=True)
+
+ return article
+
+
+async def _update_article_tags(user: User, article: Article, new_tags: tuple[str, ...]):
+ tags = await sync_to_async(Tag.objects.get_or_create_from_list)(user, new_tags)
+ await sync_to_async(ArticleTag.objects.associate_articles_with_tags)(
+ [article],
+ tags,
+ tagging_reason=constants.TaggingReason.ADDED_MANUALLY,
+ readd_deleted=True,
+ )
+ await sync_to_async(ArticleTag.objects.dissociate_article_with_tags_not_in_list)(article, tags)
+
+
+@reading_api_router.delete(
+ "/articles/{int:article_id}/",
+ url_name="delete_article",
+ response={HTTPStatus.NO_CONTENT: None},
+ summary="Delete an article",
+)
+async def delete_article_view(request: AuthenticatedApiRequest, article_id: int):
+ article = await aget_object_or_404(Article, id=article_id, user=request.auth)
+
+ await article.adelete()
+
+ return HTTPStatus.NO_CONTENT, None
diff --git a/legadilo/reading/migrations/0001_initial.py b/legadilo/reading/migrations/0001_initial.py
index 1c34548c..659455cf 100644
--- a/legadilo/reading/migrations/0001_initial.py
+++ b/legadilo/reading/migrations/0001_initial.py
@@ -57,12 +57,7 @@ class Migration(migrations.Migration):
models.JSONField(
blank=True,
default=list,
- validators=[
- legadilo.utils.validators.JsonSchemaValidator({
- "items": {"type": "string"},
- "type": "array",
- })
- ],
+ validators=[legadilo.utils.validators.list_of_strings_validator],
),
),
(
@@ -70,12 +65,7 @@ class Migration(migrations.Migration):
models.JSONField(
blank=True,
default=list,
- validators=[
- legadilo.utils.validators.JsonSchemaValidator({
- "items": {"type": "string"},
- "type": "array",
- })
- ],
+ validators=[legadilo.utils.validators.list_of_strings_validator],
),
),
("link", models.URLField(max_length=1024)),
@@ -87,12 +77,7 @@ class Migration(migrations.Migration):
blank=True,
default=list,
help_text="Tags of the article from the its source",
- validators=[
- legadilo.utils.validators.JsonSchemaValidator({
- "items": {"type": "string"},
- "type": "array",
- })
- ],
+ validators=[legadilo.utils.validators.list_of_strings_validator],
),
),
(
diff --git a/legadilo/reading/migrations/0007_article_table_of_content.py b/legadilo/reading/migrations/0007_article_table_of_content.py
index 767ffb8f..4f4d3974 100644
--- a/legadilo/reading/migrations/0007_article_table_of_content.py
+++ b/legadilo/reading/migrations/0007_article_table_of_content.py
@@ -47,34 +47,7 @@ class Migration(migrations.Migration):
blank=True,
default=list,
help_text="The table of content of the article.",
- validators=[
- legadilo.utils.validators.JsonSchemaValidator({
- "items": {
- "additionalProperties": False,
- "properties": {
- "children": {
- "items": {
- "additionalProperties": False,
- "properties": {
- "id": {"type": "string"},
- "level": {"type": "integer"},
- "text": {"type": "string"},
- },
- "required": ["id", "text", "level"],
- "type": "object",
- },
- "type": "array",
- },
- "id": {"type": "string"},
- "level": {"type": "integer"},
- "text": {"type": "string"},
- },
- "required": ["id", "text", "level"],
- "type": "object",
- },
- "type": "array",
- })
- ],
+ validators=[legadilo.utils.validators.table_of_content_validator],
),
),
migrations.RunPython(build_toc, reverse_code=migrations.RunPython.noop),
diff --git a/legadilo/reading/migrations/0010_alter_article_table_of_content.py b/legadilo/reading/migrations/0010_alter_article_table_of_content.py
new file mode 100644
index 00000000..fe9d13f3
--- /dev/null
+++ b/legadilo/reading/migrations/0010_alter_article_table_of_content.py
@@ -0,0 +1,42 @@
+# Legadilo
+# Copyright (C) 2023-2024 by Legadilo contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+# Generated by Django 5.1.3 on 2024-11-23 16:49
+
+from django.db import migrations, models
+
+import legadilo.utils.collections_utils
+import legadilo.utils.validators
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("reading", "0009_comment"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="article",
+ name="table_of_content",
+ field=models.JSONField(
+ blank=True,
+ default=list,
+ encoder=legadilo.utils.collections_utils.CustomJsonEncoder,
+ help_text="The table of content of the article.",
+ validators=[legadilo.utils.validators.table_of_content_validator],
+ ),
+ ),
+ ]
diff --git a/legadilo/reading/models/article.py b/legadilo/reading/models/article.py
index 3271d163..1081b53c 100644
--- a/legadilo/reading/models/article.py
+++ b/legadilo/reading/models/article.py
@@ -21,6 +21,7 @@
import math
from collections.abc import Iterable
from dataclasses import dataclass
+from itertools import chain
from typing import TYPE_CHECKING, Literal, Self, assert_never
from urllib.parse import urlparse
@@ -35,14 +36,14 @@
from legadilo.reading import constants
from legadilo.reading.models.tag import ArticleTag
-from legadilo.utils.collections_utils import max_or_none, min_or_none
+from legadilo.utils.collections_utils import CustomJsonEncoder, max_or_none, min_or_none
from legadilo.utils.security import full_sanitize
from legadilo.utils.text import get_nb_words_from_html
from legadilo.utils.time_utils import utcnow
from legadilo.utils.validators import (
language_code_validator,
- list_of_strings_json_schema_validator,
- table_of_content_json_schema_validator,
+ list_of_strings_validator,
+ table_of_content_validator,
)
from .article_fetch_error import ArticleFetchError
@@ -696,17 +697,15 @@ class Article(models.Model):
"we will use 0."
),
)
- authors = models.JSONField(
- validators=[list_of_strings_json_schema_validator], blank=True, default=list
- )
+ authors = models.JSONField(validators=[list_of_strings_validator], blank=True, default=list)
contributors = models.JSONField(
- validators=[list_of_strings_json_schema_validator], blank=True, default=list
+ validators=[list_of_strings_validator], blank=True, default=list
)
link = models.URLField(max_length=1_024)
preview_picture_url = models.URLField(blank=True, max_length=1_024)
preview_picture_alt = models.TextField(blank=True)
external_tags = models.JSONField(
- validators=[list_of_strings_json_schema_validator],
+ validators=[list_of_strings_validator],
blank=True,
default=list,
help_text=_("Tags of the article from the its source"),
@@ -733,10 +732,11 @@ class Article(models.Model):
validators=[language_code_validator],
)
table_of_content = models.JSONField(
- validators=[table_of_content_json_schema_validator],
+ validators=[table_of_content_validator],
blank=True,
default=list,
help_text=_("The table of content of the article."),
+ encoder=CustomJsonEncoder,
)
read_at = models.DateTimeField(null=True, blank=True)
@@ -846,9 +846,13 @@ def update_article_from_data(
) or self.reading_time
self.preview_picture_url = article_data.preview_picture_url or self.preview_picture_alt
self.preview_picture_alt = article_data.preview_picture_alt or self.preview_picture_alt
- self.authors = list(dict.fromkeys(self.authors + article_data.authors))
- self.contributors = list(dict.fromkeys(self.contributors + article_data.contributors))
- self.external_tags = list(dict.fromkeys(self.external_tags + article_data.tags))
+ # We create the deduplicated list with dict.fromkeys and not sets to preserve the
+ # initial order. We chain the iterable since they don't have the same type.
+ self.authors = list(dict.fromkeys(chain(self.authors, article_data.authors)))
+ self.contributors = list(
+ dict.fromkeys(chain(self.contributors, article_data.contributors))
+ )
+ self.external_tags = list(dict.fromkeys(chain(self.external_tags, article_data.tags)))
self.updated_at = max_or_none([article_data.updated_at, self.updated_at])
self.published_at = min_or_none([article_data.published_at, self.published_at])
elif has_content_unlike_saved:
diff --git a/legadilo/reading/models/tag.py b/legadilo/reading/models/tag.py
index e4304041..f9bf69ce 100644
--- a/legadilo/reading/models/tag.py
+++ b/legadilo/reading/models/tag.py
@@ -153,7 +153,7 @@ def get_slugs_to_ids(self, user: User, slugs: Iterable[str]) -> dict[str, int]:
}
@transaction.atomic()
- def get_or_create_from_list(self, user: User, titles_or_slugs: list[str]) -> list[Tag]:
+ def get_or_create_from_list(self, user: User, titles_or_slugs: Iterable[str]) -> list[Tag]:
existing_tags = list(
Tag.objects.get_queryset()
.for_user(user)
diff --git a/legadilo/reading/services/article_fetching.py b/legadilo/reading/services/article_fetching.py
index bfcd163d..f90d7467 100644
--- a/legadilo/reading/services/article_fetching.py
+++ b/legadilo/reading/services/article_fetching.py
@@ -13,17 +13,18 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
+from __future__ import annotations
import logging
import sys
-from dataclasses import dataclass, field
from datetime import datetime
-from typing import TypedDict
+from typing import Annotated, Any, Literal
from urllib.parse import urlparse
from bs4 import BeautifulSoup
-from django.core.exceptions import ValidationError
from django.template.defaultfilters import truncatewords_html
+from pydantic import BaseModel as BaseSchema
+from pydantic import model_validator
from slugify import slugify
from legadilo.reading import constants
@@ -33,117 +34,100 @@
sanitize_keep_safe_tags,
)
from legadilo.utils.time_utils import safe_datetime_parse
-from legadilo.utils.validators import is_url_valid, language_code_validator, normalize_url
+from legadilo.utils.validators import (
+ CleanedString,
+ FullSanitizeValidator,
+ LanguageCodeValidatorOrDefault,
+ TableOfContentItem,
+ TableOfContentTopItem,
+ ValidUrlValidator,
+ default_frozen_model_config,
+ is_url_valid,
+ none_to_value,
+ normalize_url,
+ remove_falsy_items,
+ sanitize_keep_safe_tags_validator,
+ truncate,
+)
logger = logging.getLogger(__name__)
-class TocItem(TypedDict):
- id: str
- text: str
- level: int
-
-
-class TocTopItem(TocItem):
- children: list[TocItem]
-
-
-@dataclass(frozen=True)
-class ArticleData:
- external_article_id: str
- source_title: str
- title: str
- summary: str
- content: str
- table_of_content: list[TocTopItem]
- authors: list[str]
- contributors: list[str]
- tags: list[str]
- link: str
- preview_picture_url: str
- preview_picture_alt: str
- published_at: datetime | None
- updated_at: datetime | None
- language: str
- annotations: list[str] | tuple[str] = field(default_factory=list)
+Language = Annotated[
+ str,
+ FullSanitizeValidator,
+ truncate(constants.LANGUAGE_CODE_MAX_LENGTH),
+ LanguageCodeValidatorOrDefault,
+ none_to_value(""),
+]
+OptionalUrl = Literal[""] | Annotated[str, ValidUrlValidator]
+
+
+class ArticleData(BaseSchema):
+ model_config = default_frozen_model_config
+
+ external_article_id: Annotated[
+ str, FullSanitizeValidator, truncate(constants.EXTERNAL_ARTICLE_ID_MAX_LENGTH)
+ ]
+ source_title: Annotated[
+ str, FullSanitizeValidator, truncate(constants.ARTICLE_SOURCE_TITLE_MAX_LENGTH)
+ ]
+ title: Annotated[str, FullSanitizeValidator]
+ summary: Annotated[
+ str,
+ sanitize_keep_safe_tags_validator(constants.EXTRA_TAGS_TO_REMOVE_FROM_SUMMARY),
+ ]
+ content: Annotated[str, sanitize_keep_safe_tags_validator()]
+ table_of_content: tuple[TableOfContentTopItem, ...] = ()
+ authors: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+ contributors: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+ tags: Annotated[tuple[CleanedString, ...], remove_falsy_items(tuple)] = ()
+ link: Annotated[str, ValidUrlValidator]
+ preview_picture_url: OptionalUrl = ""
+ preview_picture_alt: Annotated[str, FullSanitizeValidator, none_to_value("")] = ""
+ published_at: datetime | None = None
+ updated_at: datetime | None = None
+ language: Language
+ annotations: tuple[str, ...] = ()
read_at: datetime | None = None
is_favorite: bool = False
+ @model_validator(mode="before")
+ @staticmethod
+ def prepare_values(
+ values: dict[str, Any],
+ ) -> dict[str, Any]:
+ summary = values.get("summary", "")
+ content = values.get("content", "")
+ title = values.get("title", "")
+ source_title = values.get("source_title", "")
+ link = values.get("link")
-def build_article_data( # noqa: PLR0913 too many arguments
- *,
- external_article_id: str,
- source_title: str,
- title: str,
- summary: str,
- content: str,
- authors: list[str],
- contributors: list[str],
- tags: list[str],
- link: str,
- preview_picture_url: str,
- preview_picture_alt: str,
- published_at: datetime | None,
- updated_at: datetime | None,
- language: str,
- annotations: list[str] | tuple[str] = (), # type: ignore[assignment]
- read_at: datetime | None = None,
- is_favorite: bool = False,
-) -> ArticleData:
- summary = _resolve_relative_links(link, summary)
- content = _resolve_relative_links(link, content)
- content, toc = _build_table_of_content(content)
- if not summary and content:
- summary = _get_fallback_summary_from_content(content)
-
- try:
- language = full_sanitize(language)[: constants.LANGUAGE_CODE_MAX_LENGTH]
- language_code_validator(language)
- except (ValidationError, TypeError):
- language = ""
+ # Consider link optional here to please mypy. It's mandatory anyway so validation will fail
+ # later if needed.
+ if link:
+ summary = _resolve_relative_links(link, summary)
+ content = _resolve_relative_links(link, content)
- title = full_sanitize(title)[: constants.ARTICLE_TITLE_MAX_LENGTH]
- if not title:
- title = urlparse(link).netloc
+ content, table_of_content = _build_table_of_content(content)
- source_title = full_sanitize(source_title)[: constants.ARTICLE_SOURCE_TITLE_MAX_LENGTH]
- if not source_title:
- source_title = urlparse(link).netloc
+ if not summary and content:
+ summary = _get_fallback_summary_from_content(content)
- return ArticleData(
- external_article_id=full_sanitize(external_article_id)[
- : constants.EXTERNAL_ARTICLE_ID_MAX_LENGTH
- ],
- source_title=source_title,
- title=title,
- summary=sanitize_keep_safe_tags(
- summary, extra_tags_to_cleanup=constants.EXTRA_TAGS_TO_REMOVE_FROM_SUMMARY
- ),
- content=sanitize_keep_safe_tags(content),
- table_of_content=toc,
- authors=_sanitize_lists(authors),
- contributors=_sanitize_lists(contributors),
- tags=_sanitize_lists(tags),
- link=link,
- preview_picture_url=preview_picture_url,
- preview_picture_alt=full_sanitize(preview_picture_alt),
- published_at=published_at,
- updated_at=updated_at,
- language=language,
- annotations=annotations,
- read_at=read_at,
- is_favorite=is_favorite,
- )
+ if not title:
+ title = urlparse(values.get("link")).netloc
+ if not source_title:
+ source_title = urlparse(values.get("link")).netloc
-def _sanitize_lists(alist: list[str]) -> list[str]:
- cleaned_list = []
- for item in alist:
- cleaned_item = full_sanitize(item.strip())
- if cleaned_item:
- cleaned_list.append(cleaned_item)
-
- return cleaned_list
+ return {
+ **values,
+ "summary": summary,
+ "content": content,
+ "title": title,
+ "source_title": source_title,
+ "table_of_content": table_of_content,
+ }
def _resolve_relative_links(article_link: str, content: str) -> str:
@@ -181,10 +165,16 @@ async def get_article_from_url(url: str) -> ArticleData:
return _build_article_data_from_soup(
url,
soup,
- content_language,
+ content_language=content_language,
)
+def build_article_data_from_content(*, url: str, title: str, content: str) -> ArticleData:
+ soup = BeautifulSoup(content, "html.parser")
+
+ return _build_article_data_from_soup(url, soup, forced_title=title)
+
+
async def _get_page_content(url: str) -> tuple[str, BeautifulSoup, str | None]:
async with get_async_client() as client:
# We can have HTTP redirect with the meta htt-equiv tag. Let's read them to up to 10 time
@@ -226,19 +216,23 @@ def _parse_http_equiv_refresh(value: str) -> str | None:
def _build_article_data_from_soup(
- fetched_url: str, soup: BeautifulSoup, content_language: str | None
+ fetched_url: str,
+ soup: BeautifulSoup,
+ *,
+ content_language: str | None = None,
+ forced_title: str | None = None,
) -> ArticleData:
content = _get_content(soup)
- return build_article_data(
+ return ArticleData(
external_article_id="",
source_title=_get_site_title(fetched_url, soup),
- title=_get_title(soup),
+ title=forced_title or _get_title(soup),
summary=_get_summary(soup, content),
content=content,
- authors=_get_authors(soup),
- contributors=[],
- tags=_get_tags(soup),
+ authors=tuple(_get_authors(soup)),
+ contributors=(),
+ tags=tuple(_get_tags(soup)),
link=_get_link(fetched_url, soup),
preview_picture_url=_get_preview_picture_url(fetched_url, soup),
preview_picture_alt="",
@@ -465,10 +459,10 @@ def _get_lang(soup: BeautifulSoup, content_language: str | None) -> str:
return language
-def _build_table_of_content(content: str) -> tuple[str, list[TocTopItem]]:
+def _build_table_of_content(content: str) -> tuple[str, list[TableOfContentTopItem]]:
soup = BeautifulSoup(content, "html.parser")
toc = []
- toc_item_top_level: TocTopItem | None = None
+ toc_item_top_level: TableOfContentTopItem | None = None
for header in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
text = full_sanitize(header.text)
@@ -477,11 +471,11 @@ def _build_table_of_content(content: str) -> tuple[str, list[TocTopItem]]:
level = int(header.name.replace("h", ""))
# If the content is well-structured, all top level title will be at the same level.
# Since we don't know, we allow for a first h2 to be followed by a h1.
- if toc_item_top_level is None or level <= toc_item_top_level["level"]:
- toc_item_top_level = TocTopItem(id=id_, text=text, level=level, children=[])
+ if toc_item_top_level is None or level <= toc_item_top_level.level:
+ toc_item_top_level = TableOfContentTopItem(id=id_, text=text, level=level)
toc.append(toc_item_top_level)
# We only allow one level in the TOC. It's enough.
- elif level == toc_item_top_level["level"] + 1:
- toc_item_top_level["children"].append(TocItem(id=id_, text=text, level=level))
+ elif level == toc_item_top_level.level + 1:
+ toc_item_top_level.children.append(TableOfContentItem(id=id_, text=text, level=level))
return str(soup), toc
diff --git a/legadilo/reading/tests/factories.py b/legadilo/reading/tests/factories.py
index e9ba0712..fb02cf3a 100644
--- a/legadilo/reading/tests/factories.py
+++ b/legadilo/reading/tests/factories.py
@@ -22,6 +22,7 @@
from legadilo.users.tests.factories import UserFactory
from ..models import Article, ArticleFetchError, Comment, ReadingList, Tag
+from ..services.article_fetching import ArticleData
class ArticleFactory(DjangoModelFactory):
@@ -75,3 +76,16 @@ class CommentFactory(DjangoModelFactory):
class Meta:
model = Comment
+
+
+class ArticleDataFactory(factory.DictFactory):
+ external_article_id = factory.Sequence(lambda n: f"external-id-{n}")
+ source_title = factory.Sequence(lambda n: f"Source {n}")
+ title = factory.Sequence(lambda n: f"Article {n}")
+ summary = ""
+ content = ""
+ link = factory.Sequence(lambda n: f"https://example.com/article-{n}.html")
+ language = "en"
+
+ class Meta:
+ model = ArticleData
diff --git a/legadilo/reading/tests/snapshots/test_api/test_create_article_from_data/article.json b/legadilo/reading/tests/snapshots/test_api/test_create_article_from_data/article.json
new file mode 100644
index 00000000..0f97e3c6
--- /dev/null
+++ b/legadilo/reading/tests/snapshots/test_api/test_create_article_from_data/article.json
@@ -0,0 +1,30 @@
+{
+ "annotations": [],
+ "authors": [
+ "Alexandre Dumas"
+ ],
+ "content": "\n
\n
\n Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc porttitor dolor in justo pharetra suscipit. Vestibulum hendrerit felis id ex gravida egestas. Sed tempus placerat nibh. Proin faucibus bibendum magna in ultricies. Fusce feugiat sagittis odio a gravida. Pellentesque dignissim lorem quis eros placerat ullamcorper nec ut quam. Curabitur non tortor a justo hendrerit vehicula in a neque. Mauris vitae mi ante. Aenean et efficitur massa. Donec nec scelerisque lectus, eu malesuada urna. Aenean at dignissim purus. Praesent et tellus non ligula mollis commodo id sed felis. Phasellus fringilla non libero vitae efficitur.\n
\n
\n Vivamus eu ornare ligula. Sed ac justo eget metus tempus venenatis. Aenean ante arcu, dignissim sed bibendum nec, commodo ut tellus. Donec rhoncus leo a enim volutpat, ut porttitor risus sodales. Proin sit amet sapien vitae felis mollis luctus. Morbi malesuada nec quam sed facilisis. Vivamus urna quam, sagittis at eros vitae, porta eleifend orci. Aliquam nec velit enim. Suspendisse egestas pulvinar volutpat. Pellentesque nec sem eget nunc facilisis porta. Ut eleifend mi sed laoreet sollicitudin. Sed sagittis nibh eget quam luctus facilisis.\n
\n
\n Vestibulum eu nibh ullamcorper, luctus tortor eget, semper arcu. Curabitur id cursus urna, eu accumsan mi. Curabitur ornare elit vitae quam tempor egestas. Maecenas viverra malesuada sapien non blandit. Sed luctus pellentesque nulla eu pretium. Cras iaculis interdum interdum. Ut in metus purus. Aliquam id pretium velit, eu tempus tellus.\n
Updated content",
+ table_of_content=(
+ TableOfContentTopItem(id="header", text="My title", level=2, children=[]),
+ ),
+ authors=("Author 2", "Author 3"),
+ contributors=("Contributor 2", "Contributor 3"),
+ tags=("Some tag", "Updated tag"),
+ link="https://example.com/article/1",
preview_picture_url="https://example.com/preview.png",
preview_picture_alt="Some image alt",
published_at=utcdt(2024, 4, 20),
@@ -1633,11 +1642,11 @@ def test_update_article_from_data_article_data_is_missing_some_data(self, user):
title="Updated title",
summary="",
content="",
- table_of_content=[],
- authors=["Author"],
- contributors=[],
- tags=[],
- link="https//example.com/article/1",
+ table_of_content=(),
+ authors=("Author",),
+ contributors=(),
+ tags=(),
+ link="https://example.com/article/1",
preview_picture_url="",
preview_picture_alt="",
published_at=utcdt(2024, 4, 20),
diff --git a/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_build_article_data/with-headers/article_data.json b/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_build_article_data/with-headers/article_data.json
index dabf711d..06ff7a69 100644
--- a/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_build_article_data/with-headers/article_data.json
+++ b/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_build_article_data/with-headers/article_data.json
@@ -28,7 +28,7 @@
{
"id": "this-one-has-html-in",
"level": 2,
- "text": "This one has HTML in "
+ "text": "This one has HTML in"
}
],
"id": "some-header",
diff --git a/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url/article_data.json b/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url/article_data.json
index 08bf4eca..f103aa07 100644
--- a/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url/article_data.json
+++ b/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url/article_data.json
@@ -11,7 +11,7 @@
"link": "https://www.example.com/posts/en/1-super-article/",
"preview_picture_alt": "",
"preview_picture_url": "https://www.example.com/images/profile.png",
- "published_at": "2024-02-26T23:00:00+00:00",
+ "published_at": "2024-02-26T23:00:00Z",
"read_at": null,
"source_title": "Super blog",
"summary": "I just wrote a new book, I\u2019ll hope you will like it! Here are some thoughts on it.",
@@ -20,5 +20,5 @@
"Musketeers"
],
"title": "On the 3 musketeers",
- "updated_at": "2024-03-08T23:00:00+00:00"
+ "updated_at": "2024-03-08T23:00:00Z"
}
\ No newline at end of file
diff --git a/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url_process_fixture/no-article-tag/article_data.json b/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url_process_fixture/no-article-tag/article_data.json
index 08bf4eca..f103aa07 100644
--- a/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url_process_fixture/no-article-tag/article_data.json
+++ b/legadilo/reading/tests/test_services/snapshots/test_article_fetching/test_get_article_from_url_process_fixture/no-article-tag/article_data.json
@@ -11,7 +11,7 @@
"link": "https://www.example.com/posts/en/1-super-article/",
"preview_picture_alt": "",
"preview_picture_url": "https://www.example.com/images/profile.png",
- "published_at": "2024-02-26T23:00:00+00:00",
+ "published_at": "2024-02-26T23:00:00Z",
"read_at": null,
"source_title": "Super blog",
"summary": "I just wrote a new book, I\u2019ll hope you will like it! Here are some thoughts on it.",
@@ -20,5 +20,5 @@
"Musketeers"
],
"title": "On the 3 musketeers",
- "updated_at": "2024-03-08T23:00:00+00:00"
+ "updated_at": "2024-03-08T23:00:00Z"
}
\ No newline at end of file
diff --git a/legadilo/reading/tests/test_services/test_article_fetching.py b/legadilo/reading/tests/test_services/test_article_fetching.py
index dabb64bd..362fc526 100644
--- a/legadilo/reading/tests/test_services/test_article_fetching.py
+++ b/legadilo/reading/tests/test_services/test_article_fetching.py
@@ -19,7 +19,7 @@
import pytest
-from legadilo.reading.services.article_fetching import build_article_data, get_article_from_url
+from legadilo.reading.services.article_fetching import ArticleData, get_article_from_url
from legadilo.reading.tests.fixtures import get_article_fixture_content
from legadilo.utils.testing import serialize_for_snapshot
@@ -216,6 +216,6 @@ async def test_get_article_from_url_process_fixture(
],
)
def test_build_article_data(parameters: dict[str, Any], snapshot):
- article_data = build_article_data(**parameters)
+ article_data = ArticleData(**parameters)
snapshot.assert_match(serialize_for_snapshot(article_data), "article_data.json")
diff --git a/legadilo/reading/views/fetch_article_views.py b/legadilo/reading/views/fetch_article_views.py
index bd022f9e..77045923 100644
--- a/legadilo/reading/views/fetch_article_views.py
+++ b/legadilo/reading/views/fetch_article_views.py
@@ -29,6 +29,7 @@
from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy as _
from django.views.decorators.http import require_http_methods
+from pydantic import ValidationError as PydanticValidationError
from legadilo.core.forms.fields import MultipleTagsField
from legadilo.reading import constants
@@ -154,7 +155,7 @@ async def _handle_save(
force_update=force_update,
)
)[0]
- except (httpx.HTTPError, ArticleTooBigError) as e:
+ except (httpx.HTTPError, ArticleTooBigError, PydanticValidationError) as e:
article, created = await sync_to_async(Article.objects.create_invalid_article)(
request.user,
article_link,
diff --git a/legadilo/templates/ninja/swagger.html b/legadilo/templates/ninja/swagger.html
new file mode 100644
index 00000000..1a1b7832
--- /dev/null
+++ b/legadilo/templates/ninja/swagger.html
@@ -0,0 +1,28 @@
+{% load static %}
+
+
+
+
+
+
+ {{ api.title }}
+
+
+
+
+
+
+
+
diff --git a/legadilo/templates/users/manage_tokens.html b/legadilo/templates/users/manage_tokens.html
new file mode 100644
index 00000000..9a4a37c3
--- /dev/null
+++ b/legadilo/templates/users/manage_tokens.html
@@ -0,0 +1,67 @@
+{% extends "base.html" %}
+
+{% load i18n static crispy_forms_tags %}
+
+{% block title %}
+ {% translate "Manage API tokens" %}
+{% endblock title %}
+{% block page_js %}
+
+{% endblock page_js %}
+{% block content %}
+
{% translate "Manage API tokens" %}
+ {% if new_application_token %}
+
+ {% blocktranslate with token_title=new_application_token.title %}
+ Successfully created token {{ token_title }}. Copy the token below, you
+ won’t be able to get it back.
+ {% endblocktranslate %}
+
{{ new_application_token.token }}
+
+ {% endif %}
+
{% translate "List of tokens" %}
+
+ {% for token in tokens %}
+
+ {{ token.title }}
+
+ {% blocktranslate with created_at=token.created_at|date:"SHORT_DATETIME_FORMAT" %}
+ Created on {{ created_at }}
+ {% endblocktranslate %}
+
+ {% if token.validity_end %}
+
+ {% blocktranslate with validity_end=token.validity_end|date:"SHORT_DATETIME_FORMAT" %}
+ Valid until {{ validity_end }}
+ {% endblocktranslate %}
+
+ {% endif %}
+ {% if token.last_used_at %}
+
+ {% blocktranslate with last_used_at=token.last_used_at|date:"SHORT_DATETIME_FORMAT" %}
+ Last used {{ last_used_at }}
+ {% endblocktranslate %}
+
+ {% endif %}
+
+