From da02352201c1dfce0e838c8a25ae7d626eeba8b3 Mon Sep 17 00:00:00 2001 From: Frans de Jonge Date: Fri, 24 Jan 2025 22:44:52 +0100 Subject: [PATCH] NewsDownloader: add a cache to reduce wasted bandwidth Closes #13061. --- .../epubdownloadbackend.lua | 31 ++++++++++-- plugins/newsdownloader.koplugin/main.lua | 47 +++++++++++++++++-- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua index 5cc8c654cf1f..fc2e7bb8be5a 100644 --- a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua +++ b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua @@ -1,3 +1,5 @@ +local CacheSQLite = require("cachesqlite") +local DataStorage = require("datastorage") local Version = require("version") local ffiutil = require("ffi/util") local http = require("socket.http") @@ -22,6 +24,12 @@ local EpubDownloadBackend = { } local max_redirects = 5; --prevent infinite redirects +local FeedCache = CacheSQLite:new{ + slots = 500, + db_path = DataStorage:getDataDir() .. "/cache/newsdownloader.sqlite", + size = 1024 * 1024 * 10, -- 10MB +} + -- filter HTML using CSS selector local function filter(text, element) local htmlparser = require("htmlparser") @@ -150,8 +158,8 @@ local function build_cookies(cookies) end -- Get URL content -local function getUrlContent(url, cookies, timeout, maxtime, redirectCount) - logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")") +local function getUrlContent(url, cookies, timeout, maxtime, redirectCount, add_to_cache) + logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ",", add_to_cache, ")") if not redirectCount then redirectCount = 0 elseif redirectCount == max_redirects then @@ -209,7 +217,7 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount) redirected_url = socket_url.build(parsed_redirect_location) end logger.dbg("getUrlContent: Redirecting to url: ", redirected_url) - return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1) + return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1, add_to_cache) else error("EpubDownloadBackend: Don't know how to handle HTTP response status:", status or code) end @@ -223,10 +231,23 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount) return false, "Incomplete content received" end end + + if add_to_cache then + logger.dbg("Adding to cache", url) + FeedCache:insert(url, { + headers = headers, + content = content, + }) + end + logger.dbg("Returning content ok") return true, content end +function EpubDownloadBackend:getCache() + return FeedCache +end + function EpubDownloadBackend:getConnectionCookies(url, credentials) local body = "" @@ -258,9 +279,9 @@ function EpubDownloadBackend:getConnectionCookies(url, credentials) return cookies end -function EpubDownloadBackend:getResponseAsString(url, cookies) +function EpubDownloadBackend:getResponseAsString(url, cookies, add_to_cache) logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")") - local success, content = getUrlContent(url, cookies) + local success, content = getUrlContent(url, cookies, nil, nil, nil, add_to_cache) if (success) then return content else diff --git a/plugins/newsdownloader.koplugin/main.lua b/plugins/newsdownloader.koplugin/main.lua index de559b189c81..8daefcffcb0d 100644 --- a/plugins/newsdownloader.koplugin/main.lua +++ b/plugins/newsdownloader.koplugin/main.lua @@ -16,8 +16,11 @@ local NetworkMgr = require("ui/network/manager") local Persist = require("persist") local WidgetContainer = require("ui/widget/container/widgetcontainer") local dateparser = require("lib.dateparser") +local http = require("socket.http") local lfs = require("libs/libkoreader-lfs") +local ltn12 = require("ltn12") local logger = require("logger") +local socket = require("socket") local util = require("util") local _ = require("gettext") local T = FFIUtil.template @@ -341,16 +344,52 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance) end function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element) + -- Check if we have a cached response first + local cache = DownloadBackend:getCache() + local cached_response = cache:check(url) + local ok, response local cookies = nil if credentials ~= nil then - logger.dbg("Auth Cookies from ", cookies) + logger.dbg("Auth Cookies from ", credentials.url) cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth) end - local ok, response = pcall(function() - return DownloadBackend:getResponseAsString(url, cookies) - end) + if cached_response then + logger.dbg("NewsDownloader: Using cached response for ", url) + local headers_cached = cached_response.headers + logger.dbg("NewsDownloader: Cached response headers", headers_cached) + + -- use last-modified from headers to send a modified since request + local last_modified = headers_cached["last-modified"] + if last_modified then + logger.dbg("NewsDownloader: sending If-Modified-Since", last_modified, url) + local response_body = {} + local headers = { + ["If-Modified-Since"] = last_modified + } + if cookies then + headers["Cookie"] = cookies + end + local code, response_headers = socket.skip(1, http.request{ + url = url, + headers = headers, + sink = ltn12.sink.table(response_body) + }) + ok = (code == 304) + logger.dbg("NewsDownloader: If-Modified-Since response", code, response_headers) + if ok then + response = cached_response.content + end + end + end + + if not response then + ok, response = pcall(function() + return DownloadBackend:getResponseAsString(url, cookies, true) + end) + end + local feeds -- Check to see if a response is available to deserialize. if ok then