Skip to content

Commit

Permalink
NewsDownloader: add a cache to reduce wasted bandwidth
Browse files Browse the repository at this point in the history
  • Loading branch information
Frenzie committed Jan 31, 2025
1 parent 03500bc commit da02352
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 9 deletions.
31 changes: 26 additions & 5 deletions plugins/newsdownloader.koplugin/epubdownloadbackend.lua
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
local CacheSQLite = require("cachesqlite")
local DataStorage = require("datastorage")
local Version = require("version")
local ffiutil = require("ffi/util")
local http = require("socket.http")
Expand All @@ -22,6 +24,12 @@ local EpubDownloadBackend = {
}
local max_redirects = 5; --prevent infinite redirects

local FeedCache = CacheSQLite:new{
slots = 500,
db_path = DataStorage:getDataDir() .. "/cache/newsdownloader.sqlite",
size = 1024 * 1024 * 10, -- 10MB
}

-- filter HTML using CSS selector
local function filter(text, element)
local htmlparser = require("htmlparser")
Expand Down Expand Up @@ -150,8 +158,8 @@ local function build_cookies(cookies)
end

-- Get URL content
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")")
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount, add_to_cache)
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ",", add_to_cache, ")")
if not redirectCount then
redirectCount = 0
elseif redirectCount == max_redirects then
Expand Down Expand Up @@ -209,7 +217,7 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
redirected_url = socket_url.build(parsed_redirect_location)
end
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1, add_to_cache)
else
error("EpubDownloadBackend: Don't know how to handle HTTP response status:", status or code)
end
Expand All @@ -223,10 +231,23 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
return false, "Incomplete content received"
end
end

if add_to_cache then
logger.dbg("Adding to cache", url)
FeedCache:insert(url, {
headers = headers,
content = content,
})
end

logger.dbg("Returning content ok")
return true, content
end

function EpubDownloadBackend:getCache()
return FeedCache
end

function EpubDownloadBackend:getConnectionCookies(url, credentials)

local body = ""
Expand Down Expand Up @@ -258,9 +279,9 @@ function EpubDownloadBackend:getConnectionCookies(url, credentials)
return cookies
end

function EpubDownloadBackend:getResponseAsString(url, cookies)
function EpubDownloadBackend:getResponseAsString(url, cookies, add_to_cache)
logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")")
local success, content = getUrlContent(url, cookies)
local success, content = getUrlContent(url, cookies, nil, nil, nil, add_to_cache)
if (success) then
return content
else
Expand Down
47 changes: 43 additions & 4 deletions plugins/newsdownloader.koplugin/main.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ local NetworkMgr = require("ui/network/manager")
local Persist = require("persist")
local WidgetContainer = require("ui/widget/container/widgetcontainer")
local dateparser = require("lib.dateparser")
local http = require("socket.http")
local lfs = require("libs/libkoreader-lfs")
local ltn12 = require("ltn12")
local logger = require("logger")
local socket = require("socket")
local util = require("util")
local _ = require("gettext")
local T = FFIUtil.template
Expand Down Expand Up @@ -341,16 +344,52 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance)
end

function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
-- Check if we have a cached response first
local cache = DownloadBackend:getCache()
local cached_response = cache:check(url)
local ok, response

local cookies = nil
if credentials ~= nil then
logger.dbg("Auth Cookies from ", cookies)
logger.dbg("Auth Cookies from ", credentials.url)
cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth)
end

local ok, response = pcall(function()
return DownloadBackend:getResponseAsString(url, cookies)
end)
if cached_response then
logger.dbg("NewsDownloader: Using cached response for ", url)
local headers_cached = cached_response.headers
logger.dbg("NewsDownloader: Cached response headers", headers_cached)

-- use last-modified from headers to send a modified since request
local last_modified = headers_cached["last-modified"]
if last_modified then
logger.dbg("NewsDownloader: sending If-Modified-Since", last_modified, url)
local response_body = {}
local headers = {
["If-Modified-Since"] = last_modified
}
if cookies then
headers["Cookie"] = cookies
end
local code, response_headers = socket.skip(1, http.request{
url = url,
headers = headers,
sink = ltn12.sink.table(response_body)
})
ok = (code == 304)
logger.dbg("NewsDownloader: If-Modified-Since response", code, response_headers)
if ok then
response = cached_response.content
end
end
end

if not response then
ok, response = pcall(function()
return DownloadBackend:getResponseAsString(url, cookies, true)
end)
end

local feeds
-- Check to see if a response is available to deserialize.
if ok then
Expand Down

0 comments on commit da02352

Please sign in to comment.