Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Notify PkgServer.jl about nginx cache hits #200

Merged
merged 1 commit into from
Dec 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions deployment/conf/pkgserver.nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ real_ip_header X-Real-IP;
# This proxy cache provides Julia download caching
proxy_cache_path /caches/s3 levels=1:2 keys_zone=s3cache:1m max_size=10g min_free=3g inactive=7d use_temp_path=off;

# Only set $original_uri for internal redirects to /notify
map $uri $original_uri {
default '';
/notify $request_uri;
}

server {
# This block of code templated in from ${LISTEN_BLOCK_SRC}
${LISTEN_BLOCK}
Expand All @@ -24,13 +30,23 @@ ${LISTEN_BLOCK}

root /caches/pkgserver/cache;
try_files $uri @pkgserver;

# Notify PkgServer about this cache hit
mirror /notify;
mirror_request_body off;
Comment on lines +34 to +36
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified that nginx only send the mirror request if this block was selected (i.e. if try_files was succesful).

}
location /registries {
add_header Content-Type text/plain;
root /caches/pkgserver/static;
try_files $uri @pkgserver;
}

# Internal block for notifying PkgServer about cache hits.
location = /notify {
internal;
try_files /dev/null @pkgserver;
}

# Our default mode is to proxy things off to the `@pkgserver`. This `try_files` directive
# is a simple hack to redirect to the shared location below with no performance penalty.
# See https://serverfault.com/a/965779/46426 for more details and the appropriate amount
Expand All @@ -46,6 +62,7 @@ ${LISTEN_BLOCK}
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Request-ID $http_x_request_id;
proxy_set_header X-Original-URI $original_uri;
}

# We provide an S3 mirror for each bucket defined in S3_MIRROR_BUCKET_LIST
Expand Down
37 changes: 36 additions & 1 deletion src/PkgServer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ using Sockets: InetAddr
using Dates
using Tar
using Gzip_jll
using URIs: URIs


include("metrics.jl")
Expand Down Expand Up @@ -195,8 +196,42 @@ function start(;kwargs...)
end

function handle_request(http::HTTP.Stream)
resource = http.message.target
method = http.message.method
uri = URIs.URI(http.message.target)
resource = uri.path
request_id = HTTP.header(http, "X-Request-ID", "")

# Internal endpoint used by nginx to send a notification whenever it serves a file from
# the file cache. The original request uri for the resource is given in the
# `X-Original-URI` header. If nginx managed to serve the resource it means i) it is
# available on disk and ii) `config.cache` knows about this resource (theoretically it
# could have been removed in the very small window between nginx opening the file and we
# receiving the request here which is why we have some extra guards up).
if resource == "/notify" && method == "GET"
original_uri = HTTP.header(http, "X-Original-URI")
# This should always be true as long as the request comes from nginx
if !occursin(resource_re, original_uri)
HTTP.setstatus(http, 404)
HTTP.startwrite(http)
return
end
HTTP.setstatus(http, 204)
HTTP.startwrite(http)
# TODO: Locking of the cache should be upstreamed to FilesystemDatastructures.jl so
# that all operations on the cache are safe by default.
@lock CACHE_LOCK begin
cache_key = original_uri[2:end] # strip the leading /
# Notify the cache about this access
FilesystemDatastructures.hit!(config.cache, cache_key)
# The cache also knows about the size of the resource which can be used to keep
# track of number of bytes sent by nginx
cache_entry = get(config.cache.entries, cache_key, nothing)
if cache_entry !== nothing
Prometheus.inc(NGINX_BYTES_TRANSMITTED, cache_entry.size)
end
end
return
end
# If the user is asking for `/meta`, generate the requisite JSON object and send it back
if resource == "/meta"
serve_meta(http)
Expand Down
2 changes: 1 addition & 1 deletion src/meta.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ function serve_data(http::HTTP.Stream, msg, content_type)
startwrite(http)
if http.message.method == "GET"
written_bytes = write(http, msg)
Prometheus.inc(BYTES_SENT, written_bytes)
Prometheus.inc(BYTES_TRANSMITTED, written_bytes)
end
return
end
Expand Down
12 changes: 9 additions & 3 deletions src/metrics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,15 @@ const BYTES_RECEIVED = Prometheus.Counter(
registry = COLLECTOR_REGISTRY,
)

const BYTES_SENT = Prometheus.Counter(
"pkgserver_sent_bytes_total",
"Total number of bytes sent to clients";
const BYTES_TRANSMITTED = Prometheus.Counter(
"pkgserver_transmitted_bytes_total",
"Total number of bytes transmitted to clients";
registry = COLLECTOR_REGISTRY,
)

const NGINX_BYTES_TRANSMITTED = Prometheus.Counter(
"nginx_pkgserver_transmitted_bytes_total",
"Total number of bytes transmitted to clients by nginx";
registry = COLLECTOR_REGISTRY,
)

Expand Down
2 changes: 1 addition & 1 deletion src/resource.jl
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ function serve_file(http::HTTP.Stream,
if http.message.method == "GET"
transmitted = stream_file(io, startbyte, content_length, dl_task, http, buffer)
global payload_bytes_transmitted += transmitted
Prometheus.inc(BYTES_SENT, transmitted)
Prometheus.inc(BYTES_TRANSMITTED, transmitted)
if transmitted != content_length
@error("file size mismatch", content_length, actual=transmitted)
end
Expand Down
Loading