nginx: change etags for statically compressed files served from store

Per RFC 9110, [section 8.8.1][1], different representations of the same
resource should have different Etags:

> A strong validator is unique across all versions of all
> representations associated with a particular resource over time.
> However, there is no implication of uniqueness across representations
> of different resources (i.e., the same strong validator might be in
> use for representations of multiple resources at the same time and
> does not imply that those representations are equivalent)

When serving statically compressed files (ie, when there is an existing
corresponding .gz/.br/etc. file on disk), Nginx sends the Etag marked
as strong. These tags should be different for each compressed format
(as shown in  an explicit example in section [8.8.3.3][2] of the RFC).
Upstream Etags are composed of the file modification timestamp and
content length, and the latter generally changes between these
representations.

Previous implementation of Nix-specific Etags for things served from
store used the store hash. This is fine to share between different
files, but it becomes a problem for statically compressed versions of
the same file, as it means Nginx was serving different representations
of the same resource with the same Etag, marked as strong.

This patch addresses this by imitating the upstream Nginx behavior, and
appending the value of content length to the store hash.

[1]: https://www.rfc-editor.org/rfc/rfc9110.html#name-validator-fields
[2]:
https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying
This commit is contained in:
Dee Anzorge 2024-01-02 20:29:15 +01:00
parent 3cb442f494
commit f124c73686
5 changed files with 80 additions and 13 deletions

View File

@ -8,4 +8,4 @@ HTTP has a couple of different mechanisms for caching to prevent clients from ha
Fortunately, HTTP supports an alternative (and more effective) caching mechanism: the [`ETag`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) response header. The value of the `ETag` header specifies some identifier for the particular content that the server is sending (e.g., a hash). When a client makes a second request for the same resource, it sends that value back in an `If-None-Match` header. If the ETag value is unchanged, then the server does not need to resend the content.
As of NixOS 19.09, the nginx package in Nixpkgs is patched such that when nginx serves a file out of `/nix/store`, the hash in the store path is used as the `ETag` header in the HTTP response, thus providing proper caching functionality. This happens automatically; you do not need to do modify any configuration to get this behavior.
As of NixOS 19.09, the nginx package in Nixpkgs is patched such that when nginx serves a file out of `/nix/store`, the hash in the store path is used as the `ETag` header in the HTTP response, thus providing proper caching functionality. With NixOS 24.05 and later, the `ETag` additionally includes the response content length, to ensure files served with static compression do not share `ETag`s with their uncompressed version. This `ETag` functionality is enabled automatically; you do not need to do modify any configuration to get this behavior.

View File

@ -583,6 +583,7 @@ in {
nginx = handleTest ./nginx.nix {};
nginx-auth = handleTest ./nginx-auth.nix {};
nginx-etag = handleTest ./nginx-etag.nix {};
nginx-etag-compression = handleTest ./nginx-etag-compression.nix {};
nginx-globalredirect = handleTest ./nginx-globalredirect.nix {};
nginx-http3 = handleTest ./nginx-http3.nix {};
nginx-modsecurity = handleTest ./nginx-modsecurity.nix {};

View File

@ -0,0 +1,45 @@
import ./make-test-python.nix {
name = "nginx-etag-compression";
nodes.machine = { pkgs, lib, ... }: {
services.nginx = {
enable = true;
recommendedGzipSettings = true;
virtualHosts.default = {
root = pkgs.runCommandLocal "testdir" {} ''
mkdir "$out"
cat > "$out/index.html" <<EOF
Hello, world!
Hello, world!
Hello, world!
Hello, world!
Hello, world!
Hello, world!
Hello, world!
Hello, world!
EOF
${pkgs.gzip}/bin/gzip -k "$out/index.html"
'';
};
};
};
testScript = { nodes, ... }: ''
machine.wait_for_unit("nginx")
machine.wait_for_open_port(80)
etag_plain = machine.succeed("curl -s -w'%header{etag}' -o/dev/null -H 'Accept-encoding:' http://127.0.0.1/")
etag_gzip = machine.succeed("curl -s -w'%header{etag}' -o/dev/null -H 'Accept-encoding:gzip' http://127.0.0.1/")
with subtest("different representations have different etags"):
assert etag_plain != etag_gzip, f"etags should differ: {etag_plain} == {etag_gzip}"
with subtest("etag for uncompressed response is reproducible"):
etag_plain_repeat = machine.succeed("curl -s -w'%header{etag}' -o/dev/null -H 'Accept-encoding:' http://127.0.0.1/")
assert etag_plain == etag_plain_repeat, f"etags should be the same: {etag_plain} != {etag_plain_repeat}"
with subtest("etag for compressed response is reproducible"):
etag_gzip_repeat = machine.succeed("curl -s -w'%header{etag}' -o/dev/null -H 'Accept-encoding:gzip' http://127.0.0.1/")
assert etag_gzip == etag_gzip_repeat, f"etags should be the same: {etag_gzip} != {etag_gzip_repeat}"
'';
}

View File

@ -192,7 +192,7 @@ stdenv.mkDerivation {
passthru = {
inherit modules;
tests = {
inherit (nixosTests) nginx nginx-auth nginx-etag nginx-globalredirect nginx-http3 nginx-proxyprotocol nginx-pubhtml nginx-sso nginx-status-page nginx-unix-socket;
inherit (nixosTests) nginx nginx-auth nginx-etag nginx-etag-compression nginx-globalredirect nginx-http3 nginx-proxyprotocol nginx-pubhtml nginx-sso nginx-status-page nginx-unix-socket;
variants = lib.recurseIntoAttrs nixosTests.nginx-variants;
acme-integration = nixosTests.acme;
} // passthru.tests;

View File

@ -2,10 +2,10 @@ This patch makes it possible to serve static content from Nix store paths, by
using the hash of the store path for the ETag header.
diff --git a/src/http/ngx_http_core_module.c b/src/http/ngx_http_core_module.c
index cb49ef74..7b456993 100644
index 97a91aee2..2d07d71e6 100644
--- a/src/http/ngx_http_core_module.c
+++ b/src/http/ngx_http_core_module.c
@@ -1583,6 +1583,8 @@ ngx_http_set_etag(ngx_http_request_t *r)
@@ -1676,6 +1676,8 @@ ngx_http_set_etag(ngx_http_request_t *r)
{
ngx_table_elt_t *etag;
ngx_http_core_loc_conf_t *clcf;
@ -14,14 +14,25 @@ index cb49ef74..7b456993 100644
clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);
@@ -1598,16 +1600,60 @@ ngx_http_set_etag(ngx_http_request_t *r)
etag->hash = 1;
@@ -1692,16 +1694,82 @@ ngx_http_set_etag(ngx_http_request_t *r)
etag->next = NULL;
ngx_str_set(&etag->key, "ETag");
- etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3);
- if (etag->value.data == NULL) {
- etag->hash = 0;
- return NGX_ERROR;
+ // Upstream nginx uses file mod timestamp and content-length for Etag, but
+ // files in the Nix store have their timestamps reset, so that doesn't work.
+ // Instead, when serving from the Nix store, we use the hash from the store
+ // path and content-length.
+ //
+ // Every file in under the given store path will share the same store path
+ // hash. It is fine to serve different resources with the same Etag, but
+ // different representations of the same resource (eg the same file, but
+ // gzip-compressed) should have different Etags. Thus, we also append
+ // content-length, which should be different when the response is compressed
+
+ err = ngx_errno;
+ real = ngx_realpath(clcf->root.data, NULL);
+ ngx_set_errno(err);
@ -35,8 +46,10 @@ index cb49ef74..7b456993 100644
+ && real[NIX_STORE_LEN] == '/'
+ && real[NIX_STORE_LEN + 1] != '\0')
+ {
+ ptr1 = real + NIX_STORE_LEN;
+ *ptr1 = '"';
+ // extract the hash from a path formatted like
+ // /nix/store/hashhere1234-pname-1.0.0
+ // +1 to skip the leading /
+ ptr1 = real + NIX_STORE_LEN + 1;
+
+ ptr2 = (u_char *) ngx_strchr(ptr1, '-');
+
@ -46,11 +59,11 @@ index cb49ef74..7b456993 100644
+ return NGX_ERROR;
+ }
+
+ *ptr2++ = '"';
+ *ptr2 = '\0';
+
+ etag->value.len = ngx_strlen(ptr1);
+ etag->value.data = ngx_pnalloc(r->pool, etag->value.len);
+ // hash + content-length + quotes and hyphen. Note that the
+ // content-length part of the string can vary in length.
+ etag->value.data = ngx_pnalloc(r->pool, ngx_strlen(ptr1) + NGX_OFF_T_LEN + 3);
+
+ if (etag->value.data == NULL) {
+ ngx_free(real);
@ -58,9 +71,18 @@ index cb49ef74..7b456993 100644
+ return NGX_ERROR;
+ }
+
+ ngx_memcpy(etag->value.data, ptr1, etag->value.len);
+
+ // set value.data content to "{hash}-{content-length}" (including quote
+ // marks), and set value.len to the length of the resulting string
+ etag->value.len = ngx_sprintf(etag->value.data, "\"\%s-%xO\"",
+ ptr1,
+ r->headers_out.content_length_n)
+ - etag->value.data;
+
+ ngx_http_clear_last_modified(r);
+ } else {
+ // outside of Nix store, use the upstream Nginx logic for etags
+
+ etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3);
+
+ if (etag->value.data == NULL) {
@ -82,4 +104,3 @@ index cb49ef74..7b456993 100644
+ ngx_free(real);
r->headers_out.etag = etag;